[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH v13 04/10] accel/tcg: add jit stats and time to TBStatistics
|
From: |
Fei Wu |
|
Subject: |
[PATCH v13 04/10] accel/tcg: add jit stats and time to TBStatistics |
|
Date: |
Mon, 29 May 2023 19:49:41 +0800 |
This collects all the statistics for TBStatistics, not only for the
whole emulation but for each TB.
Signed-off-by: Vanderson M. do Rosario <vandersonmr2@gmail.com>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Fei Wu <fei2.wu@intel.com>
---
accel/tcg/monitor.c | 20 ++++-
accel/tcg/tb-stats.c | 146 ++++++++++++++++++++++++++++++++++
accel/tcg/tcg-accel-ops.c | 7 ++
accel/tcg/translate-all.c | 76 +++++++++++++++++-
accel/tcg/translator.c | 5 +-
include/exec/tb-stats-flags.h | 2 +
include/exec/tb-stats.h | 46 +++++++++++
include/qemu/timer.h | 6 ++
include/tcg/tcg.h | 45 ++++++++++-
softmmu/runstate.c | 9 +++
tcg/tcg.c | 82 +++++++++++++++++--
tests/qtest/qmp-cmd-test.c | 3 +
12 files changed, 435 insertions(+), 12 deletions(-)
diff --git a/accel/tcg/monitor.c b/accel/tcg/monitor.c
index e903dd1d2e..2bc87f2642 100644
--- a/accel/tcg/monitor.c
+++ b/accel/tcg/monitor.c
@@ -15,6 +15,7 @@
#include "sysemu/cpus.h"
#include "sysemu/cpu-timers.h"
#include "sysemu/tcg.h"
+#include "exec/tb-stats.h"
#include "internal.h"
@@ -69,6 +70,11 @@ HumanReadableText *qmp_x_query_opcount(Error **errp)
{
g_autoptr(GString) buf = g_string_new("");
+ if (!tb_stats_collection_enabled()) {
+ error_setg(errp, "TB information not being recorded.");
+ return NULL;
+ }
+
if (!tcg_enabled()) {
error_setg(errp,
"Opcode count information is only available with
accel=tcg");
@@ -80,11 +86,23 @@ HumanReadableText *qmp_x_query_opcount(Error **errp)
return human_readable_text_from_str(buf);
}
+#ifdef CONFIG_TCG
+HumanReadableText *qmp_x_query_profile(Error **errp)
+{
+ g_autoptr(GString) buf = g_string_new("");
+
+ dump_jit_exec_time_info(dev_time, buf);
+ dev_time = 0;
+
+ return human_readable_text_from_str(buf);
+}
+#else
HumanReadableText *qmp_x_query_profile(Error **errp)
{
- error_setg(errp, "Internal profiler not compiled");
+ error_setg(errp, "TCG should be enabled!");
return NULL;
}
+#endif
static void hmp_tcg_register(void)
{
diff --git a/accel/tcg/tb-stats.c b/accel/tcg/tb-stats.c
index 143a52ef5c..78a3104c7f 100644
--- a/accel/tcg/tb-stats.c
+++ b/accel/tcg/tb-stats.c
@@ -9,6 +9,11 @@
#include "qemu/osdep.h"
#include "disas/disas.h"
+#include "exec/exec-all.h"
+#include "tcg/tcg.h"
+
+#include "qemu/qemu-print.h"
+#include "qemu/timer.h"
#include "exec/tb-stats.h"
#include "tb-context.h"
@@ -24,6 +29,147 @@ enum TBStatsStatus {
static enum TBStatsStatus tcg_collect_tb_stats;
static uint32_t default_tbstats_flag;
+uint64_t dev_time;
+
+struct jit_profile_info {
+ uint64_t translations;
+ uint64_t aborted;
+ uint64_t ops;
+ unsigned ops_max;
+ uint64_t del_ops;
+ uint64_t temps;
+ unsigned temps_max;
+ uint64_t host;
+ uint64_t guest;
+ uint64_t search_data;
+
+ uint64_t interm_time;
+ uint64_t code_time;
+ uint64_t restore_count;
+ uint64_t restore_time;
+ uint64_t opt_time;
+ uint64_t la_time;
+};
+
+/* accumulate the statistics from all TBs */
+static void collect_jit_profile_info(void *p, uint32_t hash, void *userp)
+{
+ struct jit_profile_info *jpi = userp;
+ TBStatistics *tbs = p;
+
+ jpi->translations += tbs->translations.total;
+ jpi->ops += tbs->code.num_tcg_ops;
+ if (stat_per_translation(tbs, code.num_tcg_ops) > jpi->ops_max) {
+ jpi->ops_max = stat_per_translation(tbs, code.num_tcg_ops);
+ }
+ jpi->del_ops += tbs->code.deleted_ops;
+ jpi->temps += tbs->code.temps;
+ if (stat_per_translation(tbs, code.temps) > jpi->temps_max) {
+ jpi->temps_max = stat_per_translation(tbs, code.temps);
+ }
+ jpi->host += tbs->code.out_len;
+ jpi->guest += tbs->code.in_len;
+ jpi->search_data += tbs->code.search_out_len;
+
+ jpi->interm_time += stat_per_translation(tbs, gen_times.ir);
+ jpi->opt_time += stat_per_translation(tbs, gen_times.ir_opt);
+ jpi->la_time += stat_per_translation(tbs, gen_times.la);
+ jpi->code_time += stat_per_translation(tbs, gen_times.code);
+
+ /*
+ * The restore time covers how long we have spent restoring state
+ * from a given TB (e.g. recovering from a fault). It is therefor
+ * not related to the number of translations we have done.
+ */
+ jpi->restore_time += tbs->tb_restore_time;
+ jpi->restore_count += tbs->tb_restore_count;
+}
+
+void dump_jit_exec_time_info(uint64_t dev_time, GString *buf)
+{
+ static uint64_t last_cpu_exec_time;
+ uint64_t cpu_exec_time;
+ uint64_t delta;
+
+ cpu_exec_time = tcg_cpu_exec_time();
+ delta = cpu_exec_time - last_cpu_exec_time;
+
+ g_string_append_printf(buf, "async time %" PRId64 " (%0.3f)\n",
+ dev_time, dev_time /
(double)NANOSECONDS_PER_SECOND);
+ g_string_append_printf(buf, "qemu time %" PRId64 " (%0.3f)\n",
+ delta, delta / (double)NANOSECONDS_PER_SECOND);
+ last_cpu_exec_time = cpu_exec_time;
+}
+
+/* dump JIT statisticis using TCGProfile and TBStats */
+void dump_jit_profile_info(TCGProfile *s, GString *buf)
+{
+ if (!tb_stats_collection_enabled()) {
+ return;
+ }
+
+ struct jit_profile_info *jpi = g_new0(struct jit_profile_info, 1);
+
+ qht_iter(&tb_ctx.tb_stats, collect_jit_profile_info, jpi);
+
+ if (jpi->translations) {
+ g_string_append_printf(buf, "translated TBs %" PRId64 "\n",
+ jpi->translations);
+ g_string_append_printf(buf, "avg ops/TB %0.1f max=%d\n",
+ jpi->ops / (double) jpi->translations, jpi->ops_max);
+ g_string_append_printf(buf, "deleted ops/TB %0.2f\n",
+ jpi->del_ops / (double) jpi->translations);
+ g_string_append_printf(buf, "avg temps/TB %0.2f max=%d\n",
+ jpi->temps / (double) jpi->translations, jpi->temps_max);
+ g_string_append_printf(buf, "avg host code/TB %0.1f\n",
+ jpi->host / (double) jpi->translations);
+ g_string_append_printf(buf, "avg search data/TB %0.1f\n",
+ jpi->search_data / (double) jpi->translations);
+
+ uint64_t tot = jpi->interm_time + jpi->code_time;
+
+ g_string_append_printf(buf, "JIT cycles %" PRId64
+ " (%0.3fs at 2.4 GHz)\n",
+ tot, tot / 2.4e9);
+ g_string_append_printf(buf, " cycles/op %0.1f\n",
+ jpi->ops ? (double)tot / jpi->ops : 0);
+ g_string_append_printf(buf, " cycles/in byte %0.1f\n",
+ jpi->guest ? (double)tot / jpi->guest : 0);
+ g_string_append_printf(buf, " cycles/out byte %0.1f\n",
+ jpi->host ? (double)tot / jpi->host : 0);
+ g_string_append_printf(buf, " cycles/search byte %0.1f\n",
+ jpi->search_data ? (double)tot / jpi->search_data : 0);
+ if (tot == 0) {
+ tot = 1;
+ }
+
+ g_string_append_printf(buf, " gen_interm time %0.1f%%\n",
+ (double)jpi->interm_time / tot * 100.0);
+ g_string_append_printf(buf, " gen_code time %0.1f%%\n",
+ (double)jpi->code_time / tot * 100.0);
+
+ g_string_append_printf(buf, " optim./code time %0.1f%%\n",
+ (double)jpi->opt_time / (jpi->code_time ? jpi->code_time : 1)
+ * 100.0);
+ g_string_append_printf(buf, " liveness/code time %0.1f%%\n",
+ (double)jpi->la_time / (jpi->code_time ? jpi->code_time : 1)
+ * 100.0);
+
+ g_string_append_printf(buf, "cpu_restore count %" PRId64 "\n",
+ jpi->restore_count);
+ g_string_append_printf(buf, " avg cycles %0.1f\n",
+ jpi->restore_count ?
+ (double)jpi->restore_time / jpi->restore_count : 0);
+
+ if (s) {
+ g_string_append_printf(buf, "cpu exec time %" PRId64 "
(%0.3fs)\n",
+ s->cpu_exec_time,
+ s->cpu_exec_time / (double) NANOSECONDS_PER_SECOND);
+ }
+ }
+ g_free(jpi);
+}
+
void init_tb_stats_htable(void)
{
if (!tb_ctx.tb_stats.map && tb_stats_collection_enabled()) {
diff --git a/accel/tcg/tcg-accel-ops.c b/accel/tcg/tcg-accel-ops.c
index 3973591508..749ad182f2 100644
--- a/accel/tcg/tcg-accel-ops.c
+++ b/accel/tcg/tcg-accel-ops.c
@@ -70,10 +70,17 @@ void tcg_cpus_destroy(CPUState *cpu)
int tcg_cpus_exec(CPUState *cpu)
{
int ret;
+ uint64_t ti;
+
assert(tcg_enabled());
+ ti = profile_getclock();
+
cpu_exec_start(cpu);
ret = cpu_exec(cpu);
cpu_exec_end(cpu);
+
+ qatomic_add(&tcg_ctx->prof.cpu_exec_time, profile_getclock() - ti);
+
return ret;
}
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index dadf49954f..ec72b5121e 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -204,6 +204,12 @@ void cpu_restore_state_from_tb(CPUState *cpu,
TranslationBlock *tb,
uintptr_t host_pc)
{
uint64_t data[TARGET_INSN_START_WORDS];
+ uint64_t ti = 0;
+
+ if (tb_stats_enabled(tb, TB_JIT_TIME)) {
+ ti = profile_getclock();
+ }
+
int insns_left = cpu_unwind_data_from_tb(tb, host_pc, data);
if (insns_left < 0) {
@@ -220,6 +226,15 @@ void cpu_restore_state_from_tb(CPUState *cpu,
TranslationBlock *tb,
}
cpu->cc->tcg_ops->restore_state_to_opc(cpu, tb, data);
+
+ if (tb_stats_enabled(tb, TB_JIT_TIME)) {
+ TBStatistics *ts = tb->tb_stats;
+ uint64_t elapsed = profile_getclock() - ti;
+ qemu_mutex_lock(&ts->jit_stats_lock);
+ ts->tb_restore_time += elapsed;
+ ts->tb_restore_count++;
+ qemu_mutex_unlock(&ts->jit_stats_lock);
+ }
}
bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc)
@@ -267,8 +282,9 @@ void page_init(void)
*/
static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb,
target_ulong pc, void *host_pc,
- int *max_insns, int64_t *ti)
+ int *max_insns)
{
+ TCGProfile *prof = &tcg_ctx->prof;
int ret = sigsetjmp(tcg_ctx->jmp_trans, 0);
if (unlikely(ret != 0)) {
return ret;
@@ -282,6 +298,10 @@ static int setjmp_gen_code(CPUArchState *env,
TranslationBlock *tb,
tcg_ctx->cpu = NULL;
*max_insns = tb->icount;
+ if (tb_stats_enabled(tb, TB_JIT_TIME)) {
+ prof->gen_ir_done_time = profile_getclock();
+ }
+
return tcg_gen_code(tcg_ctx, tb, pc);
}
@@ -296,6 +316,8 @@ static TBStatistics *tb_get_stats(tb_page_addr_t phys_pc,
target_ulong pc,
new_stats->cs_base = cs_base;
new_stats->flags = flags;
new_stats->stats_enabled = get_default_tbstats_flag();
+ new_stats->tbs = g_ptr_array_sized_new(4);
+ qemu_mutex_init(&new_stats->jit_stats_lock);
/*
* All initialisation must be complete before we insert into qht
@@ -309,6 +331,7 @@ static TBStatistics *tb_get_stats(tb_page_addr_t phys_pc,
target_ulong pc,
* If there is already a TBStatistic for this TB from a previous flush
* then just make the new TB point to the older TBStatistic
*/
+ g_ptr_array_free(new_stats->tbs, true);
g_free(new_stats);
return existing_stats;
} else {
@@ -327,7 +350,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
tb_page_addr_t phys_pc;
tcg_insn_unit *gen_code_buf;
int gen_code_size, search_size, max_insns;
- int64_t ti;
+ TCGProfile *prof = &tcg_ctx->prof;
void *host_pc;
assert_memory_lock();
@@ -389,11 +412,14 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
if (tb_stats_collection_enabled() &&
qemu_log_in_addr_range(tb->pc)) {
tb->tb_stats = tb_get_stats(phys_pc, pc, cs_base, flags);
+ if (tb_stats_enabled(tb, TB_JIT_TIME)) {
+ prof->gen_start_time = profile_getclock();
+ }
} else {
tb->tb_stats = NULL;
}
- gen_code_size = setjmp_gen_code(env, tb, pc, host_pc, &max_insns, &ti);
+ gen_code_size = setjmp_gen_code(env, tb, pc, host_pc, &max_insns);
if (unlikely(gen_code_size < 0)) {
switch (gen_code_size) {
case -1:
@@ -445,6 +471,10 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
*/
perf_report_code(pc, tb, tcg_splitwx_to_rx(gen_code_buf));
+ if (tb_stats_enabled(tb, TB_JIT_TIME)) {
+ prof->gen_code_done_time = profile_getclock();
+ }
+
if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) &&
qemu_log_in_addr_range(pc)) {
FILE *logfile = qemu_log_trylock();
@@ -547,6 +577,46 @@ TranslationBlock *tb_gen_code(CPUState *cpu,
return tb;
}
+ /*
+ * Collect JIT stats when enabled. We batch them all up here to
+ * avoid spamming the cache with atomic accesses
+ */
+ if (tb_stats_enabled(tb, (TB_JIT_STATS | TB_JIT_TIME))) {
+ TBStatistics *ts = tb->tb_stats;
+ qemu_mutex_lock(&ts->jit_stats_lock);
+
+ if (tb_stats_enabled(tb, TB_JIT_STATS)) {
+ ts->code.num_guest_inst += prof->translation.nb_guest_insns;
+ ts->code.num_tcg_ops += prof->translation.nb_ops_pre_opt;
+ ts->code.num_tcg_ops_opt += tcg_ctx->nb_ops;
+ ts->code.spills += prof->translation.nb_spills;
+ ts->code.temps += prof->translation.temp_count;
+ ts->code.deleted_ops += prof->translation.del_op_count;
+ ts->code.in_len += tb->size;
+ ts->code.out_len += tb->tc.size;
+ ts->code.search_out_len += search_size;
+
+ ts->translations.total++;
+ if (tb_page_addr1(tb) != -1) {
+ ts->translations.spanning++;
+ }
+
+ g_ptr_array_add(ts->tbs, tb);
+ }
+
+ if (tb_stats_enabled(tb, TB_JIT_TIME)) {
+ ts->gen_times.ir += prof->gen_ir_done_time - prof->gen_start_time;
+ ts->gen_times.ir_opt +=
+ prof->gen_opt_done_time - prof->gen_ir_done_time;
+ ts->gen_times.la +=
+ prof->gen_la_done_time - prof->gen_opt_done_time;
+ ts->gen_times.code +=
+ prof->gen_code_done_time - prof->gen_la_done_time;
+ }
+
+ qemu_mutex_unlock(&ts->jit_stats_lock);
+ }
+
/*
* Insert TB into the corresponding region tree before publishing it
* through QHT. Otherwise rewinding happened in the TB might fail to
diff --git a/accel/tcg/translator.c b/accel/tcg/translator.c
index 80ffbfb455..f7c8f3f1f4 100644
--- a/accel/tcg/translator.c
+++ b/accel/tcg/translator.c
@@ -19,7 +19,7 @@
#include "exec/plugin-gen.h"
#include "exec/replay-core.h"
-static void gen_tb_exec_count(TranslationBlock *tb)
+static inline void gen_tb_exec_count(TranslationBlock *tb)
{
if (tb_stats_enabled(tb, TB_EXEC_STATS)) {
TCGv_ptr ptr = tcg_temp_ebb_new_ptr();
@@ -147,6 +147,9 @@ void translator_loop(CPUState *cpu, TranslationBlock *tb,
int *max_insns,
tb->size = db->pc_next - db->pc_first;
tb->icount = db->num_insns;
+ /* Save number of guest instructions for TB_JIT_STATS */
+ tcg_ctx->prof.translation.nb_guest_insns = db->num_insns;
+
if (qemu_loglevel_mask(CPU_LOG_TB_IN_ASM)
&& qemu_log_in_addr_range(db->pc_first)) {
FILE *logfile = qemu_log_trylock();
diff --git a/include/exec/tb-stats-flags.h b/include/exec/tb-stats-flags.h
index fa71eb6f0c..f29eff7576 100644
--- a/include/exec/tb-stats-flags.h
+++ b/include/exec/tb-stats-flags.h
@@ -13,6 +13,8 @@
#define TB_NOTHING (1 << 0)
#define TB_EXEC_STATS (1 << 1)
+#define TB_JIT_STATS (1 << 2)
+#define TB_JIT_TIME (1 << 3)
/* TBStatistic collection controls */
void enable_collect_tb_stats(void);
diff --git a/include/exec/tb-stats.h b/include/exec/tb-stats.h
index eb1fa92a4e..d93d42e085 100644
--- a/include/exec/tb-stats.h
+++ b/include/exec/tb-stats.h
@@ -34,6 +34,9 @@
#define tb_stats_enabled(tb, JIT_STATS) \
(tb && tb->tb_stats && (tb->tb_stats->stats_enabled & JIT_STATS))
+#define stat_per_translation(stat, name) \
+ (stat->translations.total ? stat->name / stat->translations.total : 0)
+
typedef struct TBStatistics TBStatistics;
/*
@@ -60,10 +63,53 @@ struct TBStatistics {
unsigned long atomic;
} executions;
+ /* JIT Stats - protected by lock */
+ QemuMutex jit_stats_lock;
+
+ /* Sum of all operations for all translations */
+ struct {
+ unsigned num_guest_inst;
+ unsigned num_tcg_ops;
+ unsigned num_tcg_ops_opt;
+ unsigned spills;
+
+ /* CONFIG_PROFILE */
+ unsigned temps;
+ unsigned deleted_ops;
+ unsigned in_len;
+ unsigned out_len;
+ unsigned search_out_len;
+ } code;
+
+ struct {
+ unsigned long total;
+ unsigned long uncached;
+ unsigned long spanning;
+ } translations;
+
+ /*
+ * All persistent (cached) TranslationBlocks using
+ * this TBStats structure. Has to be reset on a tb_flush.
+ */
+ GPtrArray *tbs;
+
+ /* Recover state from TB */
+ uint64_t tb_restore_time;
+ uint64_t tb_restore_count;
+
+ struct {
+ uint64_t ir;
+ uint64_t ir_opt;
+ uint64_t la;
+ uint64_t code;
+ } gen_times;
};
bool tb_stats_cmp(const void *ap, const void *bp);
void init_tb_stats_htable(void);
+void dump_jit_profile_info(TCGProfile *s, GString *buf);
+void dump_jit_exec_time_info(uint64_t dev_time, GString *buf);
+
#endif
diff --git a/include/qemu/timer.h b/include/qemu/timer.h
index 9a91cb1248..ad0da18a5f 100644
--- a/include/qemu/timer.h
+++ b/include/qemu/timer.h
@@ -989,4 +989,10 @@ static inline int64_t cpu_get_host_ticks(void)
}
#endif
+static inline int64_t profile_getclock(void)
+{
+ return get_clock();
+}
+
+extern uint64_t dev_time;
#endif
diff --git a/include/tcg/tcg.h b/include/tcg/tcg.h
index 083cbd6580..fbd633322b 100644
--- a/include/tcg/tcg.h
+++ b/include/tcg/tcg.h
@@ -522,6 +522,46 @@ static inline TCGRegSet output_pref(const TCGOp *op,
unsigned i)
return i < ARRAY_SIZE(op->output_pref) ? op->output_pref[i] : 0;
}
+/*
+ * The TCGProfile structure holds data for analysing the quality of
+ * the code generation. The data is split between stuff that is valid
+ * for the lifetime of a single translation and things that are valid
+ * for the lifetime of the translator. As the former is reset for each
+ * new translation so it should be copied elsewhere if you want to
+ * keep it.
+ *
+ * The structure is safe to access within the context of translation
+ * but accessing the data from elsewhere should be done with safe
+ * work.
+ */
+typedef struct TCGProfile {
+
+ struct {
+ int nb_guest_insns;
+ int nb_spills;
+ int nb_ops_pre_opt;
+
+ int del_op_count;
+ int temp_count;
+ } translation;
+
+ int64_t cpu_exec_time;
+ int64_t op_count; /* total insn count */
+ int64_t code_in_len;
+ int64_t code_out_len;
+ int64_t search_out_len;
+
+ /* Timestamps during translation */
+ uint64_t gen_start_time;
+ uint64_t gen_ir_done_time;
+ uint64_t gen_opt_done_time;
+ uint64_t gen_la_done_time;
+ uint64_t gen_code_done_time;
+
+ /* Lifetime count of TCGOps per TCGContext */
+ uint64_t table_op_count[NB_OPS];
+} TCGProfile;
+
struct TCGContext {
uint8_t *pool_cur, *pool_end;
TCGPool *pool_first, *pool_current, *pool_first_large;
@@ -548,6 +588,8 @@ struct TCGContext {
tcg_insn_unit *code_buf; /* pointer for start of tb */
tcg_insn_unit *code_ptr; /* pointer for running end of tb */
+ TCGProfile prof;
+
#ifdef CONFIG_DEBUG_TCG
int goto_tb_issue_mask;
const TCGOpcode *vecop_list;
@@ -608,6 +650,7 @@ struct TCGContext {
/* Exit to translator on overflow. */
sigjmp_buf jmp_trans;
+ TranslationBlock *current_tb;
};
static inline bool temp_readonly(TCGTemp *ts)
@@ -885,7 +928,7 @@ static inline TCGv_ptr tcg_temp_new_ptr(void)
return temp_tcgv_ptr(t);
}
-int64_t tcg_cpu_exec_time(void);
+uint64_t tcg_cpu_exec_time(void);
void tcg_dump_info(GString *buf);
void tcg_dump_op_count(GString *buf);
diff --git a/softmmu/runstate.c b/softmmu/runstate.c
index bd50062ed0..37390799f1 100644
--- a/softmmu/runstate.c
+++ b/softmmu/runstate.c
@@ -728,9 +728,18 @@ static bool main_loop_should_exit(int *status)
int qemu_main_loop(void)
{
int status = EXIT_SUCCESS;
+#ifdef CONFIG_TCG
+ uint64_t ti;
+#endif
while (!main_loop_should_exit(&status)) {
+#ifdef CONFIG_TCG
+ ti = profile_getclock();
+#endif
main_loop_wait(false);
+#ifdef CONFIG_TCG
+ dev_time += profile_getclock() - ti;
+#endif
}
return status;
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 47befdfcae..9ae0e093e1 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -41,6 +41,7 @@
#define NO_CPU_IO_DEFS
#include "exec/exec-all.h"
+#include "exec/tb-stats.h"
#include "tcg/tcg-op.h"
#if UINTPTR_MAX == UINT32_MAX
@@ -1511,6 +1512,9 @@ void tcg_func_start(TCGContext *s)
s->nb_labels = 0;
s->current_frame_offset = s->frame_start;
+ s->prof.translation.nb_spills = 0;
+ s->prof.translation.del_op_count = 0;
+ s->prof.translation.temp_count = 0;
#ifdef CONFIG_DEBUG_TCG
s->goto_tb_issue_mask = 0;
#endif
@@ -2983,6 +2987,8 @@ void tcg_op_remove(TCGContext *s, TCGOp *op)
QTAILQ_REMOVE(&s->ops, op, link);
QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
s->nb_ops--;
+ /* ? won't this end up op_opt - op = del_op_count ? */
+ s->prof.translation.del_op_count++;
}
void tcg_remove_ops_after(TCGOp *op)
@@ -4148,6 +4154,7 @@ static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet
required_regs,
}
/* We must spill something. */
+ s->prof.translation.nb_spills++;
for (j = f; j < 2; j++) {
TCGRegSet set = reg_ct[j];
@@ -5852,22 +5859,55 @@ static void tcg_out_st_helper_args(TCGContext *s, const
TCGLabelQemuLdst *ldst,
tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
}
-void tcg_dump_op_count(GString *buf)
+/* avoid copy/paste errors */
+#define PROF_ADD(to, from, field) \
+ do { \
+ (to)->field += qatomic_read(&((from)->field)); \
+ } while (0)
+
+static void collect_tcg_profiler(TCGProfile *prof)
{
- g_string_append_printf(buf, "[TCG profiler not compiled]\n");
+ unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
+ unsigned int i;
+
+ for (i = 0; i < n_ctxs; i++) {
+ TCGContext *s = qatomic_read(&tcg_ctxs[i]);
+ const TCGProfile *orig = &s->prof;
+
+ PROF_ADD(prof, orig, cpu_exec_time);
+
+ for (i = 0; i < NB_OPS; i++) {
+ PROF_ADD(prof, orig, table_op_count[i]);
+ }
+ }
}
-int64_t tcg_cpu_exec_time(void)
+uint64_t tcg_cpu_exec_time(void)
{
- error_report("%s: TCG profiler not compiled", __func__);
- exit(EXIT_FAILURE);
+ unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
+ unsigned int i;
+ uint64_t ret = 0;
+
+ for (i = 0; i < n_ctxs; i++) {
+ const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
+ const TCGProfile *prof = &s->prof;
+
+ ret += qatomic_read(&prof->cpu_exec_time);
+ }
+ return ret;
}
int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
{
+ TCGProfile *prof = &s->prof;
int i, num_insns;
TCGOp *op;
+ s->current_tb = tb;
+ /* save pre-optimisation op count */
+ prof->translation.nb_ops_pre_opt = s->nb_ops;
+ prof->translation.temp_count = s->nb_temps;
+
if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
&& qemu_log_in_addr_range(pc_start))) {
FILE *logfile = qemu_log_trylock();
@@ -5879,6 +5919,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb,
uint64_t pc_start)
}
}
+
#ifdef CONFIG_DEBUG_TCG
/* Ensure all labels referenced have been emitted. */
{
@@ -5898,6 +5939,10 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb,
uint64_t pc_start)
tcg_optimize(s);
+ if (tb_stats_enabled(tb, TB_JIT_TIME)) {
+ prof->gen_opt_done_time = profile_getclock();
+ }
+
reachable_code_pass(s);
liveness_pass_0(s);
liveness_pass_1(s);
@@ -5921,6 +5966,10 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb,
uint64_t pc_start)
}
}
+ if (tb_stats_enabled(tb, TB_JIT_TIME)) {
+ prof->gen_la_done_time = profile_getclock();
+ }
+
if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
&& qemu_log_in_addr_range(pc_start))) {
FILE *logfile = qemu_log_trylock();
@@ -5955,6 +6004,13 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb,
uint64_t pc_start)
s->pool_labels = NULL;
#endif
+ if (tb_stats_collection_enabled()) {
+ QTAILQ_FOREACH(op, &s->ops, link) {
+ TCGOpcode opc = op->opc;
+ s->prof.table_op_count[opc]++;
+ }
+ }
+
num_insns = -1;
QTAILQ_FOREACH(op, &s->ops, link) {
TCGOpcode opc = op->opc;
@@ -6053,9 +6109,23 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb,
uint64_t pc_start)
return tcg_current_code_size(s);
}
+void tcg_dump_op_count(GString *buf)
+{
+ TCGProfile prof = {};
+ int i;
+
+ collect_tcg_profiler(&prof);
+ for (i = 0; i < NB_OPS; i++) {
+ g_string_append_printf(buf, "%s %" PRId64 "\n",
+ tcg_op_defs[i].name, prof.table_op_count[i]);
+ }
+}
+
void tcg_dump_info(GString *buf)
{
- g_string_append_printf(buf, "[TCG profiler not compiled]\n");
+ TCGProfile prof = {};
+ collect_tcg_profiler(&prof);
+ dump_jit_profile_info(&prof, buf);
}
#ifdef ELF_HOST_MACHINE
diff --git a/tests/qtest/qmp-cmd-test.c b/tests/qtest/qmp-cmd-test.c
index 73a670e8fa..749aafe4da 100644
--- a/tests/qtest/qmp-cmd-test.c
+++ b/tests/qtest/qmp-cmd-test.c
@@ -46,6 +46,9 @@ static int query_error_class(const char *cmd)
{ "query-balloon", ERROR_CLASS_DEVICE_NOT_ACTIVE },
{ "query-hotpluggable-cpus", ERROR_CLASS_GENERIC_ERROR },
{ "query-vm-generation-id", ERROR_CLASS_GENERIC_ERROR },
+#ifndef CONFIG_TCG
+ { "x-query-profile", ERROR_CLASS_GENERIC_ERROR },
+#endif
/* Only valid with a USB bus added */
{ "x-query-usb", ERROR_CLASS_GENERIC_ERROR },
/* Only valid with accel=tcg */
--
2.25.1
- [PATCH v13 00/10] TCG code quality tracking, Fei Wu, 2023/05/29
- [PATCH v13 01/10] accel/tcg: remove CONFIG_PROFILER, Fei Wu, 2023/05/29
- [PATCH v13 02/10] accel/tcg: introduce TBStatistics structure, Fei Wu, 2023/05/29
- [PATCH v13 03/10] accel: collecting TB execution count, Fei Wu, 2023/05/29
- [PATCH v13 04/10] accel/tcg: add jit stats and time to TBStatistics,
Fei Wu <=
- Re: [PATCH v13 04/10] accel/tcg: add jit stats and time to TBStatistics, Richard Henderson, 2023/05/30
- Re: [PATCH v13 04/10] accel/tcg: add jit stats and time to TBStatistics, Wu, Fei, 2023/05/30
- Re: [PATCH v13 04/10] accel/tcg: add jit stats and time to TBStatistics, Wu, Fei, 2023/05/30
- Re: [PATCH v13 04/10] accel/tcg: add jit stats and time to TBStatistics, Alex Bennée, 2023/05/30
- Re: [PATCH v13 04/10] accel/tcg: add jit stats and time to TBStatistics, Wu, Fei, 2023/05/30
- Re: [PATCH v13 04/10] accel/tcg: add jit stats and time to TBStatistics, Wu, Fei, 2023/05/30
[PATCH v13 06/10] monitor: adding tb_stats hmp command, Fei Wu, 2023/05/29
[PATCH v13 05/10] debug: add -d tb_stats to control TBStatistics collection:, Fei Wu, 2023/05/29
[PATCH v13 07/10] tb-stats: reset the tracked TBs on a tb_flush, Fei Wu, 2023/05/29
[PATCH v13 08/10] Adding info [tb-list|tb] commands to HMP (WIP), Fei Wu, 2023/05/29