[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH v7 40/52] translate-all: report correct avg host TB
From: |
Richard Henderson |
Subject: |
[Qemu-devel] [PATCH v7 40/52] translate-all: report correct avg host TB size |
Date: |
Fri, 20 Oct 2017 16:20:11 -0700 |
From: "Emilio G. Cota" <address@hidden>
Since commit 6e3b2bfd6 ("tcg: allocate TB structs before the
corresponding translated code") we are not fully utilizing
code_gen_buffer for translated code, and therefore are
incorrectly reporting the amount of translated code as well as
the average host TB size. Address this by:
- Making the conscious choice of misreporting the total translated code;
doing otherwise would mislead users into thinking "-tb-size" is not
honoured.
- Expanding tb_tree_stats to accurately count the bytes of translated code on
the host, and using this for reporting the average tb host size,
as well as the expansion ratio.
In the future we might want to consider reporting the accurate numbers for
the total translated code, together with a "bookkeeping/overhead" field to
account for the TB structs.
Reviewed-by: Richard Henderson <address@hidden>
Signed-off-by: Emilio G. Cota <address@hidden>
Signed-off-by: Richard Henderson <address@hidden>
---
accel/tcg/translate-all.c | 32 +++++++++++++++++++++++---------
1 file changed, 23 insertions(+), 9 deletions(-)
diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c
index e929ccb30b..7e2c0cdb98 100644
--- a/accel/tcg/translate-all.c
+++ b/accel/tcg/translate-all.c
@@ -921,6 +921,15 @@ static void page_flush_tb(void)
}
}
+static gboolean tb_host_size_iter(gpointer key, gpointer value, gpointer data)
+{
+ const TranslationBlock *tb = value;
+ size_t *size = data;
+
+ *size += tb->tc.size;
+ return false;
+}
+
/* flush all the translation blocks */
static void do_tb_flush(CPUState *cpu, run_on_cpu_data tb_flush_count)
{
@@ -935,11 +944,12 @@ static void do_tb_flush(CPUState *cpu, run_on_cpu_data
tb_flush_count)
if (DEBUG_TB_FLUSH_GATE) {
size_t nb_tbs = g_tree_nnodes(tcg_ctx.tb_ctx.tb_tree);
+ size_t host_size = 0;
- printf("qemu: flush code_size=%td nb_tbs=%zu avg_tb_size=%td\n",
+ g_tree_foreach(tcg_ctx.tb_ctx.tb_tree, tb_host_size_iter, &host_size);
+ printf("qemu: flush code_size=%td nb_tbs=%zu avg_tb_size=%zu\n",
tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer, nb_tbs,
- nb_tbs > 0 ?
- (tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer) / nb_tbs : 0);
+ nb_tbs > 0 ? host_size / nb_tbs : 0);
}
if ((unsigned long)(tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer)
> tcg_ctx.code_gen_buffer_size) {
@@ -1882,6 +1892,7 @@ static void print_qht_statistics(FILE *f,
fprintf_function cpu_fprintf,
}
struct tb_tree_stats {
+ size_t host_size;
size_t target_size;
size_t max_target_size;
size_t direct_jmp_count;
@@ -1894,6 +1905,7 @@ static gboolean tb_tree_stats_iter(gpointer key, gpointer
value, gpointer data)
const TranslationBlock *tb = value;
struct tb_tree_stats *tst = data;
+ tst->host_size += tb->tc.size;
tst->target_size += tb->size;
if (tb->size > tst->max_target_size) {
tst->max_target_size = tb->size;
@@ -1922,6 +1934,11 @@ void dump_exec_info(FILE *f, fprintf_function
cpu_fprintf)
g_tree_foreach(tcg_ctx.tb_ctx.tb_tree, tb_tree_stats_iter, &tst);
/* XXX: avoid using doubles ? */
cpu_fprintf(f, "Translation buffer state:\n");
+ /*
+ * Report total code size including the padding and TB structs;
+ * otherwise users might think "-tb-size" is not honoured.
+ * For avg host size we use the precise numbers from tb_tree_stats though.
+ */
cpu_fprintf(f, "gen code size %td/%zd\n",
tcg_ctx.code_gen_ptr - tcg_ctx.code_gen_buffer,
tcg_ctx.code_gen_highwater - tcg_ctx.code_gen_buffer);
@@ -1929,12 +1946,9 @@ void dump_exec_info(FILE *f, fprintf_function
cpu_fprintf)
cpu_fprintf(f, "TB avg target size %zu max=%zu bytes\n",
nb_tbs ? tst.target_size / nb_tbs : 0,
tst.max_target_size);
- cpu_fprintf(f, "TB avg host size %td bytes (expansion ratio: %0.1f)\n",
- nb_tbs ? (tcg_ctx.code_gen_ptr -
- tcg_ctx.code_gen_buffer) / nb_tbs : 0,
- tst.target_size ? (double) (tcg_ctx.code_gen_ptr -
- tcg_ctx.code_gen_buffer) /
- tst.target_size : 0);
+ cpu_fprintf(f, "TB avg host size %zu bytes (expansion ratio: %0.1f)\n",
+ nb_tbs ? tst.host_size / nb_tbs : 0,
+ tst.target_size ? (double)tst.host_size / tst.target_size : 0);
cpu_fprintf(f, "cross page TB count %zu (%zu%%)\n", tst.cross_page,
nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0);
cpu_fprintf(f, "direct jump count %zu (%zu%%) (2 jumps=%zu %zu%%)\n",
--
2.13.6
- [Qemu-devel] [PATCH v7 26/52] tcg: convert tb->cflags reads to tb_cflags(tb), (continued)
- [Qemu-devel] [PATCH v7 26/52] tcg: convert tb->cflags reads to tb_cflags(tb), Richard Henderson, 2017/10/20
- [Qemu-devel] [PATCH v7 32/52] target/sh4: check CF_PARALLEL instead of parallel_cpus, Richard Henderson, 2017/10/20
- [Qemu-devel] [PATCH v7 31/52] target/s390x: check CF_PARALLEL instead of parallel_cpus, Richard Henderson, 2017/10/20
- [Qemu-devel] [PATCH v7 33/52] target/sparc: check CF_PARALLEL instead of parallel_cpus, Richard Henderson, 2017/10/20
- [Qemu-devel] [PATCH v7 34/52] tcg: check CF_PARALLEL instead of parallel_cpus, Richard Henderson, 2017/10/20
- [Qemu-devel] [PATCH v7 35/52] cpu-exec: lookup/generate TB outside exclusive region during step_atomic, Richard Henderson, 2017/10/20
- [Qemu-devel] [PATCH v7 36/52] tcg: Add CF_LAST_IO + CF_USE_ICOUNT to CF_HASH_MASK, Richard Henderson, 2017/10/20
- [Qemu-devel] [PATCH v7 37/52] tcg: Remove CF_IGNORE_ICOUNT, Richard Henderson, 2017/10/20
- [Qemu-devel] [PATCH v7 39/52] exec-all: rename tb_free to tb_remove, Richard Henderson, 2017/10/20
- [Qemu-devel] [PATCH v7 38/52] translate-all: use a binary search tree to track TBs in TBContext, Richard Henderson, 2017/10/20
- [Qemu-devel] [PATCH v7 40/52] translate-all: report correct avg host TB size,
Richard Henderson <=
- [Qemu-devel] [PATCH v7 41/52] tcg: take tb_ctx out of TCGContext, Richard Henderson, 2017/10/20
- [Qemu-devel] [PATCH v7 44/52] tcg: introduce **tcg_ctxs to keep track of all TCGContext's, Richard Henderson, 2017/10/20
- [Qemu-devel] [PATCH v7 45/52] tcg: distribute profiling counters across TCGContext's, Richard Henderson, 2017/10/20
- [Qemu-devel] [PATCH v7 42/52] tcg: define tcg_init_ctx and make tcg_ctx a pointer, Richard Henderson, 2017/10/20
- [Qemu-devel] [PATCH v7 46/52] tcg: allocate optimizer temps with tcg_malloc, Richard Henderson, 2017/10/20
- [Qemu-devel] [PATCH v7 43/52] gen-icount: fold exitreq_label into TCGContext, Richard Henderson, 2017/10/20
- [Qemu-devel] [PATCH v7 47/52] osdep: introduce qemu_mprotect_rwx/none, Richard Henderson, 2017/10/20
- [Qemu-devel] [PATCH v7 49/52] tcg: introduce regions to split code_gen_buffer, Richard Henderson, 2017/10/20
- [Qemu-devel] [PATCH v7 50/52] tcg: enable multiple TCG contexts in softmmu, Richard Henderson, 2017/10/20
- [Qemu-devel] [PATCH v7 52/52] translate-all: exit from tb_phys_invalidate if qht_remove fails, Richard Henderson, 2017/10/20