[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH] RFC: TCG constant propagation.
From: |
Pablo Virolainen |
Subject: |
Re: [Qemu-devel] [PATCH] RFC: TCG constant propagation. |
Date: |
Wed, 05 Aug 2009 11:13:38 +0300 |
User-agent: |
Thunderbird 2.0.0.22 (X11/20090608) |
Filip Navara kirjoitti:
Add support for constant propagation to TCG. This has to be paired with the
liveness
analysis to remove the dead code. Not all possible operations are covered, but
the
most common ones are. This improves the code generation for several ARM
instructions,
like MVN (immediate), and it may help other targets as well.
On my small benchmark, qemu-system-sh4 was about 3% slower on Intel Xeon
address@hidden I'm running 64-bit mode. My mini benchmark is to build
zlib 1.2.3, so it's 'real' world work load. Ran the benchmark several
times and results seems to be pretty constant.
ps. I added INDEX_op_*_i64 cases to the evaluation part. I'm not
completly sure if those &mask should be there.
Pablo Virolainen
--- qemu-0.11.0-rc1_orig/tcg/tcg.c 2009-07-30 03:38:26.000000000 +0300
+++ qemu-0.11.0-rc1/tcg/tcg.c 2009-08-05 10:43:48.000000000 +0300
@@ -1021,7 +1021,194 @@
#endif
tdefs++;
}
+}
+static void tcg_const_analysis(TCGContext *s)
+{
+ int nb_cargs, nb_iargs, nb_oargs, dest, src, src2, del_args, i;
+ TCGArg *args;
+ uint16_t op;
+ uint16_t *opc_ptr;
+ const TCGOpDef *def;
+ uint8_t *const_temps;
+ tcg_target_ulong *temp_values;
+ tcg_target_ulong val, mask;
+ tcg_target_ulong dest_val, src_val, src2_val;
+
+ const_temps = tcg_malloc(s->nb_temps);
+ memset(const_temps, 0, s->nb_temps);
+ temp_values = tcg_malloc(s->nb_temps * sizeof(uint32_t));
+
+ opc_ptr = gen_opc_buf;
+ args = gen_opparam_buf;
+ while (opc_ptr < gen_opc_ptr) {
+ op = *opc_ptr;
+ def = &tcg_op_defs[op];
+ nb_oargs = def->nb_oargs;
+ nb_iargs = def->nb_iargs;
+ nb_cargs = def->nb_cargs;
+ del_args = 0;
+ mask = ~((tcg_target_ulong)0);
+
+ switch(op) {
+ case INDEX_op_movi_i32:
+#if TCG_TARGET_REG_BITS == 64
+ case INDEX_op_movi_i64:
+#endif
+ dest = args[0];
+ val = args[1];
+ const_temps[dest] = 1;
+ temp_values[dest] = val;
+ break;
+ case INDEX_op_mov_i32:
+#if TCG_TARGET_REG_BITS == 64
+ case INDEX_op_mov_i64:
+#endif
+ dest = args[0];
+ src = args[1];
+ const_temps[dest] = const_temps[src];
+ temp_values[dest] = temp_values[src];
+ break;
+ case INDEX_op_not_i32:
+#if TCG_TARGET_REG_BITS == 64
+ mask = 0xffffffff;
+ case INDEX_op_not_i64:
+#endif
+ dest = args[0];
+ src = args[1];
+ if (const_temps[src]) {
+ const_temps[dest] = 1;
+ dest_val = ~temp_values[src];
+ *opc_ptr = INDEX_op_movi_i32;
+ args[1] = temp_values[dest] = dest_val & mask;
+ } else {
+ const_temps[dest] = 0;
+ }
+ break;
+ case INDEX_op_add_i32:
+ case INDEX_op_sub_i32:
+ case INDEX_op_mul_i32:
+ case INDEX_op_and_i32:
+ case INDEX_op_or_i32:
+ case INDEX_op_xor_i32:
+ case INDEX_op_shl_i32:
+ case INDEX_op_shr_i32:
+#if TCG_TARGET_REG_BITS == 64
+ mask = 0xffffffff;
+ case INDEX_op_add_i64:
+ case INDEX_op_sub_i64:
+ case INDEX_op_mul_i64:
+ case INDEX_op_and_i64:
+ case INDEX_op_or_i64:
+ case INDEX_op_xor_i64:
+ case INDEX_op_shl_i64:
+ case INDEX_op_shr_i64:
+#endif
+
+ dest = args[0];
+ src = args[1];
+ src2 = args[2];
+ if (const_temps[src] && const_temps[src2]) {
+ src_val = temp_values[src];
+ src2_val = temp_values[src2];
+ const_temps[dest] = 1;
+ switch (op) {
+ case INDEX_op_add_i32:
+ dest_val = src_val + src2_val;
+ break;
+ case INDEX_op_add_i64:
+ dest_val = (src_val + src2_val) & mask;
+ break;
+ case INDEX_op_sub_i32:
+ dest_val = src_val - src2_val;
+ break;
+ case INDEX_op_sub_i64:
+ dest_val = (src_val - src2_val) & mask;
+ break;
+ case INDEX_op_mul_i32:
+ dest_val = src_val * src2_val;
+ break;
+ case INDEX_op_mul_i64:
+ dest_val = (src_val * src2_val) & mask;
+ break;
+ case INDEX_op_and_i32:
+ dest_val = src_val & src2_val;
+ break;
+ case INDEX_op_and_i64:
+ dest_val = src_val & src2_val & mask;
+ break;
+ case INDEX_op_or_i32:
+ dest_val = src_val | src2_val;
+ break;
+ case INDEX_op_or_i64:
+ dest_val = (src_val | src2_val) & mask;
+ break;
+ case INDEX_op_xor_i32:
+ dest_val = src_val ^ src2_val;
+ break;
+ case INDEX_op_xor_i64:
+ dest_val = (src_val ^ src2_val) & mask;
+ break;
+ case INDEX_op_shl_i32:
+ dest_val = src_val << src2_val;
+ break;
+ case INDEX_op_shl_i64:
+ dest_val = (src_val << src2_val) & mask;
+ break;
+ case INDEX_op_shr_i32:
+ dest_val = src_val >> src2_val;
+ break;
+ case INDEX_op_shr_i64:
+ dest_val = (src_val >> src2_val) & mask;
+ break;
+ default:
+ fprintf(stderr,"index op %i\n",op);
+ tcg_abort();
+ return;
+ }
+ *opc_ptr = INDEX_op_movi_i32;
+ args[1] = temp_values[dest] = dest_val & mask;
+ del_args = 1;
+ } else {
+ const_temps[dest] = 0;
+ }
+ break;
+ case INDEX_op_call:
+ nb_oargs = args[0] >> 16;
+ nb_iargs = args[0] & 0xffff;
+ nb_cargs = def->nb_cargs;
+ args++;
+ for (i = 0; i < nb_oargs; i++) {
+ const_temps[args[i]] = 0;
+ }
+ break;
+ case INDEX_op_nopn:
+ /* variable number of arguments */
+ nb_cargs = args[0];
+ break;
+ case INDEX_op_set_label:
+ memset(const_temps, 0, s->nb_temps);
+ break;
+ default:
+ if (def->flags & TCG_OPF_BB_END) {
+ memset(const_temps, 0, s->nb_temps);
+ } else {
+ for (i = 0; i < nb_oargs; i++) {
+ const_temps[args[i]] = 0;
+ }
+ }
+ break;
+ }
+ opc_ptr++;
+ args += nb_iargs + nb_oargs + nb_cargs - del_args;
+ if (del_args > 0) {
+ gen_opparam_ptr -= del_args;
+ memmove(args, args + del_args, (gen_opparam_ptr - args) *
sizeof(*args));
+ }
+ }
+
+ if (args != gen_opparam_ptr)
+ tcg_abort();
}
#ifdef USE_LIVENESS_ANALYSIS
@@ -1891,6 +2078,8 @@
}
#endif
+ tcg_const_analysis(s);
+
#ifdef CONFIG_PROFILER
s->la_time -= profile_getclock();
#endif
- Re: [Qemu-devel] [PATCH] RFC: TCG constant propagation.,
Pablo Virolainen <=