[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [RFC 03/14] tcg: Use per-temp state data in liveness
From: |
Richard Henderson |
Subject: |
[Qemu-devel] [RFC 03/14] tcg: Use per-temp state data in liveness |
Date: |
Wed, 16 Nov 2016 20:51:39 +0100 |
Signed-off-by: Richard Henderson <address@hidden>
---
tcg/tcg.c | 219 +++++++++++++++++++++++++++++++++-----------------------------
tcg/tcg.h | 16 +++++
2 files changed, 132 insertions(+), 103 deletions(-)
diff --git a/tcg/tcg.c b/tcg/tcg.c
index 7118bce..8ae41a6 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -1368,42 +1368,55 @@ TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
/* liveness analysis: end of function: all temps are dead, and globals
should be in memory. */
-static inline void tcg_la_func_end(TCGContext *s, uint8_t *temp_state)
+static void tcg_la_func_end(TCGContext *s)
{
- memset(temp_state, TS_DEAD | TS_MEM, s->nb_globals);
- memset(temp_state + s->nb_globals, TS_DEAD, s->nb_temps - s->nb_globals);
+ int ng = s->nb_globals;
+ int nt = s->nb_temps;
+ int i;
+
+ for (i = 0; i < ng; ++i) {
+ s->temps[i].state = TS_DEAD | TS_MEM;
+ }
+ for (i = ng; i < nt; ++i) {
+ s->temps[i].state = TS_DEAD;
+ }
}
/* liveness analysis: end of basic block: all temps are dead, globals
and local temps should be in memory. */
-static inline void tcg_la_bb_end(TCGContext *s, uint8_t *temp_state)
+static void tcg_la_bb_end(TCGContext *s)
{
- int i, n;
+ int ng = s->nb_globals;
+ int nt = s->nb_temps;
+ int i;
- tcg_la_func_end(s, temp_state);
- for (i = s->nb_globals, n = s->nb_temps; i < n; i++) {
- if (s->temps[i].temp_local) {
- temp_state[i] |= TS_MEM;
- }
+ for (i = 0; i < ng; ++i) {
+ s->temps[i].state = TS_DEAD | TS_MEM;
+ }
+ for (i = ng; i < nt; ++i) {
+ s->temps[i].state = (s->temps[i].temp_local
+ ? TS_DEAD | TS_MEM
+ : TS_DEAD);
}
}
/* Liveness analysis : update the opc_arg_life array to tell if a
given input arguments is dead. Instructions updating dead
temporaries are removed. */
-static void liveness_pass_1(TCGContext *s, uint8_t *temp_state)
+static void liveness_pass_1(TCGContext *s)
{
int nb_globals = s->nb_globals;
int oi, oi_prev;
- tcg_la_func_end(s, temp_state);
+ tcg_la_func_end(s);
for (oi = s->gen_op_buf[0].prev; oi != 0; oi = oi_prev) {
int i, nb_iargs, nb_oargs;
TCGOpcode opc_new, opc_new2;
bool have_opc_new2;
TCGLifeData arg_life = 0;
- TCGArg arg;
+ TCGTemp *arg_ts;
+ TCGArg arg_idx;
TCGOp * const op = &s->gen_op_buf[oi];
TCGArg * const args = &s->gen_opparam_buf[op->args];
@@ -1424,8 +1437,8 @@ static void liveness_pass_1(TCGContext *s, uint8_t
*temp_state)
/* pure functions can be removed if their result is unused */
if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
for (i = 0; i < nb_oargs; i++) {
- arg = args[i];
- if (temp_state[arg] != TS_DEAD) {
+ arg_ts = arg_temp(args[i]);
+ if (arg_ts->state != TS_DEAD) {
goto do_not_remove_call;
}
}
@@ -1435,41 +1448,45 @@ static void liveness_pass_1(TCGContext *s, uint8_t
*temp_state)
/* output args are dead */
for (i = 0; i < nb_oargs; i++) {
- arg = args[i];
- if (temp_state[arg] & TS_DEAD) {
+ arg_ts = arg_temp(args[i]);
+ if (arg_ts->state & TS_DEAD) {
arg_life |= DEAD_ARG << i;
}
- if (temp_state[arg] & TS_MEM) {
+ if (arg_ts->state & TS_MEM) {
arg_life |= SYNC_ARG << i;
}
- temp_state[arg] = TS_DEAD;
+ arg_ts->state = TS_DEAD;
}
if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
TCG_CALL_NO_READ_GLOBALS))) {
/* globals should go back to memory */
- memset(temp_state, TS_DEAD | TS_MEM, nb_globals);
+ for (i = 0; i < nb_globals; i++) {
+ s->temps[i].state = TS_DEAD | TS_MEM;
+ }
} else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
/* globals should be synced to memory */
for (i = 0; i < nb_globals; i++) {
- temp_state[i] |= TS_MEM;
+ s->temps[i].state |= TS_MEM;
}
}
/* record arguments that die in this helper */
for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
- arg = args[i];
- if (arg != TCG_CALL_DUMMY_ARG) {
- if (temp_state[arg] & TS_DEAD) {
+ arg_idx = arg_index(args[i]);
+ if (arg_idx != TCG_CALL_DUMMY_ARG) {
+ arg_ts = arg_temp(args[i]);
+ if (arg_ts->state & TS_DEAD) {
arg_life |= DEAD_ARG << i;
}
}
}
/* input arguments are live for preceding opcodes */
for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
- arg = args[i];
- if (arg != TCG_CALL_DUMMY_ARG) {
- temp_state[arg] &= ~TS_DEAD;
+ arg_idx = arg_index(args[i]);
+ if (arg_idx != TCG_CALL_DUMMY_ARG) {
+ arg_ts = arg_temp(args[i]);
+ arg_ts->state &= ~TS_DEAD;
}
}
}
@@ -1479,7 +1496,7 @@ static void liveness_pass_1(TCGContext *s, uint8_t
*temp_state)
break;
case INDEX_op_discard:
/* mark the temporary as dead */
- temp_state[args[0]] = TS_DEAD;
+ arg_temp(args[0])->state = TS_DEAD;
break;
case INDEX_op_add2_i32:
@@ -1500,8 +1517,8 @@ static void liveness_pass_1(TCGContext *s, uint8_t
*temp_state)
the low part. The result can be optimized to a simple
add or sub. This happens often for x86_64 guest when the
cpu mode is set to 32 bit. */
- if (temp_state[args[1]] == TS_DEAD) {
- if (temp_state[args[0]] == TS_DEAD) {
+ if (arg_temp(args[1])->state == TS_DEAD) {
+ if (arg_temp(args[0])->state == TS_DEAD) {
goto do_remove;
}
/* Replace the opcode and adjust the args in place,
@@ -1538,8 +1555,8 @@ static void liveness_pass_1(TCGContext *s, uint8_t
*temp_state)
do_mul2:
nb_iargs = 2;
nb_oargs = 2;
- if (temp_state[args[1]] == TS_DEAD) {
- if (temp_state[args[0]] == TS_DEAD) {
+ if (arg_temp(args[1])->state == TS_DEAD) {
+ if (arg_temp(args[0])->state == TS_DEAD) {
/* Both parts of the operation are dead. */
goto do_remove;
}
@@ -1547,7 +1564,7 @@ static void liveness_pass_1(TCGContext *s, uint8_t
*temp_state)
op->opc = opc = opc_new;
args[1] = args[2];
args[2] = args[3];
- } else if (temp_state[args[0]] == TS_DEAD && have_opc_new2) {
+ } else if (arg_temp(args[0])->state == TS_DEAD && have_opc_new2) {
/* The low part of the operation is dead; generate the high. */
op->opc = opc = opc_new2;
args[0] = args[1];
@@ -1570,7 +1587,7 @@ static void liveness_pass_1(TCGContext *s, uint8_t
*temp_state)
implies side effects */
if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
for (i = 0; i < nb_oargs; i++) {
- if (temp_state[args[i]] != TS_DEAD) {
+ if (arg_temp(args[i])->state != TS_DEAD) {
goto do_not_remove;
}
}
@@ -1580,36 +1597,36 @@ static void liveness_pass_1(TCGContext *s, uint8_t
*temp_state)
do_not_remove:
/* output args are dead */
for (i = 0; i < nb_oargs; i++) {
- arg = args[i];
- if (temp_state[arg] & TS_DEAD) {
+ arg_ts = arg_temp(args[i]);
+ if (arg_ts->state & TS_DEAD) {
arg_life |= DEAD_ARG << i;
}
- if (temp_state[arg] & TS_MEM) {
+ if (arg_ts->state & TS_MEM) {
arg_life |= SYNC_ARG << i;
}
- temp_state[arg] = TS_DEAD;
+ arg_ts->state = TS_DEAD;
}
/* if end of basic block, update */
if (def->flags & TCG_OPF_BB_END) {
- tcg_la_bb_end(s, temp_state);
+ tcg_la_bb_end(s);
} else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
/* globals should be synced to memory */
for (i = 0; i < nb_globals; i++) {
- temp_state[i] |= TS_MEM;
+ s->temps[i].state |= TS_MEM;
}
}
/* record arguments that die in this opcode */
for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
- arg = args[i];
- if (temp_state[arg] & TS_DEAD) {
+ arg_ts = arg_temp(args[i]);
+ if (arg_ts->state & TS_DEAD) {
arg_life |= DEAD_ARG << i;
}
}
/* input arguments are live for preceding opcodes */
for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
- temp_state[args[i]] &= ~TS_DEAD;
+ arg_temp(args[i])->state &= ~TS_DEAD;
}
}
break;
@@ -1619,16 +1636,12 @@ static void liveness_pass_1(TCGContext *s, uint8_t
*temp_state)
}
/* Liveness analysis: Convert indirect regs to direct temporaries. */
-static bool liveness_pass_2(TCGContext *s, uint8_t *temp_state)
+static bool liveness_pass_2(TCGContext *s)
{
int nb_globals = s->nb_globals;
- int16_t *dir_temps;
int i, oi, oi_next;
bool changes = false;
- dir_temps = tcg_malloc(nb_globals * sizeof(int16_t));
- memset(dir_temps, 0, nb_globals * sizeof(int16_t));
-
/* Create a temporary for each indirect global. */
for (i = 0; i < nb_globals; ++i) {
TCGTemp *its = &s->temps[i];
@@ -1636,12 +1649,12 @@ static bool liveness_pass_2(TCGContext *s, uint8_t
*temp_state)
TCGTemp *dts = tcg_temp_alloc(s);
dts->type = its->type;
dts->base_type = its->base_type;
- dir_temps[i] = temp_idx(dts);
+ its->state_ptr = dts;
}
+ /* All globals begin dead. */
+ its->state = TS_DEAD;
}
- memset(temp_state, TS_DEAD, nb_globals);
-
for (oi = s->gen_op_buf[0].next; oi != 0; oi = oi_next) {
TCGOp *op = &s->gen_op_buf[oi];
TCGArg *args = &s->gen_opparam_buf[op->args];
@@ -1649,7 +1662,8 @@ static bool liveness_pass_2(TCGContext *s, uint8_t
*temp_state)
const TCGOpDef *def = &tcg_op_defs[opc];
TCGLifeData arg_life = op->life;
int nb_iargs, nb_oargs, call_flags;
- TCGArg arg, dir;
+ TCGTemp *arg_ts, *dir_ts;
+ size_t arg_idx;
oi_next = op->next;
@@ -1677,24 +1691,24 @@ static bool liveness_pass_2(TCGContext *s, uint8_t
*temp_state)
/* Make sure that input arguments are available. */
for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
- arg = args[i];
+ arg_idx = arg_index(args[i]);
/* Note this unsigned test catches TCG_CALL_ARG_DUMMY too. */
- if (arg < nb_globals) {
- dir = dir_temps[arg];
- if (dir != 0 && temp_state[arg] == TS_DEAD) {
- TCGTemp *its = &s->temps[arg];
- TCGOpcode lopc = (its->type == TCG_TYPE_I32
+ if (arg_idx < nb_globals) {
+ arg_ts = arg_temp(args[i]);
+ dir_ts = arg_ts->state_ptr;
+ if (dir_ts != 0 && arg_ts->state == TS_DEAD) {
+ TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
? INDEX_op_ld_i32
: INDEX_op_ld_i64);
TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
TCGArg *largs = &s->gen_opparam_buf[lop->args];
- largs[0] = dir;
- largs[1] = temp_idx(its->mem_base);
- largs[2] = its->mem_offset;
+ largs[0] = temp_idx(dir_ts);
+ largs[1] = temp_idx(arg_ts->mem_base);
+ largs[2] = arg_ts->mem_offset;
/* Loaded, but synced with memory. */
- temp_state[arg] = TS_MEM;
+ arg_ts->state = TS_MEM;
}
}
}
@@ -1703,14 +1717,15 @@ static bool liveness_pass_2(TCGContext *s, uint8_t
*temp_state)
No action is required except keeping temp_state up to date
so that we reload when needed. */
for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
- arg = args[i];
- if (arg < nb_globals) {
- dir = dir_temps[arg];
- if (dir != 0) {
- args[i] = dir;
+ arg_idx = arg_index(args[i]);
+ if (arg_idx < nb_globals) {
+ arg_ts = arg_temp(args[i]);
+ dir_ts = arg_ts->state_ptr;
+ if (dir_ts != 0) {
+ args[i] = temp_idx(dir_ts);
changes = true;
if (IS_DEAD_ARG(i)) {
- temp_state[arg] = TS_DEAD;
+ arg_ts->state = TS_DEAD;
}
}
}
@@ -1724,52 +1739,54 @@ static bool liveness_pass_2(TCGContext *s, uint8_t
*temp_state)
for (i = 0; i < nb_globals; ++i) {
/* Liveness should see that globals are synced back,
that is, either TS_DEAD or TS_MEM. */
- tcg_debug_assert(dir_temps[i] == 0
- || temp_state[i] != 0);
+ arg_ts = &s->temps[i];
+ tcg_debug_assert(arg_ts->state_ptr == 0
+ || arg_ts->state != 0);
}
} else {
for (i = 0; i < nb_globals; ++i) {
/* Liveness should see that globals are saved back,
that is, TS_DEAD, waiting to be reloaded. */
- tcg_debug_assert(dir_temps[i] == 0
- || temp_state[i] == TS_DEAD);
+ arg_ts = &s->temps[i];
+ tcg_debug_assert(arg_ts->state_ptr == 0
+ || arg_ts->state == TS_DEAD);
}
}
/* Outputs become available. */
for (i = 0; i < nb_oargs; i++) {
- arg = args[i];
- if (arg >= nb_globals) {
+ arg_idx = arg_index(args[i]);
+ if (arg_idx >= nb_globals) {
continue;
}
- dir = dir_temps[arg];
- if (dir == 0) {
+ arg_ts = arg_temp(args[i]);
+ dir_ts = arg_ts->state_ptr;
+ if (dir_ts == 0) {
continue;
}
- args[i] = dir;
+ args[i] = temp_idx(dir_ts);
changes = true;
/* The output is now live and modified. */
- temp_state[arg] = 0;
+ arg_ts->state = 0;
/* Sync outputs upon their last write. */
if (NEED_SYNC_ARG(i)) {
- TCGTemp *its = &s->temps[arg];
- TCGOpcode sopc = (its->type == TCG_TYPE_I32
+ TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
? INDEX_op_st_i32
: INDEX_op_st_i64);
TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
TCGArg *sargs = &s->gen_opparam_buf[sop->args];
- sargs[0] = dir;
- sargs[1] = temp_idx(its->mem_base);
- sargs[2] = its->mem_offset;
+ sargs[0] = temp_idx(dir_ts);
+ sargs[1] = temp_idx(arg_ts->mem_base);
+ sargs[2] = arg_ts->mem_offset;
- temp_state[arg] = TS_MEM;
+ arg_ts->state = TS_MEM;
}
/* Drop outputs that are dead. */
if (IS_DEAD_ARG(i)) {
- temp_state[arg] = TS_DEAD;
+ arg_ts->state = TS_DEAD;
}
}
}
@@ -2531,27 +2548,23 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
s->la_time -= profile_getclock();
#endif
- {
- uint8_t *temp_state = tcg_malloc(s->nb_temps + s->nb_indirects);
-
- liveness_pass_1(s, temp_state);
+ liveness_pass_1(s);
- if (s->nb_indirects > 0) {
+ if (s->nb_indirects > 0) {
#ifdef DEBUG_DISAS
- if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
- && qemu_log_in_addr_range(tb->pc))) {
- qemu_log_lock();
- qemu_log("OP before indirect lowering:\n");
- tcg_dump_ops(s);
- qemu_log("\n");
- qemu_log_unlock();
- }
+ if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
+ && qemu_log_in_addr_range(tb->pc))) {
+ qemu_log_lock();
+ qemu_log("OP before indirect lowering:\n");
+ tcg_dump_ops(s);
+ qemu_log("\n");
+ qemu_log_unlock();
+ }
#endif
- /* Replace indirect temps with direct temps. */
- if (liveness_pass_2(s, temp_state)) {
- /* If changes were made, re-run liveness. */
- liveness_pass_1(s, temp_state);
- }
+ /* Replace indirect temps with direct temps. */
+ if (liveness_pass_2(s)) {
+ /* If changes were made, re-run liveness. */
+ liveness_pass_1(s);
}
}
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 5a65204..723e06f 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -410,6 +410,12 @@ typedef struct TCGTemp {
struct TCGTemp *mem_base;
intptr_t mem_offset;
const char *name;
+
+ /* Pass-specific information that can be stored for a temporary.
+ One word worth of integer data, and one pointer to data
+ allocated separately. */
+ uintptr_t state;
+ void *state_ptr;
} TCGTemp;
typedef struct TCGv_i32_d {
@@ -696,6 +702,16 @@ static inline uintptr_t temp_idx(TCGTemp *ts)
return n;
}
+static inline size_t arg_index(TCGArg a)
+{
+ return a;
+}
+
+static inline TCGTemp *arg_temp(TCGArg a)
+{
+ return &tcg_ctx.temps[a];
+}
+
static inline TCGv_i32 QEMU_ARTIFICIAL MAKE_TCGV_I32(uintptr_t i)
{
return (TCGv_i32)&tcg_ctx.temps[i];
--
2.7.4
- [Qemu-devel] [RFC 00/14] tcg: Use TCGTemp pointers instead of indices, Richard Henderson, 2016/11/16
- [Qemu-devel] [RFC 01/14] tcg: Use NULL for TCGV_UNUSED_*, Richard Henderson, 2016/11/16
- [Qemu-devel] [RFC 04/14] tcg: Add temp_global bit to TCGTemp, Richard Henderson, 2016/11/16
- [Qemu-devel] [RFC 02/14] tcg: Define actual structures for TCGv_*, Richard Henderson, 2016/11/16
- [Qemu-devel] [RFC 05/14] tcg: Avoid loops against variable bounds, Richard Henderson, 2016/11/16
- [Qemu-devel] [RFC 03/14] tcg: Use per-temp state data in liveness,
Richard Henderson <=
- [Qemu-devel] [RFC 06/14] tcg: More use of arg_temp, Richard Henderson, 2016/11/16
- [Qemu-devel] [RFC 07/14] tcg: Change temp_allocate_frame arg to TCGTemp, Richard Henderson, 2016/11/16
- [Qemu-devel] [RFC 08/14] tcg: Remove unused TCG_CALL_DUMMY_TCGV, Richard Henderson, 2016/11/16
- [Qemu-devel] [RFC 09/14] tcg: More use of arg_index, Richard Henderson, 2016/11/16
- [Qemu-devel] [RFC 10/14] tcg: Map TCG_CALL_DUMMY_ARG to NULL, Richard Henderson, 2016/11/16
- [Qemu-devel] [RFC 13/14] tcg: Use temp number 0 again, Richard Henderson, 2016/11/16
- [Qemu-devel] [RFC 11/14] tcg: Introduce temp_info for the optimize pass, Richard Henderson, 2016/11/16
- [Qemu-devel] [RFC 12/14] tcg: Store pointers to temporaries directly in TCGArg, Richard Henderson, 2016/11/16
- [Qemu-devel] [RFC 14/14] tcg/optimize: Fold movcond 0/1 into setcond, Richard Henderson, 2016/11/16