qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PULL 1/7] tcg: Compress liveness data to 16 bits


From: Richard Henderson
Subject: [Qemu-devel] [PULL 1/7] tcg: Compress liveness data to 16 bits
Date: Sat, 6 Aug 2016 07:17:48 +0530

This reduces both memory usage and per-insn cacheline usage
during code generation.

Reviewed-by: Aurelien Jarno <address@hidden>
Signed-off-by: Richard Henderson <address@hidden>
---
 tcg/tcg.c | 58 ++++++++++++++++++++++------------------------------------
 tcg/tcg.h | 16 ++++++++++------
 2 files changed, 32 insertions(+), 42 deletions(-)

diff --git a/tcg/tcg.c b/tcg/tcg.c
index 0c46c43..4aa1933 100644
--- a/tcg/tcg.c
+++ b/tcg/tcg.c
@@ -1341,7 +1341,7 @@ static inline void tcg_la_bb_end(TCGContext *s, uint8_t 
*dead_temps,
     }
 }
 
-/* Liveness analysis : update the opc_dead_args array to tell if a
+/* Liveness analysis : update the opc_arg_life array to tell if a
    given input arguments is dead. Instructions updating dead
    temporaries are removed. */
 static void tcg_liveness_analysis(TCGContext *s)
@@ -1350,9 +1350,8 @@ static void tcg_liveness_analysis(TCGContext *s)
     int oi, oi_prev, nb_ops;
 
     nb_ops = s->gen_next_op_idx;
-    s->op_dead_args = tcg_malloc(nb_ops * sizeof(uint16_t));
-    s->op_sync_args = tcg_malloc(nb_ops * sizeof(uint8_t));
-    
+    s->op_arg_life = tcg_malloc(nb_ops * sizeof(TCGLifeData));
+
     dead_temps = tcg_malloc(s->nb_temps);
     mem_temps = tcg_malloc(s->nb_temps);
     tcg_la_func_end(s, dead_temps, mem_temps);
@@ -1361,8 +1360,7 @@ static void tcg_liveness_analysis(TCGContext *s)
         int i, nb_iargs, nb_oargs;
         TCGOpcode opc_new, opc_new2;
         bool have_opc_new2;
-        uint16_t dead_args;
-        uint8_t sync_args;
+        TCGLifeData arg_life = 0;
         TCGArg arg;
 
         TCGOp * const op = &s->gen_op_buf[oi];
@@ -1394,15 +1392,13 @@ static void tcg_liveness_analysis(TCGContext *s)
                 do_not_remove_call:
 
                     /* output args are dead */
-                    dead_args = 0;
-                    sync_args = 0;
                     for (i = 0; i < nb_oargs; i++) {
                         arg = args[i];
                         if (dead_temps[arg]) {
-                            dead_args |= (1 << i);
+                            arg_life |= DEAD_ARG << i;
                         }
                         if (mem_temps[arg]) {
-                            sync_args |= (1 << i);
+                            arg_life |= SYNC_ARG << i;
                         }
                         dead_temps[arg] = 1;
                         mem_temps[arg] = 0;
@@ -1423,7 +1419,7 @@ static void tcg_liveness_analysis(TCGContext *s)
                         arg = args[i];
                         if (arg != TCG_CALL_DUMMY_ARG) {
                             if (dead_temps[arg]) {
-                                dead_args |= (1 << i);
+                                arg_life |= DEAD_ARG << i;
                             }
                         }
                     }
@@ -1432,8 +1428,6 @@ static void tcg_liveness_analysis(TCGContext *s)
                         arg = args[i];
                         dead_temps[arg] = 0;
                     }
-                    s->op_dead_args[oi] = dead_args;
-                    s->op_sync_args[oi] = sync_args;
                 }
             }
             break;
@@ -1544,15 +1538,13 @@ static void tcg_liveness_analysis(TCGContext *s)
             } else {
             do_not_remove:
                 /* output args are dead */
-                dead_args = 0;
-                sync_args = 0;
                 for (i = 0; i < nb_oargs; i++) {
                     arg = args[i];
                     if (dead_temps[arg]) {
-                        dead_args |= (1 << i);
+                        arg_life |= DEAD_ARG << i;
                     }
                     if (mem_temps[arg]) {
-                        sync_args |= (1 << i);
+                        arg_life |= SYNC_ARG << i;
                     }
                     dead_temps[arg] = 1;
                     mem_temps[arg] = 0;
@@ -1570,7 +1562,7 @@ static void tcg_liveness_analysis(TCGContext *s)
                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
                     arg = args[i];
                     if (dead_temps[arg]) {
-                        dead_args |= (1 << i);
+                        arg_life |= DEAD_ARG << i;
                     }
                 }
                 /* input arguments are live for preceding opcodes */
@@ -1578,11 +1570,10 @@ static void tcg_liveness_analysis(TCGContext *s)
                     arg = args[i];
                     dead_temps[arg] = 0;
                 }
-                s->op_dead_args[oi] = dead_args;
-                s->op_sync_args[oi] = sync_args;
             }
             break;
         }
+        s->op_arg_life[oi] = arg_life;
     }
 }
 #else
@@ -1921,11 +1912,11 @@ static void tcg_reg_alloc_bb_end(TCGContext *s, 
TCGRegSet allocated_regs)
     save_globals(s, allocated_regs);
 }
 
-#define IS_DEAD_ARG(n) ((dead_args >> (n)) & 1)
-#define NEED_SYNC_ARG(n) ((sync_args >> (n)) & 1)
+#define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
+#define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
 
 static void tcg_reg_alloc_movi(TCGContext *s, const TCGArg *args,
-                               uint16_t dead_args, uint8_t sync_args)
+                               TCGLifeData arg_life)
 {
     TCGTemp *ots;
     tcg_target_ulong val;
@@ -1954,8 +1945,7 @@ static void tcg_reg_alloc_movi(TCGContext *s, const 
TCGArg *args,
 }
 
 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOpDef *def,
-                              const TCGArg *args, uint16_t dead_args,
-                              uint8_t sync_args)
+                              const TCGArg *args, TCGLifeData arg_life)
 {
     TCGRegSet allocated_regs;
     TCGTemp *ts, *ots;
@@ -2040,8 +2030,7 @@ static void tcg_reg_alloc_mov(TCGContext *s, const 
TCGOpDef *def,
 
 static void tcg_reg_alloc_op(TCGContext *s, 
                              const TCGOpDef *def, TCGOpcode opc,
-                             const TCGArg *args, uint16_t dead_args,
-                             uint8_t sync_args)
+                             const TCGArg *args, TCGLifeData arg_life)
 {
     TCGRegSet allocated_regs;
     int i, k, nb_iargs, nb_oargs;
@@ -2206,8 +2195,7 @@ static void tcg_reg_alloc_op(TCGContext *s,
 #endif
 
 static void tcg_reg_alloc_call(TCGContext *s, int nb_oargs, int nb_iargs,
-                               const TCGArg * const args, uint16_t dead_args,
-                               uint8_t sync_args)
+                               const TCGArg * const args, TCGLifeData arg_life)
 {
     int flags, nb_regs, i;
     TCGReg reg;
@@ -2427,8 +2415,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
         TCGArg * const args = &s->gen_opparam_buf[op->args];
         TCGOpcode opc = op->opc;
         const TCGOpDef *def = &tcg_op_defs[opc];
-        uint16_t dead_args = s->op_dead_args[oi];
-        uint8_t sync_args = s->op_sync_args[oi];
+        TCGLifeData arg_life = s->op_arg_life[oi];
 
         oi_next = op->next;
 #ifdef CONFIG_PROFILER
@@ -2438,11 +2425,11 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
         switch (opc) {
         case INDEX_op_mov_i32:
         case INDEX_op_mov_i64:
-            tcg_reg_alloc_mov(s, def, args, dead_args, sync_args);
+            tcg_reg_alloc_mov(s, def, args, arg_life);
             break;
         case INDEX_op_movi_i32:
         case INDEX_op_movi_i64:
-            tcg_reg_alloc_movi(s, args, dead_args, sync_args);
+            tcg_reg_alloc_movi(s, args, arg_life);
             break;
         case INDEX_op_insn_start:
             if (num_insns >= 0) {
@@ -2467,8 +2454,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
             tcg_out_label(s, arg_label(args[0]), s->code_ptr);
             break;
         case INDEX_op_call:
-            tcg_reg_alloc_call(s, op->callo, op->calli, args,
-                               dead_args, sync_args);
+            tcg_reg_alloc_call(s, op->callo, op->calli, args, arg_life);
             break;
         default:
             /* Sanity check that we've not introduced any unhandled opcodes. */
@@ -2478,7 +2464,7 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
             /* Note: in order to speed up the code, it would be much
                faster to have specialized register allocator functions for
                some common argument patterns */
-            tcg_reg_alloc_op(s, def, opc, args, dead_args, sync_args);
+            tcg_reg_alloc_op(s, def, opc, args, arg_life);
             break;
         }
 #ifdef CONFIG_DEBUG_TCG
diff --git a/tcg/tcg.h b/tcg/tcg.h
index 6046dcd..7c0a138 100644
--- a/tcg/tcg.h
+++ b/tcg/tcg.h
@@ -575,6 +575,14 @@ typedef struct TCGTempSet {
     unsigned long l[BITS_TO_LONGS(TCG_MAX_TEMPS)];
 } TCGTempSet;
 
+/* While we limit helpers to 6 arguments, for 32-bit hosts, with padding,
+   this imples a max of 6*2 (64-bit in) + 2 (64-bit out) = 14 operands.
+   There are never more than 2 outputs, which means that we can store all
+   dead + sync data within 16 bits.  */
+#define DEAD_ARG  4
+#define SYNC_ARG  1
+typedef uint16_t TCGLifeData;
+
 typedef struct TCGOp {
     TCGOpcode opc   : 8;
 
@@ -608,12 +616,8 @@ struct TCGContext {
     uintptr_t *tb_jmp_target_addr; /* tb->jmp_target_addr if !USE_DIRECT_JUMP 
*/
 
     /* liveness analysis */
-    uint16_t *op_dead_args; /* for each operation, each bit tells if the
-                               corresponding argument is dead */
-    uint8_t *op_sync_args;  /* for each operation, each bit tells if the
-                               corresponding output argument needs to be
-                               sync to memory. */
-    
+    TCGLifeData *op_arg_life;
+
     TCGRegSet reserved_regs;
     intptr_t current_frame_offset;
     intptr_t frame_start;
-- 
2.7.4




reply via email to

[Prev in Thread] Current Thread [Next in Thread]