qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH] tcg/arm: Expand epilogue inline


From: Philippe Mathieu-Daudé
Subject: Re: [PATCH] tcg/arm: Expand epilogue inline
Date: Tue, 15 Oct 2019 12:06:10 +0200
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:68.0) Gecko/20100101 Thunderbird/68.1.0

Hi Richard,

On 10/15/19 3:29 AM, Richard Henderson wrote:
It is, after all, just two instructions.

Profiling on a cortex-a15, using -d nochain to increase the number
of exit_tb that are executed, shows a minor improvement of 0.5%.

Signed-off-by: Richard Henderson <address@hidden>
---
  tcg/arm/tcg-target.inc.c | 32 +++++++++++++-------------------
  1 file changed, 13 insertions(+), 19 deletions(-)

diff --git a/tcg/arm/tcg-target.inc.c b/tcg/arm/tcg-target.inc.c
index 94d80d79d1..2a9ebfe25a 100644
--- a/tcg/arm/tcg-target.inc.c
+++ b/tcg/arm/tcg-target.inc.c
@@ -1745,24 +1745,18 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg 
*args, bool is64)
  #endif
  }
-static tcg_insn_unit *tb_ret_addr;
+static void tcg_out_epilogue(TCGContext *s);
-static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
-                const TCGArg *args, const int *const_args)
+static void tcg_out_op(TCGContext *s, TCGOpcode opc,
+                       const TCGArg *args, const int *const_args)
  {
      TCGArg a0, a1, a2, a3, a4, a5;
      int c;
switch (opc) {
      case INDEX_op_exit_tb:
-        /* Reuse the zeroing that exists for goto_ptr.  */
-        a0 = args[0];
-        if (a0 == 0) {
-            tcg_out_goto(s, COND_AL, s->code_gen_epilogue);
-        } else {
-            tcg_out_movi32(s, COND_AL, TCG_REG_R0, args[0]);
-            tcg_out_goto(s, COND_AL, tb_ret_addr);
-        }
+        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, args[0]);
+        tcg_out_epilogue(s);
          break;
      case INDEX_op_goto_tb:
          {
@@ -2284,19 +2278,17 @@ static void tcg_out_nop_fill(tcg_insn_unit *p, int 
count)
        + TCG_TARGET_STACK_ALIGN - 1) \
       & -TCG_TARGET_STACK_ALIGN)
+#define STACK_ADDEND (FRAME_SIZE - PUSH_SIZE)
+
  static void tcg_target_qemu_prologue(TCGContext *s)
  {
-    int stack_addend;
-
      /* Calling convention requires us to save r4-r11 and lr.  */
      /* stmdb sp!, { r4 - r11, lr } */
      tcg_out32(s, (COND_AL << 28) | 0x092d4ff0);
/* Reserve callee argument and tcg temp space. */
-    stack_addend = FRAME_SIZE - PUSH_SIZE;
-
      tcg_out_dat_rI(s, COND_AL, ARITH_SUB, TCG_REG_CALL_STACK,
-                   TCG_REG_CALL_STACK, stack_addend, 1);
+                   TCG_REG_CALL_STACK, STACK_ADDEND, 1);
      tcg_set_frame(s, TCG_REG_CALL_STACK, TCG_STATIC_CALL_ARGS_SIZE,
                    CPU_TEMP_BUF_NLONGS * sizeof(long));
@@ -2310,11 +2302,13 @@ static void tcg_target_qemu_prologue(TCGContext *s)
       */
      s->code_gen_epilogue = s->code_ptr;
      tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_R0, 0);
+    tcg_out_epilogue(s);
+}
- /* TB epilogue */
-    tb_ret_addr = s->code_ptr;
+static void tcg_out_epilogue(TCGContext *s)

Do you mind splitting this patch in 2?
First use tcg_out_epilogue(), then optimize tcg_out_op().

+{
      tcg_out_dat_rI(s, COND_AL, ARITH_ADD, TCG_REG_CALL_STACK,
-                   TCG_REG_CALL_STACK, stack_addend, 1);
+                   TCG_REG_CALL_STACK, STACK_ADDEND, 1);
/* ldmia sp!, { r4 - r11, pc } */
      tcg_out32(s, (COND_AL << 28) | 0x08bd8ff0);





reply via email to

[Prev in Thread] Current Thread [Next in Thread]