qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Qemu-devel] [PATCH 23/25] target-openrisc: Optimize for r0 being zero


From: Richard Henderson
Subject: [Qemu-devel] [PATCH 23/25] target-openrisc: Optimize for r0 being zero
Date: Mon, 13 Jun 2016 16:58:23 -0700

Signed-off-by: Richard Henderson <address@hidden>
---
 target-openrisc/cpu.h              |  5 +++-
 target-openrisc/exception_helper.c |  1 +
 target-openrisc/translate.c        | 47 ++++++++++++++++++++++++++++++++++++++
 3 files changed, 52 insertions(+), 1 deletion(-)

diff --git a/target-openrisc/cpu.h b/target-openrisc/cpu.h
index ca5181d..16c1458 100644
--- a/target-openrisc/cpu.h
+++ b/target-openrisc/cpu.h
@@ -394,6 +394,7 @@ int cpu_openrisc_get_phys_data(OpenRISCCPU *cpu,
 #include "exec/cpu-all.h"
 
 #define TB_FLAGS_DFLAG 1
+#define TB_FLAGS_R0_0  2
 #define TB_FLAGS_OVE   SR_OVE
 
 static inline void cpu_get_tb_cpu_state(CPUOpenRISCState *env,
@@ -402,7 +403,9 @@ static inline void cpu_get_tb_cpu_state(CPUOpenRISCState 
*env,
 {
     *pc = env->pc;
     *cs_base = 0;
-    *flags = env->dflag | (env->sr & SR_OVE);
+    *flags = (env->dflag
+              | (env->gpr[0] == 0 ? TB_FLAGS_R0_0 : 0)
+              | (env->sr & SR_OVE));
 }
 
 static inline int cpu_mmu_index(CPUOpenRISCState *env, bool ifetch)
diff --git a/target-openrisc/exception_helper.c 
b/target-openrisc/exception_helper.c
index 28b9d44..db77d30 100644
--- a/target-openrisc/exception_helper.c
+++ b/target-openrisc/exception_helper.c
@@ -19,6 +19,7 @@
 
 #include "qemu/osdep.h"
 #include "cpu.h"
+#include "exec/exec-all.h"
 #include "exec/helper-proto.h"
 #include "exception.h"
 
diff --git a/target-openrisc/translate.c b/target-openrisc/translate.c
index 95e12ef..cb9267d 100644
--- a/target-openrisc/translate.c
+++ b/target-openrisc/translate.c
@@ -52,6 +52,7 @@ typedef struct DisasContext {
 static TCGv_env cpu_env;
 static TCGv cpu_sr;
 static TCGv cpu_R[32];
+static TCGv cpu_R0;
 static TCGv cpu_pc;
 static TCGv jmp_pc;            /* l.jr/l.jalr temp pc */
 static TCGv cpu_ppc;
@@ -110,6 +111,7 @@ void openrisc_translate_init(void)
                                       offsetof(CPUOpenRISCState, gpr[i]),
                                       regnames[i]);
     }
+    cpu_R0 = cpu_R[0];
 }
 
 static void gen_exception(DisasContext *dc, unsigned int excp)
@@ -144,6 +146,15 @@ static void gen_illegal_exception(DisasContext *dc)
         return;                         \
     } while (0)
 
+/* We're about to write to REG.  On the off-chance that the user is
+   writing to R0, re-instate the architectural register.  */
+#define check_r0_write(reg)             \
+    do {                                \
+        if (unlikely(reg == 0)) {       \
+            cpu_R[0] = cpu_R0;          \
+        }                               \
+    } while (0)
+
 static inline bool use_goto_tb(DisasContext *dc, target_ulong dest)
 {
     if (unlikely(dc->singlestep_enabled)) {
@@ -731,6 +742,7 @@ static void dec_misc(DisasContext *dc, uint32_t insn)
 
     case 0x02:    /* l.adrp */
         LOG_DIS("l.adrp r%d,%d\n", rd, N21);
+        check_r0_write(rd);
         tcg_gen_movi_tl(cpu_R[rd], ((dc->pc >> 13) + N21) << 13);
         break;
 
@@ -791,6 +803,7 @@ static void dec_misc(DisasContext *dc, uint32_t insn)
 
     case 0x1b: /* l.lwa */
         LOG_DIS("l.lwa r%d, r%d, %d\n", rd, ra, I16);
+        check_r0_write(rd);
         gen_lwa(dc, cpu_R[rd], cpu_R[ra], I16);
         break;
 
@@ -856,6 +869,7 @@ static void dec_misc(DisasContext *dc, uint32_t insn)
         mop = MO_TESW;
         goto do_load;
     do_load:
+        check_r0_write(rd);
         t0 = tcg_temp_new();
         tcg_gen_addi_tl(t0, cpu_R[ra], I16);
         tcg_gen_qemu_ld_tl(cpu_R[rd], t0, dc->mem_idx, mop);
@@ -864,6 +878,7 @@ static void dec_misc(DisasContext *dc, uint32_t insn)
 
     case 0x27:    /* l.addi */
         LOG_DIS("l.addi r%d, r%d, %d\n", rd, ra, I16);
+        check_r0_write(rd);
         t0 = tcg_const_tl(I16);
         gen_add(dc, cpu_R[rd], cpu_R[ra], t0);
         tcg_temp_free(t0);
@@ -871,6 +886,7 @@ static void dec_misc(DisasContext *dc, uint32_t insn)
 
     case 0x28:    /* l.addic */
         LOG_DIS("l.addic r%d, r%d, %d\n", rd, ra, I16);
+        check_r0_write(rd);
         t0 = tcg_const_tl(I16);
         gen_addc(dc, cpu_R[rd], cpu_R[ra], t0);
         tcg_temp_free(t0);
@@ -878,21 +894,25 @@ static void dec_misc(DisasContext *dc, uint32_t insn)
 
     case 0x29:    /* l.andi */
         LOG_DIS("l.andi r%d, r%d, %d\n", rd, ra, K16);
+        check_r0_write(rd);
         tcg_gen_andi_tl(cpu_R[rd], cpu_R[ra], K16);
         break;
 
     case 0x2a:    /* l.ori */
         LOG_DIS("l.ori r%d, r%d, %d\n", rd, ra, K16);
+        check_r0_write(rd);
         tcg_gen_ori_tl(cpu_R[rd], cpu_R[ra], K16);
         break;
 
     case 0x2b:    /* l.xori */
         LOG_DIS("l.xori r%d, r%d, %d\n", rd, ra, I16);
+        check_r0_write(rd);
         tcg_gen_xori_tl(cpu_R[rd], cpu_R[ra], I16);
         break;
 
     case 0x2c:    /* l.muli */
         LOG_DIS("l.muli r%d, r%d, %d\n", rd, ra, I16);
+        check_r0_write(rd);
         t0 = tcg_const_tl(I16);
         gen_mul(dc, cpu_R[rd], cpu_R[ra], t0);
         tcg_temp_free(t0);
@@ -900,6 +920,7 @@ static void dec_misc(DisasContext *dc, uint32_t insn)
 
     case 0x2d:    /* l.mfspr */
         LOG_DIS("l.mfspr r%d, r%d, %d\n", rd, ra, K16);
+        check_r0_write(rd);
         {
             TCGv_i32 tmp = tcg_temp_new_i32();
             tcg_gen_trunc_tl_i32(tmp, cpu_R[ra]);
@@ -1000,6 +1021,7 @@ static void dec_logic(DisasContext *dc, uint32_t insn)
     L6 = extract32(insn, 0, 6);
     S6 = L6 & (TARGET_LONG_BITS - 1);
 
+    check_r0_write(rd);
     switch (op0) {
     case 0x00:    /* l.slli */
         LOG_DIS("l.slli r%d, r%d, %d\n", rd, ra, L6);
@@ -1036,6 +1058,7 @@ static void dec_M(DisasContext *dc, uint32_t insn)
     rd = extract32(insn, 21, 5);
     K16 = extract32(insn, 0, 16);
 
+    check_r0_write(rd);
     switch (op0) {
     case 0x0:    /* l.movhi */
         LOG_DIS("l.movhi  r%d, %d\n", rd, K16);
@@ -1243,48 +1266,56 @@ static void dec_float(DisasContext *dc, uint32_t insn)
     switch (op0) {
     case 0x00: /* lf.add.s */
         LOG_DIS("lf.add.s r%d, r%d, r%d\n", rd, ra, rb);
+        check_r0_write(rd);
         gen_helper_float_add_s(cpu_R[rd], cpu_env, cpu_R[ra], cpu_R[rb]);
         gen_helper_update_fpcsr(cpu_env);
         break;
 
     case 0x01: /* lf.sub.s */
         LOG_DIS("lf.sub.s r%d, r%d, r%d\n", rd, ra, rb);
+        check_r0_write(rd);
         gen_helper_float_sub_s(cpu_R[rd], cpu_env, cpu_R[ra], cpu_R[rb]);
         gen_helper_update_fpcsr(cpu_env);
         break;
 
     case 0x02: /* lf.mul.s */
         LOG_DIS("lf.mul.s r%d, r%d, r%d\n", rd, ra, rb);
+        check_r0_write(rd);
         gen_helper_float_mul_s(cpu_R[rd], cpu_env, cpu_R[ra], cpu_R[rb]);
         gen_helper_update_fpcsr(cpu_env);
         break;
 
     case 0x03: /* lf.div.s */
         LOG_DIS("lf.div.s r%d, r%d, r%d\n", rd, ra, rb);
+        check_r0_write(rd);
         gen_helper_float_div_s(cpu_R[rd], cpu_env, cpu_R[ra], cpu_R[rb]);
         gen_helper_update_fpcsr(cpu_env);
         break;
 
     case 0x04: /* lf.itof.s */
         LOG_DIS("lf.itof r%d, r%d\n", rd, ra);
+        check_r0_write(rd);
         gen_helper_itofs(cpu_R[rd], cpu_env, cpu_R[ra]);
         gen_helper_update_fpcsr(cpu_env);
         break;
 
     case 0x05: /* lf.ftoi.s */
         LOG_DIS("lf.ftoi r%d, r%d\n", rd, ra);
+        check_r0_write(rd);
         gen_helper_ftois(cpu_R[rd], cpu_env, cpu_R[ra]);
         gen_helper_update_fpcsr(cpu_env);
         break;
 
     case 0x06: /* lf.rem.s */
         LOG_DIS("lf.rem.s r%d, r%d, r%d\n", rd, ra, rb);
+        check_r0_write(rd);
         gen_helper_float_rem_s(cpu_R[rd], cpu_env, cpu_R[ra], cpu_R[rb]);
         gen_helper_update_fpcsr(cpu_env);
         break;
 
     case 0x07: /* lf.madd.s */
         LOG_DIS("lf.madd.s r%d, r%d, r%d\n", rd, ra, rb);
+        check_r0_write(rd);
         gen_helper_float_madd_s(cpu_R[rd], cpu_env, cpu_R[rd],
                                 cpu_R[ra], cpu_R[rb]);
         gen_helper_update_fpcsr(cpu_env);
@@ -1331,6 +1362,7 @@ static void dec_float(DisasContext *dc, uint32_t insn)
     case 0x10: /* lf.add.d */
         LOG_DIS("lf.add.d r%d, r%d, r%d\n", rd, ra, rb);
         check_of64s(dc);
+        check_r0_write(rd);
         gen_helper_float_add_d(cpu_R[rd], cpu_env, cpu_R[ra], cpu_R[rb]);
         gen_helper_update_fpcsr(cpu_env);
         break;
@@ -1338,6 +1370,7 @@ static void dec_float(DisasContext *dc, uint32_t insn)
     case 0x11: /* lf.sub.d */
         LOG_DIS("lf.sub.d r%d, r%d, r%d\n", rd, ra, rb);
         check_of64s(dc);
+        check_r0_write(rd);
         gen_helper_float_sub_d(cpu_R[rd], cpu_env, cpu_R[ra], cpu_R[rb]);
         gen_helper_update_fpcsr(cpu_env);
         break;
@@ -1345,6 +1378,7 @@ static void dec_float(DisasContext *dc, uint32_t insn)
     case 0x12: /* lf.mul.d */
         LOG_DIS("lf.mul.d r%d, r%d, r%d\n", rd, ra, rb);
         check_of64s(dc);
+        check_r0_write(rd);
         gen_helper_float_mul_d(cpu_R[rd], cpu_env, cpu_R[ra], cpu_R[rb]);
         gen_helper_update_fpcsr(cpu_env);
         break;
@@ -1352,6 +1386,7 @@ static void dec_float(DisasContext *dc, uint32_t insn)
     case 0x13: /* lf.div.d */
         LOG_DIS("lf.div.d r%d, r%d, r%d\n", rd, ra, rb);
         check_of64s(dc);
+        check_r0_write(rd);
         gen_helper_float_div_d(cpu_R[rd], cpu_env, cpu_R[ra], cpu_R[rb]);
         gen_helper_update_fpcsr(cpu_env);
         break;
@@ -1359,6 +1394,7 @@ static void dec_float(DisasContext *dc, uint32_t insn)
     case 0x14: /* lf.itof.d */
         LOG_DIS("lf.itof r%d, r%d\n", rd, ra);
         check_of64s(dc);
+        check_r0_write(rd);
         gen_helper_itofd(cpu_R[rd], cpu_env, cpu_R[ra]);
         gen_helper_update_fpcsr(cpu_env);
         break;
@@ -1366,6 +1402,7 @@ static void dec_float(DisasContext *dc, uint32_t insn)
     case 0x15: /* lf.ftoi.d */
         LOG_DIS("lf.ftoi r%d, r%d\n", rd, ra);
         check_of64s(dc);
+        check_r0_write(rd);
         gen_helper_ftoid(cpu_R[rd], cpu_env, cpu_R[ra]);
         gen_helper_update_fpcsr(cpu_env);
         break;
@@ -1373,6 +1410,7 @@ static void dec_float(DisasContext *dc, uint32_t insn)
     case 0x16: /* lf.rem.d */
         LOG_DIS("lf.rem.d r%d, r%d, r%d\n", rd, ra, rb);
         check_of64s(dc);
+        check_r0_write(rd);
         gen_helper_float_rem_d(cpu_R[rd], cpu_env, cpu_R[ra], cpu_R[rb]);
         gen_helper_update_fpcsr(cpu_env);
         break;
@@ -1380,6 +1418,7 @@ static void dec_float(DisasContext *dc, uint32_t insn)
     case 0x17: /* lf.madd.d */
         LOG_DIS("lf.madd.d r%d, r%d, r%d\n", rd, ra, rb);
         check_of64s(dc);
+        check_r0_write(rd);
         gen_helper_float_madd_d(cpu_R[rd], cpu_env, cpu_R[rd],
                                 cpu_R[ra], cpu_R[rb]);
         gen_helper_update_fpcsr(cpu_env);
@@ -1518,6 +1557,14 @@ void gen_intermediate_code(CPUOpenRISCState *env, struct 
TranslationBlock *tb)
 
     gen_tb_start(tb);
 
+    /* Allow the TCG optimizer to see that R0 == 0,
+       when it's true, which is the common case.  */
+    if (dc->tb_flags & TB_FLAGS_R0_0) {
+        cpu_R[0] = tcg_const_tl(0);
+    } else {
+        cpu_R[0] = cpu_R0;
+    }
+
     do {
         tcg_gen_insn_start(dc->pc, (dc->delayed_branch ? 1 : 0)
                           | (num_insns ? 2 : 0));
-- 
2.5.5




reply via email to

[Prev in Thread] Current Thread [Next in Thread]