lightning
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH v2 8/8] mips: Fill delay slots in jit_bger, jit_bgei, jit_bltr, j


From: Paul Cercueil
Subject: [PATCH v2 8/8] mips: Fill delay slots in jit_bger, jit_bgei, jit_bltr, jit_blti
Date: Mon, 9 Jan 2023 23:04:11 +0000

Fill the delay slots with the opcode that precedes the branch opcode, if
possible.

To simplify things, the code has also been factorized into a single
function.

Signed-off-by: Paul Cercueil <paul@crapouillou.net>
---
 lib/jit_mips-cpu.c | 233 ++++++++++++++++-----------------------------
 lib/jit_mips.c     |  16 ++--
 2 files changed, 89 insertions(+), 160 deletions(-)

diff --git a/lib/jit_mips-cpu.c b/lib/jit_mips-cpu.c
index a9266ae..9a62776 100644
--- a/lib/jit_mips-cpu.c
+++ b/lib/jit_mips-cpu.c
@@ -673,14 +673,10 @@ static void 
_gti_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
 static void _ner(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #define nei(r0,r1,i0)                  _nei(_jit,r0,r1,i0)
 static void _nei(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
-#define bltr(i0,r0,r1)                 _bltr(_jit,i0,r0,r1)
-static jit_word_t _bltr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
-#define bltr_u(i0,r0,r1)               _bltr_u(_jit,i0,r0,r1)
-static jit_word_t _bltr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
-#define blti(i0,r0,i1)                 _blti(_jit,i0,r0,i1)
-static jit_word_t _blti(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
-#define blti_u(i0,r0,i1)               _blti_u(_jit,i0,r0,i1)
-static jit_word_t _blti_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bltr(i0,r0,r1,no_flag)         bger(i0,r1,r0,no_flag)
+#define bltr_u(i0,r0,r1,no_flag)       bger_u(i0,r1,r0,no_flag)
+#define blti(i0,r0,i1,no_flag)         _bgei(_jit,i0,r0,i1,0,1,no_flag)
+#define blti_u(i0,r0,i1,no_flag)       _bgei(_jit,i0,r0,i1,1,1,no_flag)
 #define bler(i0,r0,r1,no_flag)         _bgtr(_jit,i0,r1,r0,0,1,no_flag)
 #define bler_u(i0,r0,r1,no_flag)       _bgtr(_jit,i0,r1,r0,1,1,no_flag)
 #define blei(i0,r0,i1,no_flag)         _bgti(_jit,i0,r0,i1,0,1,no_flag)
@@ -689,14 +685,13 @@ static jit_word_t 
_blti_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
 static jit_word_t 
_beqr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t,jit_bool_t);
 #define beqi(i0,r0,i1,no_flag)         _beqi(_jit,i0,r0,i1,no_flag)
 static jit_word_t 
_beqi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t,jit_bool_t);
-#define bger(i0,r0,r1)                 _bger(_jit,i0,r0,r1)
-static jit_word_t _bger(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
-#define bger_u(i0,r0,r1)               _bger_u(_jit,i0,r0,r1)
-static jit_word_t _bger_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
-#define bgei(i0,r0,i1)                 _bgei(_jit,i0,r0,i1)
-static jit_word_t _bgei(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
-#define bgei_u(i0,r0,i1)               _bgei_u(_jit,i0,r0,i1)
-static jit_word_t _bgei_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bger(i0,r0,r1,no_flag)         _bger(_jit,i0,r0,r1,0,no_flag)
+#define bger_u(i0,r0,r1,no_flag)       _bger(_jit,i0,r0,r1,1,no_flag)
+static jit_word_t 
_bger(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t,jit_bool_t,jit_bool_t);
+#define bgei(i0,r0,i1,no_flag)         _bgei(_jit,i0,r0,i1,0,0,no_flag)
+#define bgei_u(i0,r0,i1,no_flag)       _bgei(_jit,i0,r0,i1,1,0,no_flag)
+static jit_word_t _bgei(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t,
+                       jit_bool_t,jit_bool_t,jit_bool_t);
 #define bgtr(i0,r0,r1,no_flag)         _bgtr(_jit,i0,r0,r1,0,0,no_flag)
 #define bgtr_u(i0,r0,r1,no_flag)       _bgtr(_jit,i0,r0,r1,1,0,no_flag)
 static jit_word_t _bgtr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t,
@@ -2149,91 +2144,6 @@ _nei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, 
jit_word_t i0)
        SLTU(r0, _ZERO_REGNO, r1);
 }
 
-static jit_word_t
-_bltr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
-{
-    jit_word_t         w;
-    jit_int32_t                reg;
-
-    reg = jit_get_reg(jit_class_gpr);
-    SLT(rn(reg), r0, r1);
-    w = _jit->pc.w;
-    BNE(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
-    NOP(1);
-    jit_unget_reg(reg);
-
-    return (w);
-}
-
-static jit_word_t
-_bltr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
-{
-    jit_word_t         w;
-    jit_int32_t                reg;
-
-    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
-    SLTU(rn(reg), r0, r1);
-    w = _jit->pc.w;
-    BNE(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
-    NOP(1);
-    jit_unget_reg(reg);
-
-    return (w);
-}
-
-static jit_word_t
-_blti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
-{
-    jit_word_t         w;
-    jit_word_t         d;
-    jit_int32_t                reg;
-    jit_bool_t         zero_p;
-
-    if (!(zero_p = i1 == 0))
-       reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
-    if (can_sign_extend_short_p(i1)) {
-       if (!zero_p)
-           SLTI(rn(reg), r0, i1);
-       w = _jit->pc.w;
-       d = ((i0 - w) >> 2) - 1;
-       if (!zero_p)
-           BNE(rn(reg), _ZERO_REGNO, d);
-       else
-           BLTZ(r0, d);
-       NOP(1);
-    }
-    else {
-       movi(rn(reg), i1);
-       w = bltr(i0, r0, rn(reg));
-    }
-    if (!zero_p)
-       jit_unget_reg(reg);
-
-    return (w);
-}
-
-static jit_word_t
-_blti_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
-{
-    jit_word_t         w;
-    jit_int32_t                reg;
-
-    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
-    if (can_sign_extend_short_p(i1)) {
-       SLTIU(rn(reg), r0, i1);
-       w = _jit->pc.w;
-       BNE(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
-       NOP(1);
-    }
-    else {
-       movi(rn(reg), i1);
-       w = bltr_u(i0, r0, rn(reg));
-    }
-    jit_unget_reg(reg);
-
-    return (w);
-}
-
 static jit_word_t
 _beqr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1, 
jit_bool_t no_flag)
 {
@@ -2296,86 +2206,105 @@ _beqi(jit_state_t *_jit, jit_word_t i0, jit_int32_t 
r0, jit_word_t i1, jit_bool_
 }
 
 static jit_word_t
-_bger(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+_bger(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1,
+      jit_bool_t sltu, jit_bool_t no_flag)
 {
     jit_word_t         w;
-    jit_int32_t                reg;
+    jit_int32_t                reg, prev, offset;
+    jit_bool_t         swap_ds;
+
+    offset = ((jit_word_t)_jit->pc.ui - (jit_word_t)_jit->code.ptr) / 
sizeof(jit_instr_t);
+    swap_ds = no_flag
+           && (offset < 2 || !has_delay_slot((jit_instr_t)*(_jit->pc.ui - 2)))
+           && !op_writes_register((jit_instr_t)*(_jit->pc.ui - 1), r0)
+           && !op_writes_register((jit_instr_t)*(_jit->pc.ui - 1), r1);
 
     reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
-    SLT(rn(reg), r0, r1);
-    w = _jit->pc.w;
-    BEQ(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
-    NOP(1);
-    jit_unget_reg(reg);
 
-    return (w);
-}
+    if (swap_ds)
+       prev = *--_jit->pc.ui;
 
-static jit_word_t
-_bger_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
-{
-    jit_word_t         w;
-    jit_int32_t                reg;
+    if (sltu)
+       SLTU(rn(reg), r0, r1);
+    else
+       SLT(rn(reg), r0, r1);
 
-    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
-    SLTU(rn(reg), r0, r1);
     w = _jit->pc.w;
     BEQ(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
-    NOP(1);
+
+    if (swap_ds)
+       ii(prev);
+    else
+       NOP(1);
+
     jit_unget_reg(reg);
 
     return (w);
 }
 
 static jit_word_t
-_bgei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+_bgei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1,
+      jit_bool_t sltiu, jit_bool_t bne, jit_bool_t no_flag)
 {
     jit_word_t         w;
     jit_word_t         d;
-    jit_int32_t                reg;
-    jit_bool_t         zero_p;
+    jit_int32_t                reg, prev, offset;
+    jit_bool_t         zero_p, swap_ds;
+
+    offset = ((jit_word_t)_jit->pc.ui - (jit_word_t)_jit->code.ptr) / 
sizeof(jit_instr_t);
+    swap_ds = no_flag
+           && (offset < 2 || !has_delay_slot((jit_instr_t)*(_jit->pc.ui - 2)))
+           && !op_writes_register((jit_instr_t)*(_jit->pc.ui - 1), r0);
+    zero_p = !sltiu && i1 == 0;
 
-    if (!(zero_p = i1 == 0))
+    if (!zero_p)
        reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+    if (swap_ds)
+       prev = *--_jit->pc.ui;
+
     if (can_sign_extend_short_p(i1)) {
-       if (!zero_p)
-           SLTI(rn(reg), r0, i1);
+       if (!zero_p) {
+           if (sltiu)
+               SLTIU(rn(reg), r0, i1);
+           else
+               SLTI(rn(reg), r0, i1);
+       }
+
        w = _jit->pc.w;
        d = ((i0 - w) >> 2) - 1;
-       if (!zero_p)
-           BEQ(rn(reg), _ZERO_REGNO, d);
-       else
-           BGEZ(r0, d);
-       NOP(1);
+       if (bne) {
+           if (!zero_p)
+               BNE(rn(reg), _ZERO_REGNO, d);
+           else
+               BLTZ(r0, d);
+       } else {
+           if (!zero_p)
+               BEQ(rn(reg), _ZERO_REGNO, d);
+           else
+               BGEZ(r0, d);
+       }
     }
     else {
        movi(rn(reg), i1);
-       w = bger(i0, r0, rn(reg));
-    }
-    if (!zero_p)
-       jit_unget_reg(reg);
-
-    return (w);
-}
-
-static jit_word_t
-_bgei_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
-{
-    jit_word_t         w;
-    jit_int32_t                reg;
+       if (sltiu)
+           SLTU(rn(reg), r0, rn(reg));
+       else
+           SLT(rn(reg), r0, rn(reg));
 
-    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
-    if (can_sign_extend_short_p(i1)) {
-       SLTIU(rn(reg), r0, i1);
        w = _jit->pc.w;
-       BEQ(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
-       NOP(1);
-    }
-    else {
-       movi(rn(reg), i1);
-       w = bger_u(i0, r0, rn(reg));
+       if (bne)
+           BNE(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
+       else
+           BEQ(rn(reg), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
     }
-    jit_unget_reg(reg);
+
+    if (swap_ds)
+       ii(prev);
+    else
+       NOP(1);
+
+    if (!zero_p)
+       jit_unget_reg(reg);
 
     return (w);
 }
diff --git a/lib/jit_mips.c b/lib/jit_mips.c
index 0b7f544..4170e57 100644
--- a/lib/jit_mips.c
+++ b/lib/jit_mips.c
@@ -1511,20 +1511,20 @@ _emit_code(jit_state_t *_jit)
                case_rrw(gt, _u);
                case_rrr(ne,);
                case_rrw(ne,);
-               case_brr(blt,);
-               case_brw(blt,);
-               case_brr(blt, _u);
-               case_brw(blt, _u);
+               case_brrn(blt,, no_flag);
+               case_brwn(blt,, no_flag);
+               case_brrn(blt, _u, no_flag);
+               case_brwn(blt, _u, no_flag);
                case_brrn(ble,, no_flag);
                case_brwn(ble,, no_flag);
                case_brrn(ble, _u, no_flag);
                case_brwn(ble, _u, no_flag);
                case_brrn(beq,, no_flag);
                case_brwn(beq,, no_flag);
-               case_brr(bge,);
-               case_brw(bge,);
-               case_brr(bge, _u);
-               case_brw(bge, _u);
+               case_brrn(bge,, no_flag);
+               case_brwn(bge,, no_flag);
+               case_brrn(bge, _u, no_flag);
+               case_brwn(bge, _u, no_flag);
                case_brrn(bgt,, no_flag);
                case_brwn(bgt,, no_flag);
                case_brrn(bgt, _u, no_flag);
-- 
2.39.0




reply via email to

[Prev in Thread] Current Thread [Next in Thread]