[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH v2 5/8] mips: Fill delay slots in jit_beqr / jit_beqi
From: |
Paul Cercueil |
Subject: |
[PATCH v2 5/8] mips: Fill delay slots in jit_beqr / jit_beqi |
Date: |
Mon, 9 Jan 2023 23:04:08 +0000 |
When we know that the last generated opcode is not the target of a jump,
and that it does not write to the source registers of the BEQ opcode, we
can swap it with the BEQ opcode, so that it now becomes the delay slot
of the BEQ opcode.
Signed-off-by: Paul Cercueil <paul@crapouillou.net>
---
lib/jit_mips-cpu.c | 52 +++++++++++++++++++++++++++++++++++-----------
lib/jit_mips.c | 34 ++++++++++++++++++++++++++++--
2 files changed, 72 insertions(+), 14 deletions(-)
diff --git a/lib/jit_mips-cpu.c b/lib/jit_mips-cpu.c
index ec2745b..65338b3 100644
--- a/lib/jit_mips-cpu.c
+++ b/lib/jit_mips-cpu.c
@@ -689,10 +689,10 @@ static jit_word_t
_bler_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
static jit_word_t _blei(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
#define blei_u(i0,r0,i1) _blei_u(_jit,i0,r0,i1)
static jit_word_t _blei_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
-#define beqr(i0,r0,r1) _beqr(_jit,i0,r0,r1)
-static jit_word_t _beqr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
-#define beqi(i0,r0,i1) _beqi(_jit,i0,r0,i1)
-static jit_word_t _beqi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define beqr(i0,r0,r1,no_flag) _beqr(_jit,i0,r0,r1,no_flag)
+static jit_word_t
_beqr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t,jit_bool_t);
+#define beqi(i0,r0,i1,no_flag) _beqi(_jit,i0,r0,i1,no_flag)
+static jit_word_t
_beqi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t,jit_bool_t);
#define bger(i0,r0,r1) _bger(_jit,i0,r0,r1)
static jit_word_t _bger(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
#define bger_u(i0,r0,r1) _bger_u(_jit,i0,r0,r1)
@@ -2313,32 +2313,60 @@ _blei_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t
r0, jit_word_t i1)
}
static jit_word_t
-_beqr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+_beqr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1,
jit_bool_t no_flag)
{
jit_word_t w;
+ jit_int32_t prev, offset;
+ jit_bool_t swap_ds;
+
+ offset = ((jit_word_t)_jit->pc.ui - (jit_word_t)_jit->code.ptr) /
sizeof(jit_instr_t);
+ swap_ds = no_flag
+ && (offset < 2 || !has_delay_slot((jit_instr_t)*(_jit->pc.ui - 2)))
+ && !op_writes_register((jit_instr_t)*(_jit->pc.ui - 1), r0)
+ && !op_writes_register((jit_instr_t)*(_jit->pc.ui - 1), r1);
+
+ if (swap_ds)
+ prev = *--_jit->pc.ui;
w = _jit->pc.w;
BEQ(r0, r1, ((i0 - w) >> 2) - 1);
- NOP(1);
+ if (swap_ds)
+ ii(prev);
+ else
+ NOP(1);
return (w);
}
static jit_word_t
-_beqi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+_beqi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1,
jit_bool_t no_flag)
{
jit_word_t w;
- jit_int32_t reg;
+ jit_int32_t reg, prev, offset;
+ jit_bool_t swap_ds;
if (i1 == 0) {
- w = _jit->pc.w;
- BEQ(r0, _ZERO_REGNO, ((i0 - w) >> 2) - 1);
- NOP(1);
+ w = beqr(i0, r0, _ZERO_REGNO, no_flag);
}
else {
reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
+
+ swap_ds = no_flag
+ && (offset < 2 || !has_delay_slot((jit_instr_t)*(_jit->pc.ui -
2)))
+ && !op_writes_register((jit_instr_t)*(_jit->pc.ui - 1), r0);
+
+ if (swap_ds)
+ prev = *--_jit->pc.ui;
+
movi(rn(reg), i1);
- w = beqr(i0, r0, rn(reg));
+ w = _jit->pc.w;
+ BEQ(r0, rn(reg), ((i0 - w) >> 2) - 1);
+
+ if (swap_ds)
+ ii(prev);
+ else
+ NOP(1);
+
jit_unget_reg(reg);
}
diff --git a/lib/jit_mips.c b/lib/jit_mips.c
index cf917aa..518a540 100644
--- a/lib/jit_mips.c
+++ b/lib/jit_mips.c
@@ -1253,6 +1253,21 @@ _emit_code(jit_state_t *_jit)
patch(word, node); \
} \
break
+#define case_brrn(name, type, no_flag) \
+ case jit_code_##name##r##type: \
+ temp = node->u.n; \
+ assert(temp->code == jit_code_label || \
+ temp->code == jit_code_epilog); \
+ if (temp->flag & jit_flag_patch) \
+ name##r##type(temp->u.w, rn(node->v.w), \
+ rn(node->w.w), no_flag); \
+ else { \
+ word = name##r##type(_jit->pc.w, \
+ rn(node->v.w), rn(node->w.w), \
+ no_flag); \
+ patch(word, node); \
+ } \
+ break
#define case_brw(name, type) \
case jit_code_##name##i##type: \
temp = node->u.n; \
@@ -1267,6 +1282,21 @@ _emit_code(jit_state_t *_jit)
patch(word, node); \
} \
break
+#define case_brwn(name, type, no_flag) \
+ case jit_code_##name##i##type: \
+ temp = node->u.n; \
+ assert(temp->code == jit_code_label || \
+ temp->code == jit_code_epilog); \
+ if (temp->flag & jit_flag_patch) \
+ name##i##type(temp->u.w, \
+ rn(node->v.w), node->w.w, no_flag); \
+ else { \
+ word = name##i##type(_jit->pc.w, \
+ rn(node->v.w), node->w.w, \
+ no_flag); \
+ patch(word, node); \
+ } \
+ break
#define case_brf(name, type, size) \
case jit_code_##name##i##type: \
temp = node->u.n; \
@@ -1489,8 +1519,8 @@ _emit_code(jit_state_t *_jit)
case_brw(ble,);
case_brr(ble, _u);
case_brw(ble, _u);
- case_brr(beq,);
- case_brw(beq,);
+ case_brrn(beq,, no_flag);
+ case_brwn(beq,, no_flag);
case_brr(bge,);
case_brw(bge,);
case_brr(bge, _u);
--
2.39.0
- [PATCH v2 0/9] mips: Fill delay slots v2, Paul Cercueil, 2023/01/09
- [PATCH v2 1/8] mips: Optimize jit_eqr / jit_eqi, Paul Cercueil, 2023/01/09
- [PATCH v2 2/8] mips: Fill delay slots of JR opcodes in jit_jmpr, Paul Cercueil, 2023/01/09
- [PATCH v2 3/8] mips: Fill delay slots of JALR opcodes in jit_callr, Paul Cercueil, 2023/01/09
- [PATCH v2 4/8] mips: Fill delay slots of J in jit_jmpi, Paul Cercueil, 2023/01/09
- [PATCH v2 5/8] mips: Fill delay slots in jit_beqr / jit_beqi,
Paul Cercueil <=
- [PATCH v2 6/8] mips: Fill delay slots in jit_bner / jit_bnei, Paul Cercueil, 2023/01/09
- [PATCH v2 7/8] mips: Fill delay slots in jit_bgtr, jit_bgti, jit_bler, jit_blei, Paul Cercueil, 2023/01/09
- [PATCH v2 8/8] mips: Fill delay slots in jit_bger, jit_bgei, jit_bltr, jit_blti, Paul Cercueil, 2023/01/09
- Re: [PATCH v2 0/9] mips: Fill delay slots v2, Paulo César Pereira de Andrade, 2023/01/10