[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Lightning] `modi' broken on PowerPC
From: |
Paolo Bonzini |
Subject: |
Re: [Lightning] `modi' broken on PowerPC |
Date: |
Thu, 06 Jul 2006 15:31:42 +0200 |
User-agent: |
Thunderbird 1.5.0.4 (Macintosh/20060530) |
(gdb) disassemble result result+100
Dump of assembler code from 0x10011518 to 0x1001157c:
0x10011518 <buffer.2607+0>: mflr r0
0x1001151c <buffer.2607+4>: stwu r1,-96(r1)
0x10011520 <buffer.2607+8>: stmw r23,60(r1)
0x10011524 <buffer.2607+12>: stw r0,100(r1)
0x10011528 <buffer.2607+16>: mr r23,r3
0x1001152c <buffer.2607+20>: mr r10,r23
Up to here it is a prolog. Then there are several errors:
0x10011530 <buffer.2607+24>: li r24,8
0x10011534 <buffer.2607+28>: mtlr r31
0x10011538 <buffer.2607+32>: li r24,24
should be a no-op "li r24,r24" (which lightning would not generate)
0x1001153c <buffer.2607+36>: divw r31,r10,r24
0x10011540 <buffer.2607+40>: mullw r31,r31,r24
0x10011544 <buffer.2607+44>: subf r10,r24,r10
should be "subf r10,r31,r10"
0x10011548 <buffer.2607+48>: mflr r31
And after this it is the epilog. For small immediates, however, we can
do better:
li r24, 8
divw r24,r10,r24
mulli r24,r24,8
subf r10,r24,r10 (now it's correct to use this, no r31!)
I am not very familiar with PPC assembly so I did not try to investigate
it any further so far. I'd be glad to help as much as I can though.
Luckily I have a PPC so I could test and commit the attached patch.
Paolo
--- orig/lightning/ppc/core.h
+++ mod/lightning/ppc/core.h
@@ -81,19 +81,24 @@ struct jit_local_state {
#define jit_ubooli2(d, rs, is, jmp) (jit_chk_imu (is, CMPLWIri(rs,
is), CMPLWrr(rs, JIT_AUX)), MFCRr((d)), EXTRWIrrii((d), (d), 1, (jmp)),
XORIrri((d), (d), 1))
#define jit_uboolr2(d, s1, s2, jmp) ( CMPLWrr (s1,
s2), MFCRr((d)), EXTRWIrrii((d), (d), 1, (jmp)),
XORIrri((d), (d), 1))
-/* modulus with immediate
- * movei r26, imm
+/* modulus with big immediate with small immediate
+ * movei r24, imm movei r24, imm
* mtlr r31
- * divw r31, rs, r26 (or divwu)
- * mullw r31, r31, r26
- * sub rs, rs, r26
+ * divw r31, rs, r24 (or divwu) divw r24, rs, r24
+ * mullw r31, r31, r24 mulli r24, r24, imm
+ * sub d, rs, r31 sub d, rs, r24
* mflr r31
- */
+ *
+ *
+ * jit_mod_big expects immediate in JIT_AUX. */
-#define _jit_mod(div, rs, imm) (MOVEIri(JIT_AUX, (imm)),
MTLRr(31), (div), \
- MULLWrrr(31, 31, JIT_AUX),
SUBrrr((rs), (rs), JIT_AUX), \
+#define _jit_mod_big(div, d, rs) (MTLRr(31), div(31, (rs),
JIT_AUX), \
+ MULLWrrr(31, 31, JIT_AUX),
SUBrrr((d), (rs), 31), \
MFLRr(31))
+#define _jit_mod_small(div, d, rs, imm) (MOVEIri(JIT_AUX,
(imm)), div(JIT_AUX, (rs), JIT_AUX), \
+ MULLIrri(JIT_AUX, JIT_AUX,
(imm)), SUBrrr((d), (rs), JIT_AUX))
+
/* Patch a movei instruction made of a LIS at lis_pc and an ORI at ori_pc. */
#define jit_patch_movei(lis_pc, ori_pc, dest) \
(*(lis_pc) &= ~_MASK(16), *(lis_pc) |= _HI(dest), \
@@ -220,8 +225,8 @@ struct jit_local_state {
#define jit_lti_ui(d, rs, is) jit_ubooli ((d), (rs), (is), _lt )
#define jit_ltr_i(d, s1, s2) jit_sboolr ((d), (s1), (s2), _lt )
#define jit_ltr_ui(d, s1, s2) jit_uboolr ((d), (s1), (s2), _lt )
-#define jit_modi_i(d, rs, is) _jit_mod(jit_divi_i (31, (rs),
JIT_AUX), (rs), (is))
-#define jit_modi_ui(d, rs, is) _jit_mod(jit_divi_ui(31, (rs),
JIT_AUX), (rs), (is))
+#define jit_modi_i(d, rs, is) jit_chk_ims ((is),
_jit_mod_small(jit_divr_i , (d), (rs), (is)), _jit_mod_big(jit_divr_i , (d),
(rs)))
+#define jit_modi_ui(d, rs, is) jit_chk_imu15((is),
_jit_mod_small(jit_divr_ui, (d), (rs), (is)), _jit_mod_big(jit_divr_ui, (d),
(rs)))
#define jit_modr_i(d, s1, s2) (DIVWrrr(JIT_AUX, (s1), (s2)),
MULLWrrr(JIT_AUX, JIT_AUX, (s2)), SUBrrr((d), (s1), JIT_AUX))
#define jit_modr_ui(d, s1, s2) (DIVWUrrr(JIT_AUX, (s1), (s2)),
MULLWrrr(JIT_AUX, JIT_AUX, (s2)), SUBrrr((d), (s1), JIT_AUX))
#define jit_movi_i(d, is) MOVEIri((d), (is))
--- orig/tests/Makefile.am
+++ mod/tests/Makefile.am
@@ -1,7 +1,7 @@
AM_CPPFLAGS = -I$(top_builddir) -I$(top_srcdir)
-noinst_PROGRAMS = fibit incr printf printf2 rpn fib fibdelay add bp testfp
funcfp rpnfp
-noinst_DATA = fibit.ok incr.ok printf.ok printf2.ok rpn.ok fib.ok fibdelay.ok
testfp.ok funcfp.ok rpnfp.ok add.ok bp.ok
+noinst_PROGRAMS = fibit incr printf printf2 rpn fib fibdelay add bp testfp
funcfp rpnfp modi
+noinst_DATA = fibit.ok incr.ok printf.ok printf2.ok rpn.ok fib.ok fibdelay.ok
testfp.ok funcfp.ok rpnfp.ok add.ok bp.ok modi.ok
EXTRA_DIST = $(noinst_DATA) run-test
if DISASS
@@ -9,6 +9,6 @@ LDADD = $(top_builddir)/opcode/libdisass
endif
if REGRESSION_TESTING
-TESTS = fib fibit fibdelay incr printf printf2 rpn add bp testfp funcfp rpnfp
+TESTS = fib fibit fibdelay incr printf printf2 rpn add bp testfp funcfp rpnfp
modi
TESTS_ENVIRONMENT=$(srcdir)/run-test
endif
--- /dev/null
+++ mod/tests/modi.c
@@ -0,0 +1,52 @@
+/******************************** -*- C -*- ****************************
+ *
+ * Test jit_modi_i
+ *
+ ***********************************************************************/
+
+
+/* Contributed by Ludovic Courtes. */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <stdio.h>
+#include <string.h>
+#include "lightning.h"
+
+typedef int (* mod_t) (int);
+
+mod_t
+generate_modi (int operand)
+{
+ static char buffer[1024];
+ mod_t result;
+ int arg;
+
+ result = (mod_t)(jit_set_ip (buffer).iptr);
+ jit_leaf (1);
+ arg = jit_arg_i ();
+ jit_getarg_i (JIT_R1, arg);
+
+ jit_modi_i (JIT_R2, JIT_R1, operand);
+ jit_movr_i (JIT_RET, JIT_R2);
+
+ jit_ret ();
+ jit_flush_code (buffer, jit_get_ip ().ptr);
+
+ return result;
+}
+
+int
+main (int argc, char *argv[])
+{
+ mod_t mod_eight = generate_modi (8);
+
+ printf ("mod_eight (%i) = %i (vs. %i)\n",
+ 20420, mod_eight (20420), (20420 % 8));
+ printf ("mod_eight (%i) = %i (vs. %i)\n",
+ 216096, mod_eight (216096), (216096 % 8));
+
+ return 0;
+}
--- /dev/null
+++ mod/tests/modi.ok
@@ -0,0 +1,2 @@
+mod_eight (20420) = 4 (vs. 4)
+mod_eight (216096) = 0 (vs. 0)