guile-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Guile-commits] 365/437: X86: Correct load and store in the x32 abi


From: Andy Wingo
Subject: [Guile-commits] 365/437: X86: Correct load and store in the x32 abi
Date: Mon, 2 Jul 2018 05:14:56 -0400 (EDT)

wingo pushed a commit to branch lightning
in repository guile.

commit 3a03feae5d5f0ad7766882f772e79a1c8b61bc39
Author: pcpa <address@hidden>
Date:   Sun Feb 15 23:14:27 2015 -0200

    X86: Correct load and store in the x32 abi
    
        * lib/jit_x86-cpu.c, lib/jit_x86-sse.c, lib/jit_x86-x87.c:
        Correct encoding of ldxr* stxr* in the x32 abi. If the
        displacement register is negative, it would generate
        a 64 bit instruction with a 32 bit unsigned displacement.
    
        * check/ranger.tst, check/ranger.ok: New files, implementing
        a test case for negative loads and stores. This is range.tst
        converted to use registers instead of immediate offsets.
    
        check/Makefile.am: Update for the new test case.
---
 ChangeLog         |  13 ++
 check/Makefile.am |  13 +-
 check/ranger.ok   |   1 +
 check/ranger.tst  | 507 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 lib/jit_x86-cpu.c |  49 ++++++
 lib/jit_x86-sse.c |  88 +++++++++-
 lib/jit_x86-x87.c |  48 ++++++
 7 files changed, 709 insertions(+), 10 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 336bf07..59927ef 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,16 @@
+2015-02-15 Paulo Andrade <address@hidden>
+
+       * lib/jit_x86-cpu.c, lib/jit_x86-sse.c, lib/jit_x86-x87.c:
+       Correct encoding of ldxr* stxr* in the x32 abi. If the
+       displacement register is negative, it would generate
+       a 64 bit instruction with a 32 bit unsigned displacement.
+
+       * check/ranger.tst, check/ranger.ok: New files, implementing
+       a test case for negative loads and stores. This is range.tst
+       converted to use registers instead of immediate offsets.
+
+       check/Makefile.am: Update for the new test case.
+
 2015-02-07 Paulo Andrade <address@hidden>
 
        * lib/jit_size.c: Preventively use at least 144 bytes
diff --git a/check/Makefile.am b/check/Makefile.am
index de11b81..301e7da 100644
--- a/check/Makefile.am
+++ b/check/Makefile.am
@@ -92,6 +92,7 @@ EXTRA_DIST =                          \
        qalu_mul.tst    qalu_mul.ok     \
        qalu_div.tst    qalu_div.ok     \
        range.tst       range.ok        \
+       ranger.tst      ranger.ok       \
        ret.tst         ret.ok          \
        tramp.tst       tramp.ok        \
        check.sh                        \
@@ -121,7 +122,7 @@ base_TESTS =                                \
        clobber carry call              \
        float jmpr put                  \
        qalu_mul qalu_div               \
-       range ret tramp
+       range ranger ret tramp
 
 $(base_TESTS): check.sh
        $(LN_S) $(srcdir)/check.sh $@
@@ -193,7 +194,7 @@ arm_TESTS =                                 \
        varargs.arm stack.arm                   \
        clobber.arm carry.arm call.arm          \
        float.arm jmpr.arm tramp.arm range.arm  \
-       put.arm
+       ranger.arm put.arm
 $(arm_TESTS):  check.arm.sh
        $(LN_S) $(srcdir)/check.arm.sh $@
 TESTS += $(arm_TESTS)
@@ -218,7 +219,7 @@ swf_TESTS =                                 \
        varargs.swf stack.swf                   \
        clobber.swf carry.swf call.swf          \
        float.swf jmpr.swf tramp.swf range.swf  \
-       put.swf
+       ranger.swf put.swf
 $(swf_TESTS):  check.swf.sh
        $(LN_S) $(srcdir)/check.swf.sh $@
 TESTS += $(swf_TESTS)
@@ -241,7 +242,7 @@ arm_swf_TESTS =                                             
        \
        varargs.arm.swf stack.arm.swf                           \
        clobber.arm.swf carry.arm.swf call.arm.swf              \
        float.arm.swf jmpr.arm.swf tramp.arm.swf range.arm.swf  \
-       put.arm.swf
+       ranger.arm.swf put.arm.swf
 $(arm_swf_TESTS):      check.arm.swf.sh
        $(LN_S) $(srcdir)/check.arm.swf.sh $@
 TESTS += $(arm_swf_TESTS)
@@ -265,7 +266,7 @@ arm4_swf_TESTS =                                            
\
        varargs.arm4.swf stack.arm4.swf                         \
        clobber.arm4.swf carry.arm4.swf call.arm4.swf           \
        float.arm4.swf jmpr.arm4.swf tramp.arm4.swf             \
-       range.arm4.swf put.arm4.swf
+       range.arm4.swf ranger.arm4.swf put.arm4.swf
 $(arm4_swf_TESTS):     check.arm4.swf.sh
        $(LN_S) $(srcdir)/check.arm4.swf.sh $@
 TESTS += $(arm4_swf_TESTS)
@@ -291,7 +292,7 @@ nodata_TESTS =                                              
\
        varargs.nodata stack.nodata                     \
        clobber.nodata carry.nodata call.nodata         \
        float.nodata jmpr.nodata tramp.nodata           \
-       range.nodata put.nodata
+       range.nodata ranger.nodata put.nodata
 $(nodata_TESTS):       check.nodata.sh
        $(LN_S) $(srcdir)/check.nodata.sh $@
 TESTS += $(nodata_TESTS)
diff --git a/check/ranger.ok b/check/ranger.ok
new file mode 100644
index 0000000..9766475
--- /dev/null
+++ b/check/ranger.ok
@@ -0,0 +1 @@
+ok
diff --git a/check/ranger.tst b/check/ranger.tst
new file mode 100644
index 0000000..de80196
--- /dev/null
+++ b/check/ranger.tst
@@ -0,0 +1,507 @@
+#define M64    67108864
+
+#define aB1    (1<<1)
+#define aB2    (1<<2)
+#define aB3    (1<<3)
+#define aB4    (1<<4)
+#define aB5    (1<<5)
+#define aB6    (1<<6)
+#define aB7    (1<<7)
+#define aB8    (1<<8)
+#define aB9    (1<<9)
+#define aB10   (1<<10)
+#define aB11   (1<<11)
+#define aB12   (1<<12)
+#define aB13   (1<<13)
+#define aB14   (1<<14)
+#define aB15   (1<<15)
+#define aB16   (1<<16)
+#define aB17   (1<<17)
+#define aB18   (1<<18)
+#define aB19   (1<<19)
+#define aB20   (1<<20)
+#define aB21   (1<<21)
+#define aB22   (1<<22)
+#define aB23   (1<<23)
+#define aB24   (1<<24)
+#define aB25   (1<<25)
+#define aB26   (1<<26)
+#define bB1    (-aB1)
+#define bB2    (-aB2)
+#define bB3    (-aB3)
+#define bB4    (-aB4)
+#define bB5    (-aB5)
+#define bB6    (-aB6)
+#define bB7    (-aB7)
+#define bB8    (-aB8)
+#define bB9    (-aB9)
+#define bB10   (-aB10)
+#define bB11   (-aB11)
+#define bB12   (-aB12)
+#define bB13   (-aB13)
+#define bB14   (-aB14)
+#define bB15   (-aB15)
+#define bB16   (-aB16)
+#define bB17   (-aB17)
+#define bB18   (-aB18)
+#define bB19   (-aB19)
+#define bB20   (-aB20)
+#define bB21   (-aB21)
+#define bB22   (-aB22)
+#define bB23   (-aB23)
+#define bB24   (-aB24)
+#define bB25   (-aB25)
+#define bB26   (-aB26)
+#define cB1    (aB1-1)
+#define cB2    (aB2-1)
+#define cB3    (aB3-1)
+#define cB4    (aB4-1)
+#define cB5    (aB5-1)
+#define cB6    (aB6-1)
+#define cB7    (aB7-1)
+#define cB8    (aB8-1)
+#define cB9    (aB9-1)
+#define cB10   (aB10-1)
+#define cB11   (aB11-1)
+#define cB12   (aB12-1)
+#define cB13   (aB13-1)
+#define cB14   (aB14-1)
+#define cB15   (aB15-1)
+#define cB16   (aB16-1)
+#define cB17   (aB17-1)
+#define cB18   (aB18-1)
+#define cB19   (aB19-1)
+#define cB20   (aB20-1)
+#define cB21   (aB21-1)
+#define cB22   (aB22-1)
+#define cB23   (aB23-1)
+#define cB24   (aB24-1)
+#define cB25   (aB25-1)
+#define cB26   (aB26-1)
+#define dB1    (-aB1+1)
+#define dB2    (-aB2+1)
+#define dB3    (-aB3+1)
+#define dB4    (-aB4+1)
+#define dB5    (-aB5+1)
+#define dB6    (-aB6+1)
+#define dB7    (-aB7+1)
+#define dB8    (-aB8+1)
+#define dB9    (-aB9+1)
+#define dB10   (-aB10+1)
+#define dB11   (-aB11+1)
+#define dB12   (-aB12+1)
+#define dB13   (-aB13+1)
+#define dB14   (-aB14+1)
+#define dB15   (-aB15+1)
+#define dB16   (-aB16+1)
+#define dB17   (-aB17+1)
+#define dB18   (-aB18+1)
+#define dB19   (-aB19+1)
+#define dB20   (-aB20+1)
+#define dB21   (-aB21+1)
+#define dB22   (-aB22+1)
+#define dB23   (-aB23+1)
+#define dB24   (-aB24+1)
+#define dB25   (-aB25+1)
+#define dB26   (-aB26+1)
+
+#define add(a, b)              $(a + b)
+#define sub(a, b)              $(a - b)
+#define rsb(a, b)              $(b - a)
+#define mul(a, b)              $(a * b)
+#define div(a, b)              $(a / b)
+#define rem(a, b)              $(a % b)
+#define and(a, b)              $(a & b)
+#define or(a, b)               $(a | b)
+#define xor(a, b)              $(a ^ b)
+
+#define alu2(N, X, L, R, V)                                    \
+       movi %r1 L                                              \
+       movi %r2 R                                              \
+       N##r %r0 %r1 %r2                                        \
+       beqi X %r0 V                                            \
+       calli @abort                                            \
+X:
+#define alu1(N, M)                                             \
+       alu2(N, N##M##1, 3, $(M##1), N(3, M##1))                \
+       alu2(N, N##M##2, 3, $(M##2), N(3, M##2))                \
+       alu2(N, N##M##3, 3, $(M##3), N(3, M##3))                \
+       alu2(N, N##M##4, 3, $(M##4), N(3, M##4))                \
+       alu2(N, N##M##5, 3, $(M##5), N(3, M##5))                \
+       alu2(N, N##M##6, 3, $(M##6), N(3, M##6))                \
+       alu2(N, N##M##7, 3, $(M##7), N(3, M##7))                \
+       alu2(N, N##M##8, 3, $(M##8), N(3, M##8))                \
+       alu2(N, N##M##9, 3, $(M##9), N(3, M##9))                \
+       alu2(N, N##M##10, 3, $(M##10), N(3, M##10))             \
+       alu2(N, N##M##11, 3, $(M##11), N(3, M##11))             \
+       alu2(N, N##M##12, 3, $(M##12), N(3, M##12))             \
+       alu2(N, N##M##13, 3, $(M##13), N(3, M##13))             \
+       alu2(N, N##M##14, 3, $(M##14), N(3, M##14))             \
+       alu2(N, N##M##15, 3, $(M##15), N(3, M##15))             \
+       alu2(N, N##M##16, 3, $(M##16), N(3, M##16))             \
+       alu2(N, N##M##17, 3, $(M##17), N(3, M##17))             \
+       alu2(N, N##M##18, 3, $(M##18), N(3, M##18))             \
+       alu2(N, N##M##19, 3, $(M##19), N(3, M##19))             \
+       alu2(N, N##M##20, 3, $(M##20), N(3, M##20))             \
+       alu2(N, N##M##21, 3, $(M##21), N(3, M##21))             \
+       alu2(N, N##M##22, 3, $(M##22), N(3, M##22))             \
+       alu2(N, N##M##23, 3, $(M##23), N(3, M##23))             \
+       alu2(N, N##M##24, 3, $(M##24), N(3, M##24))             \
+       alu2(N, N##M##25, 3, $(M##25), N(3, M##25))             \
+       alu2(N, N##M##26, 3, $(M##26), N(3, M##26))
+
+#define alu(N)                                                 \
+       alu1(N, aB)                                             \
+       alu1(N, bB)                                             \
+       alu1(N, cB)                                             \
+       alu1(N, dB)
+
+#define _lsh(N)                                                        \
+       alu2(lsh, L##N, 3, N, $(3<<N))
+#define _rsh(N)                                                        \
+       alu2(rsh, R##N, $(1<<63), N, $((1<<63)>>N))
+
+#if __WORDSIZE == 32
+#  define xsh64(X)                     /**/
+#else
+#  define xsh64(X)                                             \
+       _##X##sh(32)                                            \
+       _##X##sh(33)                                            \
+       _##X##sh(34)                                            \
+       _##X##sh(35)                                            \
+       _##X##sh(36)                                            \
+       _##X##sh(37)                                            \
+       _##X##sh(38)                                            \
+       _##X##sh(39)                                            \
+       _##X##sh(40)                                            \
+       _##X##sh(41)                                            \
+       _##X##sh(42)                                            \
+       _##X##sh(43)                                            \
+       _##X##sh(44)                                            \
+       _##X##sh(45)                                            \
+       _##X##sh(46)                                            \
+       _##X##sh(47)                                            \
+       _##X##sh(48)                                            \
+       _##X##sh(49)                                            \
+       _##X##sh(50)                                            \
+       _##X##sh(51)                                            \
+       _##X##sh(52)                                            \
+       _##X##sh(53)                                            \
+       _##X##sh(54)                                            \
+       _##X##sh(55)                                            \
+       _##X##sh(56)                                            \
+       _##X##sh(57)                                            \
+       _##X##sh(58)                                            \
+       _##X##sh(59)                                            \
+       _##X##sh(60)                                            \
+       _##X##sh(61)                                            \
+       _##X##sh(62)                                            \
+       _##X##sh(63)
+#endif
+
+#define xsh(X)                                                 \
+       _##X##sh(0)                                             \
+       _##X##sh(1)                                             \
+       _##X##sh(2)                                             \
+       _##X##sh(3)                                             \
+       _##X##sh(4)                                             \
+       _##X##sh(5)                                             \
+       _##X##sh(6)                                             \
+       _##X##sh(7)                                             \
+       _##X##sh(8)                                             \
+       _##X##sh(9)                                             \
+       _##X##sh(10)                                            \
+       _##X##sh(11)                                            \
+       _##X##sh(12)                                            \
+       _##X##sh(13)                                            \
+       _##X##sh(14)                                            \
+       _##X##sh(15)                                            \
+       _##X##sh(16)                                            \
+       _##X##sh(17)                                            \
+       _##X##sh(18)                                            \
+       _##X##sh(19)                                            \
+       _##X##sh(20)                                            \
+       _##X##sh(21)                                            \
+       _##X##sh(22)                                            \
+       _##X##sh(23)                                            \
+       _##X##sh(24)                                            \
+       _##X##sh(25)                                            \
+       _##X##sh(26)                                            \
+       _##X##sh(27)                                            \
+       _##X##sh(28)                                            \
+       _##X##sh(29)                                            \
+       _##X##sh(30)                                            \
+       _##X##sh(31)                                            \
+       xsh64(X)
+
+#define lsh()                                                  \
+       xsh(l)
+#define rsh()                                                  \
+       xsh(r)
+
+#define reset(V)                                               \
+       prepare                                                 \
+               pushargi buf                                    \
+               pushargi V                                      \
+               pushargi $(M64 + 8)                             \
+       finishi @memset
+
+#define stx(T, N, O, V)                                                \
+       movi %r0 V                                              \
+       movi %r1 O                                              \
+       stxr##T %r1 %v0 %r0
+#define stx8(T, M, V)                                          \
+       stx(T, 3, $(M##B3), V)                                  \
+       stx(T, 4, $(M##B4), V)                                  \
+       stx(T, 5, $(M##B5), V)                                  \
+       stx(T, 6, $(M##B6), V)                                  \
+       stx(T, 7, $(M##B7), V)                                  \
+       stx(T, 8, $(M##B8), V)                                  \
+       stx(T, 9, $(M##B9), V)                                  \
+       stx(T, 10, $(M##B10), V)                                \
+       stx(T, 11, $(M##B11), V)                                \
+       stx(T, 12, $(M##B12), V)                                \
+       stx(T, 13, $(M##B13), V)                                \
+       stx(T, 14, $(M##B14), V)                                \
+       stx(T, 15, $(M##B15), V)                                \
+       stx(T, 16, $(M##B16), V)                                \
+       stx(T, 17, $(M##B17), V)                                \
+       stx(T, 18, $(M##B18), V)                                \
+       stx(T, 19, $(M##B19), V)                                \
+       stx(T, 20, $(M##B20), V)                                \
+       stx(T, 21, $(M##B21), V)                                \
+       stx(T, 22, $(M##B22), V)                                \
+       stx(T, 23, $(M##B23), V)                                \
+       stx(T, 24, $(M##B24), V)                                \
+       stx(T, 25, $(M##B25), V)                                \
+       stx(T, 26, $(M##B26), V)
+#define stx4(T, M, V)                                          \
+       stx(T, 2, $(M##B2), V)                                  \
+       stx8(T, M, V)
+#define stx2(T, M, V)                                          \
+       stx(T, 1, $(M##B1), V)                                  \
+       stx4(T, M, V)
+#define ldx(T, N, M, O, V)                                     \
+       movi %r0 0                                              \
+       ldxi##T %r0 %v0 O                                       \
+       beqi ldx##T##N##M %r0 V                                 \
+       calli @abort                                            \
+ldx##T##N##M:
+#define ldx8(T, M, V)                                          \
+       ldx(T, 3, M, $(M##B3), V)                               \
+       ldx(T, 4, M, $(M##B4), V)                               \
+       ldx(T, 5, M, $(M##B5), V)                               \
+       ldx(T, 6, M, $(M##B6), V)                               \
+       ldx(T, 7, M, $(M##B7), V)                               \
+       ldx(T, 8, M, $(M##B8), V)                               \
+       ldx(T, 9, M, $(M##B9), V)                               \
+       ldx(T, 10, M, $(M##B10), V)                             \
+       ldx(T, 11, M, $(M##B11), V)                             \
+       ldx(T, 12, M, $(M##B12), V)                             \
+       ldx(T, 13, M, $(M##B13), V)                             \
+       ldx(T, 14, M, $(M##B14), V)                             \
+       ldx(T, 15, M, $(M##B15), V)                             \
+       ldx(T, 16, M, $(M##B16), V)                             \
+       ldx(T, 17, M, $(M##B17), V)                             \
+       ldx(T, 18, M, $(M##B18), V)                             \
+       ldx(T, 19, M, $(M##B19), V)                             \
+       ldx(T, 20, M, $(M##B20), V)                             \
+       ldx(T, 21, M, $(M##B21), V)                             \
+       ldx(T, 22, M, $(M##B22), V)                             \
+       ldx(T, 23, M, $(M##B23), V)                             \
+       ldx(T, 24, M, $(M##B24), V)                             \
+       ldx(T, 25, M, $(M##B25), V)                             \
+       ldx(T, 26, M, $(M##B26), V)
+#define ldx4(T, M, V)                                          \
+       ldx(T, 2, M, $(M##B2), V)                               \
+       ldx8(T, M, V)
+#define ldx2(T, M, V)                                          \
+       ldx(T, 1, M, $(M##B1), V)                               \
+       ldx4(T, M, V)
+
+#define stf(T, N, O, V)                                                \
+       movi##T %f0 V                                           \
+       movi %r0 O                                              \
+       stxr##T %r0 %v0 %f0
+#define stf8(T, M, V)                                          \
+       stf(T, 3, $(M##B3), V)                                  \
+       stf(T, 4, $(M##B4), V)                                  \
+       stf(T, 5, $(M##B5), V)                                  \
+       stf(T, 6, $(M##B6), V)                                  \
+       stf(T, 7, $(M##B7), V)                                  \
+       stf(T, 8, $(M##B8), V)                                  \
+       stf(T, 9, $(M##B9), V)                                  \
+       stf(T, 10, $(M##B10), V)                                \
+       stf(T, 11, $(M##B11), V)                                \
+       stf(T, 12, $(M##B12), V)                                \
+       stf(T, 13, $(M##B13), V)                                \
+       stf(T, 14, $(M##B14), V)                                \
+       stf(T, 15, $(M##B15), V)                                \
+       stf(T, 16, $(M##B16), V)                                \
+       stf(T, 17, $(M##B17), V)                                \
+       stf(T, 18, $(M##B18), V)                                \
+       stf(T, 19, $(M##B19), V)                                \
+       stf(T, 20, $(M##B20), V)                                \
+       stf(T, 21, $(M##B21), V)                                \
+       stf(T, 22, $(M##B22), V)                                \
+       stf(T, 23, $(M##B23), V)                                \
+       stf(T, 24, $(M##B24), V)                                \
+       stf(T, 25, $(M##B25), V)                                \
+       stf(T, 26, $(M##B26), V)
+#define stf4(T, M, V)                                          \
+       stf(T, 2, $(M##B2), V)                                  \
+       stf8(T, M, V)
+#define ldf(T, N, M, O, V)                                     \
+       movi##T %f0 0                                           \
+       ldxi##T %f0 %v0 O                                       \
+       beqi##T ldf##T##N##M %f0 V                              \
+       calli @abort                                            \
+ldf##T##N##M:
+#define ldf8(T, M, V)                                          \
+       ldf(T, 3, M, $(M##B3), V)                               \
+       ldf(T, 4, M, $(M##B4), V)                               \
+       ldf(T, 5, M, $(M##B5), V)                               \
+       ldf(T, 6, M, $(M##B6), V)                               \
+       ldf(T, 7, M, $(M##B7), V)                               \
+       ldf(T, 8, M, $(M##B8), V)                               \
+       ldf(T, 9, M, $(M##B9), V)                               \
+       ldf(T, 10, M, $(M##B10), V)                             \
+       ldf(T, 11, M, $(M##B11), V)                             \
+       ldf(T, 12, M, $(M##B12), V)                             \
+       ldf(T, 13, M, $(M##B13), V)                             \
+       ldf(T, 14, M, $(M##B14), V)                             \
+       ldf(T, 15, M, $(M##B15), V)                             \
+       ldf(T, 16, M, $(M##B16), V)                             \
+       ldf(T, 17, M, $(M##B17), V)                             \
+       ldf(T, 18, M, $(M##B18), V)                             \
+       ldf(T, 19, M, $(M##B19), V)                             \
+       ldf(T, 20, M, $(M##B20), V)                             \
+       ldf(T, 21, M, $(M##B21), V)                             \
+       ldf(T, 22, M, $(M##B22), V)                             \
+       ldf(T, 23, M, $(M##B23), V)                             \
+       ldf(T, 24, M, $(M##B24), V)                             \
+       ldf(T, 25, M, $(M##B25), V)                             \
+       ldf(T, 26, M, $(M##B26), V)
+#define ldf4(T, M, V)                                          \
+       ldf(T, 2, M, $(M##B2), V)                               \
+       ldf8(T, M, V)
+
+#define ldst_c()                                               \
+       reset(0xa5)                                             \
+       movi %v0 buf                                            \
+       stx2(_c, a, 0x5a)                                       \
+       ldx2(_c, a, 0x5a)                                       \
+       reset(0xa5)                                             \
+       movi %v0 $(buf + M64)                                   \
+       stx2(_c, b, 0x5a)                                       \
+       ldx2(_c, b, 0x5a)
+#define ldst_uc()                                              \
+       reset(0xa5)                                             \
+       movi %v0 buf                                            \
+       stx2(_c, a, 0x5a)                                       \
+       ldx2(_uc, a, 0x5a)                                      \
+       movi %v0 $(buf + M64)                                   \
+       stx2(_c, b, 0x5a)                                       \
+       ldx2(_uc, b, 0x5a)
+#define ldst_s()                                               \
+       reset(0xa5)                                             \
+       movi %v0 buf                                            \
+       stx2(_s, a, 0x5a5a)                                     \
+       ldx2(_s, a, 0x5a5a)                                     \
+       reset(0xa5)                                             \
+       movi %v0 $(buf + M64)                                   \
+       stx2(_s, b, 0x5a5a)                                     \
+       ldx2(_s, b, 0x5a5a)
+#define ldst_us()                                              \
+       reset(0xa5)                                             \
+       movi %v0 buf                                            \
+       stx2(_s, a, 0x5a5a)                                     \
+       ldx2(_us, a, 0x5a5a)                                    \
+       reset(0xa5)                                             \
+       movi %v0 $(buf + M64)                                   \
+       stx2(_s, b, 0x5a5a)                                     \
+       ldx2(_us, b, 0x5a5a)
+#define ldst_i()                                               \
+       reset(0xa5)                                             \
+       movi %v0 buf                                            \
+       stx4(_i, a, 0x5a5a5a5a)                                 \
+       ldx4(_i, a, 0x5a5a5a5a)                                 \
+       reset(0xa5)                                             \
+       movi %v0 $(buf + M64)                                   \
+       stx4(_i, b, 0x5a5a5a5a)                                 \
+       ldx4(_i, b, 0x5a5a5a5a)
+#define ldst_ui()                                              \
+       reset(0xa5)                                             \
+       movi %v0 buf                                            \
+       stx4(_i, a, 0x5a5a5a5a)                                 \
+       ldx4(_ui, a, 0x5a5a5a5a)                                \
+       reset(0xa5)                                             \
+       movi %v0 $(buf + M64)                                   \
+       stx4(_i, b, 0x5a5a5a5a)                                 \
+       ldx4(_ui, b, 0x5a5a5a5a)
+#define ldst_l()                                               \
+       reset(0xa5)                                             \
+       movi %v0 buf                                            \
+       stx8(_l, a, 0x5a5a5a5a5a5a5a5a)                         \
+       ldx8(_l, a, 0x5a5a5a5a5a5a5a5a)                         \
+       reset(0xa5)                                             \
+       movi %v0 $(buf + M64)                                   \
+       stx8(_l, b, 0x5a5a5a5a5a5a5a5a)                         \
+       ldx8(_l, b, 0x5a5a5a5a5a5a5a5a)
+#define ldst_f()                                               \
+       reset(0xa5)                                             \
+       movi %v0 buf                                            \
+       stf4(_f, a, 0.5)                                        \
+       ldf4(_f, a, 0.5)                                        \
+       reset(0xa5)                                             \
+       movi %v0 $(buf + M64)                                   \
+       stf4(_f, b, 0.5)                                        \
+       ldf4(_f, b, 0.5)
+#define ldst_d()                                               \
+       reset(0xa5)                                             \
+       movi %v0 buf                                            \
+       stf8(_d, a, 0.5)                                        \
+       ldf8(_d, a, 0.5)                                        \
+       reset(0xa5)                                             \
+       movi %v0 $(buf + M64)                                   \
+       stf8(_d, b, 0.5)                                        \
+       ldf8(_d, b, 0.5)
+
+.data          67112960
+buf:
+.size          M64
+.size          8
+ok:
+.c             "ok"
+
+.code
+       prolog
+
+       alu(add)
+       alu(sub)
+       alu(rsb)
+       alu(mul)
+       alu(div)
+       alu(rem)
+       lsh()
+       rsh()
+       alu(and)
+       alu(or)
+       alu(xor)
+       ldst_c()
+       ldst_uc()
+       ldst_s()
+       ldst_us()
+       ldst_i()
+#if __WORDSIZE == 64
+       ldst_ui()
+       ldst_l()
+#endif
+       ldst_f()
+       ldst_d()
+
+       prepare
+               pushargi ok
+       finishi @puts
+       ret
+       epilog
diff --git a/lib/jit_x86-cpu.c b/lib/jit_x86-cpu.c
index 50ab0e3..fa4eade 100644
--- a/lib/jit_x86-cpu.c
+++ b/lib/jit_x86-cpu.c
@@ -2466,10 +2466,15 @@ _ldi_l(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 static void
 _ldxr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
+#if __X64_32
+    addr(r0, r1, r2);
+    ldr_c(r0, r0);
+#else
     rex(0, WIDE, r0, r1, r2);
     ic(0x0f);
     ic(0xbe);
     rx(r0, 0, r2, r1, _SCL1);
+#endif
 }
 
 static void
@@ -2493,10 +2498,15 @@ _ldxi_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1, jit_word_t i0)
 static void
 _ldxr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
+#if __X64_32
+    addr(r0, r1, r2);
+    ldr_uc(r0, r0);
+#else
     rex(0, WIDE, r0, r1, r2);
     ic(0x0f);
     ic(0xb6);
     rx(r0, 0, r2, r1, _SCL1);
+#endif
 }
 
 static void
@@ -2520,10 +2530,15 @@ _ldxi_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1, jit_word_t i0)
 static void
 _ldxr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
+#if __X64_32
+    addr(r0, r1, r2);
+    ldr_s(r0, r0);
+#else
     rex(0, WIDE, r0, r1, r2);
     ic(0x0f);
     ic(0xbf);
     rx(r0, 0, r2, r1, _SCL1);
+#endif
 }
 
 static void
@@ -2547,10 +2562,15 @@ _ldxi_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1, jit_word_t i0)
 static void
 _ldxr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
+#if __X64_32
+    addr(r0, r1, r2);
+    ldr_us(r0, r0);
+#else
     rex(0, WIDE, r0, r1, r2);
     ic(0x0f);
     ic(0xb7);
     rx(r0, 0, r2, r1, _SCL1);
+#endif
 }
 
 static void
@@ -2610,9 +2630,15 @@ _ldxi_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1, jit_word_t i0)
 static void
 _ldxr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
+#if __X64_32
+    addr(r0, r1, r2);
+    /* to avoid confusion with macro renames */
+    _ldr_ui(_jit, r0, r0);
+#else
     rex(0, 0, r0, r1, r2);
     ic(0x8b);
     rx(r0, 0, r2, r1, _SCL1);
+#endif
 }
 
 static void
@@ -2789,6 +2815,12 @@ static void
 _stxr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_int32_t                reg;
+#if __X64_32
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r0, r1);
+    str_c(rn(reg), r2);
+    jit_unget_reg(reg);
+#else
     if (reg8_p(r2)) {
        rex(0, 0, r2, r1, r0);
        ic(0x88);
@@ -2802,6 +2834,7 @@ _stxr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1, jit_int32_t r2)
        rx(rn(reg), 0, r0, r1, _SCL1);
        jit_unget_reg(reg);
     }
+#endif
 }
 
 static void
@@ -2834,10 +2867,18 @@ _stxi_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t 
r0, jit_int32_t r1)
 static void
 _stxr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
+#if __X64_32
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r0, r1);
+    str_s(rn(reg), r2);
+    jit_unget_reg(reg);
+#else
     ic(0x66);
     rex(0, 0, r2, r1, r0);
     ic(0x89);
     rx(r2, 0, r0, r1, _SCL1);
+#endif
 }
 
 static void
@@ -2861,9 +2902,17 @@ _stxi_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t 
r0, jit_int32_t r1)
 static void
 _stxr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
+#if __X64_32
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r0, r1);
+    str_i(rn(reg), r2);
+    jit_unget_reg(reg);
+#else
     rex(0, 0, r2, r1, r0);
     ic(0x89);
     rx(r2, 0, r0, r1, _SCL1);
+#endif
 }
 
 static void
diff --git a/lib/jit_x86-sse.c b/lib/jit_x86-sse.c
index cea1632..d29ee4c 100644
--- a/lib/jit_x86-sse.c
+++ b/lib/jit_x86-sse.c
@@ -232,13 +232,15 @@ static void 
_sse_unordi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
 #  define sse_ldr_f(r0, r1)            movssmr(0, r1, _NOREG, _SCL1, r0)
 #  define sse_ldi_f(r0, i0)            _sse_ldi_f(_jit, r0, i0)
 static void _sse_ldi_f(jit_state_t*, jit_int32_t, jit_word_t);
-#  define sse_ldxr_f(r0, r1, r2)       movssmr(0, r1, r2, _SCL1, r0)
+#  define sse_ldxr_f(r0, r1, r2)       _sse_ldxr_f(_jit, r0, r1, r2)
+static void _sse_ldxr_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
 #  define sse_ldxi_f(r0, r1, i0)       _sse_ldxi_f(_jit, r0, r1, i0)
 static void _sse_ldxi_f(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
 #  define sse_str_f(r0, r1)            movssrm(r1, 0, r0, _NOREG, _SCL1)
 #  define sse_sti_f(i0, r0)            _sse_sti_f(_jit, i0, r0)
 static void _sse_sti_f(jit_state_t*, jit_word_t,jit_int32_t);
-#  define sse_stxr_f(r0, r1, r2)       movssrm(r2, 0, r0, r1, _SCL1)
+#  define sse_stxr_f(r0, r1, r2)       _sse_stxr_f(_jit, r0, r1, r2)
+static void _sse_stxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #  define sse_stxi_f(i0, r0, r1)       _sse_stxi_f(_jit, i0, r0, r1)
 static void _sse_stxi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
 #  define sse_bltr_f(i0, r0, r1)       _sse_bltr_f(_jit, i0, r0, r1)
@@ -366,14 +368,16 @@ static void 
_sse_unordi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
 #  define sse_ldr_d(r0, r1)            movsdmr(0, r1, _NOREG, _SCL1, r0)
 #  define sse_ldi_d(r0, i0)            _sse_ldi_d(_jit, r0, i0)
 static void _sse_ldi_d(jit_state_t*, jit_int32_t, jit_word_t);
-#  define sse_ldxr_d(r0, r1, r2)       movsdmr(0, r1, r2, _SCL1, r0)
+#  define sse_ldxr_d(r0, r1, r2)       _sse_ldxr_d(_jit, r0, r1, r2)
+static void _sse_ldxr_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
 #  define sse_ldxi_d(r0, r1, i0)       _sse_ldxi_d(_jit, r0, r1, i0)
 static void _sse_ldxi_d(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
 #  define sse_bltr_d(i0, r0, r1)       _sse_bltr_d(_jit, i0, r0, r1)
 #  define sse_str_d(r0, r1)            movsdrm(r1, 0, r0, _NOREG, _SCL1)
 #  define sse_sti_d(i0, r0)            _sse_sti_d(_jit, i0, r0)
 static void _sse_sti_d(jit_state_t*, jit_word_t,jit_int32_t);
-#  define sse_stxr_d(r0, r1, r2)       movsdrm(r2, 0, r0, r1, _SCL1)
+#  define sse_stxr_d(r0, r1, r2)       _sse_stxr_d(_jit, r0, r1, r2)
+static void _sse_stxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #  define sse_stxi_d(i0, r0, r1)       _sse_stxi_d(_jit, i0, r0, r1)
 static void _sse_stxi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
 static jit_word_t _sse_bltr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
@@ -935,6 +939,20 @@ _sse_ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t 
i0)
 }
 
 static void
+_sse_ldxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+#if __X64_32
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r1, r2);
+    sse_ldr_f(r0, rn(reg));
+    jit_unget_reg(reg);
+#else
+    movssmr(0, r1, r2, _SCL1, r0);
+#endif
+}
+
+static void
 _sse_ldxi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
     jit_int32_t                reg;
@@ -942,8 +960,13 @@ _sse_ldxi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1, jit_word_t i0)
        movssmr(i0, r1, _NOREG, _SCL1, r0);
     else {
        reg = jit_get_reg(jit_class_gpr);
+#if __X64_32
+       addi(rn(reg), r1, i0);
+       sse_ldr_f(r0, rn(reg));
+#else
        movi(rn(reg), i0);
        sse_ldxr_f(r0, r1, rn(reg));
+#endif
        jit_unget_reg(reg);
     }
 }
@@ -963,6 +986,20 @@ _sse_sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t 
r0)
 }
 
 static void
+_sse_stxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+#if __X64_32
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r0, r1);
+    sse_str_f(rn(reg), r2);
+    jit_unget_reg(reg);
+#else
+    movssrm(r2, 0, r0, r1, _SCL1);
+#endif
+}
+
+static void
 _sse_stxi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     jit_int32_t                reg;
@@ -970,8 +1007,13 @@ _sse_stxi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t 
r0, jit_int32_t r1)
        movssrm(r1, i0, r0, _NOREG, _SCL1);
     else {
        reg = jit_get_reg(jit_class_gpr);
+#if __X64_32
+       addi(rn(reg), r0, i0);
+       sse_str_f(rn(reg), r1);
+#else
        movi(rn(reg), i0);
        sse_stxr_f(rn(reg), r0, r1);
+#endif
        jit_unget_reg(reg);
     }
 }
@@ -1290,6 +1332,20 @@ _sse_ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t 
i0)
 }
 
 static void
+_sse_ldxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+#if __X64_32
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r1, r2);
+    sse_ldr_d(r0, rn(reg));
+    jit_unget_reg(reg);
+#else
+    movsdmr(0, r1, r2, _SCL1, r0);
+#endif
+}
+
+static void
 _sse_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
     jit_int32_t                reg;
@@ -1297,8 +1353,13 @@ _sse_ldxi_d(jit_state_t *_jit, jit_int32_t r0, 
jit_int32_t r1, jit_word_t i0)
        movsdmr(i0, r1, _NOREG, _SCL1, r0);
     else {
        reg = jit_get_reg(jit_class_gpr);
+#if __X64_32
+       addi(rn(reg), r1, i0);
+       sse_ldr_d(r0, rn(reg));
+#else
        movi(rn(reg), i0);
        sse_ldxr_d(r0, r1, rn(reg));
+#endif
        jit_unget_reg(reg);
     }
 }
@@ -1318,6 +1379,20 @@ _sse_sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t 
r0)
 }
 
 static void
+_sse_stxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+#if __X64_32
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r0, r1);
+    sse_str_d(rn(reg), r2);
+    jit_unget_reg(reg);
+#else
+    movsdrm(r2, 0, r0, r1, _SCL1);
+#endif
+}
+
+static void
 _sse_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     jit_int32_t                reg;
@@ -1325,8 +1400,13 @@ _sse_stxi_d(jit_state_t *_jit, jit_word_t i0, 
jit_int32_t r0, jit_int32_t r1)
        movsdrm(r1, i0, r0, _NOREG, _SCL1);
     else {
        reg = jit_get_reg(jit_class_gpr);
+#if __X64_32
+       addi(rn(reg), r0, i0);
+       sse_str_d(rn(reg), r1);
+#else
        movi(rn(reg), i0);
        sse_stxr_f(rn(reg), r0, r1);
+#endif
        jit_unget_reg(reg);
     }
 }
diff --git a/lib/jit_x86-x87.c b/lib/jit_x86-x87.c
index 75bde5c..5b45c26 100644
--- a/lib/jit_x86-x87.c
+++ b/lib/jit_x86-x87.c
@@ -887,8 +887,15 @@ _x87_ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t 
i0)
 static void
 _x87_ldxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
+#if __X64_32
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r1, r2);
+    x87_ldr_f(r0, rn(reg));
+#else
     fldsm(0, r1, r2, _SCL1);
     fstpr(r0 + 1);
+#endif
 }
 
 static void
@@ -901,8 +908,13 @@ _x87_ldxi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1, jit_word_t i0)
     }
     else {
        reg = jit_get_reg(jit_class_gpr);
+#if __X64_32
+       addi(rn(reg), r1, i0);
+       x87_ldr_f(r0, rn(reg));
+#else
        movi(rn(reg), i0);
        x87_ldxr_f(r0, r1, rn(reg));
+#endif
        jit_unget_reg(reg);
     }
 }
@@ -941,6 +953,12 @@ _x87_sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t 
r0)
 static void
 _x87_stxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
+#if __X64_32
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r0, r1);
+    x87_str_f(rn(reg), r2);
+#else
     if (r2 == _ST0_REGNO)
        fstsm(0, r0, r1, _SCL1);
     else {
@@ -948,6 +966,7 @@ _x87_stxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1, jit_int32_t r2)
        fstsm(0, r0, r1, _SCL1);
        fxchr(r2);
     }
+#endif
 }
 
 static void
@@ -956,8 +975,13 @@ _x87_stxi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t 
r0, jit_int32_t r1)
     jit_int32_t                reg;
     if (!can_sign_extend_int_p(i0)) {
        reg = jit_get_reg(jit_class_gpr);
+#if __X64_32
+       addi(rn(reg), r0, i0);
+       x87_str_f(rn(reg), r1);
+#else
        movi(rn(reg), i0);
        x87_stxr_f(rn(reg), r0, r1);
+#endif
        jit_unget_reg(reg);
     }
     else if (r1 == _ST0_REGNO)
@@ -1144,8 +1168,15 @@ _x87_ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t 
i0)
 static void
 _x87_ldxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
+#if __X64_32
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r1, r2);
+    x87_ldr_d(r0, rn(reg));
+#else
     fldlm(0, r1, r2, _SCL1);
     fstpr(r0 + 1);
+#endif
 }
 
 static void
@@ -1158,8 +1189,13 @@ _x87_ldxi_d(jit_state_t *_jit, jit_int32_t r0, 
jit_int32_t r1, jit_word_t i0)
     }
     else {
        reg = jit_get_reg(jit_class_gpr);
+#if __X64_32
+       addi(rn(reg), r1, i0);
+       x87_ldr_d(r0, rn(reg));
+#else
        movi(rn(reg), i0);
        x87_ldxr_d(r0, r1, rn(reg));
+#endif
        jit_unget_reg(reg);
     }
 }
@@ -1198,6 +1234,12 @@ _x87_sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t 
r0)
 static void
 _x87_stxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
+#if __X64_32
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r0, r1);
+    x87_str_d(rn(reg), r2);
+#else
     if (r2 == _ST0_REGNO)
        fstlm(0, r0, r1, _SCL1);
     else {
@@ -1205,6 +1247,7 @@ _x87_stxr_d(jit_state_t *_jit, jit_int32_t r0, 
jit_int32_t r1, jit_int32_t r2)
        fstlm(0, r0, r1, _SCL1);
        fxchr(r2);
     }
+#endif
 }
 
 static void
@@ -1213,8 +1256,13 @@ _x87_stxi_d(jit_state_t *_jit, jit_word_t i0, 
jit_int32_t r0, jit_int32_t r1)
     jit_int32_t                reg;
     if (!can_sign_extend_int_p(i0)) {
        reg = jit_get_reg(jit_class_gpr);
+#if __X64_32
+       addi(rn(reg), r0, i0);
+       x87_str_d(rn(reg), r1);
+#else
        movi(rn(reg), i0);
        x87_stxr_d(rn(reg), r0, r1);
+#endif
        jit_unget_reg(reg);
     }
     else if (r1 == _ST0_REGNO)



reply via email to

[Prev in Thread] Current Thread [Next in Thread]