guile-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Guile-commits] 209/437: Correct float division and LX with stop code ge


From: Andy Wingo
Subject: [Guile-commits] 209/437: Correct float division and LX with stop code generation.
Date: Mon, 2 Jul 2018 05:14:22 -0400 (EDT)

wingo pushed a commit to branch lightning
in repository guile.

commit 2475ae88d4cfe1ff266d19647c295ab73314772a
Author: pcpa <address@hidden>
Date:   Sat Apr 27 04:37:57 2013 -0300

    Correct float division and LX with stop code generation.
    
        * lib/jit_ia64-cpu.c: Correct X2 pattern matching by preventing
        it to attempt to require a stop between the L and the X
        instruction; that is, check the registers and predicates
        before emitting the L instruction, not after.
    
        * lib/jit_ia64-fpu.c: Slightly simplify and correct
        divr_f and divrd_d implementation.
    
        * check/lightning.c: Add __ia64__ preprocessor define
        on Itanium.
    
        * check/alu.inc, check/clobber.tst, check/float.tst: Define
        several macros conditionally to __ia64__. This is required
        because __ia64__ jit generation can use way too many memory,
        due to not implementing instruction reordering to avoid
        as much as possible "stops", what causes way too many nops
        to be generated, as well as the fact that division and
        remainder requires function calls, and float division
        requires significant code to implement.
---
 ChangeLog          |  22 +++
 check/alu.inc      |  54 +++++++-
 check/clobber.tst  | 384 ++++++++++++++++++++++++++++++++++-------------------
 check/float.tst    |  32 ++++-
 check/lightning.c  |   5 +
 lib/jit_ia64-cpu.c |   2 +-
 lib/jit_ia64-fpu.c |  24 ++--
 7 files changed, 358 insertions(+), 165 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 0f51dea..4053b9f 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,27 @@
 2013-04-27 Paulo Andrade <address@hidden>
 
+       * lib/jit_ia64-cpu.c: Correct X2 pattern matching by preventing
+       it to attempt to require a stop between the L and the X
+       instruction; that is, check the registers and predicates
+       before emitting the L instruction, not after.
+
+       * lib/jit_ia64-fpu.c: Slightly simplify and correct
+       divr_f and divrd_d implementation.
+
+       * check/lightning.c: Add __ia64__ preprocessor define
+       on Itanium. 
+
+       * check/alu.inc, check/clobber.tst, check/float.tst: Define
+       several macros conditionally to __ia64__. This is required
+       because __ia64__ jit generation can use way too many memory,
+       due to not implementing instruction reordering to avoid
+       as much as possible "stops", what causes way too many nops
+       to be generated, as well as the fact that division and
+       remainder requires function calls, and float division
+       requires significant code to implement.
+
+2013-04-27 Paulo Andrade <address@hidden>
+
        * include/lightning.h: Add new backend specific movr_w_d,
        movr_d_w and movi_d_w codes as helpers to ia64 varargs
        functions arguments.
diff --git a/check/alu.inc b/check/alu.inc
index 571f4c5..0c259ea 100644
--- a/check/alu.inc
+++ b/check/alu.inc
@@ -2,6 +2,15 @@
 ok:
 .c     "ok\n"
 
+/* ia64 code generation is not optimized for size, and also some
+ * codes generate quite long sequences due to need for stops causing
+ * no code template match and needing to add nops, and other cases
+ * are division/remainder that needs function calls, or float division
+ * that requires a quite long sequence.
+ * (the brute force tests of all register combinations can easily
+ *  generate several GB of jit).
+ */
+
 /* 3 operand */
 
 /* reg0 = reg1 op reg2 */
@@ -67,7 +76,12 @@ OP##T##N##i_0##R0##R1##R2:
        ALU2(N, T, OP, I0, I1, V, R1, R0, R2)           \
        ALU2(N, T, OP, I0, I1, V, R2, R1, R0)
 
-#define ALU(N, T, OP, I0, I1, V)                       \
+#if __ia64__
+#  define ALU(N, T, OP, I0, I1, V)                     \
+       ALU3(N, T, OP, I0, I1, V, r0, r1, r2)           \
+       ALU3(N, T, OP, I0, I1, V, v0, v1, v2)
+#else
+#  define ALU(N, T, OP, I0, I1, V)                     \
        ALU1(N, T, OP, I0, I1, V, v0, v1, v2)           \
        ALU1(N, T, OP, I0, I1, V, v0, v1, r0)           \
        ALU1(N, T, OP, I0, I1, V, v0, v1, r1)           \
@@ -77,6 +91,7 @@ OP##T##N##i_0##R0##R1##R2:
        ALU1(N, T, OP, I0, I1, V, v2, r0, r1)           \
        ALU1(N, T, OP, I0, I1, V, v2, r0, r2)           \
        ALU1(N, T, OP, I0, I1, V, r0, r1, r2)
+#endif
 
 /* 3 carry set/propagate */
 
@@ -140,7 +155,12 @@ OP##N##rr##R0##R1##R2:
        ALUX1(N, OP, I0, I1, V, R1, R0, R2)             \
        ALUX1(N, OP, I0, I1, V, R2, R1, R0)
 
-#define ALUX(N, OP, I0, I1, V)                         \
+#if __ia64__
+#  define ALUX(N, OP, I0, I1, V)                       \
+       ALUX2(N, OP, I0, I1, V, r0, r1, r2)             \
+       ALUX2(N, OP, I0, I1, V, v0, v1, v2)
+#else
+#  define ALUX(N, OP, I0, I1, V)                       \
        ALUX0(N, OP, I0, I1, V, v0, v1, v2)             \
        ALUX0(N, OP, I0, I1, V, v0, v1, r0)             \
        ALUX0(N, OP, I0, I1, V, v0, v1, r1)             \
@@ -151,6 +171,7 @@ OP##N##rr##R0##R1##R2:
        ALUX0(N, OP, I0, I1, V, v2, r0, r1)             \
        ALUX0(N, OP, I0, I1, V, v2, r0, r2)             \
        ALUX0(N, OP, I0, I1, V, r0, r1, r2)
+#endif
 
 /* unary int */
 
@@ -176,7 +197,12 @@ OP##N##c##R0##R1:
        UN2(N, OP, I, V, R0, R1)                        \
        UN2(N, OP, I, V, R1, R0)
 
-#define UN(N, OP, I, V)                                        \
+#if __ia64__
+#  define UN(N, OP, I, V)                              \
+       UN2(N, OP, I, V, r0, r1)                        \
+       UN2(N, OP, I, V, v0, v1)
+#else
+#  define UN(N, OP, I, V)                              \
        UN1(N, OP, I, V, v0, v1)                        \
        UN1(N, OP, I, V, v0, v2)                        \
        UN1(N, OP, I, V, v0, r0)                        \
@@ -192,6 +218,7 @@ OP##N##c##R0##R1:
        UN1(N, OP, I, V, r0, r1)                        \
        UN1(N, OP, I, V, r0, r2)                        \
        UN1(N, OP, I, V, r1, r2)
+#endif
 
 /* reg0 = reg1 op reg2 */
 #define FOPR(N, T, OP, I0, I1, V, F0, F1, F2)          \
@@ -245,11 +272,16 @@ OP##T##N##i0##F0##F1##F2:
        FOPI(N, T, OP, I0, I1, V, F0, F1, F2)           \
        FOPI0(N, T, OP, I0, I1, V, F0, F1, F2)
 
-#define  FOP(N, T, OP, I0, I1, V)                      \
+#if __ia64__
+#  define  FOP(N, T, OP, I0, I1, V)                    \
+       FOP1(N, T, OP, I0, I1, V, f0, f1, f2)
+#else
+#  define  FOP(N, T, OP, I0, I1, V)                    \
        FOP1(N, T, OP, I0, I1, V, f0, f1, f2)           \
        FOP1(N, T, OP, I0, I1, V, f0, f2, f3)           \
        FOP1(N, T, OP, I0, I1, V, f0, f3, f4)           \
        FOP1(N, T, OP, I0, I1, V, f0, f5, f1)
+#endif
 
 /* unary float */
 
@@ -275,12 +307,17 @@ OP##N##T##c##R0##R1:
        FUN2(N, T, OP, I, V, R0, R1)                    \
        FUN2(N, T, OP, I, V, R1, R0)
 
-#define FUN(N, T, OP, I, V)                            \
+#if __ia64__
+#  define FUN(N, T, OP, I, V)                          \
+       FUN2(N, T, OP, I, V, f0, f1)
+#else
+#  define FUN(N, T, OP, I, V)                          \
        FUN1(N, T, OP, I, V, f0, f1)                    \
        FUN1(N, T, OP, I, V, f0, f2)                    \
        FUN1(N, T, OP, I, V, f0, f3)                    \
        FUN1(N, T, OP, I, V, f0, f4)                    \
        FUN1(N, T, OP, I, V, f0, f5)
+#endif
 
 /* unordered comparison unary float */
 
@@ -306,12 +343,17 @@ OP##N##T##uc##R0##R1:
        UFUN2(N, T, OP, I, V, R0, R1)                   \
        UFUN2(N, T, OP, I, V, R1, R0)
 
-#define UFUN(N, T, OP, I, V)                           \
+#if __ia64__
+#  define UFUN(N, T, OP, I, V)                         \
+       UFUN2(N, T, OP, I, V, f0, f1)
+#else
+#  define UFUN(N, T, OP, I, V)                         \
        UFUN1(N, T, OP, I, V, f0, f1)                   \
        UFUN1(N, T, OP, I, V, f0, f2)                   \
        UFUN1(N, T, OP, I, V, f0, f3)                   \
        UFUN1(N, T, OP, I, V, f0, f4)                   \
        UFUN1(N, T, OP, I, V, f0, f5)
+#endif
 
 .      $( $NaN =  0.0/0.0)
 .      $( $Inf =  1.0/0.0)
diff --git a/check/clobber.tst b/check/clobber.tst
index 0f8fc4a..7530842 100644
--- a/check/clobber.tst
+++ b/check/clobber.tst
@@ -152,13 +152,19 @@ label:
        alurc0(l, op, i0, i1,   i2, i3, i4, i5)                 \
        alurc1(l, op, i0, i1,   i2, i3, i4, i5)                 \
        alurc2(l, op, i0,               i1, i2, i3, i4, i5)
-#define  alu(l, op)                                            \
-       xalu(l, op, 0, 1, 2, 3, 4, 5)                           \
-       xalu(l, op, 1, 2, 3, 4, 5, 0)                           \
-       xalu(l, op, 2, 3, 4, 5, 0, 1)                           \
-       xalu(l, op, 3, 4, 5, 0, 1, 2)                           \
-       xalu(l, op, 4, 5, 0, 1, 2, 3)                           \
-       xalu(l, op, 5, 0, 1, 2, 3, 4)
+
+#if __ia64__
+#  define alu(l, op)                                           \
+        xalu(l, op, 0, 1, 2, 3, 4, 5)
+#else
+#  define alu(l, op)                                           \
+        xalu(l, op, 0, 1, 2, 3, 4, 5)                          \
+        xalu(l, op, 1, 2, 3, 4, 5, 0)                          \
+        xalu(l, op, 2, 3, 4, 5, 0, 1)                          \
+        xalu(l, op, 3, 4, 5, 0, 1, 2)                          \
+        xalu(l, op, 4, 5, 0, 1, 2, 3)                          \
+        xalu(l, op, 5, 0, 1, 2, 3, 4)
+#endif
 
 #define fopi(f, l, op, f0, f1, f2, f3, f4, f5)                 \
        setup##f()                                              \
@@ -200,16 +206,26 @@ label:
        foprc0(f, l, op, f0, f1,                f2, f3, f4, f5) \
        foprc1(f, l, op, f0, f1,                f2, f3, f4, f5) \
        foprc2(f, l, op, f0, f1,                f2, f3, f4, f5)
-#define xxfop(l, op, f, f0, f1, f2, f3, f4, f5)                        \
-        xfop(_f, l, op, f0, f1, f2, f3, f4, f5)                \
-        xfop(_d, l, op, f0, f1, f2, f3, f4, f5)
-#define   fop(l, op)                                           \
+#if __ia64__
+#  define xxfop(l, op, f, f0, f1, f2, f3, f4, f5)              \
+          xfop(_f, l, op, f0, f1, f2, f3, f4, f5)
+#else
+#  define xxfop(l, op, f, f0, f1, f2, f3, f4, f5)              \
+          xfop(_f, l, op, f0, f1, f2, f3, f4, f5)              \
+          xfop(_d, l, op, f0, f1, f2, f3, f4, f5)
+#endif
+#if __ia64__
+#  define fop(l, op)                                           \
+       xxfop(l, op, f, 0, 1, 2, 3, 4, 5)
+#else
+#  define fop(l, op)                                           \
        xxfop(l, op, f, 0, 1, 2, 3, 4, 5)                       \
        xxfop(l, op, f, 1, 2, 3, 4, 5, 0)                       \
        xxfop(l, op, f, 2, 3, 4, 5, 0, 1)                       \
        xxfop(l, op, f, 3, 4, 5, 0, 1, 2)                       \
        xxfop(l, op, f, 4, 5, 0, 1, 2, 3)                       \
        xxfop(l, op, f, 5, 0, 1, 2, 3, 4)
+#endif
 
 #define aluxii(l, op, i0, i1, i2, i3, i4, i5)                  \
        setup()                                                 \
@@ -242,13 +258,18 @@ label:
        aluxir(l, op, i0, i1, i2,       i3, i4, i5)             \
        aluxri(l, op, i0, i1, i2,       i3, i4, i5)             \
        aluxrr(l, op, i0, i1, i2,       i3, i4, i5)
-#define  alux(l, op)                                           \
-       xalux(l, op, 0, 1, 2, 3, 4, 5)                          \
-       xalux(l, op, 1, 2, 3, 4, 5, 0)                          \
-       xalux(l, op, 2, 3, 4, 5, 0, 1)                          \
-       xalux(l, op, 3, 4, 5, 0, 1, 2)                          \
-       xalux(l, op, 4, 5, 0, 1, 2, 3)                          \
-       xalux(l, op, 5, 0, 1, 2, 3, 4)
+#if __ia64__
+#  define alux(l, op)                                          \
+        xalux(l, op, 0, 1, 2, 3, 4, 5)
+#else
+#  define alux(l, op)                                          \
+        xalux(l, op, 0, 1, 2, 3, 4, 5)                         \
+        xalux(l, op, 1, 2, 3, 4, 5, 0)                         \
+        xalux(l, op, 2, 3, 4, 5, 0, 1)                         \
+        xalux(l, op, 3, 4, 5, 0, 1, 2)                         \
+        xalux(l, op, 4, 5, 0, 1, 2, 3)                         \
+        xalux(l, op, 5, 0, 1, 2, 3, 4)
+#endif
 
 #define alui_u(l, op, i0, i1, i2, i3, i4, i5)                  \
        setup()                                                 \
@@ -290,13 +311,18 @@ label:
        alurc0_u(l, op, i0, i1, i2, i3, i4, i5)                 \
        alurc1_u(l, op, i0, i1, i2, i3, i4, i5)                 \
        alurc2_u(l, op, i0,             i1, i2, i3, i4, i5)
-#define  alu_u(l, op)                                          \
-       xalu_u(l, op, 0, 1, 2, 3, 4, 5)                         \
-       xalu_u(l, op, 1, 2, 3, 4, 5, 0)                         \
-       xalu_u(l, op, 2, 3, 4, 5, 0, 1)                         \
-       xalu_u(l, op, 3, 4, 5, 0, 1, 2)                         \
-       xalu_u(l, op, 4, 5, 0, 1, 2, 3)                         \
-       xalu_u(l, op, 5, 0, 1, 2, 3, 4)
+#if __ia64__
+#  define alu_u(l, op)                                         \
+        xalu_u(l, op, 0, 1, 2, 3, 4, 5)
+#else
+#  define alu_u(l, op)                                         \
+        xalu_u(l, op, 0, 1, 2, 3, 4, 5)                        \
+        xalu_u(l, op, 1, 2, 3, 4, 5, 0)                        \
+        xalu_u(l, op, 2, 3, 4, 5, 0, 1)                        \
+        xalu_u(l, op, 3, 4, 5, 0, 1, 2)                        \
+        xalu_u(l, op, 4, 5, 0, 1, 2, 3)                        \
+        xalu_u(l, op, 5, 0, 1, 2, 3, 4)
+#endif
 
 #define unir(l, op, i0, i1, i2, i3, i4, i5)                    \
        setup()                                                 \
@@ -311,13 +337,18 @@ label:
 #define   xuni(l, op, i0, i1, i2, i3, i4, i5)                  \
          unir(l, op, i0, i1,   i2, i3, i4, i5)                 \
         unirc(l, op, i0,               i1, i2, i3, i4, i5)
-#define   uni(l, op)                                           \
+#if __ia64__
+#  define uni(l, op)                                           \
+        xuni(l, op, 0, 1, 2, 3, 4, 5)
+#else
+#  define uni(l, op)                                           \
         xuni(l, op, 0, 1, 2, 3, 4, 5)                          \
         xuni(l, op, 1, 2, 3, 4, 5, 0)                          \
         xuni(l, op, 2, 3, 4, 5, 0, 1)                          \
         xuni(l, op, 3, 4, 5, 0, 1, 2)                          \
         xuni(l, op, 4, 5, 0, 1, 2, 3)                          \
         xuni(l, op, 5, 0, 1, 2, 3, 4)
+#endif
 
 #define unfr(f, l, op, f0, f1, f2, f3, f4, f5)                 \
        setup##f()                                              \
@@ -335,13 +366,18 @@ label:
 #define xxunf(l, op, f0, f1, f2, f3, f4, f5)                   \
         xunf(_f, l, op, f0, f1, f2, f3, f4, f5)                \
         xunf(_d, l, op, f0, f1, f2, f3, f4, f5)
-#define   unf(l, op)                                           \
+#if __ia64__
+#  define unf(l, op)                                           \
+       xxunf(l, op, 0, 1, 2, 3, 4, 5)
+#else
+#  define unf(l, op)                                           \
        xxunf(l, op, 0, 1, 2, 3, 4, 5)                          \
        xxunf(l, op, 1, 2, 3, 4, 5, 0)                          \
        xxunf(l, op, 2, 3, 4, 5, 0, 1)                          \
        xxunf(l, op, 3, 4, 5, 0, 1, 2)                          \
        xxunf(l, op, 4, 5, 0, 1, 2, 3)                          \
        xxunf(l, op, 5, 0, 1, 2, 3, 4)
+#endif
 
 #define fcpi(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)   \
        setup()                                                 \
@@ -365,35 +401,50 @@ label:
        op##r##f %IR##r0 %FR##f0 %FR##f0                        \
        check5(rc##f##f0, l, r1, r2, r3, r4, r5)                \
        checkf5(f, rc##r0, l, f1, f2, f3, f4, f5)
-#define  ifcp(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)  \
-        fcpi(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)   \
-        fcpr(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)   \
-       fcprc(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)   \
-        fcpi(f, l, op, r1,r2,r3,r4,r5,r0, f0,f1,f2,f3,f4,f5)   \
-        fcpr(f, l, op, r1,r2,r3,r4,r5,r0, f0,f1,f2,f3,f4,f5)   \
-       fcprc(f, l, op, r1,r2,r3,r4,r5,r0, f0,f1,f2,f3,f4,f5)   \
-        fcpi(f, l, op, r2,r3,r4,r5,r0,r1, f0,f1,f2,f3,f4,f5)   \
-        fcpr(f, l, op, r2,r3,r4,r5,r0,r1, f0,f1,f2,f3,f4,f5)   \
-       fcprc(f, l, op, r2,r3,r4,r5,r0,r1, f0,f1,f2,f3,f4,f5)   \
-        fcpi(f, l, op, r3,r4,r5,r0,r1,r2, f0,f1,f2,f3,f4,f5)   \
-        fcpr(f, l, op, r3,r4,r5,r0,r1,r2, f0,f1,f2,f3,f4,f5)   \
-       fcprc(f, l, op, r3,r4,r5,r0,r1,r2, f0,f1,f2,f3,f4,f5)   \
-        fcpi(f, l, op, r4,r5,r0,r1,r2,r3, f0,f1,f2,f3,f4,f5)   \
-        fcpr(f, l, op, r4,r5,r0,r1,r2,r3, f0,f1,f2,f3,f4,f5)   \
-       fcprc(f, l, op, r4,r5,r0,r1,r2,r3, f0,f1,f2,f3,f4,f5)   \
-        fcpi(f, l, op, r5,r0,r1,r2,r3,r4, f0,f1,f2,f3,f4,f5)   \
-        fcpr(f, l, op, r5,r0,r1,r2,r3,r4, f0,f1,f2,f3,f4,f5)   \
-       fcprc(f, l, op, r5,r0,r1,r2,r3,r4, f0,f1,f2,f3,f4,f5)
-#define  xfcp(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)  \
-        ifcp(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)   \
-        ifcp(f, l, op, r0,r1,r2,r3,r4,r5, f1,f2,f3,f4,f5,f0)   \
-        ifcp(f, l, op, r0,r1,r2,r3,r4,r5, f2,f3,f4,f5,f0,f1)   \
-        ifcp(f, l, op, r0,r1,r2,r3,r4,r5, f3,f4,f5,f0,f1,f2)   \
-        ifcp(f, l, op, r0,r1,r2,r3,r4,r5, f4,f5,f0,f1,f2,f3)   \
-        ifcp(f, l, op, r0,r1,r2,r3,r4,r5, f5,f0,f1,f2,f3,f4)
-#define  fcmp(l, op)                                           \
-        xfcp(_f, l, op, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5)    \
-        xfcp(_d, l, op, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5)
+#if __ia64__
+#  define ifcp(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+         fcpi(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)
+#else
+#  define ifcp(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+         fcpi(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)  \
+         fcpr(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)  \
+        fcprc(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)  \
+         fcpi(f, l, op, r1,r2,r3,r4,r5,r0, f0,f1,f2,f3,f4,f5)  \
+         fcpr(f, l, op, r1,r2,r3,r4,r5,r0, f0,f1,f2,f3,f4,f5)  \
+        fcprc(f, l, op, r1,r2,r3,r4,r5,r0, f0,f1,f2,f3,f4,f5)  \
+         fcpi(f, l, op, r2,r3,r4,r5,r0,r1, f0,f1,f2,f3,f4,f5)  \
+         fcpr(f, l, op, r2,r3,r4,r5,r0,r1, f0,f1,f2,f3,f4,f5)  \
+        fcprc(f, l, op, r2,r3,r4,r5,r0,r1, f0,f1,f2,f3,f4,f5)  \
+         fcpi(f, l, op, r3,r4,r5,r0,r1,r2, f0,f1,f2,f3,f4,f5)  \
+         fcpr(f, l, op, r3,r4,r5,r0,r1,r2, f0,f1,f2,f3,f4,f5)  \
+        fcprc(f, l, op, r3,r4,r5,r0,r1,r2, f0,f1,f2,f3,f4,f5)  \
+         fcpi(f, l, op, r4,r5,r0,r1,r2,r3, f0,f1,f2,f3,f4,f5)  \
+         fcpr(f, l, op, r4,r5,r0,r1,r2,r3, f0,f1,f2,f3,f4,f5)  \
+        fcprc(f, l, op, r4,r5,r0,r1,r2,r3, f0,f1,f2,f3,f4,f5)  \
+         fcpi(f, l, op, r5,r0,r1,r2,r3,r4, f0,f1,f2,f3,f4,f5)  \
+         fcpr(f, l, op, r5,r0,r1,r2,r3,r4, f0,f1,f2,f3,f4,f5)  \
+        fcprc(f, l, op, r5,r0,r1,r2,r3,r4, f0,f1,f2,f3,f4,f5)
+#endif
+#if __ia64__
+#  define xfcp(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+         ifcp(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)
+#else
+#  define xfcp(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+         ifcp(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)  \
+         ifcp(f, l, op, r0,r1,r2,r3,r4,r5, f1,f2,f3,f4,f5,f0)  \
+         ifcp(f, l, op, r0,r1,r2,r3,r4,r5, f2,f3,f4,f5,f0,f1)  \
+         ifcp(f, l, op, r0,r1,r2,r3,r4,r5, f3,f4,f5,f0,f1,f2)  \
+         ifcp(f, l, op, r0,r1,r2,r3,r4,r5, f4,f5,f0,f1,f2,f3)  \
+         ifcp(f, l, op, r0,r1,r2,r3,r4,r5, f5,f0,f1,f2,f3,f4)
+#endif
+#if __ia64__
+#  define fcmp(l, op)                                          \
+         xfcp(_f, l, op, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5)
+#else
+#  define fcmp(l, op)                                          \
+         xfcp(_f, l, op, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5)   \
+         xfcp(_d, l, op, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5)
+#endif
 
 #define imvi(l, i0, i1, i2, i3, i4, i5)                                \
        setup()                                                 \
@@ -407,13 +458,18 @@ label:
 #define xmvi(l, i0, i1, i2, i3, i4, i5)                                \
        imvi(l, i0,     i1, i2, i3, i4, i5)                     \
        imvr(l, i0, i1, i2, i3, i4, i5)
-#define  mvi(l)                                                        \
-       xmvi(l, 0, 1, 2, 3, 4, 5)                               \
-       xmvi(l, 1, 2, 3, 4, 5, 0)                               \
-       xmvi(l, 2, 3, 4, 5, 0, 1)                               \
-       xmvi(l, 3, 4, 5, 0, 1, 2)                               \
-       xmvi(l, 4, 5, 0, 1, 2, 3)                               \
-       xmvi(l, 5, 0, 1, 2, 3, 4)
+#if __ia64__
+#  define mvi(l)                                               \
+        xmvi(l, 0, 1, 2, 3, 4, 5)
+#else
+#  define mvi(l)                                               \
+        xmvi(l, 0, 1, 2, 3, 4, 5)                              \
+        xmvi(l, 1, 2, 3, 4, 5, 0)                              \
+        xmvi(l, 2, 3, 4, 5, 0, 1)                              \
+        xmvi(l, 3, 4, 5, 0, 1, 2)                              \
+        xmvi(l, 4, 5, 0, 1, 2, 3)                              \
+        xmvi(l, 5, 0, 1, 2, 3, 4)
+#endif
 
 #define fmvi(f, l, f0, f1, f2, f3, f4, f5)                     \
        setup##f()                                              \
@@ -427,13 +483,18 @@ label:
 #define xmvf(f, l, f0, f1, f2, f3, f4, f5)                     \
        fmvi(f, l, f0, f1, f2, f3, f4, f5)                      \
        fmvr(f, l, f0, f1,      f2, f3, f4, f5)
-#define xxmvf(f, l)                                            \
-        xmvf(f, l, 0, 1, 2, 3, 4, 5)                           \
-        xmvf(f, l, 1, 2, 3, 4, 5, 0)                           \
-        xmvf(f, l, 2, 3, 4, 5, 0, 1)                           \
-        xmvf(f, l, 3, 4, 5, 0, 1, 2)                           \
-        xmvf(f, l, 4, 5, 0, 1, 2, 3)                           \
-        xmvf(f, l, 5, 0, 1, 2, 3, 4)
+#if __ia64__
+#  define xxmvf(f, l)                                          \
+         xmvf(f, l, 0, 1, 2, 3, 4, 5)
+#else
+#  define xxmvf(f, l)                                          \
+          xmvf(f, l, 0, 1, 2, 3, 4, 5)                         \
+          xmvf(f, l, 1, 2, 3, 4, 5, 0)                         \
+          xmvf(f, l, 2, 3, 4, 5, 0, 1)                         \
+          xmvf(f, l, 3, 4, 5, 0, 1, 2)                         \
+          xmvf(f, l, 4, 5, 0, 1, 2, 3)                         \
+          xmvf(f, l, 5, 0, 1, 2, 3, 4)
+#endif
 #define   mvf(l)                                               \
        xxmvf(_f, l)                                            \
        xxmvf(_d, l)
@@ -451,13 +512,18 @@ label:
 #define  xf2f(f, l, op, f0, f1, f2, f3, f4, f5)                        \
         f2fr(f, l, op, f0, f1, f2, f3, f4, f5)                 \
        f2frc(f, l, op, f0,             f1, f2, f3, f4, f5)
-#define   f2f(l, f, op)                                                \
+#if __ia64__
+#  define f2f(l, f, op)                                                \
+        xf2f(f, l, op, 0, 1, 2, 3, 4, 5)
+#else
+#  define f2f(l, f, op)                                                \
         xf2f(f, l, op, 0, 1, 2, 3, 4, 5)                       \
         xf2f(f, l, op, 1, 2, 3, 4, 5, 0)                       \
         xf2f(f, l, op, 2, 3, 4, 5, 0, 1)                       \
         xf2f(f, l, op, 3, 4, 5, 0, 1, 2)                       \
         xf2f(f, l, op, 4, 5, 0, 1, 2, 3)                       \
         xf2f(f, l, op, 5, 0, 1, 2, 3, 4)
+#endif
 
 #define f2ir(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)   \
        setup()                                                 \
@@ -466,20 +532,27 @@ label:
        op##f %IR##r0 %FR##f0                                   \
        check5(r##f##f0, l, r1, r2, r3, r4, r5)                 \
        checkf5(f, i##r0, l, f1, f2, f3, f4, f5)
-#define if2i(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)   \
-       f2ir(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)    \
-       f2ir(f, l, op, r1,r2,r3,r4,r5,r0, f0,f1,f2,f3,f4,f5)    \
-       f2ir(f, l, op, r2,r3,r4,r5,r0,r1, f0,f1,f2,f3,f4,f5)    \
-       f2ir(f, l, op, r3,r4,r5,r0,r1,r2, f0,f1,f2,f3,f4,f5)    \
-       f2ir(f, l, op, r4,r5,r0,r1,r2,r3, f0,f1,f2,f3,f4,f5)    \
-       f2ir(f, l, op, r5,r0,r1,r2,r3,r4, f0,f1,f2,f3,f4,f5)
-#define xf2i(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)   \
-       if2i(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)    \
-       if2i(f, l, op, r0,r1,r2,r3,r4,r5, f1,f2,f3,f4,f5,f0)    \
-       if2i(f, l, op, r0,r1,r2,r3,r4,r5, f2,f3,f4,f5,f0,f1)    \
-       if2i(f, l, op, r0,r1,r2,r3,r4,r5, f3,f4,f5,f0,f1,f2)    \
-       if2i(f, l, op, r0,r1,r2,r3,r4,r5, f4,f5,f0,f1,f2,f3)    \
-       if2i(f, l, op, r0,r1,r2,r3,r4,r5, f5,f0,f1,f2,f3,f4)
+#if __ia64__
+#  define if2i(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+         f2ir(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)
+#  define xf2i(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+         if2i(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)
+#else
+#  define if2i(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+         f2ir(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)  \
+         f2ir(f, l, op, r1,r2,r3,r4,r5,r0, f0,f1,f2,f3,f4,f5)  \
+         f2ir(f, l, op, r2,r3,r4,r5,r0,r1, f0,f1,f2,f3,f4,f5)  \
+         f2ir(f, l, op, r3,r4,r5,r0,r1,r2, f0,f1,f2,f3,f4,f5)  \
+         f2ir(f, l, op, r4,r5,r0,r1,r2,r3, f0,f1,f2,f3,f4,f5)  \
+         f2ir(f, l, op, r5,r0,r1,r2,r3,r4, f0,f1,f2,f3,f4,f5)
+#  define xf2i(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+         if2i(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)  \
+         if2i(f, l, op, r0,r1,r2,r3,r4,r5, f1,f2,f3,f4,f5,f0)  \
+         if2i(f, l, op, r0,r1,r2,r3,r4,r5, f2,f3,f4,f5,f0,f1)  \
+         if2i(f, l, op, r0,r1,r2,r3,r4,r5, f3,f4,f5,f0,f1,f2)  \
+         if2i(f, l, op, r0,r1,r2,r3,r4,r5, f4,f5,f0,f1,f2,f3)  \
+         if2i(f, l, op, r0,r1,r2,r3,r4,r5, f5,f0,f1,f2,f3,f4)
+#endif
 #define f2i(l, op)                                             \
        xf2i(_f, l, op, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5)     \
        xf2i(_d, l, op, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5)
@@ -491,20 +564,27 @@ label:
        op##f %FR##f0 %IR##r0                                   \
        check5(r##f##f0, l, r1, r2, r3, r4, r5)                 \
        checkf5(f, i##r0, l, f1, f2, f3, f4, f5)
-#define ii2f(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)   \
-       i2fr(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)    \
-       i2fr(f, l, op, r1,r2,r3,r4,r5,r0, f0,f1,f2,f3,f4,f5)    \
-       i2fr(f, l, op, r2,r3,r4,r5,r0,r1, f0,f1,f2,f3,f4,f5)    \
-       i2fr(f, l, op, r3,r4,r5,r0,r1,r2, f0,f1,f2,f3,f4,f5)    \
-       i2fr(f, l, op, r4,r5,r0,r1,r2,r3, f0,f1,f2,f3,f4,f5)    \
-       i2fr(f, l, op, r5,r0,r1,r2,r3,r4, f0,f1,f2,f3,f4,f5)
-#define xi2f(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)   \
-       ii2f(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)    \
-       ii2f(f, l, op, r0,r1,r2,r3,r4,r5, f1,f2,f3,f4,f5,f0)    \
-       ii2f(f, l, op, r0,r1,r2,r3,r4,r5, f2,f3,f4,f5,f0,f1)    \
-       ii2f(f, l, op, r0,r1,r2,r3,r4,r5, f3,f4,f5,f0,f1,f2)    \
-       ii2f(f, l, op, r0,r1,r2,r3,r4,r5, f4,f5,f0,f1,f2,f3)    \
-       ii2f(f, l, op, r0,r1,r2,r3,r4,r5, f5,f0,f1,f2,f3,f4)
+#if __ia64__
+#  define ii2f(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+         i2fr(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)
+#  define xi2f(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+         ii2f(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)
+#else
+#  define ii2f(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+         i2fr(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)  \
+         i2fr(f, l, op, r1,r2,r3,r4,r5,r0, f0,f1,f2,f3,f4,f5)  \
+         i2fr(f, l, op, r2,r3,r4,r5,r0,r1, f0,f1,f2,f3,f4,f5)  \
+         i2fr(f, l, op, r3,r4,r5,r0,r1,r2, f0,f1,f2,f3,f4,f5)  \
+         i2fr(f, l, op, r4,r5,r0,r1,r2,r3, f0,f1,f2,f3,f4,f5)  \
+         i2fr(f, l, op, r5,r0,r1,r2,r3,r4, f0,f1,f2,f3,f4,f5)
+#  define xi2f(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5) \
+         ii2f(f, l, op, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)  \
+         ii2f(f, l, op, r0,r1,r2,r3,r4,r5, f1,f2,f3,f4,f5,f0)  \
+         ii2f(f, l, op, r0,r1,r2,r3,r4,r5, f2,f3,f4,f5,f0,f1)  \
+         ii2f(f, l, op, r0,r1,r2,r3,r4,r5, f3,f4,f5,f0,f1,f2)  \
+         ii2f(f, l, op, r0,r1,r2,r3,r4,r5, f4,f5,f0,f1,f2,f3)  \
+         ii2f(f, l, op, r0,r1,r2,r3,r4,r5, f5,f0,f1,f2,f3,f4)
+#endif
 #define i2f(l, op)                                             \
        xi2f(_f, l, op, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5)     \
        xi2f(_d, l, op, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5)
@@ -578,13 +658,18 @@ label:
         xxldi(_us, l, r0, r1, r2, r3, r4, r5)                  \
         xxldi( _i, l, r0, r1, r2, r3, r4, r5)                  \
        xxxldi(l, r0, r1, r2, r3, r4, r5)
-#define   ldi(l)                                               \
+#if __ia64__
+#  define ldi(l)                                               \
+        xldi(l, 0, 1, 2, 3, 4, 5)
+#else
+#  define ldi(l)                                               \
         xldi(l, 0, 1, 2, 3, 4, 5)                              \
         xldi(l, 1, 2, 3, 4, 5, 0)                              \
         xldi(l, 2, 3, 4, 5, 0, 1)                              \
         xldi(l, 3, 4, 5, 0, 1, 2)                              \
         xldi(l, 4, 5, 0, 1, 2, 3)                              \
         xldi(l, 5, 0, 1, 2, 3, 4)
+#endif
 
 #define fldi(f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)       \
        setup()                                                 \
@@ -622,20 +707,25 @@ label:
 #define         xxldf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)         \
          xldf(_f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)     \
          xldf(_d, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)
-#define  fxldf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)                \
-        xxldf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)         \
-        xxldf(l, r0,r1,r2,r3,r4,r5, f1,f2,f3,f4,f5,f0)         \
-        xxldf(l, r0,r1,r2,r3,r4,r5, f2,f3,f4,f5,f0,f1)         \
-        xxldf(l, r0,r1,r2,r3,r4,r5, f3,f4,f5,f0,f1,f2)         \
-        xxldf(l, r0,r1,r2,r3,r4,r5, f4,f5,f0,f1,f2,f3)         \
-        xxldf(l, r0,r1,r2,r3,r4,r5, f5,f0,f1,f2,f3,f4)
-#define  ixldf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)                \
-        fxldf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)         \
-        fxldf(l, r1,r2,r3,r4,r5,r0, f0,f1,f2,f3,f4,f5)         \
-        fxldf(l, r2,r3,r4,r5,r0,r1, f0,f1,f2,f3,f4,f5)         \
-        fxldf(l, r3,r4,r5,r0,r1,r2, f0,f1,f2,f3,f4,f5)         \
-        fxldf(l, r4,r5,r0,r1,r2,r3, f0,f1,f2,f3,f4,f5)         \
-        fxldf(l, r5,r0,r1,r2,r3,r4, f0,f1,f2,f3,f4,f5)
+#if __ia64__
+#  define ixldf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)       \
+         xxldf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)
+#else
+#  define fxldf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)       \
+         xxldf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)        \
+         xxldf(l, r0,r1,r2,r3,r4,r5, f1,f2,f3,f4,f5,f0)        \
+         xxldf(l, r0,r1,r2,r3,r4,r5, f2,f3,f4,f5,f0,f1)        \
+         xxldf(l, r0,r1,r2,r3,r4,r5, f3,f4,f5,f0,f1,f2)        \
+         xxldf(l, r0,r1,r2,r3,r4,r5, f4,f5,f0,f1,f2,f3)        \
+         xxldf(l, r0,r1,r2,r3,r4,r5, f5,f0,f1,f2,f3,f4)
+#  define ixldf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)       \
+         fxldf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)        \
+         fxldf(l, r1,r2,r3,r4,r5,r0, f0,f1,f2,f3,f4,f5)        \
+         fxldf(l, r2,r3,r4,r5,r0,r1, f0,f1,f2,f3,f4,f5)        \
+         fxldf(l, r3,r4,r5,r0,r1,r2, f0,f1,f2,f3,f4,f5)        \
+         fxldf(l, r4,r5,r0,r1,r2,r3, f0,f1,f2,f3,f4,f5)        \
+         fxldf(l, r5,r0,r1,r2,r3,r4, f0,f1,f2,f3,f4,f5)
+#endif
 #define   ldf(l)                                               \
        ixldf(l, 0,1,2,3,4,5, 0,1,2,3,4,5)
 
@@ -695,13 +785,18 @@ label:
         xxsti( _s, l, r0, r1, r2, r3, r4, r5)                  \
         xxsti( _i, l, r0, r1, r2, r3, r4, r5)                  \
        xxxsti(l, r0, r1, r2, r3, r4, r5)
-#define   sti(l)                                               \
+#if __ia64__
+#  define sti(l)                                               \
+        xsti(l, 0, 1, 2, 3, 4, 5)
+#else
+#  define sti(l)                                               \
         xsti(l, 0, 1, 2, 3, 4, 5)                              \
         xsti(l, 1, 2, 3, 4, 5, 0)                              \
         xsti(l, 2, 3, 4, 5, 0, 1)                              \
         xsti(l, 3, 4, 5, 0, 1, 2)                              \
         xsti(l, 4, 5, 0, 1, 2, 3)                              \
         xsti(l, 5, 0, 1, 2, 3, 4)
+#endif
 
 #define fsti(f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)       \
        setup()                                                 \
@@ -739,20 +834,25 @@ label:
 #define         xxstf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)         \
          xstf(_f, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)     \
          xstf(_d, l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)
-#define  fxstf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)                \
-        xxstf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)         \
-        xxstf(l, r0,r1,r2,r3,r4,r5, f1,f2,f3,f4,f5,f0)         \
-        xxstf(l, r0,r1,r2,r3,r4,r5, f2,f3,f4,f5,f0,f1)         \
-        xxstf(l, r0,r1,r2,r3,r4,r5, f3,f4,f5,f0,f1,f2)         \
-        xxstf(l, r0,r1,r2,r3,r4,r5, f4,f5,f0,f1,f2,f3)         \
-        xxstf(l, r0,r1,r2,r3,r4,r5, f5,f0,f1,f2,f3,f4)
-#define  ixstf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)                \
-        fxstf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)         \
-        fxstf(l, r1,r2,r3,r4,r5,r0, f0,f1,f2,f3,f4,f5)         \
-        fxstf(l, r2,r3,r4,r5,r0,r1, f0,f1,f2,f3,f4,f5)         \
-        fxstf(l, r3,r4,r5,r0,r1,r2, f0,f1,f2,f3,f4,f5)         \
-        fxstf(l, r4,r5,r0,r1,r2,r3, f0,f1,f2,f3,f4,f5)         \
-        fxstf(l, r5,r0,r1,r2,r3,r4, f0,f1,f2,f3,f4,f5)
+#if __ia64__
+#  define ixstf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)       \
+         xxstf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)
+#else
+#  define fxstf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)       \
+         xxstf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)        \
+         xxstf(l, r0,r1,r2,r3,r4,r5, f1,f2,f3,f4,f5,f0)        \
+         xxstf(l, r0,r1,r2,r3,r4,r5, f2,f3,f4,f5,f0,f1)        \
+         xxstf(l, r0,r1,r2,r3,r4,r5, f3,f4,f5,f0,f1,f2)        \
+         xxstf(l, r0,r1,r2,r3,r4,r5, f4,f5,f0,f1,f2,f3)        \
+         xxstf(l, r0,r1,r2,r3,r4,r5, f5,f0,f1,f2,f3,f4)
+# define ixstf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)                \
+         fxstf(l, r0,r1,r2,r3,r4,r5, f0,f1,f2,f3,f4,f5)        \
+         fxstf(l, r1,r2,r3,r4,r5,r0, f0,f1,f2,f3,f4,f5)        \
+         fxstf(l, r2,r3,r4,r5,r0,r1, f0,f1,f2,f3,f4,f5)        \
+         fxstf(l, r3,r4,r5,r0,r1,r2, f0,f1,f2,f3,f4,f5)        \
+         fxstf(l, r4,r5,r0,r1,r2,r3, f0,f1,f2,f3,f4,f5)        \
+         fxstf(l, r5,r0,r1,r2,r3,r4, f0,f1,f2,f3,f4,f5)
+#endif
 #define   stf(l)                                               \
        ixstf(l, 0,1,2,3,4,5, 0,1,2,3,4,5)
 
@@ -775,13 +875,18 @@ r##l##op##r0:                                             
        \
 #define  xjmpi(l, op, u, il, ir, r0, r1, r2, r3, r4, r5)       \
           bri(l, op, u, il, ir, r0, r1, r2, r3, r4, r5)        \
           brr(l, op, u, il, ir, r0, r1, r2, r3, r4, r5)
-#define  jmpi(l, op, u, il, ir)                                        \
-       xjmpi(l, op, u, il, ir, 0, 1, 2, 3, 4, 5)               \
-       xjmpi(l, op, u, il, ir, 1, 2, 3, 4, 5, 0)               \
-       xjmpi(l, op, u, il, ir, 2, 3, 4, 5, 0, 1)               \
-       xjmpi(l, op, u, il, ir, 3, 4, 5, 0, 1, 2)               \
-       xjmpi(l, op, u, il, ir, 4, 5, 0, 1, 2, 3)               \
-       xjmpi(l, op, u, il, ir, 5, 0, 1, 2, 3, 4)
+#if __ia64__
+#  define jmpi(l, op, u, il, ir)                               \
+        xjmpi(l, op, u, il, ir, 0, 1, 2, 3, 4, 5)
+#else
+#  define jmpi(l, op, u, il, ir)                               \
+        xjmpi(l, op, u, il, ir, 0, 1, 2, 3, 4, 5)              \
+        xjmpi(l, op, u, il, ir, 1, 2, 3, 4, 5, 0)              \
+        xjmpi(l, op, u, il, ir, 2, 3, 4, 5, 0, 1)              \
+        xjmpi(l, op, u, il, ir, 3, 4, 5, 0, 1, 2)              \
+        xjmpi(l, op, u, il, ir, 4, 5, 0, 1, 2, 3)              \
+        xjmpi(l, op, u, il, ir, 5, 0, 1, 2, 3, 4)
+#endif
 
 #define bfi(f, l, op, il, ir, f0, f1, f2, f3, f4, f5)          \
        setup##f()                                              \
@@ -804,13 +909,18 @@ r##l##op##f##f0:                                          
\
 #define xxjmpf(l, op, il, ir, f0, f1, f2, f3, f4, f5)          \
         xjmpf(_f, l, op, il, ir, f0, f1, f2, f3, f4, f5)       \
         xjmpf(_d, l, op, il, ir, f0, f1, f2, f3, f4, f5)
-#define   jmpf(l, op, il, ir)                                  \
+#if __ia64__
+#  define jmpf(l, op, il, ir)                                  \
+       xxjmpf(l, op, il, ir, 0, 1, 2, 3, 4, 5)
+#else
+#  define jmpf(l, op, il, ir)                                  \
        xxjmpf(l, op, il, ir, 0, 1, 2, 3, 4, 5)                 \
        xxjmpf(l, op, il, ir, 1, 2, 3, 4, 5, 0)                 \
        xxjmpf(l, op, il, ir, 2, 3, 4, 5, 0, 1)                 \
        xxjmpf(l, op, il, ir, 3, 4, 5, 0, 1, 2)                 \
        xxjmpf(l, op, il, ir, 4, 5, 0, 1, 2, 3)                 \
        xxjmpf(l, op, il, ir, 5, 0, 1, 2, 3, 4)
+#endif
 
 .data  32
 buff:
diff --git a/check/float.tst b/check/float.tst
index 9c8d039..65ab3c6 100644
--- a/check/float.tst
+++ b/check/float.tst
@@ -74,9 +74,15 @@ F##op##i##t##r0##f0##f1##l:
        tcmp1(l, t, op, r0, li, ri)                     \
        tcmp1(l, t, op, r1, li, ri)                     \
        tcmp1(l, t, op, r2, li, ri)
-#define tcmp(l, op, li, ri)                            \
-       tcmp0(l, _f, op, li, ri)                        \
-       tcmp0(l, _d, op, li, ri)
+#if __ia64__
+#  define tcmp(l, op, li, ri)                          \
+        xtcmp(l, _f, op, r0, f0, f1, li, ri)           \
+        xtcmp(l, _d, op, r0, f0, f1, li, ri)
+#else
+#  define tcmp(l, op, li, ri)                          \
+        tcmp0(l, _f, op, li, ri)                       \
+        tcmp0(l, _d, op, li, ri)
+#endif
 
 #define xfcmp(l, t, op, r0, f0, f1, li, ri)            \
        movi##t %f0 li                                  \
@@ -114,9 +120,15 @@ F##op##i##t##r0##f0##f1##l:
        fcmp1(l, t, op, r0, li, ri)                     \
        fcmp1(l, t, op, r1, li, ri)                     \
        fcmp1(l, t, op, r2, li, ri)
-#define fcmp(l, op, li, ri)                            \
-       fcmp0(l, _f, op, li, ri)                        \
-       fcmp0(l, _d, op, li, ri)
+#if __ia64__
+#  define fcmp(l, op, li, ri)                          \
+       xfcmp(l, _f, op, r0, f0, f1, li, ri)            \
+       xfcmp(l, _d, op, r0, f0, f1, li, ri)
+#else
+#  define fcmp(l, op, li, ri)                          \
+        fcmp0(l, _f, op, li, ri)                       \
+        fcmp0(l, _d, op, li, ri)
+#endif
 
 #define xf2w(l, f, r0, f0, iv, fv)                     \
        movi##f %f0 fv                                  \
@@ -138,9 +150,15 @@ W##f##r0##f0##l:
        f2w1(l, t, r0, iv, fv)                          \
        f2w1(l, t, r1, iv, fv)                          \
        f2w1(l, t, r2, iv, fv)
-#define f2w(l, iv, fv)                                 \
+#if __ia64__
+#  define f2w(l, iv, fv)                               \
+       xf2w(l, _f, r0, f0, iv, fv)                     \
+       xf2w(l, _d, r0, f0, iv, fv)
+#else
+#  define f2w(l, iv, fv)                               \
        f2w0(l, _f, iv, fv)                             \
        f2w0(l, _d, iv, fv)
+#endif
 
 .code
        prolog
diff --git a/check/lightning.c b/check/lightning.c
index 0c7e914..b38a568 100644
--- a/check/lightning.c
+++ b/check/lightning.c
@@ -3977,6 +3977,11 @@ main(int argc, char *argv[])
                          sizeof(cmdline) - opt_short,
                          " -D__sparc__=1");
 #endif
+#if defined(__ia64__)
+    opt_short += snprintf(cmdline + opt_short,
+                         sizeof(cmdline) - opt_short,
+                         " -D__ia64__=1");
+#endif
     if ((parser.fp = popen(cmdline, "r")) == NULL)
        error("cannot execute %s", cmdline);
 
diff --git a/lib/jit_ia64-cpu.c b/lib/jit_ia64-cpu.c
index 0182d95..d817fbf 100644
--- a/lib/jit_ia64-cpu.c
+++ b/lib/jit_ia64-cpu.c
@@ -3297,9 +3297,9 @@ _X2(jit_state_t *_jit, jit_word_t _p,
     i5  = (im >> 16) &          0x1fL;
     i9  = (im >>  7) &         0x1ffL;
     i7  =  im        &          0x7fL;
-    inst(i41, INST_L);
     TSTPRED(_p);
     TSTREG1(r1);
+    inst(i41, INST_L);
     inst((6L<<37)|(i1<<36)|(i9<<27)|(i5<<22)|
         (ic<<21)|(i7<<13)|(r1<<6)|_p, INST_X);
     SETREG(r1);
diff --git a/lib/jit_ia64-fpu.c b/lib/jit_ia64-fpu.c
index eb12ab5..907afa9 100644
--- a/lib/jit_ia64-fpu.c
+++ b/lib/jit_ia64-fpu.c
@@ -1065,22 +1065,20 @@ dopi(div)
 static void
 _divr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
-    jit_int32_t                t0, t1, t2, t3;
+    jit_int32_t                t0, t1, t2;
     t0 = jit_get_reg(jit_class_fpr);
     t1 = jit_get_reg(jit_class_fpr);
     t2 = jit_get_reg(jit_class_fpr);
-    t3 = jit_get_reg(jit_class_fpr);
     FRCPA(rn(t0), PR_6, r1, r2);
-    FNMA_p(rn(t1), r2, rn(t0), 1, SF_S1, PR_6);
+    FNMA_p(rn(t1), r2, rn(t0), GR_1, SF_S1, PR_6);
     FMA_p(rn(t2), rn(t0), rn(t1), rn(t0), SF_S1, PR_6);
     FMPY_p(rn(t1), rn(t1), rn(t1), SF_S1, PR_6);
     FMA_p(rn(t2), rn(t2), rn(t1), rn(t2), SF_S1, PR_6);
     FMPY_p(rn(t1), rn(t1), rn(t1), SF_S1, PR_6);
     FMA_p(rn(t1), rn(t2), rn(t1), rn(t2), SF_S1, PR_6);
-    FMPY_S_p(rn(t1), rn(t1), rn(t1), SF_S1, PR_6);
-    FNMA_p(rn(t3), r2, rn(t2), r1, SF_S1, PR_6);
-    FMA_S_p(r0, rn(t3), rn(t1), 1, SF_S0, PR_6);
-    jit_unget_reg(t3);
+    FMPY_S_p(rn(t2), r1, rn(t1), SF_S1, PR_6);
+    FNMA_p(rn(t0), r2, rn(t2), r1, SF_S1, PR_6);
+    FMA_S_p(r0, rn(t0), rn(t1), rn(t2), SF_S0, PR_6);
     jit_unget_reg(t2);
     jit_unget_reg(t1);
     jit_unget_reg(t0);
@@ -1089,22 +1087,20 @@ _divr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1, jit_int32_t r2)
 static void
 _divr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
-    jit_int32_t                t0, t1, t2, t3;
+    jit_int32_t                t0, t1, t2;
     t0 = jit_get_reg(jit_class_fpr);
     t1 = jit_get_reg(jit_class_fpr);
     t2 = jit_get_reg(jit_class_fpr);
-    t3 = jit_get_reg(jit_class_fpr);
     FRCPA(rn(t0), PR_6, r1, r2);
-    FNMA_p(rn(t1), r2, rn(t0), 1, SF_S1, PR_6);
+    FNMA_p(rn(t1), r2, rn(t0), GR_1, SF_S1, PR_6);
     FMA_p(rn(t2), rn(t0), rn(t1), rn(t0), SF_S1, PR_6);
     FMPY_p(rn(t1), rn(t1), rn(t1), SF_S1, PR_6);
     FMA_p(rn(t2), rn(t2), rn(t1), rn(t2), SF_S1, PR_6);
     FMPY_p(rn(t1), rn(t1), rn(t1), SF_S1, PR_6);
     FMA_p(rn(t1), rn(t2), rn(t1), rn(t2), SF_S1, PR_6);
-    FMPY_D_p(rn(t1), rn(t1), rn(t1), SF_S1, PR_6);
-    FNMA_p(rn(t3), r2, rn(t2), r1, SF_S1, PR_6);
-    FMA_D_p(r0, rn(t3), rn(t1), 1, SF_S0, PR_6);
-    jit_unget_reg(t3);
+    FMPY_D_p(rn(t2), r1, rn(t1), SF_S1, PR_6);
+    FNMA_p(rn(t0), r2, rn(t2), r1, SF_S1, PR_6);
+    FMA_D_p(r0, rn(t0), rn(t1), rn(t2), SF_S0, PR_6);
     jit_unget_reg(t2);
     jit_unget_reg(t1);
     jit_unget_reg(t0);



reply via email to

[Prev in Thread] Current Thread [Next in Thread]