[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Guile-commits] 45/437: add floating-point for x86-64
From: |
Andy Wingo |
Subject: |
[Guile-commits] 45/437: add floating-point for x86-64 |
Date: |
Mon, 2 Jul 2018 05:13:42 -0400 (EDT) |
wingo pushed a commit to branch lightning
in repository guile.
commit 58c4dcea4396193ec4ac18b54ff84dba0c12719c
Author: Paolo Bonzini <address@hidden>
Date: Thu Nov 23 09:01:19 2006 +0000
add floating-point for x86-64
git-archimport-id: address@hidden/lightning--stable--1.2--patch-49
---
ChangeLog | 14 +++
NEWS | 5 +
lightning/core-common.h | 24 ++--
lightning/i386/asm-32.h | 7 +-
lightning/i386/asm-64.h | 36 ++++++
lightning/i386/asm-i386.h | 279 ++++++++++++++++++++++++++++++++++++++++++++-
lightning/i386/core-64.h | 46 +++++---
lightning/i386/core-i386.h | 5 -
lightning/i386/fp-32.h | 5 +
lightning/i386/fp-64.h | 260 +++++++++++++++++++++++++++++++++++++++++-
lightning/sparc/fp.h | 4 +-
11 files changed, 640 insertions(+), 45 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index a749ceb..79dbe91 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,17 @@
+2006-11-23 Paolo Bonzini <address@hidden>
+
+ * lightning/core-common.h: Add casts in "*i_p" variants.
+ * lightning/i386/asm-32.h: Add _r1.
+ * lightning/i386/asm-64.h: Likewise, and add SSE instructions.
+ * lightning/i386/asm-i386.h: Merge SSE instructions from Gwenole.
+ Use short form for 16-bit AX instructions. Remove _r1
+ * lightning/i386/core-64.h: Add FP ABI support in its infancy.
+ * lightning/i386/core-i386.h: Move jit_arg_f and jit_arg_d...
+ * lightning/i386/core-32.h: ... and jit_prepare_f and jit_prepare_d...
+ * lightning/i386/fp-32.h: ... here.
+ * lightning/i386/fp-64.h: Write the code.
+ * lightning/sparc/fp.h: Fix jit_extr_{f_d,d_f} register order.
+
2006-11-22 Paolo Bonzini <address@hidden>
* lightning/i386/asm-i386.h: Move x86-64 instructions...
diff --git a/NEWS b/NEWS
index 5869867..be75675 100644
--- a/NEWS
+++ b/NEWS
@@ -2,6 +2,11 @@ NEWS FROM VERSION 1.2 TO 1.3
o Initial support for x86-64 back-end (mostly untested).
+o lightning is more strict on casts from integer to pointer.
+ Be sure to use the _p variants when your immediates are
+ of pointer type. This was done to ease 64-bit cleanliness
+ tests.
+
o Many bug fixes.
o JIT_FPRET is used as JIT_RET to move return values.
diff --git a/lightning/core-common.h b/lightning/core-common.h
index d9edaab..1a90c57 100644
--- a/lightning/core-common.h
+++ b/lightning/core-common.h
@@ -396,30 +396,30 @@ typedef union jit_code {
#define jit_bmsi_ul(label, rs, is) jit_bmsi_l((label), (rs), (is))
#define jit_ltr_p(d, s1, s2) jit_ltr_ul((d), (s1), (s2))
-#define jit_lti_p(d, rs, is) jit_lti_ul((d), (rs), (is))
+#define jit_lti_p(d, rs, is) jit_lti_ul((d), (rs), (long)(is))
#define jit_ler_p(d, s1, s2) jit_ler_ul((d), (s1), (s2))
-#define jit_lei_p(d, rs, is) jit_lei_ul((d), (rs), (is))
+#define jit_lei_p(d, rs, is) jit_lei_ul((d), (rs), (long)(is))
#define jit_gtr_p(d, s1, s2) jit_gtr_ul((d), (s1), (s2))
-#define jit_gti_p(d, rs, is) jit_gti_ul((d), (rs), (is))
+#define jit_gti_p(d, rs, is) jit_gti_ul((d), (rs), (long)(is))
#define jit_ger_p(d, s1, s2) jit_ger_ul((d), (s1), (s2))
-#define jit_gei_p(d, rs, is) jit_gei_ul((d), (rs), (is))
+#define jit_gei_p(d, rs, is) jit_gei_ul((d), (rs), (long)(is))
#define jit_eqr_p(d, s1, s2) jit_eqr_ul((d), (s1), (s2))
-#define jit_eqi_p(d, rs, is) jit_eqi_ul((d), (rs), (is))
+#define jit_eqi_p(d, rs, is) jit_eqi_ul((d), (rs), (long)(is))
#define jit_ner_p(d, s1, s2) jit_ner_ul((d), (s1), (s2))
-#define jit_nei_p(d, rs, is) jit_nei_ul((d), (rs), (is))
+#define jit_nei_p(d, rs, is) jit_nei_ul((d), (rs), (long)(is))
#define jit_bltr_p(label, s1, s2) jit_bltr_ul((label), (s1), (s2))
-#define jit_blti_p(label, rs, is) jit_blti_ul((label), (rs), (is))
+#define jit_blti_p(label, rs, is) jit_blti_ul((label), (rs), (long)(is))
#define jit_bler_p(label, s1, s2) jit_bler_ul((label), (s1), (s2))
-#define jit_blei_p(label, rs, is) jit_blei_ul((label), (rs), (is))
+#define jit_blei_p(label, rs, is) jit_blei_ul((label), (rs), (long)(is))
#define jit_bgtr_p(label, s1, s2) jit_bgtr_ul((label), (s1), (s2))
-#define jit_bgti_p(label, rs, is) jit_bgti_ul((label), (rs), (is))
+#define jit_bgti_p(label, rs, is) jit_bgti_ul((label), (rs), (long)(is))
#define jit_bger_p(label, s1, s2) jit_bger_ul((label), (s1), (s2))
-#define jit_bgei_p(label, rs, is) jit_bgei_ul((label), (rs), (is))
+#define jit_bgei_p(label, rs, is) jit_bgei_ul((label), (rs), (long)(is))
#define jit_beqr_p(label, s1, s2) jit_beqr_ul((label), (s1), (s2))
-#define jit_beqi_p(label, rs, is) jit_beqi_ul((label), (rs), (is))
+#define jit_beqi_p(label, rs, is) jit_beqi_ul((label), (rs), (long)(is))
#define jit_bner_p(label, s1, s2) jit_bner_ul((label), (s1), (s2))
-#define jit_bnei_p(label, rs, is) jit_bnei_ul((label), (rs), (is))
+#define jit_bnei_p(label, rs, is) jit_bnei_ul((label), (rs), (long)(is))
#define jit_retval_ui(rd) jit_retval_i((rd))
#define jit_retval_uc(rd) jit_retval_i((rd))
diff --git a/lightning/i386/asm-32.h b/lightning/i386/asm-32.h
index 1945a49..d336cb2 100644
--- a/lightning/i386/asm-32.h
+++ b/lightning/i386/asm-32.h
@@ -45,6 +45,7 @@
#include "asm-i386.h"
+#define _r1(R) ( ((R) & ~3) == _AL || ((R) & ~3) == _AH ? _rN(R) :
JITFAIL( "8-bit register required"))
#define _rA(R) _r4(R)
/* Use RIP-addressing in 64-bit mode, if possible */
@@ -58,14 +59,14 @@
#define _m64only(X) JITFAIL("invalid instruction in 32-bit mode")
#define _m64(X) ((void)0)
-#define CALLsr(R) CALLLsr(R)
-#define JMPsr(R) JMPLsr(R)
-
#define _AH 0x24
#define _CH 0x25
#define _DH 0x26
#define _BH 0x27
+#define CALLsr(R) CALLLsr(R)
+#define JMPsr(R) JMPLsr(R)
+
#define DECWr(RD) (_d16(), _Or (0x48,_r2(RD)
))
#define DECLr(RD) _Or (0x48,_r4(RD)
)
#define INCWr(RD) (_d16(), _Or (0x40,_r2(RD)
))
diff --git a/lightning/i386/asm-64.h b/lightning/i386/asm-64.h
index 2280c83..9f5431c 100644
--- a/lightning/i386/asm-64.h
+++ b/lightning/i386/asm-64.h
@@ -127,6 +127,8 @@
#define _R15 0x4F
#define _RIP -2
+#define _r1(R) ( ((unsigned) _rC((R) - 16)) < (0x30 - 16) ?
_rN(R) : JITFAIL( "8-bit register required"))
+
#if 0
#define _r8(R) ( (_rC(R) == 0x50) ? _rN(R) :
JITFAIL("64-bit register required"))
#else
@@ -335,6 +337,40 @@
#define BSWAPQr(R) (_REXQrr(0, R), _OOr
(0x0fc8,_r8(R) ))
+
+
+#define __SSEQrr(OP,RS,RSA,RD,RDA) (_REXQrr(RD, RS),
_OO_Mrm (0x0f00|(OP) ,_b11,RDA(RD),RSA(RS)
))
+#define __SSEQmr(OP,MD,MB,MI,MS,RD,RDA) (_REXQmr(MB, MI, RD),
_OO_r_X (0x0f00|(OP) ,RDA(RD) ,MD,MB,MI,MS
))
+#define __SSEQrm(OP,RS,RSA,MD,MB,MI,MS) (_REXQrm(RS, MB, MI),
_OO_r_X (0x0f00|(OP) ,RSA(RS) ,MD,MB,MI,MS
))
+#define __SSEQ1rm(OP,RS,RSA,MD,MB,MI,MS) (_REXQrm(RS, MB, MI),
_OO_r_X (0x0f01|(OP) ,RSA(RS) ,MD,MB,MI,MS
))
+
+#define _SSEQrr(PX,OP,RS,RSA,RD,RDA)
(_jit_B(PX), __SSEQrr(OP, RS, RSA, RD, RDA))
+#define _SSEQmr(PX,OP,MD,MB,MI,MS,RD,RDA)
(_jit_B(PX), __SSEQmr(OP, MD, MB, MI, MS, RD, RDA))
+#define _SSEQrm(PX,OP,RS,RSA,MD,MB,MI,MS)
(_jit_B(PX), __SSEQrm(OP, RS, RSA, MD, MB, MI, MS))
+#define _SSEQ1rm(PX,OP,RS,RSA,MD,MB,MI,MS)
(_jit_B(PX), __SSEQ1rm(OP, RS, RSA, MD, MB, MI, MS))
+
+#define CVTSS2SIQrr(RS, RD) _SSEQrr(0xf3, X86_SSE_CVTSI, RS,_rX,
RD,_r8)
+#define CVTSS2SIQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf3, X86_SSE_CVTSI,
MD, MB, MI, MS, RD,_r8)
+#define CVTSD2SIQrr(RS, RD) _SSEQrr(0xf2, X86_SSE_CVTSI, RS,_rX,
RD,_r8)
+#define CVTSD2SIQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf2, X86_SSE_CVTSI,
MD, MB, MI, MS, RD,_r8)
+
+#define CVTSI2SSQrr(RS, RD) _SSEQrr(0xf3, X86_SSE_CVTIS, RS,_r8,
RD,_rX)
+#define CVTSI2SSQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf3, X86_SSE_CVTIS,
MD, MB, MI, MS, RD,_rX)
+#define CVTSI2SDQrr(RS, RD) _SSEQrr(0xf2, X86_SSE_CVTIS, RS,_r8,
RD,_rX)
+#define CVTSI2SDQmr(MD, MB, MI, MS, RD) _SSEQmr(0xf2, X86_SSE_CVTIS,
MD, MB, MI, MS, RD,_rX)
+
+#define MOVDQXrr(RS, RD) _SSEQrr(0x66, 0x6e, RS,_r8, RD,_rX)
+#define MOVDQXmr(MD, MB, MI, MS, RD) _SSEQmr(0x66, 0x6e, MD, MB, MI, MS,
RD,_rX)
+
+#define MOVDXQrr(RS, RD) _SSEQrr(0x66, 0x7e, RS,_rX, RD,_r8)
+#define MOVDXQrm(RS, MD, MB, MI, MS) _SSEQrm(0x66, 0x7e, RS,_rX, MD, MB,
MI, MS)
+#define MOVDQMrr(RS, RD) __SSEQrr( 0x6e, RS,_r8, RD,_rM)
+#define MOVDQMmr(MD, MB, MI, MS, RD) __SSEQmr( 0x6e, MD, MB, MI, MS,
RD,_rM)
+#define MOVDMQrr(RS, RD) __SSEQrr( 0x7e, RS,_rM, RD,_r8)
+#define MOVDMQrm(RS, MD, MB, MI, MS) __SSEQrm( 0x7e, RS,_rM, MD, MB,
MI, MS)
+
+
+
#define CALLsr(R) CALLQsr(R)
#define JMPsr(R) JMPQsr(R)
diff --git a/lightning/i386/asm-i386.h b/lightning/i386/asm-i386.h
index ffe870e..94d944c 100644
--- a/lightning/i386/asm-i386.h
+++ b/lightning/i386/asm-i386.h
@@ -129,7 +129,6 @@ typedef _uc jit_insn;
#define _rM(R) _rN(R)
#define _rX(R) _rN(R)
#else
-#define _r1(R) ( ((unsigned) _rC((R) - 16)) < (0x30 - 16) ?
_rN(R) : JITFAIL( "8-bit register required"))
#define _r2(R) ( (_rC(R) == 0x30) ? _rN(R) :
JITFAIL("16-bit register required"))
#define _r4(R) ( (_rC(R) == 0x40) ? _rN(R) :
JITFAIL("32-bit register required"))
#define _rM(R) ( (_rC(R) == 0x60) ? _rN(R) :
JITFAIL("MMX register required"))
@@ -314,7 +313,7 @@ enum {
#define _ALUWrr(OP, RS, RD) (_d16(), _REXLrr(RS, RD), _O_Mrm
(((OP) << 3) + 1,_b11,_r2(RS),_r2(RD) ))
#define _ALUWmr(OP, MD, MB, MI, MS, RD) (_d16(), _REXLmr(MB, MI, RD),
_O_r_X (((OP) << 3) + 3 ,_r2(RD) ,MD,MB,MI,MS
))
#define _ALUWrm(OP, RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI),
_O_r_X (((OP) << 3) + 1 ,_r2(RS) ,MD,MB,MI,MS
))
-#define _ALUWir(OP, IM, RD) (!_s8P(IM) && (RD) == _AX ? \
+#define _ALUWir(OP, IM, RD) ((RD) == _AX ? \
(_d16(), _REXLrr(0, RD), _O_W
(((OP) << 3) + 5 ,_su16(IM))) : \
(_d16(), _REXLrr(0, RD),
_Os_Mrm_sW (0x81 ,_b11,OP ,_r2(RD)
,_su16(IM))) )
#define _ALUWim(OP, IM, MD, MB, MI, MS) (_d16(), _REXLrm(0, MB, MI),
_Os_r_X_sW (0x81 ,OP ,MD,MB,MI,MS
,_su16(IM)))
@@ -1084,7 +1083,7 @@ enum {
#define TESTWrr(RS, RD) (_d16(), _REXLrr(RS, RD),
_O_Mrm (0x85 ,_b11,_r2(RS),_r2(RD)
))
#define TESTWrm(RS, MD, MB, MI, MS) (_d16(), _REXLrm(RS, MB, MI), _O_r_X
(0x85 ,_r2(RS) ,MD,MB,MI,MS ))
-#define TESTWir(IM, RD) (!_s8P(IM) && (RD) == _AX ? \
+#define TESTWir(IM, RD) ((RD) == _AX ? \
(_d16(), _REXLrr(0, RD), _O_W
(0xa9 ,_u16(IM))) : \
(_d16(), _REXLrr(0, RD),
_O_Mrm_W (0xf7 ,_b11,_b000 ,_r2(RD)
,_u16(IM))) )
#define TESTWim(IM, MD, MB, MI, MS) (_d16(), _REXLrm(0, MB, MI),
_O_r_X_W (0xf7 ,_b000 ,MD,MB,MI,MS
,_u16(IM)))
@@ -1302,6 +1301,280 @@ enum {
JITFAIL(".align argument too large")))
+/* --- Media 128-bit instructions ------------------------------------------ */
+
+enum {
+ X86_SSE_MOV = 0x10,
+ X86_SSE_MOVLP = 0x12,
+ X86_SSE_MOVHP = 0x16,
+ X86_SSE_MOVA = 0x28,
+ X86_SSE_CVTIS = 0x2a,
+ X86_SSE_CVTSI = 0x2d,
+ X86_SSE_UCOMI = 0x2e,
+ X86_SSE_COMI = 0x2f,
+ X86_SSE_SQRT = 0x51,
+ X86_SSE_RSQRT = 0x52,
+ X86_SSE_RCP = 0x53,
+ X86_SSE_AND = 0x54,
+ X86_SSE_ANDN = 0x55,
+ X86_SSE_OR = 0x56,
+ X86_SSE_XOR = 0x57,
+ X86_SSE_ADD = 0x58,
+ X86_SSE_MUL = 0x59,
+ X86_SSE_CVTSD = 0x5a,
+ X86_SSE_CVTDT = 0x5b,
+ X86_SSE_SUB = 0x5c,
+ X86_SSE_MIN = 0x5d,
+ X86_SSE_DIV = 0x5e,
+ X86_SSE_MAX = 0x5f,
+ X86_SSE_MOV2 = 0xd6
+};
+
+/*
_format Opcd ,Mod ,r ,m ,mem=dsp+sib ,imm...
*/
+
+#define __SSELrr(OP,RS,RSA,RD,RDA) (_REXLrr(RD, RS),
_OO_Mrm (0x0f00|(OP) ,_b11,RDA(RD),RSA(RS)
))
+#define __SSELmr(OP,MD,MB,MI,MS,RD,RDA) (_REXLmr(MB, MI, RD),
_OO_r_X (0x0f00|(OP) ,RDA(RD) ,MD,MB,MI,MS
))
+#define __SSELrm(OP,RS,RSA,MD,MB,MI,MS) (_REXLrm(RS, MB, MI),
_OO_r_X (0x0f00|(OP) ,RSA(RS) ,MD,MB,MI,MS
))
+#define __SSEL1rm(OP,RS,RSA,MD,MB,MI,MS) (_REXLrm(RS, MB, MI),
_OO_r_X (0x0f01|(OP) ,RSA(RS) ,MD,MB,MI,MS
))
+
+#define _SSELrr(PX,OP,RS,RSA,RD,RDA)
(_jit_B(PX), __SSELrr(OP, RS, RSA, RD, RDA))
+#define _SSELmr(PX,OP,MD,MB,MI,MS,RD,RDA)
(_jit_B(PX), __SSELmr(OP, MD, MB, MI, MS, RD, RDA))
+#define _SSELrm(PX,OP,RS,RSA,MD,MB,MI,MS)
(_jit_B(PX), __SSELrm(OP, RS, RSA, MD, MB, MI, MS))
+#define _SSEL1rm(PX,OP,RS,RSA,MD,MB,MI,MS)
(_jit_B(PX), __SSEL1rm(OP, RS, RSA, MD, MB, MI, MS))
+
+#define _SSEPSrr(OP,RS,RD) __SSELrr ( OP, RS,_rX, RD,_rX)
+#define _SSEPSmr(OP,MD,MB,MI,MS,RD) __SSELmr ( OP, MD, MB, MI, MS,
RD,_rX)
+#define _SSEPSrm(OP,RS,MD,MB,MI,MS) __SSELrm ( OP, RS,_rX, MD, MB, MI,
MS)
+#define _SSEPS1rm(OP,RS,MD,MB,MI,MS) __SSEL1rm( OP, RS,_rX, MD, MB, MI,
MS)
+
+#define _SSEPDrr(OP,RS,RD) _SSELrr (0x66, OP, RS,_rX, RD,_rX)
+#define _SSEPDmr(OP,MD,MB,MI,MS,RD) _SSELmr (0x66, OP, MD, MB, MI, MS,
RD,_rX)
+#define _SSEPDrm(OP,RS,MD,MB,MI,MS) _SSELrm (0x66, OP, RS,_rX, MD, MB, MI,
MS)
+#define _SSEPD1rm(OP,RS,MD,MB,MI,MS) _SSEL1rm(0x66, OP, RS,_rX, MD, MB, MI,
MS)
+
+#define _SSESSrr(OP,RS,RD) _SSELrr (0xf3, OP, RS,_rX, RD,_rX)
+#define _SSESSmr(OP,MD,MB,MI,MS,RD) _SSELmr (0xf3, OP, MD, MB, MI, MS,
RD,_rX)
+#define _SSESSrm(OP,RS,MD,MB,MI,MS) _SSELrm (0xf3, OP, RS,_rX, MD, MB, MI,
MS)
+#define _SSESS1rm(OP,RS,MD,MB,MI,MS) _SSEL1rm(0xf3, OP, RS,_rX, MD, MB, MI,
MS)
+
+#define _SSESDrr(OP,RS,RD) _SSELrr (0xf2, OP, RS,_rX, RD,_rX)
+#define _SSESDmr(OP,MD,MB,MI,MS,RD) _SSELmr (0xf2, OP, MD, MB, MI, MS,
RD,_rX)
+#define _SSESDrm(OP,RS,MD,MB,MI,MS) _SSELrm (0xf2, OP, RS,_rX, MD, MB, MI,
MS)
+#define _SSESD1rm(OP,RS,MD,MB,MI,MS) _SSEL1rm(0xf2, OP, RS,_rX, MD, MB, MI,
MS)
+
+#define ADDPSrr(RS, RD) _SSEPSrr(X86_SSE_ADD, RS, RD)
+#define ADDPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_ADD, MD, MB, MI, MS,
RD)
+#define ADDPDrr(RS, RD) _SSEPDrr(X86_SSE_ADD, RS, RD)
+#define ADDPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_ADD, MD, MB, MI, MS,
RD)
+
+#define ADDSSrr(RS, RD) _SSESSrr(X86_SSE_ADD, RS, RD)
+#define ADDSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_ADD, MD, MB, MI, MS,
RD)
+#define ADDSDrr(RS, RD) _SSESDrr(X86_SSE_ADD, RS, RD)
+#define ADDSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_ADD, MD, MB, MI, MS,
RD)
+
+#define ANDNPSrr(RS, RD) _SSEPSrr(X86_SSE_ANDN, RS, RD)
+#define ANDNPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_ANDN, MD, MB, MI, MS,
RD)
+#define ANDNPDrr(RS, RD) _SSEPDrr(X86_SSE_ANDN, RS, RD)
+#define ANDNPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_ANDN, MD, MB, MI, MS,
RD)
+
+#define ANDNSSrr ANDNPSrr
+#define ANDNSSmr ANDNPSrr
+#define ANDNSDrr ANDNPDrr
+#define ANDNSDmr ANDNPDrr
+
+#define ANDPSrr(RS, RD) _SSEPSrr(X86_SSE_AND, RS, RD)
+#define ANDPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_AND, MD, MB, MI, MS,
RD)
+#define ANDPDrr(RS, RD) _SSEPDrr(X86_SSE_AND, RS, RD)
+#define ANDPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_AND, MD, MB, MI, MS,
RD)
+
+#define ANDSSrr ANDPSrr
+#define ANDSSmr ANDPSrr
+#define ANDSDrr ANDPDrr
+#define ANDSDmr ANDPDrr
+
+#define DIVPSrr(RS, RD) _SSEPSrr(X86_SSE_DIV, RS, RD)
+#define DIVPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_DIV, MD, MB, MI, MS,
RD)
+#define DIVPDrr(RS, RD) _SSEPDrr(X86_SSE_DIV, RS, RD)
+#define DIVPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_DIV, MD, MB, MI, MS,
RD)
+
+#define DIVSSrr(RS, RD) _SSESSrr(X86_SSE_DIV, RS, RD)
+#define DIVSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_DIV, MD, MB, MI, MS,
RD)
+#define DIVSDrr(RS, RD) _SSESDrr(X86_SSE_DIV, RS, RD)
+#define DIVSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_DIV, MD, MB, MI, MS,
RD)
+
+#define MAXPSrr(RS, RD) _SSEPSrr(X86_SSE_MAX, RS, RD)
+#define MAXPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_MAX, MD, MB, MI, MS,
RD)
+#define MAXPDrr(RS, RD) _SSEPDrr(X86_SSE_MAX, RS, RD)
+#define MAXPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_MAX, MD, MB, MI, MS,
RD)
+
+#define MAXSSrr(RS, RD) _SSESSrr(X86_SSE_MAX, RS, RD)
+#define MAXSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_MAX, MD, MB, MI, MS,
RD)
+#define MAXSDrr(RS, RD) _SSESDrr(X86_SSE_MAX, RS, RD)
+#define MAXSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_MAX, MD, MB, MI, MS,
RD)
+
+#define MINPSrr(RS, RD) _SSEPSrr(X86_SSE_MIN, RS, RD)
+#define MINPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_MIN, MD, MB, MI, MS,
RD)
+#define MINPDrr(RS, RD) _SSEPDrr(X86_SSE_MIN, RS, RD)
+#define MINPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_MIN, MD, MB, MI, MS,
RD)
+
+#define MINSSrr(RS, RD) _SSESSrr(X86_SSE_MIN, RS, RD)
+#define MINSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_MIN, MD, MB, MI, MS,
RD)
+#define MINSDrr(RS, RD) _SSESDrr(X86_SSE_MIN, RS, RD)
+#define MINSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_MIN, MD, MB, MI, MS,
RD)
+
+#define MULPSrr(RS, RD) _SSEPSrr(X86_SSE_MUL, RS, RD)
+#define MULPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_MUL, MD, MB, MI, MS,
RD)
+#define MULPDrr(RS, RD) _SSEPDrr(X86_SSE_MUL, RS, RD)
+#define MULPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_MUL, MD, MB, MI, MS,
RD)
+
+#define MULSSrr(RS, RD) _SSESSrr(X86_SSE_MUL, RS, RD)
+#define MULSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_MUL, MD, MB, MI, MS,
RD)
+#define MULSDrr(RS, RD) _SSESDrr(X86_SSE_MUL, RS, RD)
+#define MULSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_MUL, MD, MB, MI, MS,
RD)
+
+#define ORPSrr(RS, RD) _SSEPSrr(X86_SSE_OR, RS, RD)
+#define ORPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_OR, MD, MB, MI, MS, RD)
+#define ORPDrr(RS, RD) _SSEPDrr(X86_SSE_OR, RS, RD)
+#define ORPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_OR, MD, MB, MI, MS, RD)
+
+#define ORSSrr ORPSrr
+#define ORSSmr ORPSrr
+#define ORSDrr ORPDrr
+#define ORSDmr ORPDrr
+
+#define RCPPSrr(RS, RD) _SSEPSrr(X86_SSE_RCP, RS, RD)
+#define RCPPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_RCP, MD, MB, MI, MS,
RD)
+#define RCPSSrr(RS, RD) _SSESSrr(X86_SSE_RCP, RS, RD)
+#define RCPSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_RCP, MD, MB, MI, MS,
RD)
+
+#define RSQRTPSrr(RS, RD) _SSEPSrr(X86_SSE_RSQRT, RS, RD)
+#define RSQRTPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_RSQRT, MD, MB, MI, MS,
RD)
+#define RSQRTSSrr(RS, RD) _SSESSrr(X86_SSE_RSQRT, RS, RD)
+#define RSQRTSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_RSQRT, MD, MB, MI, MS,
RD)
+
+#define SQRTPSrr(RS, RD) _SSEPSrr(X86_SSE_SQRT, RS, RD)
+#define SQRTPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_SQRT, MD, MB, MI, MS,
RD)
+#define SQRTPDrr(RS, RD) _SSEPDrr(X86_SSE_SQRT, RS, RD)
+#define SQRTPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_SQRT, MD, MB, MI, MS,
RD)
+
+#define SQRTSSrr(RS, RD) _SSESSrr(X86_SSE_SQRT, RS, RD)
+#define SQRTSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_SQRT, MD, MB, MI, MS,
RD)
+#define SQRTSDrr(RS, RD) _SSESDrr(X86_SSE_SQRT, RS, RD)
+#define SQRTSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_SQRT, MD, MB, MI, MS,
RD)
+
+#define SUBPSrr(RS, RD) _SSEPSrr(X86_SSE_SUB, RS, RD)
+#define SUBPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_SUB, MD, MB, MI, MS,
RD)
+#define SUBPDrr(RS, RD) _SSEPDrr(X86_SSE_SUB, RS, RD)
+#define SUBPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_SUB, MD, MB, MI, MS,
RD)
+
+#define SUBSSrr(RS, RD) _SSESSrr(X86_SSE_SUB, RS, RD)
+#define SUBSSmr(MD, MB, MI, MS, RD) _SSESSmr(X86_SSE_SUB, MD, MB, MI, MS,
RD)
+#define SUBSDrr(RS, RD) _SSESDrr(X86_SSE_SUB, RS, RD)
+#define SUBSDmr(MD, MB, MI, MS, RD) _SSESDmr(X86_SSE_SUB, MD, MB, MI, MS,
RD)
+
+#define XORPSrr(RS, RD) _SSEPSrr(X86_SSE_XOR, RS, RD)
+#define XORPSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_XOR, MD, MB, MI, MS,
RD)
+#define XORPDrr(RS, RD) _SSEPDrr(X86_SSE_XOR, RS, RD)
+#define XORPDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_XOR, MD, MB, MI, MS,
RD)
+
+#define XORSSrr XORPSrr
+#define XORSSmr XORPSrr
+#define XORSDrr XORPDrr
+#define XORSDmr XORPDrr
+
+/* No prefixes here. */
+#define COMISSrr(RS, RD) _SSEPSrr(X86_SSE_COMI, RS, RD)
+#define COMISSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_COMI, MD, MB, MI, MS,
RD)
+#define COMISDrr(RS, RD) _SSEPDrr(X86_SSE_COMI, RS, RD)
+#define COMISDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_COMI, MD, MB, MI, MS,
RD)
+
+/* No prefixes here. */
+#define UCOMISSrr(RS, RD) _SSEPSrr(X86_SSE_UCOMI, RS, RD)
+#define UCOMISSmr(MD, MB, MI, MS, RD) _SSEPSmr(X86_SSE_UCOMI, MD, MB, MI, MS,
RD)
+#define UCOMISDrr(RS, RD) _SSEPDrr(X86_SSE_UCOMI, RS, RD)
+#define UCOMISDmr(MD, MB, MI, MS, RD) _SSEPDmr(X86_SSE_UCOMI, MD, MB, MI, MS,
RD)
+
+#define MOVSSrr(RS, RD) _SSESSrr (X86_SSE_MOV, RS, RD)
+#define MOVSSmr(MD, MB, MI, MS, RD) _SSESSmr (X86_SSE_MOV, MD, MB, MI, MS,
RD)
+#define MOVSSrm(RS, MD, MB, MI, MS) _SSESS1rm(X86_SSE_MOV, RS, MD, MB, MI,
MS)
+
+#define MOVSDrr(RS, RD) _SSESDrr (X86_SSE_MOV, RS, RD)
+#define MOVSDmr(MD, MB, MI, MS, RD) _SSESDmr (X86_SSE_MOV, MD, MB, MI, MS,
RD)
+#define MOVSDrm(RS, MD, MB, MI, MS) _SSESD1rm(X86_SSE_MOV, RS, MD, MB, MI,
MS)
+
+#define MOVAPSrr(RS, RD) _SSEPSrr (X86_SSE_MOVA, RS, RD)
+#define MOVAPSmr(MD, MB, MI, MS, RD) _SSEPSmr (X86_SSE_MOVA, MD, MB, MI, MS,
RD)
+#define MOVAPSrm(RS, MD, MB, MI, MS) _SSEPS1rm(X86_SSE_MOVA, RS, MD, MB, MI,
MS)
+
+#define MOVAPDrr(RS, RD) _SSEPDrr (X86_SSE_MOVA, RS, RD)
+#define MOVAPDmr(MD, MB, MI, MS, RD) _SSEPDmr (X86_SSE_MOVA, MD, MB, MI, MS,
RD)
+#define MOVAPDrm(RS, MD, MB, MI, MS) _SSEPD1rm(X86_SSE_MOVA, RS, MD, MB, MI,
MS)
+
+#define CVTPS2PIrr(RS, RD) __SSELrr( X86_SSE_CVTSI, RS,_rX,
RD,_rM)
+#define CVTPS2PImr(MD, MB, MI, MS, RD) __SSELmr( X86_SSE_CVTSI, MD, MB,
MI, MS, RD,_rM)
+#define CVTPD2PIrr(RS, RD) _SSELrr(0x66, X86_SSE_CVTSI, RS,_rX,
RD,_rM)
+#define CVTPD2PImr(MD, MB, MI, MS, RD) _SSELmr(0x66, X86_SSE_CVTSI, MD, MB,
MI, MS, RD,_rM)
+
+#define CVTPI2PSrr(RS, RD) __SSELrr( X86_SSE_CVTIS, RS,_rM,
RD,_rX)
+#define CVTPI2PSmr(MD, MB, MI, MS, RD) __SSELmr( X86_SSE_CVTIS, MD, MB,
MI, MS, RD,_rX)
+#define CVTPI2PDrr(RS, RD) _SSELrr(0x66, X86_SSE_CVTIS, RS,_rM,
RD,_rX)
+#define CVTPI2PDmr(MD, MB, MI, MS, RD) _SSELmr(0x66, X86_SSE_CVTIS, MD, MB,
MI, MS, RD,_rX)
+
+#define CVTPS2PDrr(RS, RD) __SSELrr( X86_SSE_CVTSD, RS,_rX,
RD,_rX)
+#define CVTPS2PDmr(MD, MB, MI, MS, RD) __SSELmr( X86_SSE_CVTSD, MD, MB,
MI, MS, RD,_rX)
+#define CVTPD2PSrr(RS, RD) _SSELrr(0x66, X86_SSE_CVTSD, RS,_rX,
RD,_rX)
+#define CVTPD2PSmr(MD, MB, MI, MS, RD) _SSELmr(0x66, X86_SSE_CVTSD, MD, MB,
MI, MS, RD,_rX)
+
+#define CVTSS2SDrr(RS, RD) _SSELrr(0xf3, X86_SSE_CVTSD, RS,_rX,
RD,_rX)
+#define CVTSS2SDmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, X86_SSE_CVTSD, MD, MB,
MI, MS, RD,_rX)
+#define CVTSD2SSrr(RS, RD) _SSELrr(0xf2, X86_SSE_CVTSD, RS,_rX,
RD,_rX)
+#define CVTSD2SSmr(MD, MB, MI, MS, RD) _SSELmr(0xf2, X86_SSE_CVTSD, MD, MB,
MI, MS, RD,_rX)
+
+#define CVTSS2SILrr(RS, RD) _SSELrr(0xf3, X86_SSE_CVTSI, RS,_rX,
RD,_r4)
+#define CVTSS2SILmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, X86_SSE_CVTSI,
MD, MB, MI, MS, RD,_r4)
+#define CVTSD2SILrr(RS, RD) _SSELrr(0xf2, X86_SSE_CVTSI, RS,_rX,
RD,_r4)
+#define CVTSD2SILmr(MD, MB, MI, MS, RD) _SSELmr(0xf2, X86_SSE_CVTSI,
MD, MB, MI, MS, RD,_r4)
+
+#define CVTSI2SSLrr(RS, RD) _SSELrr(0xf3, X86_SSE_CVTIS, RS,_r4,
RD,_rX)
+#define CVTSI2SSLmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, X86_SSE_CVTIS,
MD, MB, MI, MS, RD,_rX)
+#define CVTSI2SDLrr(RS, RD) _SSELrr(0xf2, X86_SSE_CVTIS, RS,_r4,
RD,_rX)
+#define CVTSI2SDLmr(MD, MB, MI, MS, RD) _SSELmr(0xf2, X86_SSE_CVTIS,
MD, MB, MI, MS, RD,_rX)
+
+#define MOVDLXrr(RS, RD) _SSELrr(0x66, 0x6e, RS,_r4, RD,_rX)
+#define MOVDLXmr(MD, MB, MI, MS, RD) _SSELmr(0x66, 0x6e, MD, MB, MI, MS,
RD,_rX)
+
+#define MOVDXLrr(RS, RD) _SSELrr(0x66, 0x7e, RS,_rX, RD,_r4)
+#define MOVDXLrm(RS, MD, MB, MI, MS) _SSELrm(0x66, 0x7e, RS,_rX, MD, MB,
MI, MS)
+
+#define MOVDLMrr(RS, RD) __SSELrr( 0x6e, RS,_r4, RD,_rM)
+#define MOVDLMmr(MD, MB, MI, MS, RD) __SSELmr( 0x6e, MD, MB, MI, MS,
RD,_rM)
+
+#define MOVDMLrr(RS, RD) __SSELrr( 0x7e, RS,_rM, RD,_r4)
+#define MOVDMLrm(RS, MD, MB, MI, MS) __SSELrm( 0x7e, RS,_rM, MD, MB,
MI, MS)
+
+#define MOVDQ2Qrr(RS, RD) _SSELrr(0xf2, X86_SSE_MOV2, RS,_rX,
RD,_rM)
+#define MOVQ2DQrr(RS, RD) _SSELrr(0xf3, X86_SSE_MOV2, RS,_rM,
RD,_rX)
+#define MOVHLPSrr(RS, RD) __SSELrr( X86_SSE_MOVLP, RS,_rX,
RD,_rX)
+#define MOVLHPSrr(RS, RD) __SSELrr( X86_SSE_MOVHP, RS,_rX,
RD,_rX)
+
+#define MOVDQArr(RS, RD) _SSELrr(0x66, 0x6f, RS,_rX, RD,_rX)
+#define MOVDQAmr(MD, MB, MI, MS, RD) _SSELmr(0x66, 0x6f, MD, MB, MI, MS,
RD,_rX)
+#define MOVDQArm(RS, MD, MB, MI, MS) _SSELrm(0x66, 0x7f, RS,_rX, MD, MB,
MI, MS)
+
+#define MOVDQUrr(RS, RD) _SSELrr(0xf3, 0x6f, RS,_rX, RD,_rX)
+#define MOVDQUmr(MD, MB, MI, MS, RD) _SSELmr(0xf3, 0x6f, MD, MB, MI, MS,
RD,_rX)
+#define MOVDQUrm(RS, MD, MB, MI, MS) _SSELrm(0xf3, 0x7f, RS,_rX, MD, MB,
MI, MS)
+
+#define MOVHPDmr(MD, MB, MI, MS, RD) _SSELmr (0x66, X86_SSE_MOVHP, MD, MB,
MI, MS, RD,_rX)
+#define MOVHPDrm(RS, MD, MB, MI, MS) _SSEL1rm(0x66, X86_SSE_MOVHP, RS,_rX,
MD, MB, MI, MS)
+#define MOVHPSmr(MD, MB, MI, MS, RD) __SSELmr ( X86_SSE_MOVHP, MD, MB,
MI, MS, RD,_rX)
+#define MOVHPSrm(RS, MD, MB, MI, MS) __SSEL1rm( X86_SSE_MOVHP, RS,_rX,
MD, MB, MI, MS)
+
+#define MOVLPDmr(MD, MB, MI, MS, RD) _SSELmr (0x66, X86_SSE_MOVLP, MD, MB,
MI, MS, RD,_rX)
+#define MOVLPDrm(RS, MD, MB, MI, MS) _SSEL1rm(0x66, X86_SSE_MOVLP, RS,_rX,
MD, MB, MI, MS)
+#define MOVLPSmr(MD, MB, MI, MS, RD) __SSELmr ( X86_SSE_MOVLP, MD, MB,
MI, MS, RD,_rX)
+#define MOVLPSrm(RS, MD, MB, MI, MS) __SSEL1rm( X86_SSE_MOVLP, RS,_rX,
MD, MB, MI, MS)
+
/*** References:
*/
/*
*/
/* [1] "Intel Architecture Software Developer's Manual Volume 1: Basic
Architecture", */
diff --git a/lightning/i386/core-64.h b/lightning/i386/core-64.h
index e095091..922cd26 100644
--- a/lightning/i386/core-64.h
+++ b/lightning/i386/core-64.h
@@ -43,6 +43,8 @@
struct jit_local_state {
int long_jumps;
+ int nextarg_getfp;
+ int nextarg_putfp;
int nextarg_geti;
int argssize;
int alloca_offset;
@@ -63,13 +65,19 @@ struct jit_local_state {
jit_allocai_internal ((n), (_jitl.alloca_slack - (n)) & 15)
/* 3-parameter operation */
-#define jit_qopr_(d, s1, s2, op1d, op2d) \
- ( (s2 == d) ? op1d : \
- ( ((s1 == d) ? (void)0 : (void)MOVQrr(s1, d)), op2d ) \
+#define jit_qopr_(d, s1, s2, op1d, op2d)
\
+ ( ((s2) == (d)) ? op1d :
\
+ ( (((s1) == (d)) ? (void)0 : (void)MOVQrr((s1), (d))), op2d )
\
)
-/* 3-parameter operation, with immediate */
-#define jit_qop_(d, s1, op2d) \
+/* 3-parameter operation, with immediate. TODO: fix the case where mmediate
+ does not fit! */
+#define jit_qop_small(d, s1, op2d) \
+ (((s1) == (d)) ? op2d : (MOVQrr((s1), (d)), op2d))
+#define jit_qop_(d, s1, is, op2d, op2i)
\
+ (_s32P((long)(is)) \
+ ? jit_qop_small ((d), (s1), (op2d)) \
+ : (MOVQrr ((is), JIT_REXTMP), jit_qop_small ((d), (s1), (op2i))))
#define jit_bra_qr(s1, s2, op) (CMPQrr(s2, s1), op, _jit.x.pc)
#define _jit_bra_l(rs, is, op) (CMPQir(is, rs), op, _jit.x.pc)
@@ -88,21 +96,19 @@ struct jit_local_state {
#define jit_addi_l(d, rs, is) jit_opi_((d), (rs), ADDQir((is), (d)),
LEAQmr((is), (rs), 0, 0, (d)) )
#define jit_addr_l(d, s1, s2) jit_opo_((d), (s1), (s2), ADDQrr((s2), (d)),
ADDQrr((s1), (d)), LEAQmr(0, (s1), (s2), 1, (d)) )
-#define jit_andi_l(d, rs, is) jit_qop_ ((d), (rs), ANDQir((is), (d))
)
+#define jit_andi_l(d, rs, is) jit_qop_ ((d), (rs), (is), ANDQir((is), (d)),
ANDQrr(JIT_REXTMP, (d)))
#define jit_andr_l(d, s1, s2) jit_qopr_((d), (s1), (s2), ANDQrr((s1), (d)),
ANDQrr((s2), (d)) )
#define jit_orr_l(d, s1, s2) jit_qopr_((d), (s1), (s2), ORQrr((s1), (d)),
ORQrr((s2), (d)) )
#define jit_subr_l(d, s1, s2) jit_qopr_((d), (s1), (s2), (SUBQrr((s1), (d)),
NEGQr(d)), SUBQrr((s2), (d)) )
#define jit_xorr_l(d, s1, s2) jit_qopr_((d), (s1), (s2), XORQrr((s1), (d)),
XORQrr((s2), (d)) )
/* These can sometimes use byte or word versions! */
-#define jit_ori_i(d, rs, is) jit_op_ ((d), (rs), jit_reduce(OR, (is),
(d)) )
-#define jit_xori_i(d, rs, is) jit_op_ ((d), (rs), jit_reduce(XOR,
(is), (d)) )
-#define jit_ori_l(d, rs, is) jit_qop_ ((d), (rs), jit_reduceQ(OR,
(is), (d)) )
-#define jit_xori_l(d, rs, is) jit_qop_ ((d), (rs), jit_reduceQ(XOR,
(is), (d)) )
-
-#define jit_lshi_l(d, rs, is) ((is) <= 3 ? LEAQmr(0, 0, (rs), 1 << (is),
(d)) : jit_qop_ ((d), (rs), SHLQir((is), (d)) ))
-#define jit_rshi_l(d, rs, is)
jit_qop_ ((d), (rs), SARQir((is), (d)) )
-#define jit_rshi_ul(d, rs, is)
jit_qop_ ((d), (rs), SHRQir((is), (d)) )
+#define jit_ori_l(d, rs, is) jit_qop_ ((d), (rs), jit_reduceQ(OR,
(is), (d)), ORQrr(JIT_REXTMP, (d)) )
+#define jit_xori_l(d, rs, is) jit_qop_ ((d), (rs), jit_reduceQ(XOR,
(is), (d)), ORQrr(JIT_REXTMP, (d)) )
+
+#define jit_lshi_l(d, rs, is) ((is) <= 3 ? LEAQmr(0, 0, (rs), 1 << (is),
(d)) : jit_qop_small ((d), (rs), SHLQir((is), (d)) ))
+#define jit_rshi_l(d, rs, is)
jit_qop_small ((d), (rs), SARQir((is), (d)) )
+#define jit_rshi_ul(d, rs, is)
jit_qop_small ((d), (rs), SHRQir((is), (d)) )
#define jit_lshr_l(d, r1, r2) jit_replace((r1), (r2), _ECX,
jit_qop_ ((d), (r1), SHLQrr(_CL, (d)) ))
#define jit_rshr_l(d, r1, r2) jit_replace((r1), (r2), _ECX,
jit_qop_ ((d), (r1), SARQrr(_CL, (d)) ))
#define jit_rshr_ul(d, r1, r2) jit_replace((r1), (r2), _ECX,
jit_qop_ ((d), (r1), SHRQrr(_CL, (d)) ))
@@ -112,7 +118,7 @@ struct jit_local_state {
#define jit_popr_i(rs) POPQr(rs)
#define jit_base_prolog() (PUSHQr(_EBX), PUSHQr(_R12), PUSHQr(_R13),
PUSHQr(_EBP), MOVQrr(_ESP, _EBP))
-#define jit_prolog(n) (_jitl.nextarg_geti = 0, _jitl.alloca_offset = 0,
jit_base_prolog())
+#define jit_prolog(n) (_jitl.nextarg_getfp = _jitl.nextarg_geti = 0,
_jitl.alloca_offset = 0, jit_base_prolog())
/* Stack isn't used for arguments: */
#define jit_prepare_i(ni) (_jitl.argssize = 0)
@@ -181,17 +187,19 @@ static int jit_arg_reg_order[] = { _EDI, _ESI, _EDX, _ECX
};
#define jit_ret() ((_jitl.alloca_offset < 0 ? LEAVE_() : POPQr(_EBP)),
POPQr(_R13), POPQr(_R12), POPQr(_EBX), RET_())
#define _jit_ldi_l(d, is) MOVQmr((is), 0, 0, 0, (d))
+#define _jit_ldxi_l(d, rs, is) MOVQmr((is), (rs), 0, 0, (d))
#define jit_ldr_l(d, rs) MOVQmr(0, (rs), 0, 0, (d))
#define jit_ldxr_l(d, s1, s2) MOVQmr(0, (s1), (s2), 1, (d))
-#define jit_ldxi_l(d, rs, is) MOVQmr((is), (rs), 0, 0, (d))
#define _jit_sti_l(id, rs) MOVQrm((rs), (id), 0, 0, 0)
+#define _jit_stxi_l(id, rd, rs) MOVQrm((rs), (id), (rd), 0,
0)
#define jit_str_l(rd, rs) MOVQrm((rs), 0, (rd), 0, 0)
#define jit_stxr_l(d1, d2, rs) MOVQrm((rs), 0, (d1), (d2), 1)
-#define jit_stxi_l(id, rd, rs) MOVQrm((rs), (id), (rd), 0, 0)
-#define jit_ldi_l(d, is) (_u32P((long)(is)) ? _jit_ldi_l((d),
(is)) : (jit_movi_l(JIT_REXTMP, (is)), jit_ldr_l(JIT_REXTMP)))
-#define jit_sti_l(id, rs) (_u32P((long)(id)) ? _jit_sti_l(id, rs)
: (jit_movi_l(JIT_REXTMP, id), jit_str_l (JIT_REXTMP, (rs))))
+#define jit_ldi_l(d, is) (_u32P((long)(is)) ? _jit_ldi_l((d),
(is)) : (jit_movi_l(JIT_REXTMP, (is)), jit_ldr_l((d), JIT_REXTMP)))
+#define jit_sti_l(id, rs) (_u32P((long)(id)) ? _jit_sti_l((id),
(rs)) : (jit_movi_l(JIT_REXTMP, (id)), jit_str_l (JIT_REXTMP, (rs))))
+#define jit_ldxi_l(d, rs, is) (_u32P((long)(is)) ? _jit_ldxi_l((d),
(rs), (is)) : (jit_movi_l(JIT_REXTMP, (is)), jit_ldxr_l((d), (rs), JIT_REXTMP)))
+#define jit_stxi_l(id, rd, rs) (_u32P((long)(id)) ? _jit_stxi_l((id),
(rd), (rs)) : (jit_movi_l(JIT_REXTMP, (id)), jit_stxr_l (JIT_REXTMP, (rd),
(rs))))
/* Memory */
diff --git a/lightning/i386/core-i386.h b/lightning/i386/core-i386.h
index 2a9a8c2..24d12b5 100644
--- a/lightning/i386/core-i386.h
+++ b/lightning/i386/core-i386.h
@@ -244,13 +244,8 @@
#define jit_rshr_ui(d, r1, r2) jit_replace((r1), (r2), _ECX,
jit_op_ ((d), (r1), SHRLrr(_CL, (d)) ))
/* Stack */
-#define jit_prepare_f(nf) (_jitl.argssize += (nf))
-#define jit_prepare_d(nd) (_jitl.argssize += 2 * (nd))
#define jit_retval_i(rd) ((void)jit_movr_i ((rd), _EAX))
-#define jit_arg_f() ((_jitl.framesize += sizeof(float)) -
sizeof(float))
-#define jit_arg_d() ((_jitl.framesize += sizeof(double)) -
sizeof(double))
-
/* Unary */
#define jit_negr_i(d, rs) jit_opi_((d), (rs), NEGLr(d), (XORLrr((d),
(d)), SUBLrr((rs), (d))) )
diff --git a/lightning/i386/fp-32.h b/lightning/i386/fp-32.h
index 31a1d3d..1ee56db 100644
--- a/lightning/i386/fp-32.h
+++ b/lightning/i386/fp-32.h
@@ -346,4 +346,9 @@ union jit_double_imm {
_OO(0xd9f1)) /* fyl2x */
#endif
+#define jit_prepare_f(nf) (_jitl.argssize += (nf))
+#define jit_prepare_d(nd) (_jitl.argssize += 2 * (nd))
+#define jit_arg_f() ((_jitl.framesize += sizeof(float)) -
sizeof(float))
+#define jit_arg_d() ((_jitl.framesize += sizeof(double)) -
sizeof(double))
+
#endif /* __lightning_asm_h */
diff --git a/lightning/i386/fp-64.h b/lightning/i386/fp-64.h
index 19e73dc..74cdfec 100644
--- a/lightning/i386/fp-64.h
+++ b/lightning/i386/fp-64.h
@@ -33,6 +33,264 @@
#ifndef __lightning_fp_h
#define __lightning_fp_h
-#warning SSE math not yet supported
+#include <float.h>
+
+#define JIT_FPR_NUM 9
+#define JIT_FPRET _XMM0
+#define JIT_FPR(i) (_XMM7 + (i))
+#define JIT_FPTMP _XMM6
+
+/* Either use a temporary register that is finally AND/OR/XORed with RS = RD,
+ or use RD as the temporary register and to the AND/OR/XOR with RS. */
+#define jit_unop_tmp(rd, rs, op) \
+ ( (rs) == (rd) \
+ ? op((rd), JIT_FPTMP, JIT_FPTMP)) \
+ : op((rd), (rd), (rs)))
+
+#define jit_unop_f(rd, rs, op) \
+ ((rs) == (rd) ? op((rd)) : (MOVSSrr ((rs), (rd)), op((rd))))
+
+#define jit_unop_d(rd, rs, op) \
+ ((rs) == (rd) ? op((rd)) : (MOVSDrr ((rs), (rd)), op((rd))))
+
+#define jit_3opc_f(rd, s1, s2, op) \
+ ( (s1) == (rd) ? op((s2), (rd)) \
+ : ((s2) == (rd) ? op((s1), (rd)) \
+ : (MOVSSrr ((s1), (rd)), op((s2), (rd)))))
+
+#define jit_3opc_d(rd, s1, s2, op) \
+ ( (s1) == (rd) ? op((s2), (rd)) \
+ : ((s2) == (rd) ? op((s1), (rd)) \
+ : (MOVSDrr ((s1), (rd)), op((s2), (rd)))))
+
+#define jit_3op_f(rd, s1, s2, op) \
+ ( (s1) == (rd) ? op((s2), (rd)) \
+ : ((s2) == (rd) \
+ ? (MOVSSrr ((rd), JIT_FPTMP), MOVSSrr ((s1), (rd)), op(JIT_FPTMP,
(rd))) \
+ : (MOVSSrr ((s1), (rd)), op((s2), (rd)))))
+
+#define jit_3op_d(rd, s1, s2, op) \
+ ( (s1) == (rd) ? op((s2), (rd)) \
+ : ((s2) == (rd) \
+ ? (MOVSDrr ((rd), JIT_FPTMP), MOVSDrr ((s1), (rd)), op(JIT_FPTMP,
(rd))) \
+ : (MOVSDrr ((s1), (rd)), op((s2), (rd)))))
+
+#define jit_addr_f(rd,s1,s2) jit_3opc_f((rd), (s1), (s2), ADDSSrr)
+#define jit_subr_f(rd,s1,s2) jit_3op_f((rd), (s1), (s2), SUBSSrr)
+#define jit_mulr_f(rd,s1,s2) jit_3opc_f((rd), (s1), (s2), MULSSrr)
+#define jit_divr_f(rd,s1,s2) jit_3op_f((rd), (s1), (s2), DIVSSrr)
+
+#define jit_addr_d(rd,s1,s2) jit_3opc_d((rd), (s1), (s2), ADDSDrr)
+#define jit_subr_d(rd,s1,s2) jit_3op_d((rd), (s1), (s2), SUBSDrr)
+#define jit_mulr_d(rd,s1,s2) jit_3opc_d((rd), (s1), (s2), MULSDrr)
+#define jit_divr_d(rd,s1,s2) jit_3op_d((rd), (s1), (s2), DIVSDrr)
+
+#define jit_movr_f(rd,rs) MOVSSrr((rs), (rd))
+#define jit_movr_d(rd,rs) MOVSDrr((rs), (rd))
+
+/* either pcmpeqd %xmm7, %xmm7 / psrld $1, %xmm7 / andps %xmm7, %RD (if RS =
RD)
+ or pcmpeqd %RD, %RD / psrld $1, %RD / andps %RS, %RD (if RS != RD) */
+#define _jit_abs_f(rd,cnst,rs) \
+ (PCMPEQDrr((cnst), (cnst)), PSRLDir (1, (cnst)), ANDPSrr ((rs), (rd)))
+#define _jit_neg_f(rd,cnst,rs) \
+ (PCMPEQDrr((cnst), (cnst)), PSLLDir (31, (cnst)), XORPSrr ((rs), (rd)))
+#define jit_abs_f(rd,rs) jit_unop_tmp ((rd), (rs), _jit_abs_f)
+#define jit_neg_f(rd,rs) jit_unop_tmp ((rd), (rs), _jit_neg_f)
+
+#define _jit_abs_d(rd,cnst,rs) \
+ (PCMPEQDrr((cnst), (cnst)), PSRLQir (1, (cnst)), ANDPDrr ((rs), (rd)))
+#define _jit_neg_d(rd,cnst,rs) \
+ (PCMPEQDrr((cnst), (cnst)), PSLLQir (63, (cnst)), XORPDrr ((rs), (rd)))
+#define jit_abs_d(rd,rs) jit_unop_tmp ((rd), (rs), _jit_abs_d)
+#define jit_neg_d(rd,rs) jit_unop_tmp ((rd), (rs), _jit_neg_d)
+
+#define jit_sqrt_d(rd,rs) SQRTSSrr((rs), (rd))
+#define jit_sqrt_f(rd,rs) SQRTSDrr((rs), (rd))
+
+#define _jit_ldi_f(d, is) MOVSSmr((is), 0, 0, 0, (d))
+#define _jit_ldxi_f(d, rs, is) MOVSSmr((is), (rs), 0, 0, (d))
+#define jit_ldr_f(d, rs) MOVSSmr(0, (rs), 0, 0, (d))
+#define jit_ldxr_f(d, s1, s2) MOVSSmr(0, (s1), (s2), 1, (d))
+
+#define _jit_sti_f(id, rs) MOVSSrm((rs), (id), 0, 0, 0)
+#define _jit_stxi_f(id, rd, rs) MOVSSrm((rs), (id), (rd), 0, 0)
+#define jit_str_f(rd, rs) MOVSSrm((rs), 0, (rd), 0, 0)
+#define jit_stxr_f(d1, d2, rs) MOVSSrm((rs), 0, (d1), (d2), 1)
+
+#define jit_ldi_f(d, is) (_u32P((long)(is)) ? _jit_ldi_f((d),
(is)) : (jit_movi_l(JIT_REXTMP, (is)), jit_ldr_f((d), JIT_REXTMP)))
+#define jit_sti_f(id, rs) (_u32P((long)(id)) ? _jit_sti_f((id),
(rs)) : (jit_movi_l(JIT_REXTMP, (id)), jit_str_f (JIT_REXTMP, (rs))))
+#define jit_ldxi_f(d, rs, is) (_u32P((long)(is)) ? _jit_ldxi_f((d),
(rs), (is)) : (jit_movi_l(JIT_REXTMP, (is)), jit_ldxr_f((d), (rs), JIT_REXTMP)))
+#define jit_stxi_f(id, rd, rs) (_u32P((long)(id)) ? _jit_stxi_f((id),
(rd), (rs)) : (jit_movi_l(JIT_REXTMP, (id)), jit_stxr_f (JIT_REXTMP, (rd),
(rs))))
+
+#define _jit_ldi_d(d, is) MOVSDmr((is), 0, 0, 0, (d))
+#define _jit_ldxi_d(d, rs, is) MOVSDmr((is), (rs), 0, 0, (d))
+#define jit_ldr_d(d, rs) MOVSDmr(0, (rs), 0, 0, (d))
+#define jit_ldxr_d(d, s1, s2) MOVSDmr(0, (s1), (s2), 1, (d))
+
+#define _jit_sti_d(id, rs) MOVSDrm((rs), (id), 0, 0, 0)
+#define _jit_stxi_d(id, rd, rs) MOVSDrm((rs), (id), (rd), 0, 0)
+#define jit_str_d(rd, rs) MOVSDrm((rs), 0, (rd), 0, 0)
+#define jit_stxr_d(d1, d2, rs) MOVSDrm((rs), 0, (d1), (d2), 1)
+
+#define jit_ldi_d(d, is) (_u32P((long)(is)) ? _jit_ldi_d((d),
(is)) : (jit_movi_l(JIT_REXTMP, (is)), jit_ldr_d((d), JIT_REXTMP)))
+#define jit_sti_d(id, rs) (_u32P((long)(id)) ? _jit_sti_d((id),
(rs)) : (jit_movi_l(JIT_REXTMP, (id)), jit_str_d (JIT_REXTMP, (rs))))
+#define jit_ldxi_d(d, rs, is) (_u32P((long)(is)) ? _jit_ldxi_d((d),
(rs), (is)) : (jit_movi_l(JIT_REXTMP, (is)), jit_ldxr_d((d), (rs), JIT_REXTMP)))
+#define jit_stxi_d(id, rd, rs) (_u32P((long)(id)) ? _jit_stxi_d((id),
(rd), (rs)) : (jit_movi_l(JIT_REXTMP, (id)), jit_stxr_d (JIT_REXTMP, (rd),
(rs))))
+
+
+#define jit_movi_f(rd,immf) \
+ ((immf) == 0.0 ? XORSSrr ((rd), (rd)) :
\
+ (_O (0x50), \
+ MOVLim (0x12345678L, 0, _ESP, 0, 0), \
+ *((float *) (_jit.x.uc_pc - 4)) = (float) immf, \
+ jit_ldr_f((rd), _ESP), \
+ ADDLir(4, _ESP)))
+
+union jit_double_imm {
+ double d;
+ long l;
+};
+
+#define jit_movi_d(rd,immd)
\
+ ((immd) == 0.0 ? XORSDrr ((rd), (rd)) :
\
+ (_O (0x50),
\
+ MOVQir (0x123456789abcdef0L, _EAX),
\
+ ((union jit_double_imm *) (_jit.x.uc_pc - 8))->d = (double) immd,
\
+ _O (0x50), jit_ldr_d((rd), _ESP),
\
+ _O (0x58), _O (0x58)))
+
+#define jit_extr_i_d(rd, rs) CVTSI2SDLrr((rs), (rd))
+#define jit_extr_i_f(rd, rs) CVTSI2SSLrr((rs), (rd))
+#define jit_extr_l_d(rd, rs) CVTSI2SDQrr((rs), (rd))
+#define jit_extr_l_f(rd, rs) CVTSI2SSQrr((rs), (rd))
+#define jit_roundr_d_i(rd, rs) CVTSD2SILrr((rs), (rd))
+#define jit_roundr_f_i(rd, rs) CVTSS2SILrr((rs), (rd))
+#define jit_roundr_d_l(rd, rs) CVTSD2SIQrr((rs), (rd))
+#define jit_roundr_f_l(rd, rs) CVTSS2SIQrr((rs), (rd))
+
+
+#define jit_ceilr_f_i(rd, rs) do { \
+ jit_roundr_f_i ((rd), (rs)); \
+ jit_extr_i_f (JIT_FPTMP, (rd)); \
+ UCOMISSrr ((rs), JIT_FPTMP); \
+ ADCLir (0, (rd)); \
+ } while (0)
+
+#define jit_ceilr_d_i(rd, rs) do { \
+ jit_roundr_d_i ((rd), (rs)); \
+ jit_extr_i_d (JIT_FPTMP, (rd)); \
+ UCOMISDrr ((rs), JIT_FPTMP); \
+ ADCLir (0, (rd)); \
+ } while (0)
+
+#define jit_truncr_f_i(rd, rs) do { \
+ jit_roundr_f_i ((rd), (rs)); \
+ jit_extr_i_f (JIT_FPTMP, (rd)); \
+ TESTLrr ((rd), (rd)); \
+ JSm (_jit.x.pc + 9); \
+ UCOMISSrr (JIT_FPTMP, (rs)); \
+ SBBLir (0, (rd)); \
+ JMPSm (_jit.x.pc + 7); \
+ UCOMISSrr ((rs), JIT_FPTMP); \
+ ADCLir (0, (rd)); \
+ } while (0)
+
+#define jit_truncr_d_i(rd, rs) do { \
+ jit_roundr_d_i ((rd), (rs)); \
+ jit_extr_i_d (JIT_FPTMP, (rd)); \
+ TESTLrr ((rd), (rd)); \
+ JSm (_jit.x.pc + 9); \
+ UCOMISDrr (JIT_FPTMP, (rs)); \
+ SBBLir (0, (rd)); \
+ JMPSm (_jit.x.pc + 7); \
+ UCOMISDrr ((rs), JIT_FPTMP); \
+ ADCLir (0, (rd)); \
+ } while (0)
+
+#define jit_floorr_f_i(rd, rs) do { \
+ jit_roundr_f_i ((rd), (rs)); \
+ jit_extr_i_f (JIT_FPTMP, (rd)); \
+ UCOMISSrr (JIT_FPTMP, (rs)); \
+ SBBLir (0, (rd)); \
+ } while (0)
+
+#define jit_floorr_d_i(rd, rs) do { \
+ jit_roundr_d_i ((rd), (rs)); \
+ jit_extr_i_d (JIT_FPTMP, (rd)); \
+ UCOMISDrr (JIT_FPTMP, (rs)); \
+ SBBLir (0, (rd)); \
+ } while (0)
+
+#define jit_bltr_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), JAm ((d)))
+#define jit_bler_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), JAEm ((d)))
+#define jit_beqr_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), _OO
(0x7a06), JEm ((d)))
+#define jit_bner_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), _OO
(0x7a03), _OO (0x7405), JMPm (((d)))) /* JP to JMP, JZ past JMP */
+#define jit_bger_f(d, s1, s2) (UCOMISSrr ((s2), (s1)), JAEm ((d)))
+#define jit_bgtr_f(d, s1, s2) (UCOMISSrr ((s2), (s1)), JAm ((d)))
+#define jit_bunltr_f(d, s1, s2) (UCOMISSrr ((s2), (s1)), JNAEm ((d)))
+#define jit_bunler_f(d, s1, s2) (UCOMISSrr ((s2), (s1)), JNAm ((d)))
+#define jit_buneqr_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), JEm ((d)))
+#define jit_bltgtr_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), JNEm ((d)))
+#define jit_bunger_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), JNAm ((d)))
+#define jit_bungtr_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), JNAEm ((d)))
+#define jit_bordr_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), JNPm ((d)))
+#define jit_bunordr_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), JPm ((d)))
+
+#define jit_bltr_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), JAm ((d)))
+#define jit_bler_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), JAEm ((d)))
+#define jit_beqr_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), _OO
(0x7a06), JEm ((d)))
+#define jit_bner_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), _OO
(0x7a03), _OO (0x7405), JMPm (((d)))) /* JP to JMP, JZ past JMP */
+#define jit_bger_d(d, s1, s2) (UCOMISDrr ((s2), (s1)), JAEm ((d)))
+#define jit_bgtr_d(d, s1, s2) (UCOMISDrr ((s2), (s1)), JAm ((d)))
+#define jit_bunltr_d(d, s1, s2) (UCOMISDrr ((s2), (s1)), JNAEm ((d)))
+#define jit_bunler_d(d, s1, s2) (UCOMISDrr ((s2), (s1)), JNAm ((d)))
+#define jit_buneqr_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), JEm ((d)))
+#define jit_bltgtr_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), JNEm ((d)))
+#define jit_bunger_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), JNAm ((d)))
+#define jit_bungtr_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), JNAEm ((d)))
+#define jit_bordr_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), JNPm ((d)))
+#define jit_bunordr_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), JPm ((d)))
+
+#define jit_ltr_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s1),
(s2)), SETAr (jit_reg8((d))))
+#define jit_ler_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s1),
(s2)), SETAEr (jit_reg8((d))))
+#define jit_eqr_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s1),
(s2)), _OO(0x7a03), SETEr (jit_reg8((d))))
+#define jit_ner_f(d, s1, s2) (UCOMISSrr ((s1), (s2)), MOVLir (1,
(d)), _OO(0x7a03), SETNEr (jit_reg8((d))))
+#define jit_ger_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s2),
(s1)), SETAEr (jit_reg8((d))))
+#define jit_gtr_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s2),
(s1)), SETAr (jit_reg8((d))))
+#define jit_unltr_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s2),
(s1)), SETNAEr (jit_reg8((d))))
+#define jit_unler_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s2),
(s1)), SETNAr (jit_reg8((d))))
+#define jit_uneqr_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s1),
(s2)), SETEr (jit_reg8((d))))
+#define jit_ltgtr_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s1),
(s2)), SETNEr (jit_reg8((d))))
+#define jit_unger_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s1),
(s2)), SETNAr (jit_reg8((d))))
+#define jit_ungtr_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s1),
(s2)), SETNAEr (jit_reg8((d))))
+#define jit_ordr_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s1),
(s2)), SETNPr (jit_reg8((d))))
+#define jit_unordr_f(d, s1, s2) (XORLrr ((d), (d)), UCOMISSrr ((s1),
(s2)), SETPr (jit_reg8((d))))
+
+#define jit_ltr_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s1),
(s2)), SETAr (jit_reg8((d))))
+#define jit_ler_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s1),
(s2)), SETAEr (jit_reg8((d))))
+#define jit_eqr_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s1),
(s2)), _OO(0x7a03), SETEr (jit_reg8((d))))
+#define jit_ner_d(d, s1, s2) (UCOMISDrr ((s1), (s2)), MOVLir (1,
(d)), _OO(0x7a03), SETNEr (jit_reg8((d))))
+#define jit_ger_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s2),
(s1)), SETAEr (jit_reg8((d))))
+#define jit_gtr_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s2),
(s1)), SETAr (jit_reg8((d))))
+#define jit_unltr_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s2),
(s1)), SETNAEr (jit_reg8((d))))
+#define jit_unler_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s2),
(s1)), SETNAr (jit_reg8((d))))
+#define jit_uneqr_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s1),
(s2)), SETEr (jit_reg8((d))))
+#define jit_ltgtr_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s1),
(s2)), SETNEr (jit_reg8((d))))
+#define jit_unger_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s1),
(s2)), SETNAr (jit_reg8((d))))
+#define jit_ungtr_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s1),
(s2)), SETNAEr (jit_reg8((d))))
+#define jit_ordr_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s1),
(s2)), SETNPr (jit_reg8((d))))
+#define jit_unordr_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s1),
(s2)), SETPr (jit_reg8((d))))
+
+#define jit_prepare_f(num) (_jitl.nextarg_putfp = _XMM0 + (num))
+#define jit_prepare_d(num) (_jitl.nextarg_putfp = _XMM0 + (num))
+
+#define jit_arg_f() (_XMM0 + _jitl.nextarg_getfp++)
+#define jit_arg_d() (_XMM0 + _jitl.nextarg_getfp++)
+
+#define jit_getarg_f(rd, ofs) (jit_movr_f ((rd), (ofs)))
+#define jit_getarg_d(rd, ofs) (jit_movr_d ((rd), (ofs)))
+
+#define jit_pusharg_f(rs) (--_jitl.nextarg_putfp, jit_movr_f
(_jitl.nextarg_putfp, (rs)))
+#define jit_pusharg_d(rs) (--_jitl.nextarg_putfp, jit_movr_d
(_jitl.nextarg_putfp, (rs)))
#endif /* __lightning_fp_h */
diff --git a/lightning/sparc/fp.h b/lightning/sparc/fp.h
index 5a34e7d..a11f2eb 100644
--- a/lightning/sparc/fp.h
+++ b/lightning/sparc/fp.h
@@ -60,8 +60,8 @@
#define jit_abs_f(rd,rs) FABSDrr((rs), (rd))
#define jit_negr_f(rd,rs) FNEGDrr((rs), (rd))
#define jit_sqrt_f(rd,rs) FSQRTDrr((rs), (rd))
-#define jit_extr_f_d(rs, rd) FSTODrr((rs), (rd))
-#define jit_extr_d_f(rs, rd) FDTOSrr((rs), (rd))
+#define jit_extr_f_d(rd, rs) FSTODrr((rs), (rd))
+#define jit_extr_d_f(rd, rs) FDTOSrr((rs), (rd))
#define jit_movi_f(rd,immf) \
do { \
- [Guile-commits] 64/437: avoid "value computed is not used" in jit_allocai, (continued)
- [Guile-commits] 64/437: avoid "value computed is not used" in jit_allocai, Andy Wingo, 2018/07/02
- [Guile-commits] 66/437: warning patrol (allocai.c, ppc), Andy Wingo, 2018/07/02
- [Guile-commits] 75/437: fix uses of jit_qop_ with 4 parameters, Andy Wingo, 2018/07/02
- [Guile-commits] 61/437: avoid redefinition of _r1, Andy Wingo, 2018/07/02
- [Guile-commits] 73/437: add LEAQmr for x86-64, Andy Wingo, 2018/07/02
- [Guile-commits] 82/437: use MOVLir directly to implement i386 32-bit jit_movi_p, Andy Wingo, 2018/07/02
- [Guile-commits] 72/437: misc. x86-64 bugfixes, Andy Wingo, 2018/07/02
- [Guile-commits] 78/437: avoid CRLF issues on mingw, Andy Wingo, 2018/07/02
- [Guile-commits] 62/437: commit source files for separate using/porting manuals, Andy Wingo, 2018/07/02
- [Guile-commits] 80/437: fix C++ incompatibility in i386 back-end, Andy Wingo, 2018/07/02
- [Guile-commits] 45/437: add floating-point for x86-64,
Andy Wingo <=
- [Guile-commits] 28/437: first merge of x86-64 backend and related configury changes, Andy Wingo, 2018/07/02
- [Guile-commits] 74/437: add long boolean operations for x86-64, Andy Wingo, 2018/07/02
- [Guile-commits] 89/437: fix _rN vs. _rR, Andy Wingo, 2018/07/02
- [Guile-commits] 90/437: fix IMULQir and IMULQirr, Andy Wingo, 2018/07/02
- [Guile-commits] 79/437: fix jit_replace8 for case when one of the operands is _EAX., Andy Wingo, 2018/07/02
- [Guile-commits] 87/437: rewrite x86-64 argument passing to support up to 6 arguments, Andy Wingo, 2018/07/02
- [Guile-commits] 92/437: fix installation bug, Andy Wingo, 2018/07/02
- [Guile-commits] 71/437: add missing x86-64 memory operations, Andy Wingo, 2018/07/02
- [Guile-commits] 84/437: move x86-64 JIT_V1/JIT_V2 in R12/R13, Andy Wingo, 2018/07/02
- [Guile-commits] 83/437: detect 32-/64-bit variant of i386 back-end at include time (not configure time), Andy Wingo, 2018/07/02