guile-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Guile-commits] 03/437: big merge


From: Andy Wingo
Subject: [Guile-commits] 03/437: big merge
Date: Mon, 2 Jul 2018 05:13:33 -0400 (EDT)

wingo pushed a commit to branch lightning
in repository guile.

commit ba5044a6684e1af0fc20fc42da8cc5de609c9692
Author: Paolo Bonzini <address@hidden>
Date:   Thu Oct 14 16:10:07 2004 +0000

    big merge
    
    git-archimport-id: address@hidden/lightning--stable--1.2--patch-1
    git-archimport-id: address@hidden/lightning--stable--1.2--patch-2
---
 .cvsignore               |   1 +
 AUTHORS                  |   3 +-
 ChangeLog                | 214 +++++++++++++++++++++++++++++
 NEWS                     |  11 +-
 THANKS                   |   8 ++
 config/config.guess      |   1 +
 config/config.sub        |   1 +
 config/depcomp           |   1 +
 config/mdate-sh          |   1 +
 config/missing           |   1 +
 config/texi2dvi          |   4 +-
 config/texinfo.tex       |   1 +
 doc/.cvsignore           |   3 +
 doc/Makefile.am          |   2 -
 doc/body.texi            |   8 +-
 doc/lightning.texi       |   5 -
 doc/porting.texi         | 313 ++++++++++++++++++++++++++++++++++---------
 doc/toc.texi             |   1 -
 doc/using.texi           | 301 ++++++++++++++++++++++++++++-------------
 lightning-inst.h         |   5 +-
 lightning.h.in           |   5 +-
 lightning/Makefile.am    |   2 +-
 lightning/asm-common.h   |  12 +-
 lightning/core-common.h  |  98 +++++++++++---
 lightning/fp-common.h    | 270 +++++++------------------------------
 lightning/funcs-common.h |   6 +
 lightning/i386/asm.h     |  50 +++----
 lightning/i386/core.h    |  39 +++---
 lightning/i386/fp.h      | 341 ++++++++++++++++++++++++++++++-----------------
 lightning/i386/funcs.h   |  53 +++++++-
 lightning/ppc/asm.h      |  82 +++++++++---
 lightning/ppc/core.h     | 122 ++++++++++-------
 lightning/ppc/fp.h       | 233 +++++++++++++++++++++++---------
 lightning/ppc/funcs.h    | 129 +++++++++---------
 lightning/sparc/asm.h    |  80 +++++++++++
 lightning/sparc/core.h   |  60 ++++++---
 lightning/sparc/fp.h     | 232 ++++++++++++++++----------------
 opcode/Makefile.am       |   2 +
 tests/Makefile.am        |   8 +-
 tests/{fib.c => bp.c}    |  20 ++-
 tests/bp.ok              |   1 +
 tests/fib.c              |   8 +-
 tests/fibit.c            |   2 +-
 tests/funcfp.c           | 208 ++++++++++++++---------------
 tests/funcfp.ok          |   2 +-
 tests/printf.c           |   2 +-
 tests/rpnfp.c            |  18 +--
 tests/testfp.c           | 101 +++++++-------
 tests/testfp.ok          |   3 +-
 49 files changed, 1980 insertions(+), 1094 deletions(-)

diff --git a/.cvsignore b/.cvsignore
new file mode 100644
index 0000000..d899218
--- /dev/null
+++ b/.cvsignore
@@ -0,0 +1 @@
+autom4te.cache
diff --git a/AUTHORS b/AUTHORS
index d19bf6b..bda81b4 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -1,2 +1,3 @@
 Paolo Bonzini <address@hidden>
-i386 and PPC assemblers by Ian Piumarta <address@hidden>
\ No newline at end of file
+i386 and PPC assemblers by Ian Piumarta <address@hidden>
+Major PPC contributions by Laurent Michel <address@hidden>
diff --git a/ChangeLog b/ChangeLog
index 5fbd089..4172498 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,217 @@
+2004-10-12  Paolo Bonzini  <address@hidden>
+
+       * lightning/i386/fp.h: Fix bugs in conditional branches.
+
+2004-10-10  Paolo Bonzini  <address@hidden>
+
+       * lightning/i386/funcs.h: Fix pasto in jit_flush_code.
+
+2004-10-08  Paolo Bonzini  <address@hidden>
+
+       * lightning/ppc/fp.h: Optimized conditional branches.
+
+2004-09-20  Paolo Bonzini  <address@hidden>
+
+       * lightning/ppc/asm.h: Fix more typos.
+
+2004-09-20  Paolo Bonzini  <address@hidden>
+
+       * lightning/ppc/asm.h: Fix typos, replace `26' with JIT_AUX.
+
+2004-09-20  Paolo Bonzini  <address@hidden>
+
+       * lightning/ppc/fp.h: Added conditional branches.
+
+2004-09-18  Laurent Michel  <address@hidden>
+
+       * lightning/ppc/fp.h (jit_unler_d, jit_unltr_d, jit_unger_d,
+       jit_ungtr_d, jit_ltgt_d, jit_uneq_d): Implemented missing tests
+       to fully support testfp.
+       (jit_floorr_d_i, jit_ceilr_d_i, jit_roundr_d_i, jit_truncr_d_i):
+       New macros.
+       * lightning/ppc/asm.h: Added missing opcodes FCTIWZ and MTFSFI.
+       * lightning/ppc/funcs.h (_jit_prolog): Fixed minor mistake in
+       the initialization of _jitl.nextarg_geti, relying on the
+       JIT_AUX macro as well to get the register offset.
+
+2004-09-07  Paolo Bonzini  <address@hidden>
+
+       * lightning/ppc/funcs.h: Fix typo.
+
+2004-09-06  Paolo Bonzini  <address@hidden>
+
+       * tests/funcfp.c: Use %g.  Remove C99 variable declarations.
+       * tests/testfp.c: Don't use __builtin_nan.
+
+       * lightning/ppc/core.h: Add three V registers.
+       * lightning/ppc/funcs.h: Adjust.
+
+       * lightning/sparc/core.h: Some fixes related to FP argument passing.
+       Move R0 to %g2, use %o7 for JIT_BIG2.
+       * lightning/sparc/fp.h: Some fixes related to FP argument passing.
+
+2004-09-02  Paolo Bonzini  <address@hidden>
+
+       * lightning/sparc/core.h: Add another V register,
+       move R0 to %o7.
+
+2004-07-15  Paolo Bonzini  <address@hidden>
+
+       * lightning/i386/funcs.h: Implement jit_flush_code,
+       in order to support Fedora's exec-shield.
+
+2004-07-14  Paolo Bonzini  <address@hidden>
+
+       * lightning/core-common.h: Add more jit_extr_*_* macros.
+       * lightning/doc/using.texi: Be clearer about the order
+       of arguments in jit_extr_*_*.
+       * lightning/doc/porting.texi: Add more jit_extr_*_* macros.
+       * lightning/i386/fp.h: Fix typo in jit_extr_i_d.
+
+2004-07-14  Paolo Bonzini  <address@hidden>
+
+       * lightning/ppc/funcs.h: Adjust offset of LR into
+       stack frame if running under the Darwin ABI.
+
+2004-07-13  Paolo Bonzini  <address@hidden>
+
+       * lightning/i386/fp.h: Rename jit_exti_d to jit_extr_i_d.
+
+2004-07-13  Paolo Bonzini  <address@hidden>
+
+       * lightning/ppc/core.h: Fix thinko.
+
+       * lightning/i386/core.h: Fix jit_lti_ui.
+       * lightning/core-common.h: Add missing macros.
+
+       * lightning/ppc/fp.h: Rename jit_neg_* to jit_negr_*.
+       * lightning/i386/fp.h: Rename jit_neg_* to jit_negr_*.
+       * lightning/sparc/fp.h: Rename jit_neg_* to jit_negr_*.
+       * lightning/fp-common.h: Rename jit_neg_* to jit_negr_*.
+       * doc/porting.texi: Add undocumented macros.
+
+2004-07-12  Paolo Bonzini  <address@hidden>
+
+       * doc/porting.texi: Add missing macros.
+
+2004-07-12  Paolo Bonzini  <address@hidden>
+
+       * lightning/ppc/funcs.h: Don't generate trampolines.
+       Separate prolog and epilog generation.
+       * lightning/ppc/core.h: Generate epilog explicitly.
+       Don't reserve r31 anymore.
+       * lightning/core-common.h: Remove call to jit_setup_code.
+
+2004-07-09  Paolo Bonzini  <address@hidden>
+
+       * lightning/lightning.h.in: Avoid preprocessor warnings.
+       * lightning/lightning-inst.h: Likewise.
+
+       * lightning/i386/core.h: Define JIT_R, JIT_R_NUM, JIT_V,
+       JIT_V_NUM.
+       * lightning/ppc/core.h: Likewise.
+       * lightning/sparc/core.h: Likewise.
+       * lightning/i386/fp.h: Define JIT_FPR, JIT_FPR_NUM.
+       * lightning/ppc/fp.h: Likewise.
+       * lightning/sparc/fp.h: Likewise.
+       * lightning/core-common.h: Define fixed register names.
+       * lightning/fp-common.h: Likewise for FP regs.
+
+2004-07-09  Paolo Bonzini  <address@hidden>
+
+       * lightning/ppc/funcs.h: Fix location where return address
+       is stored.
+       * lightning/i386/asm.h: Add a trailing _ to opcodes without
+       any parameter.
+       * lightning/i386/core.h: Adjust for the above.
+
+2004-04-15  Paolo Bonzini  <address@hidden>
+
+       * lightning/i386/fp.h: Change "and" to "_and"
+       to satisfy C++ compilers.
+
+2004-04-14  Paolo Bonzini  <address@hidden>
+
+       * lightning/sparc/fp.h: Use memcpy to implement jit_movi.
+       * lightning/ppc/fp.h: Use memcpy to implement jit_movi.
+       Move floating-point opcodes...
+       * lightning/ppc/asm.h: ... here.
+
+2004-04-14  Paolo Bonzini  <address@hidden>
+
+       * lightning/core-common.h: Add jit_finishr.
+       * lightning/ppc/core.h: Add jit_callr and jit_finishr.
+       * lightning/i386/core.h: Add jit_callr.
+       * lightning/sparc/core.h: Add jit_callr.  Fix typo.
+
+2004-04-14  Paolo Bonzini  <address@hidden>
+
+       * lightning/i386/core.h: Fix pasto in jit_b*_ui.
+
+2004-03-30  Laurent Michel
+
+       * lightning/ppc: Implement PowerPC floating point
+       (ChangeLog entry missing).
+
+2004-03-12  Paolo Bonzini  <address@hidden>
+
+       * lightning/fp-common.h: Load/store macros are not the
+       same for floats and doubles anywhere, but jit_retval may be.
+       * lightning/i386/asm.h: Fix = mistaken for == in ESCrri.
+       * lightning/i386/core.h: Fix typo in jit_prepare_[fd].
+       * lightning/i386/fp.h: Rewritten.
+       * tests/testfp.c: Add tests for unordered comparisons.
+       * tests/testfp.ok: Add results.
+
+2004-03-15  Paolo Bonzini  <address@hidden>
+
+       Merge changes from Laurent Michel.
+
+       * lightning/asm-common.h: Add _jit_I_noinc.
+       * lightning/core-common.h: Support jit_init,
+       jit_setup_code, jit_patch_at.  Return patchable IP from
+       jit_movi_p.
+       * lightning/funcs-common.h: Provide defaults
+       for jit_setup_code, jit_start_pfx, jit_end_pfx
+       * lightning/i386/core.h: Add jit_patch_at, jit_patch_movi.
+       * lightning/ppc/core.h: Likewise.
+       * lightning/sparc/core.h: Likewise.
+       * lightning/ppc/asm.h: Fix generation of branch destination
+       displacements in _FB and _BB
+       * lightning/ppc/core.h: Generate trampolines in the user
+       area.
+       * lightning/ppc/funcs.h: Add a few casts.
+       * tests/bc.c: New testcase.
+
+       * lightning/i386/asm.h: Wrap into #ifndef LIGHTNING_DEBUG.
+       * lightning/ppc/asm.h: Wrap into #ifndef LIGHTNING_DEBUG.
+       * lightning/sparc/asm.h: Wrap into #ifndef LIGHTNING_DEBUG.
+
+
+2004-03-09  Paolo Bonzini  <address@hidden>
+
+       * lightning/sparc/fp.h: Rewrite.  Move macros for
+       FP code generation...
+       * lightning/sparc/asm.h: ... here.
+       * lightning/sparc/core.h: Rename jit_prepare to
+       jit_prepare_i, jit_retval to jit_retval_i.
+       * lightning/ppc/core.h: Rename jit_prepare to
+       jit_prepare_i, jit_retval to jit_retval_i.
+       * lightning/i386/core.h: Rename jit_prepare to
+       jit_prepare_i, jit_retval to jit_retval_i.
+       * lightning/core-common.h: Provide backwards
+       compatible synonyms for the above.
+       * lightning/fp-common.h: Rewrite.
+       * lightning-inst.h: Include fp unconditionally.
+       * lightning.h.in: Include fp unconditionally.
+       * tests/Makefile.am: Enable fp tests.
+       * tests/fib.c: Use jit_retval_i.
+       * tests/fibit.c: Cast codeBuffer to char *.
+       * tests/funcfp.c: Use new fp macros.
+       * tests/printf.c: Use jit_retval_i.
+       * tests/rpnfp.c: Use new fp macros.
+       * tests/testfp.c: Use new fp macros.
+
 2004-03-02  Paolo Bonzini  <address@hidden>
 
        * lightning/i386/core.h: generate correct code when
diff --git a/NEWS b/NEWS
index bae18f3..a8b42b1 100644
--- a/NEWS
+++ b/NEWS
@@ -1,10 +1,17 @@
 NEWS FROM VERSION 1.1.2 TO 1.2
 
-o   Floating-point interface rewritten, uses a common register
-    file architecture rather than a stack.
+o   Floating-point interface rewritten, uses a register file
+    architecture rather than a stack.
 
 o   Many bug fixes.
 
+o   jit_prepare and jit_retval are now jit_prepare_i and
+    jit_retval_i.
+
+o   Support for Fedora Core 1's exec-shield feature.
+
+o   PPC supports both SysV and Darwin ABIs.
+
 o   More (and more complete) examples provided.
 
 ---
diff --git a/THANKS b/THANKS
new file mode 100644
index 0000000..7671541
--- /dev/null
+++ b/THANKS
@@ -0,0 +1,8 @@
+Thanks to all the following people for their help in
+improving GNU lightning:
+
+Tom Tromey                      <address@hidden>
+Laurent Michel                  <address@hidden>
+Eli Barzilay                    <address@hidden>
+Jens Troeger                    <address@hidden>
+Basile Starynkevitch            <address@hidden>
diff --git a/config/config.guess b/config/config.guess
new file mode 120000
index 0000000..9c6e8c2
--- /dev/null
+++ b/config/config.guess
@@ -0,0 +1 @@
+/sw/share/automake-1.9/config.guess
\ No newline at end of file
diff --git a/config/config.sub b/config/config.sub
new file mode 120000
index 0000000..9db5449
--- /dev/null
+++ b/config/config.sub
@@ -0,0 +1 @@
+/sw/share/automake-1.9/config.sub
\ No newline at end of file
diff --git a/config/depcomp b/config/depcomp
new file mode 120000
index 0000000..20f0b61
--- /dev/null
+++ b/config/depcomp
@@ -0,0 +1 @@
+/sw/share/automake-1.9/depcomp
\ No newline at end of file
diff --git a/config/mdate-sh b/config/mdate-sh
new file mode 120000
index 0000000..a3d6cb9
--- /dev/null
+++ b/config/mdate-sh
@@ -0,0 +1 @@
+/sw/share/automake-1.9/mdate-sh
\ No newline at end of file
diff --git a/config/missing b/config/missing
new file mode 120000
index 0000000..4db5c1b
--- /dev/null
+++ b/config/missing
@@ -0,0 +1 @@
+/sw/share/automake-1.9/missing
\ No newline at end of file
diff --git a/config/texi2dvi b/config/texi2dvi
index 010b586..fa4d4e0 100755
--- a/config/texi2dvi
+++ b/config/texi2dvi
@@ -1,6 +1,6 @@
 #! /bin/sh
 # texi2dvi --- produce DVI (or PDF) files from Texinfo (or LaTeX) sources.
-# $Id: texi2dvi,v 1.14 2003/02/05 00:42:33 karl Exp $
+# $Id: texi2dvi,v 1.1.1.1 2004/03/03 12:51:44 bonzini Exp $
 #
 # Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2001,
 # 2002, 2003 Free Software Foundation, Inc.
@@ -27,7 +27,7 @@
 # the `--debug' option when making a bug report.
 
 # This string is expanded by rcs automatically when this file is checked out.
-rcs_revision='$Revision: 1.14 $'
+rcs_revision='$Revision: 1.1.1.1 $'
 rcs_version=`set - $rcs_revision; echo $2`
 program=`echo $0 | sed -e 's!.*/!!'`
 version="texi2dvi (GNU Texinfo 4.5) $rcs_version
diff --git a/config/texinfo.tex b/config/texinfo.tex
new file mode 120000
index 0000000..02f1111
--- /dev/null
+++ b/config/texinfo.tex
@@ -0,0 +1 @@
+/sw/share/automake-1.9/texinfo.tex
\ No newline at end of file
diff --git a/doc/.cvsignore b/doc/.cvsignore
new file mode 100644
index 0000000..01e2da8
--- /dev/null
+++ b/doc/.cvsignore
@@ -0,0 +1,3 @@
+*.info*
+stamp-*
+version.texi
diff --git a/doc/Makefile.am b/doc/Makefile.am
index 968812f..10b4a46 100644
--- a/doc/Makefile.am
+++ b/doc/Makefile.am
@@ -1,5 +1,3 @@
-EXTRA_DIST=lightning.info lightning.info-1 lightning.info-2 lightning.info-3
-
 TEXI2DVI=$(top_srcdir)/config/texi2dvi
 HELP2MAN = $(top_srcdir)/config/help2man
 
diff --git a/doc/body.texi b/doc/body.texi
index cccadd0..b385aed 100644
--- a/doc/body.texi
+++ b/doc/body.texi
@@ -51,7 +51,7 @@ There are no Secondary Sections, no Cover Texts and no 
Invariant Sections
 Info documentation, constitutes the Title Page.
 @end titlepage
 
address@hidden ISTEX
address@hidden
 @node Top
 @top @lightning{}
 
@@ -61,17 +61,17 @@ which are usually either inefficient or non-portable, 
@lightning{} is
 both retargetable and very fast.
 
 @include toc.texi
address@hidden ifclear
address@hidden ifnottex
 
 @node Overview
 @chapter Introduction to @lightning{}
 
address@hidden ISTEX
address@hidden
 This document describes @value{TOPIC} the @lightning{} library for
 dynamic code generation.  Unlike other dynamic code generation systems,
 which are usually either inefficient or non-portable, @lightning{} is
 both retargetable and very fast.
address@hidden ifset
address@hidden iftex
 
 @ifclear USING
 This manual assumes that you are pretty comfortable with the usage of
diff --git a/doc/lightning.texi b/doc/lightning.texi
index dee770e..a336a3d 100644
--- a/doc/lightning.texi
+++ b/doc/lightning.texi
@@ -36,11 +36,6 @@
 @c Macros for Texinfo 3.1/4.0 compatibility
 @c ---------------------------------------------------------------------
 
address@hidden Emulate the address@hidden' command which is found in Texinfo 4.0
address@hidden
address@hidden ISTEX
address@hidden iftex
-
 @c @hlink (macro), @url and @email are used instead of @uref for Texinfo 3.1
 @c compatibility
 @macro hlink{url, link}
diff --git a/doc/porting.texi b/doc/porting.texi
index 7423397..9f68d86 100644
--- a/doc/porting.texi
+++ b/doc/porting.texi
@@ -353,16 +353,20 @@ that make up the platform-independent interface provided 
by
 Implementation of forward references takes place in:
 
 @itemize @bullet
address@hidden The branch macros
address@hidden The @code{jit_patch} macros
address@hidden
+The branch macros
+
address@hidden
+The @code{jit_patch_at} macros
 @end itemize
 
 Roughly speaking, the branch macros, as seen in @usingref{GNU lightning
 macros, Generating code at run-time}, return a value that later calls
-to @code{jit_patch} use to complete the assembly of the forward
-reference.  This value is usually the contents of the program counter
-after the branch instruction is compiled (which is accessible in the
address@hidden variable).  Let's see an example from the x86 back-end:
+to @code{jit_patch} or @code{jit_patch_at} use to complete the assembly
+of the forward reference.  This value is usually the contents of the
+program counter after the branch instruction is compiled (which is
+accessible in the @code{_jit.pc} variable).  Let's see an example from
+the x86 back-end:
 
 @example
 #define jit_bmsr_i(label, s1, s2)                            \
@@ -374,7 +378,7 @@ the combination of a @code{TEST} instruction (bit-wise 
@sc{and} between
 the two operands) and a @code{JNZ} instruction (jump if non-zero).  The
 macro then returns the final value of the program counter.
 
address@hidden is one of the few macros that need to possess a
address@hidden is one of the few macros that need to possess a
 knowledge of the machine's instruction formats.  Its purpose is to
 patch a branch instruction (identified by the value returned at the
 moment the branch was compiled) to jump to the current position (that
@@ -382,11 +386,11 @@ is, to the address identified by @code{_jit.pc}).
 
 On the x86, the displacement between the jump and the landing point is
 expressed as a 32-bit signed integer lying in the last four bytes of the
-jump instruction.  The definition of @code{_jit_patch} is:
+jump instruction.  The definition of @code{_jit_patch_at} is:
 
 @example
-#define jit_patch(jump_pc)      (*_PSL((jump_pc) - 4) = \
-                                 _jit.pc - (jump_pc))
+#define jit_patch(jump_pc, pv)    (*_PSL((jump_pc) - 4) = \
+                                  (pv) - (jump_pc))
 @end example
 
 The @code{_PSL} macro is nothing more than a cast to @code{long *},
@@ -394,42 +398,69 @@ and is used here to shorten the definition and avoid 
cluttering it with
 excessive parentheses.  These type-cast macros are:
 
 @itemize @bullet
address@hidden @code{_PUC(X)} to cast to a @code{unsigned char *}.
address@hidden @code{_PUS(X)} to cast to a @code{unsigned short *}.
address@hidden @code{_PUI(X)} to cast to a @code{unsigned int *}.
address@hidden @code{_PSL(X)} to cast to a @code{long *}.
address@hidden @code{_PUL(X)} to cast to a @code{unsigned long *}.
address@hidden
address@hidden(X)} to cast to a @code{unsigned char *}.
+
address@hidden
address@hidden(X)} to cast to a @code{unsigned short *}.
+
address@hidden
address@hidden(X)} to cast to a @code{unsigned int *}.
+
address@hidden
address@hidden(X)} to cast to a @code{long *}.
+
address@hidden
address@hidden(X)} to cast to a @code{unsigned long *}.
 @end itemize
 
 On other platforms, notably RISC ones, the displacement is embedded into
-the instruction itself.  In this case, @code{jit_patch} must first zero
+the instruction itself.  In this case, @code{jit_patch_at} must first zero
 out the field, and then @sc{or} in the correct displacement.  The SPARC,
 for example, encodes the displacement in the bottom 22 bits; in addition
 the right-most two bits are suppressed, which are always zero because
 instruction have to be word-aligned.
 
 @example
-#define jit_patch(delay_pc)   jit_patch_ ( ((delay_pc) - 1) )
+#define jit_patch_at(delay_pc, pv)   jit_patch_ (((delay_pc) - 1), (pv))
 
 @rem{/* branch instructions return the address of the @emph{delay}
  * instruction---this is just a helper macro that makes the code more
  * readable.
  */}
-#define jit_patch_(jump_pc)   (*jump_pc =                  \
+#define jit_patch_(jump_pc, pv)   (*jump_pc =              \
         (*jump_pc & ~_MASK(22)) |                          \
-         ((_UL(_jit.pc) - _UL(jump_pc)) >> 2) & _MASK(22))
+         ((_UL(pv) - _UL(jump_pc)) >> 2) & _MASK(22))
 @end example
 
 This introduces more predefined shortcut macros:
 @itemize @bullet
address@hidden @code{_UC(X)} to cast to a @code{unsigned char}.
address@hidden @code{_US(X)} to cast to a @code{unsigned short}.
address@hidden @code{_UI(X)} to cast to a @code{unsigned int}.
address@hidden @code{_SL(X)} to cast to a @code{long}.
address@hidden @code{_UL(X)} to cast to a @code{unsigned long}.
address@hidden @code{_MASK(N)} gives a binary number made of N ones.
address@hidden
address@hidden(X)} to cast to a @code{unsigned char}.
+
address@hidden
address@hidden(X)} to cast to a @code{unsigned short}.
+
address@hidden
address@hidden(X)} to cast to a @code{unsigned int}.
+
address@hidden
address@hidden(X)} to cast to a @code{long}.
+
address@hidden
address@hidden(X)} to cast to a @code{unsigned long}.
+
address@hidden
address@hidden(N)} gives a binary number made of N ones.
 @end itemize
 
+Dual to branches and @code{jit_patch_at} are @code{jit_movi_p}
+and @code{jit_patch_movi}, since they can also be used to implement
+forward references.  @code{jit_movi_p} should be carefully implemented
+to use an encoding that is as long as possible, and it should return
+an address which is then passed to @code{jit_patch_movi}.  The
+implementation of @code{jit_patch_movi} is similar to
address@hidden
 
 @node Common features
 @section Common features supported by @file{core-common.h}
@@ -448,14 +479,16 @@ avoids compiler warnings about redefined macros, but 
there should be
 no need to define them.  They are:
 @example
 #define jit_extr_c_ui(d, rs)
-#define jit_extr_i_ul(d, rs)
 #define jit_extr_s_ui(d, rs)
+#define jit_extr_c_ul(d, rs)
+#define jit_extr_s_ul(d, rs)
+#define jit_extr_i_ul(d, rs)
 #define jit_negr_i(d, rs)
 #define jit_negr_l(d, rs)
 @end example
 
 @item Support for the @sc{abi}
-Both @code{jit_prolog}, @code{jit_leaf} and @code{jit_finish} are not
+All of @code{jit_prolog}, @code{jit_leaf} and @code{jit_finish} are not
 mandatory.  If not defined, they will be defined respectively as an
 empty macro, as a synonym for @code{jit_prolog}, and as a synonym for
 @code{jit_calli}.  Whether to define them or not in the port-specific
@@ -471,8 +504,12 @@ and ``reverse subtraction'' (that is, 
address@hidden@math{-}REG1):
 @example
 #define jit_extr_c_i(d, rs)
 #define jit_extr_s_i(d, rs)
+#define jit_extr_c_l(d, rs)
+#define jit_extr_s_l(d, rs)
+#define jit_extr_i_l(d, rs)
 #define jit_rsbi_i(d, rs, is)
 #define jit_rsbi_l(d, rs, is)
+#define jit_rsbi_p(d, rs, is)
 @end example
 
 @item Conversion between network and host byte ordering
@@ -510,7 +547,7 @@ unsigned integers is exactly the same as adding two signed 
integers
 @lightning{} provides both @code{jit_addr_i} and @code{jit_addr_ui}
 macros.  Similarly, pointers and unsigned long integers behave in the
 same way, but @lightning{} has separate instruction for the two data
-types---those that operate on pointers usually comprise a typecast
+types---those that operate on pointers usually include a typecast
 that makes programs clearer.
 
 @item Shortcuts
@@ -553,7 +590,7 @@ instruction to be scheduled in the delay slot with the 
branch
 instruction.  The only parameter accepted by the macro is a call
 to a branch macro, which must be expanded @strong{exactly once} by
 @code{jit_fill_delay_after}.  The client must be able to pass the
-return value of @code{jit_fill_delay_after} to @code{jit_patch}.
+return value of @code{jit_fill_delay_after} to @code{jit_patch_at}.
 
 There are two possible approaches that can be used in
 @code{jit_fill_delay_after}.  They are summarized in the following
@@ -701,9 +738,9 @@ in @file{core-common.h} (@pxref{Common features, , Common 
features
 supported by @file{core-common.h}}).
 
 @example
-#define jit_prepare(numargs)  (_jitl.pusharg = _Ro(numargs))
-#define jit_pusharg_i(rs)     (--_jitl.pusharg,         \
-                               MOVrr((rs), _jitl.pusharg))
+#define jit_prepare_i(numargs)  (_jitl.pusharg = _Ro(numargs))
+#define jit_pusharg_i(rs)       (--_jitl.pusharg,         \
+                                 MOVrr((rs), _jitl.pusharg))
 @end example
 
 Remember that arguments pushing takes place in reverse order, thus
@@ -759,18 +796,18 @@ epilog code.
 @code{jit_pusharg} uses a hardware push operation, which is commonly
 available on CISC machines (where this approach is most likely
 followed).  Since the stack has to be cleaned up after the call,
address@hidden remembers how many parameters have been put there,
address@hidden remembers how many parameters have been put there,
 and @code{jit_finish} adjusts the stack pointer after the call.
 
 @example
-#define jit_prepare(numargs) (_jitl.args += (numargs))
-#define jit_pusharg_i(rs)    PUSHLr(rs)
-#define jit_finish(sub)      (jit_calli((sub)),              \
-                             ADDLir(4 * _jitl.args, JIT_SP), \
-                             _jitl.numargs = 0)
+#define jit_prepare_i(numargs) (_jitl.args += (numargs))
+#define jit_pusharg_i(rs)      PUSHLr(rs)
+#define jit_finish(sub)        (jit_calli((sub)),              \
+                               ADDLir(4 * _jitl.args, JIT_SP), \
+                               _jitl.numargs = 0)
 @end example
 
-Note the usage of @code{+=} in @code{jit_prepare}.  This is done
+Note the usage of @code{+=} in @code{jit_prepare_i}.  This is done
 so that one can defer the popping of the arguments that were saved
 on the stack (@dfn{stack pollution}).  To do so, it is sufficient to
 use @code{jit_calli} instead of @code{jit_finish} in all but the
@@ -823,12 +860,12 @@ operations:
 @table @b
 @item Register names (all mandatory but the last two)
 @example
-#define JIT_R0
-#define JIT_R1
-#define JIT_R2
-#define JIT_V0
-#define JIT_V1
-#define JIT_V2
+#define JIT_R
+#define JIT_R_NUM
+#define JIT_V
+#define JIT_V_NUM
+#define JIT_FPR
+#define JIT_FPR_NUM
 #define JIT_SP
 #define JIT_FP
 #define JIT_RZERO
@@ -850,57 +887,81 @@ operations:
 #define jit_arg_ui()
 #define jit_arg_ul()
 #define jit_arg_us()
+#define jit_abs_d(rd,rs)
 #define jit_addi_i(d, rs, is)
+#define jit_addr_d(rd,s1,s2)
 #define jit_addr_i(d, s1, s2)
 #define jit_addxi_i(d, rs, is)
 #define jit_addxr_i(d, s1, s2)
 #define jit_andi_i(d, rs, is)
 #define jit_andr_i(d, s1, s2)
 #define jit_beqi_i(label, rs, is)
+#define jit_beqr_d(label, s1, s2)
 #define jit_beqr_i(label, s1, s2)
 #define jit_bgei_i(label, rs, is)
 #define jit_bgei_ui(label, rs, is)
+#define jit_bger_d(label, s1, s2)
 #define jit_bger_i(label, s1, s2)
 #define jit_bger_ui(label, s1, s2)
 #define jit_bgti_i(label, rs, is)
 #define jit_bgti_ui(label, rs, is)
+#define jit_bgtr_d(label, s1, s2)
 #define jit_bgtr_i(label, s1, s2)
 #define jit_bgtr_ui(label, s1, s2)
 #define jit_blei_i(label, rs, is)
 #define jit_blei_ui(label, rs, is)
+#define jit_bler_d(label, s1, s2)
 #define jit_bler_i(label, s1, s2)
 #define jit_bler_ui(label, s1, s2)
+#define jit_bltgtr_d(label, s1, s2)
 #define jit_blti_i(label, rs, is)
 #define jit_blti_ui(label, rs, is)
+#define jit_bltr_d(label, s1, s2)
 #define jit_bltr_i(label, s1, s2)
 #define jit_bltr_ui(label, s1, s2)
+#define jit_bmci_i(label, rs, is)
+#define jit_bmcr_i(label, s1, s2)
+#define jit_bmsi_i(label, rs, is)
+#define jit_bmsr_i(label, s1, s2)
+#define jit_bnei_i(label, rs, is)
+#define jit_bner_d(label, s1, s2)
+#define jit_bner_i(label, s1, s2)
 #define jit_boaddi_i(label, rs, is)
 #define jit_boaddi_ui(label, rs, is)
 #define jit_boaddr_i(label, s1, s2)
 #define jit_boaddr_ui(label, s1, s2)
+#define jit_bordr_d(label, s1, s2)
 #define jit_bosubi_i(label, rs, is)
 #define jit_bosubi_ui(label, rs, is)
 #define jit_bosubr_i(label, s1, s2)
 #define jit_bosubr_ui(label, s1, s2)
-#define jit_bmci_i(label, rs, is)
-#define jit_bmcr_i(label, s1, s2)
-#define jit_bmsi_i(label, rs, is)
-#define jit_bmsr_i(label, s1, s2)
-#define jit_bnei_i(label, rs, is)
-#define jit_bner_i(label, s1, s2)
+#define jit_buneqr_d(label, s1, s2)
+#define jit_bunger_d(label, s1, s2)
+#define jit_bungtr_d(label, s1, s2)
+#define jit_bunler_d(label, s1, s2)
+#define jit_bunltr_d(label, s1, s2)
+#define jit_bunordr_d(label, s1, s2)
 #define jit_calli(label)
+#define jit_callr(label)
+#define jit_ceilr_d_i(rd, rs)
 #define jit_divi_i(d, rs, is)
 #define jit_divi_ui(d, rs, is)
+#define jit_divr_d(rd,s1,s2)
 #define jit_divr_i(d, s1, s2)
 #define jit_divr_ui(d, s1, s2)
 #define jit_eqi_i(d, rs, is)
+#define jit_eqr_d(d, s1, s2)
 #define jit_eqr_i(d, s1, s2)
+#define jit_extr_i_d(rd, rs)
+#define jit_floorr_d_i(rd, rs)
 #define jit_gei_i(d, rs, is)
 #define jit_gei_ui(d, s1, s2)
+#define jit_ger_d(d, s1, s2)
 #define jit_ger_i(d, s1, s2)
 #define jit_ger_ui(d, s1, s2)
 #define jit_gti_i(d, rs, is)
 #define jit_gti_ui(d, s1, s2)
+#define jit_gtr_d(d, s1, s2)
 #define jit_gtr_i(d, s1, s2)
 #define jit_gtr_ui(d, s1, s2)
 #define jit_hmuli_i(d, rs, is)
@@ -909,61 +970,93 @@ operations:
 #define jit_hmulr_ui(d, s1, s2)
 #define jit_jmpi(label)
 #define jit_jmpr(reg)
+#define jit_ldxi_f(rd, rs, is)
+#define jit_ldxr_f(rd, s1, s2)
 #define jit_ldxi_c(d, rs, is)
+#define jit_ldxi_d(rd, rs, is)
 #define jit_ldxi_i(d, rs, is)
 #define jit_ldxi_s(d, rs, is)
 #define jit_ldxi_uc(d, rs, is)
 #define jit_ldxi_us(d, rs, is)
 #define jit_ldxr_c(d, s1, s2)
+#define jit_ldxr_d(rd, s1, s2)
 #define jit_ldxr_i(d, s1, s2)
 #define jit_ldxr_s(d, s1, s2)
 #define jit_ldxr_uc(d, s1, s2)
 #define jit_ldxr_us(d, s1, s2)
 #define jit_lei_i(d, rs, is)
 #define jit_lei_ui(d, s1, s2)
+#define jit_ler_d(d, s1, s2)
 #define jit_ler_i(d, s1, s2)
 #define jit_ler_ui(d, s1, s2)
 #define jit_lshi_i(d, rs, is)
 #define jit_lshr_i(d, r1, r2)
+#define jit_ltgtr_d(d, s1, s2)
 #define jit_lti_i(d, rs, is)
 #define jit_lti_ui(d, s1, s2)
+#define jit_ltr_d(d, s1, s2)
 #define jit_ltr_i(d, s1, s2)
 #define jit_ltr_ui(d, s1, s2)
 #define jit_modi_i(d, rs, is)
 #define jit_modi_ui(d, rs, is)
 #define jit_modr_i(d, s1, s2)
 #define jit_modr_ui(d, s1, s2)
+#define jit_movi_d(rd,immd)
+#define jit_movi_f(rd,immf)
 #define jit_movi_i(d, is)
+#define jit_movi_p(d, is)
+#define jit_movr_d(rd,rs)
 #define jit_movr_i(d, rs)
 #define jit_muli_i(d, rs, is)
 #define jit_muli_ui(d, rs, is)
+#define jit_mulr_d(rd,s1,s2)
 #define jit_mulr_i(d, s1, s2)
 #define jit_mulr_ui(d, s1, s2)
+#define jit_negr_d(rd,rs)
 #define jit_nei_i(d, rs, is)
+#define jit_ner_d(d, s1, s2)
 #define jit_ner_i(d, s1, s2)
 #define jit_nop()
+#define jit_ordr_d(d, s1, s2)
 #define jit_ori_i(d, rs, is)
 #define jit_orr_i(d, s1, s2)
-#define jit_patch(jump_pc)
+#define jit_patch_at(jump_pc, value)
+#define jit_patch_movi(jump_pc, value)
 #define jit_pop_i(rs)
-#define jit_prepare(numargs)
+#define jit_prepare_d(numargs)
+#define jit_prepare_f(numargs)
+#define jit_prepare_i(numargs)
 #define jit_push_i(rs)
 #define jit_pusharg_i(rs)
 #define jit_ret()
 #define jit_retval_i(rd)
+#define jit_roundr_d_i(rd, rs)
 #define jit_rshi_i(d, rs, is)
 #define jit_rshi_ui(d, rs, is)
 #define jit_rshr_i(d, r1, r2)
 #define jit_rshr_ui(d, r1, r2)
+#define jit_sqrt_d(rd,rs)
 #define jit_stxi_c(rd, id, rs)
+#define jit_stxi_d(id, rd, rs)
+#define jit_stxi_f(id, rd, rs)
 #define jit_stxi_i(rd, id, rs)
 #define jit_stxi_s(rd, id, rs)
 #define jit_stxr_c(d1, d2, rs)
+#define jit_stxr_d(d1, d2, rs)
+#define jit_stxr_f(d1, d2, rs)
 #define jit_stxr_i(d1, d2, rs)
 #define jit_stxr_s(d1, d2, rs)
+#define jit_subr_d(rd,s1,s2)
 #define jit_subr_i(d, s1, s2)
 #define jit_subxi_i(d, rs, is)
 #define jit_subxr_i(d, s1, s2)
+#define jit_truncr_d_i(rd, rs)
+#define jit_uneqr_d(d, s1, s2)
+#define jit_unger_d(d, s1, s2)
+#define jit_ungtr_d(d, s1, s2)
+#define jit_unler_d(d, s1, s2)
+#define jit_unltr_d(d, s1, s2)
+#define jit_unordr_d(d, s1, s2)
 #define jit_xori_i(d, rs, is)
 #define jit_xorr_i(d, s1, s2)
 @end example
@@ -971,17 +1064,20 @@ operations:
 @item Non mandatory---there should be no need to define them:
 @example
 #define jit_extr_c_ui(d, rs)
-#define jit_extr_i_ul(d, rs)
 #define jit_extr_s_ui(d, rs)
+#define jit_extr_c_ul(d, rs)
+#define jit_extr_s_ul(d, rs)
+#define jit_extr_i_ul(d, rs)
 #define jit_negr_i(d, rs)
 #define jit_negr_l(d, rs)
 @end example
 
 @item Non mandatory---whether to define them depends on the @sc{abi}:
 @example
-#define jit_prolog()
-#define jit_finish()
-#define jit_leaf()
+#define jit_prolog(n)
+#define jit_finish(sub)
+#define jit_finishr(reg)
+#define jit_leaf(n)
 #define jit_getarg_c(reg, ofs)
 #define jit_getarg_i(reg, ofs)
 #define jit_getarg_l(reg, ofs)
@@ -991,12 +1087,17 @@ operations:
 #define jit_getarg_ui(reg, ofs)
 #define jit_getarg_ul(reg, ofs)
 #define jit_getarg_us(reg, ofs)
+#define jit_getarg_f(reg, ofs)
+#define jit_getarg_d(reg, ofs)
 @end example
 
 @item Non mandatory---define them if instructions that do this exist:
 @example
 #define jit_extr_c_i(d, rs)
 #define jit_extr_s_i(d, rs)
+#define jit_extr_c_l(d, rs)
+#define jit_extr_s_l(d, rs)
+#define jit_extr_i_l(d, rs)
 #define jit_rsbi_i(d, rs, is)
 #define jit_rsbi_l(d, rs, is)
 @end example
@@ -1037,6 +1138,14 @@ operations:
 #define jit_str_c(rd, rs)
 #define jit_str_i(rd, rs)
 #define jit_str_s(rd, rs)
+#define jit_ldi_f(rd, is)
+#define jit_sti_f(id, rs)
+#define jit_ldi_d(rd, is)
+#define jit_sti_d(id, rs)
+#define jit_ldr_f(rd, rs)
+#define jit_str_f(rd, rs)
+#define jit_ldr_d(rd, rs)
+#define jit_str_d(rd, rs)
 @end example
 
 @item Synonyms---don't define them:
@@ -1085,14 +1194,20 @@ operations:
 #define jit_eqr_p(d, s1, s2)
 #define jit_eqr_ui(d, s1, s2)
 #define jit_eqr_ul(d, s1, s2)
+#define jit_extr_c_s(d, rs)
+#define jit_extr_c_us(d, rs)
+#define jit_extr_uc_s(d, rs)
+#define jit_extr_uc_us(d, rs)
 #define jit_extr_uc_i(d, rs)
 #define jit_extr_uc_ui(d, rs)
-#define jit_extr_ui_l(d, rs)
-#define jit_extr_ui_l(d, rs)
-#define jit_extr_ui_ul(d, rs)
-#define jit_extr_ui_ul(d, rs)
 #define jit_extr_us_i(d, rs)
 #define jit_extr_us_ui(d, rs)
+#define jit_extr_uc_l(d, rs)
+#define jit_extr_uc_ul(d, rs)
+#define jit_extr_us_l(d, rs)
+#define jit_extr_us_ul(d, rs)
+#define jit_extr_ui_l(d, rs)
+#define jit_extr_ui_ul(d, rs)
 #define jit_gei_p(d, rs, is)
 #define jit_ger_p(d, s1, s2)
 #define jit_gti_p(d, rs, is)
@@ -1145,8 +1260,10 @@ operations:
 #define jit_retval_ui(rd)
 #define jit_retval_ul(rd)
 #define jit_retval_us(rd)
+#define jit_rsbi_p(d, rs, is)
 #define jit_rsbi_ui(d, rs, is)
 #define jit_rsbi_ul(d, rs, is)
+#define jit_rsbr_p(d, rs, is)
 #define jit_rsbr_ui(d, s1, s2)
 #define jit_rsbr_ul(d, s1, s2)
 #define jit_sti_p(d, is)
@@ -1175,6 +1292,12 @@ operations:
 #define jit_subr_p(d, s1, s2)
 #define jit_subr_ui(d, s1, s2)
 #define jit_subr_ul(d, s1, s2)
+#define jit_subxi_p(d, rs, is)
+#define jit_subxi_ui(d, rs, is)
+#define jit_subxi_ul(d, rs, is)
+#define jit_subxr_p(d, s1, s2)
+#define jit_subxr_ui(d, s1, s2)
+#define jit_subxr_ul(d, s1, s2)
 #define jit_xori_ui(d, rs, is)
 #define jit_xori_ul(d, rs, is)
 #define jit_xorr_ui(d, s1, s2)
@@ -1183,6 +1306,19 @@ operations:
 
 @item Shortcuts---don't define them:
 @example
+#define JIT_R0
+#define JIT_R1
+#define JIT_R2
+#define JIT_V0
+#define JIT_V1
+#define JIT_V2
+#define JIT_FPR0
+#define JIT_FPR1
+#define JIT_FPR2
+#define JIT_FPR3
+#define JIT_FPR4
+#define JIT_FPR5
+#define jit_patch(jump_pc)
 #define jit_notr_c(d, rs)
 #define jit_notr_i(d, rs)
 #define jit_notr_l(d, rs)
@@ -1191,12 +1327,61 @@ operations:
 #define jit_notr_ui(d, rs)
 #define jit_notr_ul(d, rs)
 #define jit_notr_us(d, rs)
+#define jit_rsbr_d(d, s1, s2)
 #define jit_rsbr_i(d, s1, s2)
 #define jit_rsbr_l(d, s1, s2)
 #define jit_subi_i(d, rs, is)
 #define jit_subi_l(d, rs, is)
 @end example
 
address@hidden Mandatory unless target arithmetic is always done in the same 
precision:
address@hidden
+#define jit_abs_f(rd,rs)
+#define jit_addr_f(rd,s1,s2)
+#define jit_beqr_f(label, s1, s2)
+#define jit_bger_f(label, s1, s2)
+#define jit_bgtr_f(label, s1, s2)
+#define jit_bler_f(label, s1, s2)
+#define jit_bltgtr_f(label, s1, s2)
+#define jit_bltr_f(label, s1, s2)
+#define jit_bner_f(label, s1, s2)
+#define jit_bordr_f(label, s1, s2)
+#define jit_buneqr_f(label, s1, s2)
+#define jit_bunger_f(label, s1, s2)
+#define jit_bungtr_f(label, s1, s2)
+#define jit_bunler_f(label, s1, s2)
+#define jit_bunltr_f(label, s1, s2)
+#define jit_bunordr_f(label, s1, s2)
+#define jit_ceilr_f_i(rd, rs)
+#define jit_divr_f(rd,s1,s2)
+#define jit_eqr_f(d, s1, s2)
+#define jit_extr_d_f(rs, rd)
+#define jit_extr_f_d(rs, rd)
+#define jit_extr_i_f(rd, rs)
+#define jit_floorr_f_i(rd, rs)
+#define jit_ger_f(d, s1, s2)
+#define jit_gtr_f(d, s1, s2)
+#define jit_ler_f(d, s1, s2)
+#define jit_ltgtr_f(d, s1, s2)
+#define jit_ltr_f(d, s1, s2)
+#define jit_movr_f(rd,rs)
+#define jit_mulr_f(rd,s1,s2)
+#define jit_negr_f(rd,rs)
+#define jit_ner_f(d, s1, s2)
+#define jit_ordr_f(d, s1, s2)
+#define jit_roundr_f_i(rd, rs)
+#define jit_rsbr_f(d, s1, s2)
+#define jit_sqrt_f(rd,rs)
+#define jit_subr_f(rd,s1,s2)
+#define jit_truncr_f_i(rd, rs)
+#define jit_uneqr_f(d, s1, s2)
+#define jit_unger_f(d, s1, s2)
+#define jit_ungtr_f(d, s1, s2)
+#define jit_unler_f(d, s1, s2)
+#define jit_unltr_f(d, s1, s2)
+#define jit_unordr_f(d, s1, s2)
address@hidden example
+
 @item Mandatory if sizeof(long) != sizeof(int)---don't define them on other 
systems:
 @example
 #define jit_addi_l(d, rs, is)
@@ -1241,6 +1426,12 @@ operations:
 #define jit_divr_ul(d, s1, s2)
 #define jit_eqi_l(d, rs, is)
 #define jit_eqr_l(d, s1, s2)
+#define jit_extr_c_l(d, rs)
+#define jit_extr_c_ul(d, rs)
+#define jit_extr_s_l(d, rs)
+#define jit_extr_s_ul(d, rs)
+#define jit_extr_i_l(d, rs)
+#define jit_extr_i_ul(d, rs)
 #define jit_gei_l(d, rs, is)
 #define jit_gei_ul(d, rs, is)
 #define jit_ger_l(d, s1, s2)
diff --git a/doc/toc.texi b/doc/toc.texi
index 8a37022..56b2e10 100644
--- a/doc/toc.texi
+++ b/doc/toc.texi
@@ -7,7 +7,6 @@
 * Installation::          Configuring and installing GNU lightning
 * The instruction set::   The RISC instruction set used i GNU lightning
 * GNU lightning macros::  GNU lightning's macros
-* Floating-point::        Doing floating point computations.
 * Reentrancy::            Re-entrant usage of GNU lightning
 * Autoconf support::      Using @code{autoconf} with GNU lightning
 @end ifset
diff --git a/doc/using.texi b/doc/using.texi
index a65a048..9a0a7c0 100644
--- a/doc/using.texi
+++ b/doc/using.texi
@@ -49,9 +49,14 @@ that closely match those of most existing RISC 
architectures, or
 that can be easily syntesized if absent.  Each instruction is composed
 of:
 @itemize @bullet
address@hidden an operation (like @code{sub} or @code{mul})
address@hidden sometimes, an register/immediate flag (@code{r} or @code{i})
address@hidden a type identifier (occasionally, two)
address@hidden
+an operation, like @code{sub} or @code{mul}
+
address@hidden
+sometimes, an register/immediate flag (@code{r} or @code{i})
+
address@hidden
+a type identifier or, occasionally, two
 @end itemize
 
 The second and third field are separated by an underscore; thus,
@@ -75,6 +80,8 @@ following table together with the C types they represent:
      ui         @r{unsigned int}
      l          @r{long}
      ul         @r{unsigned long}
+     f          @r{float}
+     d          @r{double}
      p          @r{void *}
 @end example
 
@@ -82,27 +89,31 @@ Some of these types may not be distinct: for example, 
(e.g., @code{l}
 is equivalent to @code{i} on 32-bit machines, and @code{p} is
 substantially equivalent to @code{ul}).
 
-There are seven registers, of which six are general-purpose, while
-the last is used to contain the stack pointer (@code{SP}).  The
-stack pointer can be used to allocate and access local variables
-on the stack (which is supposed to grow downwards in memory on all 
-architectures).
+There are at least seven integer registers, of which six are
+general-purpose, while the last is used to contain the stack pointer
+(@code{SP}).  The stack pointer can be used to allocate and access local
+variables on the stack (which is supposed to grow downwards in memory
+on all architectures).
 
-Of the six general-purpose registers, three are guaranteed to be
+Of the general-purpose registers, at least three are guaranteed to be
 preserved across function calls (@code{V0}, @code{V1} and
address@hidden) and three are not (@code{R0}, @code{R1} and
address@hidden)address@hidden registers are not very much, but this
address@hidden) and at least three are not (@code{R0}, @code{R1} and
address@hidden).  Six registers are not very much, but this
 restriction was forced by the need to target CISC architectures
-which, like the x86, are poor of registers.  Anyway, consider
-that even on a RISC architecture you don't have many more registers
-which are not devoted to function calls: on the SPARC, you have nine
-(@code{%g1} and the eight registers @code{%l0} through @code{%l7}).}
+which, like the x86, are poor of registers; anyway, backends can
+specify the actual number of available caller- and callee-save
+registers.
 
 In addition, there is a special @code{RET} register which contains
 the return value.  You should always remember, however, that writing
 this register could overwrite either a general-purpose register or
 an incoming parameter, depending on the architecture.
 
+There are at least six floating-point registers, named @code{FPR0} to
address@hidden  These are separate from the integer registers on
+all the supported architectures; on Intel architectures, the
+register stack is mapped to a flat register file.
+
 The complete instruction set follows; as you can see, most non-memory
 operations only take integers, long integers (either signed or
 unsigned) and pointers as operands; this was done in order to reduce
@@ -113,61 +124,117 @@ signed and in an unsigned way.
 
 @table @b
 @item Binary ALU operations
-These accept three operands, of which the last can be an immediate
-value.  @code{addx} operations must directly follow @code{addc}, and
+These accept three operands; the last one can be an immediate
+value for integer operands, or a register for all operand types.
address@hidden operations must directly follow @code{addc}, and
 @code{subx} must follow @code{subc}; otherwise, results are undefined.
 @example
-addr/addi    i  ui l  ul p  O1 = O2 + O3
-addxr/addxi  i  ui l  ul    O1 = O2 + (O3 + carry)
-addcr/addci  i  ui l  ul    O1 = O2 + O3, set carry
-subr/subi    i  ui l  ul p  O1 = O2 - O3
-subxr/subxi  i  ui l  ul    O1 = O2 - (O3 + carry)
-subcr/subci  i  ui l  ul    O1 = O2 - O3, set carry
-rsbr/rsbi    i  ui l  ul p  O1 = O3 - O2
-mulr/muli    i  ui l  ul    O1 = O2 * O3
-hmulr/hmuli  i  ui l  ul    O1 = @r{high bits of} O2 * O3
-divr/divi    i  ui l  ul    O1 = O2 / O3
-modr/modi    i  ui l  ul    O1 = O2 % O3
-andr/andi    i  ui l  ul    O1 = O2 & O3
-orr/ori      i  ui l  ul    O1 = O2 | O3
-xorr/xori    i  ui l  ul    O1 = O2 ^ O3
-lshr/lshi    i  ui l  ul    O1 = O2 << O3
-rshr/rshi    i  ui l  ul    O1 = O2 >> address@hidden sign bit is propagated 
for signed types.}
+addr     i  ui  l  ul  p  f  d  O1 = O2 + O3
+addi     i  ui  l  ul  p        O1 = O2 + O3
+addxr    i  ui  l  ul           O1 = O2 + (O3 + carry)
+addxi    i  ui  l  ul           O1 = O2 + (O3 + carry)
+addcr    i  ui  l  ul           O1 = O2 + O3, set carry
+addci    i  ui  l  ul           O1 = O2 + O3, set carry
+subr     i  ui  l  ul  p  f  d  O1 = O2 - O3
+subi     i  ui  l  ul  p        O1 = O2 - O3
+subxr    i  ui  l  ul           O1 = O2 - (O3 + carry)
+subxi    i  ui  l  ul           O1 = O2 - (O3 + carry)
+subcr    i  ui  l  ul           O1 = O2 - O3, set carry
+subci    i  ui  l  ul           O1 = O2 - O3, set carry
+rsbr     i  ui  l  ul  p  f  d  O1 = O3 - O2
+rsbi     i  ui  l  ul  p        O1 = O3 - O2
+mulr     i  ui  l  ul     f  d  O1 = O2 * O3
+muli     i  ui  l  ul           O1 = O2 * O3
+hmulr    i  ui  l  ul           O1 = @r{high bits of} O2 * O3
+hmuli    i  ui  l  ul           O1 = @r{high bits of} O2 * O3
+divr     i  ui  l  ul     f  d  O1 = O2 / O3
+divi     i  ui  l  ul           O1 = O2 / O3
+modr     i  ui  l  ul           O1 = O2 % O3
+modi     i  ui  l  ul           O1 = O2 % O3
+andr     i  ui  l  ul           O1 = O2 & O3
+andi     i  ui  l  ul           O1 = O2 & O3
+orr      i  ui  l  ul           O1 = O2 | O3
+ori      i  ui  l  ul           O1 = O2 | O3
+xorr     i  ui  l  ul           O1 = O2 ^ O3
+xori     i  ui  l  ul           O1 = O2 ^ O3
+lshr     i  ui  l  ul           O1 = O2 << O3
+lshi     i  ui  l  ul           O1 = O2 << O3
+rshr     i  ui  l  ul           O1 = O2 >> address@hidden sign bit is 
propagated for signed types.}
+rshi     i  ui  l  ul           O1 = O2 >> address@hidden sign bit is 
propagated for signed types.}
 @end example
 
 @item Unary ALU operations
 These accept two operands, both of which must be registers.
 @example
-negr        i     l         O1 = -O2
-notr        i  ui l  ul     O1 = ~O2
+negr     i     l         f  d  O1 = -O2
+notr     i  ui l  ul           O1 = ~O2
 @end example
 
 @item Compare instructions
-These accept three operands, of which the last can be an immediate
-value.  The last two operands are compared, and the first operand is
-set to either 0 or 1, according to whether the given condition was
-met or not.
+These accept three operands; again, the last can be an immediate
+value for integer data types.  The last two operands are compared,
+and the first operand is set to either 0 or 1, according to
+whether the given condition was met or not.
+
+The conditions given below are for the standard behavior of C,
+where the ``unordered'' comparison result is mapped to false.
 
 @example
-ltr/lti     i ui l  ul p     O1 = (O2 <  O3)
-ler/lei     i ui l  ul p     O1 = (O2 <= O3)
-gtr/gti     i ui l  ul p     O1 = (O2 >  O3)
-ger/gei     i ui l  ul p     O1 = (O2 >= O3)
-eqr/eqi     i ui l  ul p     O1 = (O2 == O3)
-ner/nei     i ui l  ul p     O1 = (O2 != O3)
+ltr      i  ui  l  ul  p  f  d  O1 = (O2 <  O3)
+lti      i  ui  l  ul  p        O1 = (O2 <  O3)
+ler      i  ui  l  ul  p  f  d  O1 = (O2 <= O3)
+lei      i  ui  l  ul  p        O1 = (O2 <= O3)
+gtr      i  ui  l  ul  p  f  d  O1 = (O2 >  O3)
+gti      i  ui  l  ul  p        O1 = (O2 >  O3)
+ger      i  ui  l  ul  p  f  d  O1 = (O2 >= O3)
+gei      i  ui  l  ul  p        O1 = (O2 >= O3)
+eqr      i  ui  l  ul  p  f  d  O1 = (O2 == O3)
+eqi      i  ui  l  ul  p        O1 = (O2 == O3)
+ner      i  ui  l  ul  p  f  d  O1 = (O2 != O3)
+nei      i  ui  l  ul  p        O1 = (O2 != O3)
+unltr                     f  d  O1 = !(O2 >= O3)
+unler                     f  d  O1 = !(O2 >  O3)
+ungtr                     f  d  O1 = !(O2 <= O3)
+unger                     f  d  O1 = !(O2 <  O3)
+uneqr                     f  d  O1 = !(O2 <  O3) && !(O2 >  O3)
+ltgtr                     f  d  O1 = !(O2 >= O3) || !(O2 <= O3)
+ordr                      f  d  O1 =  (O2 == O2) &&  (O3 == O3)
+unordr                    f  d  O1 =  (O2 != O2) ||  (O3 != O3)
 @end example
 
 @item Transfer operations
 These accept two operands; for @code{ext} both of them must be
 registers, while @code{mov} accepts an immediate value as the second
-operand. @code{ext} needs @strong{two} data type specifications, of
-which the first must be smaller in size than the second; for example
address@hidden is correct while @code{extr_ul_us} is not.
+operand.
+
+Unlike @code{movr} and @code{movi}, the other instructions are applied
+between operands of different data types, and they need @strong{two}
+data type specifications.  You can use @code{extr} to convert between
+integer data types, in which case the first must be smaller in size
+than the second; for example @code{extr_c_ui} is correct while
address@hidden is not.  You can also use @code{extr} to convert
+an integer to a floating point value: the only available possibilities
+are @code{extr_i_f} and @code{extr_i_d}.  The other instructions
+convert a floating point value to an integer, so the possible
+suffixes are @code{_f_i} and @code{_d_i}.
+
 @example
-movr/movi               i  ui l  ul p   O1 = O2
-extr        c  uc s  us i  ui l  ul     O1 = address@hidden @code{movr} and 
@code{movi}, @code{extr} is applied between operands of different sizes.}
+movr                      i  ui  l  ul  p  f  d  O1 = O2
+movi                      i  ui  l  ul  p  f  d  O1 = O2
+extr        c  uc  s  us  i  ui  l  ul     f  d  O1 = O2
+roundr                    i                f  d  O1 = round(O2)
+truncr                    i                f  d  O1 = trunc(O2)
+floorr                    i                f  d  O1 = floor(O2)
+ceilr                     i                f  d  O1 = ceil(O2)
 @end example
 
+Note that the order of the arguments is @emph{destination first,
+source second} as for all other @lightning{} instructions, but
+the order of the types is always reversed with respect to that
+of the arguments: @address@hidden,
address@hidden  This happens for historical
+reasons.
+
 @item Network extensions
 These accept two operands, both of which must be registers; these
 two instructions actually perform the same task, yet they are
@@ -185,8 +252,10 @@ in both cases, the last can be either a register or an 
immediate
 value. Values are extended (with or without sign, according to
 the data type specification) to fit a whole register.
 @example
-ldr/ldi     c  uc s  us i  ui l  ul p   O1 = *O2
-ldxr/ldxi   c  uc s  us i  ui l  ul p   O1 = *(O2+O3)
+ldr     c  uc  s  us  i  ui  l  ul  p  f  d  O1 = *O2
+ldi     c  uc  s  us  i  ui  l  ul  p  f  d  O1 = *O2
+ldxr    c  uc  s  us  i  ui  l  ul  p  f  d  O1 = *(O2+O3)
+ldxi    c  uc  s  us  i  ui  l  ul  p  f  d  O1 = *(O2+O3)
 @end example
 
 @item Store operations
@@ -194,8 +263,10 @@ ldxr/ldxi   c  uc s  us i  ui l  ul p   O1 = *(O2+O3)
 both cases, the first can be either a register or an immediate
 value. Values are sign-extended to fit a whole register.
 @example
-str/sti     c  uc s  us i  ui l  ul p   *O1 = O2
-stxr/stxi   c  uc s  us i  ui l  ul p   *(O1+O2) = O3
+str     c  uc  s  us  i  ui  l  ul  p  f  d  *O1 = O2
+sti     c  uc  s  us  i  ui  l  ul  p  f  d  *O1 = O2
+stxr    c  uc  s  us  i  ui  l  ul  p  f  d  *(O1+O2) = O3
+stxi    c  uc  s  us  i  ui  l  ul  p  f  d  *(O1+O2) = O3
 @end example
 
 @item Stack management
@@ -203,26 +274,27 @@ These accept a single register parameter.  These 
operations are not
 guaranteed to be efficient on all architectures.
 
 @example
-pushr                   i  ui l  ul p   @r{push address@hidden on the stack}
-popr                    i  ui l  ul p   @r{pop address@hidden off the stack}
+pushr                     i  ui  l  ul  p   @r{push address@hidden on the 
stack}
+popr                      i  ui  l  ul  p   @r{pop address@hidden off the 
stack}
 @end example
 
 @item Argument management
 These are:
 @example
-prepare     (not specified)
-pusharg     c  uc s  us i  ui l  ul p
-getarg      c  uc s  us i  ui l  ul p
-arg         c  uc s  us i  ui l  ul p
+prepare                   i                f  d
+pusharg     c  uc  s  us  i  ui  l  ul  p  f  d
+getarg      c  uc  s  us  i  ui  l  ul  p  f  d
+arg         c  uc  s  us  i  ui  l  ul  p  f  d
 @end example
 
 Of these, the first two are used by the caller, while the last two
 are used by the callee.  A code snippet that wants to call another
 procedure and has to pass registers must, in order: use the
 @code{prepare} instruction, giving the number of arguments to
-be passed to the procedure; use @code{pusharg} to push the arguments
address@hidden reverse order}; and use @code{calli} or @code{finish}
-(explained below) to perform the actual call.
+be passed to the procedure (once for each data type); use
address@hidden to push the arguments @strong{in reverse order};
+and use @code{calli} or @code{finish} (explained below) to
+perform the actual call.
 
 @code{arg} and @code{getarg} are used by the callee.
 @code{arg} is different from other instruction in that it does not
@@ -269,18 +341,36 @@ is to be used to compile forward branches as explained in
 destination of the branch and two operands to be compared; of these,
 the last can be either a register or an immediate.  They are:
 @example
-bltr/blti     i ui l  ul p    @r{if }O2 <  address@hidden goto }O1
-bler/blei     i ui l  ul p    @r{if }O2 <= address@hidden goto }O1
-bgtr/bgti     i ui l  ul p    @r{if }O2 >  address@hidden goto }O1
-bger/bgei     i ui l  ul p    @r{if }O2 >= address@hidden goto }O1
-beqr/beqi     i ui l  ul p    @r{if }O2 == address@hidden goto }O1
-bner/bnei     i ui l  ul p    @r{if }O2 != address@hidden goto }O1
-
-bmsr/bmsi     i ui l  ul      @r{if }O2 &  address@hidden goto }O1
-bmcr/bmci     i ui l  ul      @r{if }!(O2 & O3)@r{ goto address@hidden two 
mnemonics mean, respectively, @dfn{branch if mask set} and @dfn{branch if mask 
cleared}.}
-
-boaddr/boaddi i ui l  ul      O2 += address@hidden, goto address@hidden on 
overflow}
-bosubr/bosubi i ui l  ul      O2 -= address@hidden, goto address@hidden on 
overflow}
+bltr      i  ui  l  ul  p  f  d  @r{if }(O2 <  O3)@r{ goto }O1
+blti      i  ui  l  ul  p        @r{if }(O2 <  O3)@r{ goto }O1
+bler      i  ui  l  ul  p  f  d  @r{if }(O2 <= O3)@r{ goto }O1
+blei      i  ui  l  ul  p        @r{if }(O2 <= O3)@r{ goto }O1
+bgtr      i  ui  l  ul  p  f  d  @r{if }(O2 >  O3)@r{ goto }O1
+bgti      i  ui  l  ul  p        @r{if }(O2 >  O3)@r{ goto }O1
+bger      i  ui  l  ul  p  f  d  @r{if }(O2 >= O3)@r{ goto }O1
+bgei      i  ui  l  ul  p        @r{if }(O2 >= O3)@r{ goto }O1
+beqr      i  ui  l  ul  p  f  d  @r{if }(O2 == O3)@r{ goto }O1
+beqi      i  ui  l  ul  p        @r{if }(O2 == O3)@r{ goto }O1
+bner      i  ui  l  ul  p  f  d  @r{if }(O2 != O3)@r{ goto }O1
+bnei      i  ui  l  ul  p        @r{if }(O2 != O3)@r{ goto }O1
+
+bunltr                     f  d  @r{if }!(O2 >= O3)@r{ goto }O1
+bunler                     f  d  @r{if }!(O2 >  O3)@r{ goto }O1
+bungtr                     f  d  @r{if }!(O2 <= O3)@r{ goto }O1
+bunger                     f  d  @r{if }!(O2 <  O3)@r{ goto }O1
+buneqr                     f  d  @r{if }!(O2 <  O3) && !(O2 >  O3)@r{ goto }O1
+bltgtr                     f  d  @r{if }!(O2 >= O3) || !(O2 <= O3)@r{ goto }O1
+bordr                      f  d  @r{if } (O2 == O2) &&  (O3 == O3)@r{ goto }O1
+bunordr                    f  d  @r{if }!(O2 != O2) ||  (O3 != O3)@r{ goto }O1
+
+bmsr      i ui l  ul             @r{if }O2 &  address@hidden goto }O1
+bmsi      i ui l  ul             @r{if }O2 &  address@hidden goto }O1
+bmcr      i ui l  ul             @r{if }!(O2 & O3)@r{ goto }O1
+bmci      i ui l  ul             @r{if }!(O2 & O3)@r{ goto address@hidden 
mnemonics mean, respectively, @dfn{branch if mask set} and @dfn{branch if mask 
cleared}.}
+boaddr    i ui l  ul             O2 += address@hidden, goto address@hidden on 
overflow}
+boaddi    i ui l  ul             O2 += address@hidden, goto address@hidden on 
overflow}
+bosubr    i ui l  ul             O2 -= address@hidden, goto address@hidden on 
overflow}
+bosubi    i ui l  ul             O2 -= address@hidden, goto address@hidden on 
overflow}
 @end example
 
 @item Jump and return operations
@@ -291,14 +381,16 @@ and the former must @strong{always} follow a 
@code{prepare}
 instruction.  Results are undefined when using function calls
 in a leaf function.
 @example
-calli     (not specified)            @r{function call to O1}
-finish    (not specified)            @r{function call to O1}
-jmpi/jmpr (not specified)            @r{unconditional jump to O1}
-prolog    (not specified)            @r{function prolog for O1 args}
-leaf      (not specified)            @r{the same for leaf functions}
-ret       (not specified)            @r{return from subroutine}
-retval    c  uc s  us i  ui l  ul p  @r{move return value}
-                                     @r{to register}
+calli     (not specified)                  @r{function call to O1}
+callr     (not specified)                  @r{function call to a register}
+finish    (not specified)                  @r{function call to O1}
+finishr   (not specified)                  @r{function call to a register}
+jmpi/jmpr (not specified)                  @r{unconditional jump to O1}
+prolog    (not specified)                  @r{function prolog for O1 args}
+leaf      (not specified)                  @r{the same for leaf functions}
+ret       (not specified)                  @r{return from subroutine}
+retval    c  uc s  us i  ui l  ul p  f  d  @r{move return value}
+                                           @r{to register}
 @end example
 
 Like branch instruction, @code{jmpi} also returns a value which is to
@@ -353,7 +445,7 @@ between parentheses, just like with every other @sc{cpp} 
macro.
 
 This small tutorial presents three examples:
 
address@hidden ISTEX
address@hidden
 @itemize @bullet
 @item
 The @code{incr} function found in @ref{The instruction set, ,
@@ -368,15 +460,15 @@ An RPN calculator.
 @item
 Fibonacci numbers
 @end itemize
address@hidden ifset
address@hidden ISTEX
address@hidden iftex
address@hidden
 @menu
 * incr::             A function which increments a number by one
 * printf::           A simple function call to printf
 * RPN calculator::   A more complex example, an RPN calculator
 * Fibonacci::        Calculating Fibonacci numbers
 @end menu
address@hidden ifclear
address@hidden ifnottex
 
 @node incr
 @section A function which increments a number by one
@@ -931,8 +1023,23 @@ instruction; otherwise, it emits the delay instruction 
before the branch
 instruction.  The delay instruction must not depend on being executed
 before or after the branch.
 
address@hidden Floating-point
address@hidden Doing floating point computations
+Instead of @code{jit_patch}, you can use @code{jit_patch_at}, which
+takes two arguments: the first is the same as for @code{jit_patch}, and
+the second is the valued to be patched in.  In other words, these two
+invocations have the same effect:
+
address@hidden
+  jit_patch (jump_pc);
+  jit_patch_at (jump_pc, jit_get_ip ());
address@hidden example
+
+Dual to branches and @code{jit_patch_at} are @code{jit_movi_p}
+and @code{jit_patch_movi}, which can also be used to implement
+forward references.  @code{jit_movi_p} is carefully implemented
+to use an encoding that is as long as possible, so that it can
+always be patched; in addition, like branches, it will return
+an address which is then passed to @code{jit_patch_movi}.  The
+usage of @code{jit_patch_movi} is similar to @code{jit_patch_at}.
 
 @node Reentrancy
 @chapter Re-entrant usage of @lightning{}
@@ -1040,6 +1147,22 @@ extern void _opt_muli_i(struct jit_state *, int, int, 
int);
 @end example
 
 
address@hidden Registers
address@hidden Accessing the whole register file
+
+As mentioned earlier in this chapter, all @lightning{} back-ends
+are guaranteed to have at least six integer registers and six
+floating-point registers, but many back-ends will have more.
+
+To access the entire register files, you can use the
address@hidden, @code{JIT_V} and @code{JIT_FPR} macros.  They
+accept a parameter that identifies the register number, which
+must be strictly less than @code{JIT_R_NUM}, @code{JIT_V_NUM}
+and @code{JIT_FPR_NUM} respectively; the number need not be
+constant.  Of course, expressions like @code{JIT_R0} and
address@hidden(0)} denote the same register, and likewise for
+integer callee-saved, or floating-point, registers.
+
 @node Autoconf support
 @chapter Using @code{autoconf} with @lightning{}
 
diff --git a/lightning-inst.h b/lightning-inst.h
index 5cf1af2..96f41a4 100644
--- a/lightning-inst.h
+++ b/lightning-inst.h
@@ -39,7 +39,6 @@ extern "C" {
 #endif
 
 #include <lightning/asm-common.h>
-#include <lightning/funcs-common.h>
 
 #ifndef LIGHTNING_DEBUG
 #include <lightning/asm.h>
@@ -48,11 +47,9 @@ extern "C" {
 #include <lightning/core.h>
 #include <lightning/core-common.h>
 #include <lightning/funcs.h>
+#include <lightning/funcs-common.h>
 #include <lightning/fp.h>
-
-#ifdef jit_cmp
 #include <lightning/fp-common.h>
-#endif
 
 #ifndef JIT_R0
 #error GNU lightning does not support the current target
diff --git a/lightning.h.in b/lightning.h.in
index fd1a4d7..78367f9 100644
--- a/lightning.h.in
+++ b/lightning.h.in
@@ -62,7 +62,6 @@ extern "C" {
 #endif
 
 #include <lightning/asm-common.h>
-#include <lightning/funcs-common.h>
 
 #ifndef LIGHTNING_DEBUG
 #include <lightning/asm.h>
@@ -71,11 +70,9 @@ extern "C" {
 #include <lightning/core.h>
 #include <lightning/core-common.h>
 #include <lightning/funcs.h>
+#include <lightning/funcs-common.h>
 #include <lightning/fp.h>
-
-#ifdef jit_cmp
 #include <lightning/fp-common.h>
-#endif
 
 #ifdef LIGHTNING_DISASSEMBLE
 extern void disassemble(FILE *stream, char *from, char *to);
diff --git a/lightning/Makefile.am b/lightning/Makefile.am
index d032e2a..d02dd29 100644
--- a/lightning/Makefile.am
+++ b/lightning/Makefile.am
@@ -12,5 +12,5 @@ dist_pkgdata_DATA = Makefile.am
 nobase_dist_lightning_HEADERS = $(LIGHTNING_FILES)
 nodist_lightning_HEADERS = asm.h core.h funcs.h fp.h 
 else
-dist_noinst_HEADERS = $(LIGHTNING_FILES) lightning.h
+dist_noinst_HEADERS = $(LIGHTNING_FILES)
 endif
diff --git a/lightning/asm-common.h b/lightning/asm-common.h
index b0a1194..42c8814 100644
--- a/lightning/asm-common.h
+++ b/lightning/asm-common.h
@@ -88,12 +88,11 @@ typedef unsigned int        _ui;
 typedef long           _sl;
 typedef unsigned long  _ul;
 
-#define _jit_UC(X)             ((_uc  )(X))
-#define _jit_US(X)             ((_us  )(X))
-#define _jit_UI(X)             ((_ui  )(X))
-#define _jit_SL(X)             ((_sl  )(X))
-#define _jit_UL(X)             ((_ul  )(X))
-
+#define _jit_UC(X)     ((_uc  )(X))
+#define _jit_US(X)     ((_us  )(X))
+#define _jit_UI(X)     ((_ui  )(X))
+#define _jit_SL(X)     ((_sl  )(X))
+#define _jit_UL(X)     ((_ul  )(X))
 # define _PUC(X)       ((_uc *)(X))
 # define _PUS(X)       ((_us *)(X))
 # define _PUI(X)       ((_ui *)(X))
@@ -104,6 +103,7 @@ typedef unsigned long       _ul;
 #define _jit_W(W)         _jit_UL(((*_jit.x.us_pc++)= _jit_US((W)&0xffff)))
 #define _jit_I(I)         _jit_UL(((*_jit.x.ui_pc++)= _jit_UI((I)       )))
 #define _jit_L(L)         _jit_UL(((*_jit.x.ul_pc++)= _jit_UL((L)       )))
+#define _jit_I_noinc(I)   _jit_UL(((*_jit.x.ui_pc)=   _jit_UI((I)       )))
 
 #define _MASK(N)       ((unsigned)((1<<(N)))-1)
 #define _siP(N,I)      (!((((unsigned)(I))^(((unsigned)(I))<<1))&~_MASK(N)))
diff --git a/lightning/core-common.h b/lightning/core-common.h
index 23b4747..9310ee2 100644
--- a/lightning/core-common.h
+++ b/lightning/core-common.h
@@ -45,14 +45,24 @@ typedef struct {
   struct jit_local_state jitl;
 } jit_state;
 
+#ifdef jit_init
+static jit_state                       _jit = jit_init ();
+#else
 static jit_state                       _jit;
+#endif
 
 #define JIT_NOREG                      (-1)
+#define JIT_R0                         JIT_R(0)
+#define JIT_R1                         JIT_R(1)
+#define JIT_R2                         JIT_R(2)
+#define JIT_V0                         JIT_V(0)
+#define JIT_V1                         JIT_V(1)
+#define JIT_V2                         JIT_V(2)
 
 #define _jitl                          _jit.jitl
 
 #define        jit_get_ip()                    (*(jit_code *) &_jit.x.pc)
-#define        jit_set_ip(ptr)                 (_jit.x.pc = (jit_insn *) ptr, 
jit_get_ip())
+#define        jit_set_ip(ptr)                 (_jit.x.pc = (ptr), jit_get_ip 
())
 #define        jit_get_label()                 (_jit.x.pc)
 #define        jit_forward()                   (_jit.x.pc)
 
@@ -138,16 +148,24 @@ typedef union jit_code {
 #define jit_subci_ul(d, rs, is)                jit_subci_l((d), (rs), (is))    
 #define jit_subcr_ul(d, s1, s2)                jit_subcr_l((d), (s1), (s2))
 #define jit_subxi_ui(d, rs, is)                jit_subxi_i((d), (rs), (is))    
+#define jit_subxi_ul(d, rs, is)                jit_subxi_l((d), (rs), (is))    
 #define jit_subxr_ui(d, s1, s2)                jit_subxr_i((d), (s1), (s2))
+#define jit_subxr_ul(d, s1, s2)                jit_subxr_i((d), (s1), (s2))
 #define jit_xori_ul(d, rs, is)         jit_xori_l((d), (rs), (is))     
 #define jit_xorr_ul(d, s1, s2)         jit_xorr_l((d), (s1), (s2))
 
 #define jit_addr_p(d, s1, s2)          jit_addr_ul((d), (s1),        (s2))
 #define jit_addi_p(d, rs, is)          jit_addi_ul((d), (rs), (long) (is))
 #define jit_movr_p(d, rs)              jit_movr_ul((d),              (rs))
-#define jit_movi_p(d, is)              jit_movi_ul((d),       (long) (is))
 #define jit_subr_p(d, s1, s2)          jit_subr_ul((d), (s1),        (s2))
 #define jit_subi_p(d, rs, is)          jit_subi_ul((d), (rs), (long) (is))
+#define jit_rsbi_p(d, rs, is)          jit_rsbi_ul((d), (rs), (long) (is))
+
+#ifndef jit_movi_p
+#define jit_movi_p(d, is)              (jit_movi_ul((d),       (long) (is)), 
_jit.x.pc)
+#endif
+
+#define jit_patch(pv)                  jit_patch_at ((pv), (_jit.x.pc))
 
 #ifndef jit_addci_i
 #define jit_addci_i(d, rs, is)         jit_addi_i((d), (rs), (is))     
@@ -190,8 +208,11 @@ typedef union jit_code {
 #define jit_subi_l(d, rs, is)          jit_addi_l((d), (rs), -(is))
 #define jit_subci_i(d, rs, is)         jit_addci_i((d), (rs), -(is))
 #define jit_subci_l(d, rs, is)         jit_addci_l((d), (rs), -(is))
+#define jit_rsbr_f(d, s1, s2)          jit_subr_f((d), (s2), (s1))
+#define jit_rsbr_d(d, s1, s2)          jit_subr_d((d), (s2), (s1))
 #define jit_rsbr_i(d, s1, s2)          jit_subr_i((d), (s2), (s1))
 #define jit_rsbr_l(d, s1, s2)          jit_subr_l((d), (s2), (s1))
+#define jit_rsbr_p(d, s1, s2)          jit_subr_p((d), (s2), (s1))
 
 /* Unary */
 #define jit_notr_c(d, rs)              jit_xori_c((d), (rs), 255)
@@ -216,23 +237,43 @@ typedef union jit_code {
 #define jit_extr_s_i(d, rs)            (jit_lshi_i((d), (rs), 16), 
jit_rshi_i((d), (d), 16))
 #endif
 
+#ifdef jit_addi_l /* sizeof(long) != sizeof(int) */
+#ifndef jit_extr_c_l
+#define jit_extr_c_l(d, rs)            (jit_lshi_l((d), (rs), 56), 
jit_rshi_l((d), (d), 56))
+#endif
+#ifndef jit_extr_s_l
+#define jit_extr_s_l(d, rs)            (jit_lshi_l((d), (rs), 48), 
jit_rshi_l((d), (d), 48))
+#endif
+#ifndef jit_extr_i_l
+#define jit_extr_i_l(d, rs)            (jit_lshi_l((d), (rs), 32), 
jit_rshi_l((d), (d), 32))
+#endif
+#ifndef jit_extr_c_ul
+#define jit_extr_c_ul(d, rs)           jit_andi_l((d), (rs), 0xFF)
+#endif
+#ifndef jit_extr_s_ul
+#define jit_extr_s_ul(d, rs)           jit_andi_l((d), (rs), 0xFFFF)
+#endif
+#ifndef jit_extr_i_ul
+#define jit_extr_i_ul(d, rs)           jit_andi_l((d), (rs), 0xFFFFFFFFUL)
+#endif
+#endif
 
+#define jit_extr_c_s(d, rs)            jit_extr_c_i((d), (rs))
+#define jit_extr_c_us(d, rs)           jit_extr_c_ui((d), (rs))
+#define jit_extr_uc_s(d, rs)           jit_extr_uc_i((d), (rs))
+#define jit_extr_uc_us(d, rs)          jit_extr_uc_ui((d), (rs))
 #define jit_extr_uc_i(d, rs)           jit_extr_c_ui((d), (rs))
 #define jit_extr_uc_ui(d, rs)          jit_extr_c_ui((d), (rs))
 #define jit_extr_us_i(d, rs)           jit_extr_s_ui((d), (rs))
 #define jit_extr_us_ui(d, rs)          jit_extr_s_ui((d), (rs))
-
-#ifndef jit_extr_i_ul
-#ifdef jit_addi_l /* sizeof(long) != sizeof(int) */
-#define jit_extr_i_ul(d, rs)           jit_andi_ui((d), (rs), 0xFF)
-#else /* sizeof(long) == sizeof(int) */
-#define jit_extr_i_ul(d, rs)           jit_movr_i(d, rs)
-#endif /* sizeof(long) == sizeof(int) */
-#endif
-
+#define jit_extr_uc_l(d, rs)           jit_extr_c_ul((d), (rs))
+#define jit_extr_uc_ul(d, rs)          jit_extr_c_ul((d), (rs))
+#define jit_extr_us_l(d, rs)           jit_extr_s_ul((d), (rs))
+#define jit_extr_us_ul(d, rs)          jit_extr_s_ul((d), (rs))
 #define jit_extr_ui_l(d, rs)           jit_extr_i_ul((d), (rs))
 #define jit_extr_ui_ul(d, rs)          jit_extr_i_ul((d), (rs))
 
+
 /* NTOH/HTON is not mandatory for big endian architectures */
 #ifndef jit_ntoh_ui /* big endian */
 #define jit_ntoh_ui(d, rs)             ((d) == (rs) ? (void)0 : 
jit_movr_i((d), (rs)))
@@ -251,7 +292,7 @@ typedef union jit_code {
 #define jit_pushr_p(rs)                        jit_pushr_ul(rs)
 #define jit_popr_p(rs)                 jit_popr_ul(rs)         
 
-#define jit_prepare(nint)              jitfp_prepare((nint), 0, 0)
+#define jit_prepare(nint)              jit_prepare_i((nint))
 #define jit_pusharg_c(rs)              jit_pusharg_i(rs)
 #define jit_pusharg_s(rs)              jit_pusharg_i(rs)
 #define jit_pusharg_uc(rs)             jit_pusharg_i(rs)
@@ -388,10 +429,17 @@ typedef union jit_code {
 #define jit_retval_c(rd)               jit_retval_i((rd))
 #define jit_retval_s(rd)               jit_retval_i((rd))
 
+/* This was a bug, but we keep it.  */
+#define jit_retval(rd)                 jit_retval_i ((rd))
+
 #ifndef jit_finish
 #define jit_finish(sub)                        jit_calli(sub)
 #endif
 
+#ifndef jit_finishr
+#define jit_finishr(reg)               jit_callr(reg)
+#endif
+
 #ifndef jit_prolog
 #define jit_prolog(numargs)
 #endif
@@ -412,15 +460,15 @@ typedef union jit_code {
 #define jit_getarg_ul(reg, ofs)                jit_extr_uc_ul((reg), (ofs))
 #define jit_getarg_us(reg, ofs)                jit_extr_us_ul((reg), (ofs))
 #else
-#define jit_getarg_c(reg, ofs) jit_ldxi_c((reg), JIT_FP, (ofs));
-#define jit_getarg_uc(reg, ofs)        jit_ldxi_uc((reg), JIT_FP, (ofs));
-#define jit_getarg_s(reg, ofs) jit_ldxi_s((reg), JIT_FP, (ofs));
-#define jit_getarg_us(reg, ofs)        jit_ldxi_us((reg), JIT_FP, (ofs));
-#define jit_getarg_i(reg, ofs) jit_ldxi_i((reg), JIT_FP, (ofs));
-#define jit_getarg_ui(reg, ofs)        jit_ldxi_ui((reg), JIT_FP, (ofs));
-#define jit_getarg_l(reg, ofs) jit_ldxi_l((reg), JIT_FP, (ofs));
-#define jit_getarg_ul(reg, ofs)        jit_ldxi_ul((reg), JIT_FP, (ofs));
-#define jit_getarg_p(reg, ofs) jit_ldxi_p((reg), JIT_FP, (ofs));
+#define jit_getarg_c(reg, ofs)         jit_ldxi_c((reg), JIT_FP, (ofs));
+#define jit_getarg_uc(reg, ofs)                jit_ldxi_uc((reg), JIT_FP, 
(ofs));
+#define jit_getarg_s(reg, ofs)         jit_ldxi_s((reg), JIT_FP, (ofs));
+#define jit_getarg_us(reg, ofs)                jit_ldxi_us((reg), JIT_FP, 
(ofs));
+#define jit_getarg_i(reg, ofs)         jit_ldxi_i((reg), JIT_FP, (ofs));
+#define jit_getarg_ui(reg, ofs)                jit_ldxi_ui((reg), JIT_FP, 
(ofs));
+#define jit_getarg_l(reg, ofs)         jit_ldxi_l((reg), JIT_FP, (ofs));
+#define jit_getarg_ul(reg, ofs)                jit_ldxi_ul((reg), JIT_FP, 
(ofs));
+#define jit_getarg_p(reg, ofs)         jit_ldxi_p((reg), JIT_FP, (ofs));
 #endif
 #endif
 
@@ -474,6 +522,14 @@ typedef union jit_code {
 #define jit_rshi_ul(d, rs, is)         jit_rshi_ui((d), (rs), (is))    
 #define jit_rshr_ul(d, s1, s2)         jit_rshr_ui((d), (s1), (s2))
 
+/* Sign/Zero extension */
+#define jit_extr_c_l(d, rs)            jit_extr_c_i(d, rs)
+#define jit_extr_c_ul(d, rs)           jit_extr_c_ui(d, rs)
+#define jit_extr_s_l(d, rs)            jit_extr_s_i(d, rs)
+#define jit_extr_s_ul(d, rs)           jit_extr_s_ui(d, rs)
+#define jit_extr_i_l(d, rs)            jit_movr_i(d, rs)
+#define jit_extr_i_ul(d, rs)           jit_movr_i(d, rs)
+
 /* Unary */
 #define jit_movi_l(d, rs)              jit_movi_i((d), (rs))
 #define jit_movr_l(d, rs)              jit_movr_i((d), (rs))
diff --git a/lightning/fp-common.h b/lightning/fp-common.h
index de25fbb..907fdc4 100644
--- a/lightning/fp-common.h
+++ b/lightning/fp-common.h
@@ -29,232 +29,58 @@
  *
  ***********************************************************************/
 
-struct jit_fp {
-  char kind;
-  char  subkind;
-  union {
-    struct {
-      int   displ;
-      char  reg1;
-      char  reg2;
-    } addr;
-    union {
-      double number;
-      long   split[sizeof(double) / sizeof(long)];
-    } imm;
-    struct {
-      struct jit_fp *lhs, *rhs;
-    } ops;
-  } d;
-};
+#define JIT_FPR0                       JIT_FPR(0)
+#define JIT_FPR1                       JIT_FPR(1)
+#define JIT_FPR2                       JIT_FPR(2)
+#define JIT_FPR3                       JIT_FPR(3)
+#define JIT_FPR4                       JIT_FPR(4)
+#define JIT_FPR5                       JIT_FPR(5)
 
-#ifdef jit_trunc
-
-enum { JIT_NULL,                                               /* unused */
-        
-        JIT_CMP, JIT_FLOOR, JIT_CEIL, JIT_ROUND, JIT_TRUNC,    /* integer */
-       
-       JIT_XI, JIT_ADD, JIT_XR, JIT_SUB,                       /* subkinds */
-       JIT_I,  JIT_MUL, JIT_R,  JIT_DIV,
-       JIT_INT,
-       
-       JIT_ABS, JIT_SIN, JIT_COS, JIT_TAN, JIT_ATN,            /* functions */
-       JIT_EXP, JIT_LOG, JIT_NEG, JIT_SQRT,
-       
-       JIT_OP,  JIT_FN,  JIT_LD,  JIT_IMM };                   /* kinds */
-
-/* Declarations */
-
-static void _jit_emit(jit_state *, struct jit_fp *,
-                     int, int, int, int) JIT_UNUSED;
-static struct jit_fp *_jit_op(struct jit_fp *, int, 
-                             struct jit_fp *, struct jit_fp *) JIT_UNUSED;
-static struct jit_fp *_jit_ld(struct jit_fp *, int, 
-                             int, int) JIT_UNUSED;
-static struct jit_fp *_jit_fn(struct jit_fp *, int, 
-                             struct jit_fp *) JIT_UNUSED;
-static struct jit_fp *_jit_imm(struct jit_fp *, double) JIT_UNUSED;
-
-/* Internal function to walk the tree */
-
-void
-_jit_emit(jit_state *jit, struct jit_fp *head,
-         int store_kind, int store1, int store2, int reg0)
-{
-#define _jit (*jit)
-  switch (head->kind) {
-    case JIT_OP:
-      _jit_emit(jit, head->d.ops.lhs, JIT_NULL, 0, 0, reg0);
-      _jit_emit(jit, head->d.ops.rhs, JIT_NULL, 0, 0, reg0 + 1);
-      switch (head->subkind) {
-        case JIT_ADD: jit_add_two(reg0); break;
-        case JIT_SUB: jit_sub_two(reg0); break;
-        case JIT_MUL: jit_mul_two(reg0); break;
-        case JIT_DIV: jit_div_two(reg0); break;
-      }
-      break;
-
-    case JIT_IMM:
-#ifdef JIT_LONG_IS_INT
-      jit_fpimm(reg0, head->d.imm.split[0], head->d.imm.split[1]);
-#else
-      jit_fpimm(reg0, head->d.imm.split[0]);
-#endif
-      break;
-
-    case JIT_FN:
-      _jit_emit(jit, head->d.ops.lhs, JIT_NULL, 0, 0, reg0);
-      switch (head->subkind) {
-       case JIT_ABS: jit_abs(reg0); break;
-       case JIT_NEG: jit_neg(reg0); break;
-#ifdef JIT_TRANSCENDENTAL
-       case JIT_SIN: jit_sin(reg0); break;
-       case JIT_SQRT: jit_sqrt(reg0); break;
-       case JIT_COS: jit_cos(reg0); break;
-       case JIT_TAN: jit_tan(reg0); break;
-       case JIT_ATN: jit_atn(reg0); break;
-       case JIT_EXP: jit_exp(reg0); break;
-       case JIT_LOG: jit_log(reg0); break;
-#endif
-      }
-      break;
-
-    case JIT_LD:
-      switch (head->subkind) {
-        case JIT_INT:    jit_exti_d(reg0, head->d.addr.reg1); break;
-        case JIT_XI:     jit_ldxi_f(reg0, head->d.addr.reg1, 
head->d.addr.displ); break;
-        case JIT_XR:     jit_ldxr_f(reg0, head->d.addr.reg1, 
head->d.addr.reg2);  break;
-        case JIT_XI | 1: jit_ldxi_d(reg0, head->d.addr.reg1, 
head->d.addr.displ); break;
-        case JIT_XR | 1: jit_ldxr_d(reg0, head->d.addr.reg1, 
head->d.addr.reg2);  break;
-#ifndef JIT_RZERO
-        case JIT_I:      jit_ldi_f(reg0, head->d.addr.displ); break;
-        case JIT_R:      jit_ldr_f(reg0, head->d.addr.reg1);  break;
-        case JIT_I | 1:  jit_ldi_d(reg0, head->d.addr.displ); break;
-        case JIT_R | 1:  jit_ldr_d(reg0, head->d.addr.reg1);  break;
-#endif
-      }
-      break;
-  }
-
-  switch (store_kind) {
-    case JIT_FLOOR:  jit_floor(store1, reg0);           break;
-    case JIT_CEIL:   jit_ceil(store1, reg0);            break;
-    case JIT_TRUNC:  jit_trunc(store1, reg0);           break;
-    case JIT_ROUND:  jit_round(store1, reg0);           break;
-    case JIT_CMP:    jit_cmp(store1, store2, reg0);     break;
-    case JIT_XI:     jit_stxi_f(store2, store1, reg0);   break;
-    case JIT_XR:     jit_stxr_f(store2, store1, reg0);   break;
-    case JIT_XI | 1: jit_stxi_d(store2, store1, reg0);   break;
-    case JIT_XR | 1: jit_stxr_d(store2, store1, reg0);   break;
-#ifndef JIT_RZERO
-    case JIT_I:      jit_sti_f(store2, reg0);           break;
-    case JIT_R:      jit_str_f(store2, reg0);           break;
-    case JIT_I | 1:  jit_sti_d(store2, reg0);           break;
-    case JIT_R | 1:  jit_str_d(store2, reg0);           break;
+#ifdef JIT_RZERO
+#ifndef jit_ldi_f
+#define jit_ldi_f(rd, is)               jit_ldxi_f((rd), JIT_RZERO, (is))
+#define jit_sti_f(id, rs)               jit_stxi_f((id), JIT_RZERO, (rs))
+#define jit_ldi_d(rd, is)               jit_ldxi_d((rd), JIT_RZERO, (is))
+#define jit_sti_d(id, rs)               jit_stxi_d((id), JIT_RZERO, (rs))
 #endif
-    case JIT_NULL:   break;
-  }
-#undef _jit
-}
-
-/* Internal functions to build the tree */
-
-struct jit_fp *
-_jit_op(struct jit_fp *where, int which, 
-       struct jit_fp *op1, struct jit_fp *op2)
-{
-  where->kind = JIT_OP;
-  where->subkind = which;
-  where->d.ops.lhs = op1;
-  where->d.ops.rhs = op2;
-  return (where);
-}
-
-struct jit_fp *
-_jit_ld(struct jit_fp *where, int which, int op1, int op2)
-{
-  where->kind = JIT_LD;
-  where->subkind = which;
-  switch (which & ~1) {
-    case JIT_XI: where->d.addr.reg1 = op1;
-    case JIT_I:  where->d.addr.displ = op2;    break;
-    case JIT_XR: where->d.addr.reg2  = op2;
-    case JIT_INT:
-    case JIT_R:  where->d.addr.reg1  = op1;    break;
-  }
-  return (where);
-}
-
-struct jit_fp *
-_jit_fn(struct jit_fp *where, int which, struct jit_fp *op1)
-{
-  where->kind = JIT_FN;
-  where->subkind = which;
-  where->d.ops.lhs = op1;
-  return (where);
-}
-
-struct jit_fp *
-_jit_imm(struct jit_fp *where, double number)
-{
-  where->kind = JIT_IMM;
-  where->d.imm.number = number;
-  return (where);
-}
 
-#define jitfp_begin(buf)               (_jit.fp = (buf), --_jit.fp)
-#define jitfp_add(op1, op2)            _jit_op(++_jit.fp, JIT_ADD, (op1), 
(op2))
-#define jitfp_sub(op1, op2)            _jit_op(++_jit.fp, JIT_SUB, (op1), 
(op2))
-#define jitfp_mul(op1, op2)            _jit_op(++_jit.fp, JIT_MUL, (op1), 
(op2))
-#define jitfp_div(op1, op2)            _jit_op(++_jit.fp, JIT_DIV, (op1), 
(op2))
-#define jitfp_imm(imm)                 _jit_imm(++_jit.fp, (imm))
-#define jitfp_exti_d(reg1)             _jit_ld(++_jit.fp, JIT_INT, (reg1), 0)
-#define jitfp_ldxi_f(reg1, imm)                _jit_ld(++_jit.fp, JIT_XI, 
(reg1), (long)(imm))
-#define jitfp_ldxr_f(reg1, reg2)       _jit_ld(++_jit.fp, JIT_XR, (reg1), 
(reg2))
-#define jitfp_ldxi_d(reg1, imm)                _jit_ld(++_jit.fp, JIT_XI | 1, 
(reg1), (long)(imm))
-#define jitfp_ldxr_d(reg1, reg2)       _jit_ld(++_jit.fp, JIT_XR | 1, (reg1), 
(reg2))
-#define jitfp_abs(op1)                 _jit_fn(++_jit.fp, JIT_ABS, (op1))
-#define jitfp_sqrt(op1)                        _jit_fn(++_jit.fp, JIT_SQRT, 
(op1))
-#define jitfp_neg(op1)                 _jit_fn(++_jit.fp, JIT_NEG, (op1))
-#define jitfp_stxi_f(imm, reg1, op1)   _jit_emit(&_jit, (op1), JIT_XI, (reg1), 
(long)(imm), 0)
-#define jitfp_stxr_f(reg1, reg2, op1)  _jit_emit(&_jit, (op1), JIT_XR, (reg1), 
(reg2), 0)
-#define jitfp_stxi_d(imm, reg1, op1)   _jit_emit(&_jit, (op1), JIT_XI | 1, 
(reg1), (long)(imm), 0)
-#define jitfp_stxr_d(reg1, reg2, op1)  _jit_emit(&_jit, (op1), JIT_XR | 1, 
(reg1), (reg2), 0)
-#define jitfp_cmp(regle, regge, op1)   _jit_emit(&_jit, (op1), JIT_CMP, regle, 
regge, 0)
-#define jitfp_floor(reg1, op1)         _jit_emit(&_jit, (op1), JIT_FLOOR, 
reg1, 0, 0)
-#define jitfp_ceil(reg1, op1)          _jit_emit(&_jit, (op1), JIT_CEIL, reg1, 
0, 0)
-#define jitfp_trunc(reg1, op1)         _jit_emit(&_jit, (op1), JIT_TRUNC, 
reg1, 0, 0)
-#define jitfp_round(reg1, op1)         _jit_emit(&_jit, (op1), JIT_ROUND, 
reg1, 0, 0)
-
-
-#ifdef JIT_TRANSCENDENTAL
-#define jitfp_sin(op1)                 _jit_fn(++_jit.fp, JIT_SIN, (op1))
-#define jitfp_cos(op1)                 _jit_fn(++_jit.fp, JIT_COS, (op1))
-#define jitfp_tan(op1)                 _jit_fn(++_jit.fp, JIT_TAN, (op1))
-#define jitfp_atn(op1)                 _jit_fn(++_jit.fp, JIT_ATN, (op1))
-#define jitfp_exp(op1)                 _jit_fn(++_jit.fp, JIT_EXP, (op1))
-#define jitfp_log(op1)                 _jit_fn(++_jit.fp, JIT_LOG, (op1))
+#ifndef jit_ldr_f
+#define jit_ldr_f(rd, rs)               jit_ldxr_f((rd), JIT_RZERO, (rs))
+#define jit_str_f(rd, rs)               jit_stxr_f((rd), JIT_RZERO, (rs))
+#define jit_ldr_d(rd, rs)               jit_ldxr_d((rd), JIT_RZERO, (rs))
+#define jit_str_d(rd, rs)               jit_stxr_d((rd), JIT_RZERO, (rs))
 #endif
-
-#ifdef JIT_RZERO
-#define jitfp_ldi_f(imm)               _jit_ld(++_jit.fp, JIT_XI, JIT_RZERO, 
(long)(imm))
-#define jitfp_ldr_f(reg1)              _jit_ld(++_jit.fp, JIT_XR, JIT_RZERO, 
(reg1))
-#define jitfp_ldi_d(imm)               _jit_ld(++_jit.fp, JIT_XI | 1, 
JIT_RZERO, (long)(imm))
-#define jitfp_ldr_d(reg1)              _jit_ld(++_jit.fp, JIT_XR | 1, 
JIT_RZERO, (reg1))
-#define jitfp_sti_f(imm, op1)          _jit_emit(&_jit, (op1), JIT_XI, 
JIT_RZERO, (long)(imm), 0)
-#define jitfp_str_f(reg1, op1)         _jit_emit(&_jit, (op1), JIT_XR, 
JIT_RZERO, (reg1), 0)
-#define jitfp_sti_d(imm, op1)          _jit_emit(&_jit, (op1), JIT_XI | 1, 
JIT_RZERO, (long)(imm), 0)
-#define jitfp_str_d(reg1, op1)         _jit_emit(&_jit, (op1), JIT_XR | 1, 
JIT_RZERO, (reg1), 0)
-#else
-#define jitfp_ldi_f(imm)               _jit_ld(++_jit.fp, JIT_I, 0, 
(long)(imm))
-#define jitfp_ldr_f(reg1)              _jit_ld(++_jit.fp, JIT_R, (reg1), 0)
-#define jitfp_ldi_d(imm)               _jit_ld(++_jit.fp, JIT_I | 1, 0, 
(long)(imm))
-#define jitfp_ldr_d(reg1)              _jit_ld(++_jit.fp, JIT_R | 1, (reg1), 0)
-#define jitfp_sti_f(imm, op1)          _jit_emit(&_jit, (op1), JIT_I, 0, 
(long)(imm), 0)
-#define jitfp_str_f(reg1, op1)         _jit_emit(&_jit, (op1), JIT_R, 0, 
(reg1), 0)
-#define jitfp_sti_d(imm, op1)          _jit_emit(&_jit, (op1), JIT_I | 1, 0, 
(long)(imm), 0)
-#define jitfp_str_d(reg1, op1)         _jit_emit(&_jit, (op1), JIT_R | 1, 0, 
(reg1), 0)
 #endif
 
-
+#ifndef jit_addr_f
+#define jit_addr_f(rd,s1,s2)           jit_addr_d(rd,s1,s2)
+#define jit_subr_f(rd,s1,s2)           jit_subr_d(rd,s1,s2)
+#define jit_mulr_f(rd,s1,s2)           jit_mulr_d(rd,s1,s2)
+#define jit_divr_f(rd,s1,s2)           jit_divr_d(rd,s1,s2)
+#define jit_movr_f(rd,rs)              jit_movr_d(rd,rs)
+#define jit_abs_f(rd,rs)               jit_abs_d(rd,rs)
+#define jit_negr_f(rd,rs)              jit_negr_d(rd,rs)
+#define jit_sqrt_f(rd,rs)              jit_sqrt_d(rd,rs)
+#define jit_extr_f_d(rs, rd)
+#define jit_extr_d_f(rs, rd)
+#define jit_extr_i_f(rd, rs)           jit_extr_i_d(rd, rs)
+#define jit_roundr_f_i(rd, rs)         jit_roundr_d_i(rd, rs)
+#define jit_floorr_f_i(rd, rs)         jit_floorr_d_i(rd, rs)
+#define jit_ceilr_f_i(rd, rs)          jit_ceilr_d_i(rd, rs)
+#define jit_truncr_f_i(rd, rs)         jit_truncr_d_i(rd, rs)
+#define jit_ltr_f(d, s1, s2)           jit_ltr_d(d, s1, s2)
+#define jit_ler_f(d, s1, s2)           jit_ler_d(d, s1, s2)
+#define jit_eqr_f(d, s1, s2)           jit_eqr_d(d, s1, s2)
+#define jit_ner_f(d, s1, s2)           jit_ner_d(d, s1, s2)
+#define jit_ger_f(d, s1, s2)           jit_ger_d(d, s1, s2)
+#define jit_gtr_f(d, s1, s2)           jit_gtr_d(d, s1, s2)
+#define jit_unltr_f(d, s1, s2)         jit_unltr_d(d, s1, s2)
+#define jit_unler_f(d, s1, s2)         jit_unler_d(d, s1, s2)
+#define jit_uneqr_f(d, s1, s2)         jit_uneqr_d(d, s1, s2)
+#define jit_ltgtr_f(d, s1, s2)         jit_ltgtr_d(d, s1, s2)
+#define jit_unger_f(d, s1, s2)         jit_unger_d(d, s1, s2)
+#define jit_ungtr_f(d, s1, s2)         jit_ungtr_d(d, s1, s2)
+#define jit_ordr_f(d, s1, s2)          jit_ordr_d(d, s1, s2)
+#define jit_unordr_f(d, s1, s2)                jit_unordr_d(d, s1, s2)
+#define jit_retval_f(rs)               jit_retval_d(rs)
 #endif
diff --git a/lightning/funcs-common.h b/lightning/funcs-common.h
index dafae1f..278c544 100644
--- a/lightning/funcs-common.h
+++ b/lightning/funcs-common.h
@@ -45,4 +45,10 @@ jit_fail(const char *msg, const char *file, int line, const 
char *function)
   abort();
 }
 
+
+#ifndef jit_start_pfx
+#define jit_start_pfx()                 ( (jit_insn*)0x4)
+#define jit_end_pfx()                   ( (jit_insn*)0x0)
+#endif
+
 #endif /* __lightning_funcs_common_h */
diff --git a/lightning/i386/asm.h b/lightning/i386/asm.h
index d805716..fcc364c 100644
--- a/lightning/i386/asm.h
+++ b/lightning/i386/asm.h
@@ -43,6 +43,7 @@
 
 typedef _uc            jit_insn;
 
+#ifndef LIGHTNING_DEBUG
 #define _b00           0
 #define _b01           1
 #define _b10           2
@@ -340,10 +341,10 @@ typedef _uc               jit_insn;
 
 #define CALLsm(D,B,I,S)                        _O_r_X  (0xff        ,_b010     
,(int)(D),B,I,S         )
 
-#define CBW()                          _O              (0x98                   
                                        )
-#define CLC()                          _O              (0xf8                   
                                        )
-#define CLTD()                         _O              (0x99                   
                                        )
-#define CMC()                          _O              (0xf5                   
                                        )
+#define CBW_()                         _O              (0x98                   
                                        )
+#define CLC_()                         _O              (0xf8                   
                                        )
+#define CLTD_()                                _O              (0x99           
                                                )
+#define CMC_()                         _O              (0xf5                   
                                        )
 
 
 #define CMPBrr(RS, RD)                 _O_Mrm          (0x38           
,_b11,_r1(RS),_r1(RD)                           )
@@ -365,7 +366,7 @@ typedef _uc         jit_insn;
 #define CMPLim(IM, MD, MB, MI, MS)     _O_r_X_L        (0x81                
,_b111             ,MD,MB,MI,MS    ,IM     )
 
 
-#define CWD()                          _O              (0x99                   
                                        )
+#define CWD_()                         _O              (0x99                   
                                        )
 
 
 #define CMPXCHGBrr(RS,RD)              _OO_Mrm         (0x0fb0         
,_b11,_r1(RS),_r1(RD)                           )
@@ -399,7 +400,7 @@ typedef _uc         jit_insn;
 
 
 #define ENTERii(W, B)                  _O_W_B          (0xc8                   
                          ,_su16(W),_su8(B))
-#define HLT()                          _O              (0xf4                   
                                        )
+#define HLT_()                         _O              (0xf4                   
                                        )
 
 
 #define IDIVBr(RS)                     _O_Mrm          (0xf6           
,_b11,_b111  ,_r1(RS)                           )
@@ -443,7 +444,7 @@ typedef _uc         jit_insn;
 #define INCLm(MD,MB,MI,MS)             _O_r_X          (0xff                
,_b000             ,MD,MB,MI,MS            )
 
 
-#define INVD()                         _OO             (0x0f08                 
                                        )
+#define INVD_()                                _OO             (0x0f08         
                                                )
 #define INVLPGm(MD, MB, MI, MS)                _OO_r_X         (0x0f01         
     ,_b111             ,MD,MB,MI,MS            )
 
 
@@ -523,9 +524,9 @@ typedef _uc         jit_insn;
 #define JMPsm(D,B,I,S)                 _O_r_X  (0xff        ,_b100     
,(int)(D),B,I,S         )
 
 
-#define LAHF()                         _O              (0x9f                   
                                        )
+#define LAHF_()                                _O              (0x9f           
                                                )
 #define LEALmr(MD, MB, MI, MS, RD)     _O_r_X          (0x8d                
,_r4(RD)           ,MD,MB,MI,MS            )
-#define LEAVE()                                _O              (0xc9           
                                                )
+#define LEAVE_()                       _O              (0xc9                   
                                        )
 
 
 #define LMSWr(RS)                      _OO_Mrm         (0x0f01         
,_b11,_b110,_r4(RS)                             )
@@ -600,7 +601,7 @@ typedef _uc         jit_insn;
 #define NEGLm(MD,MB,MI,MS)             _O_r_X          (0xf7                
,_b011             ,MD,MB,MI,MS            )
 
 
-#define NOP()                          _O              (0x90                   
                                        )
+#define NOP_()                         _O              (0x90                   
                                        )
 
 
 #define NOTBr(RD)                      _O_Mrm          (0xf6           
,_b11,_b010  ,_r1(RD)                           )
@@ -639,11 +640,11 @@ typedef _uc               jit_insn;
 #define POPLm(MD,MB,MI,MS)             _O_r_X          (0x8f                
,_b000             ,MD,MB,MI,MS            )
 
 
-#define POPA()                         _wO             (0x61                   
                                        )
-#define POPAD()                                _O              (0x61           
                                                )
+#define POPA_()                                _wO             (0x61           
                                                )
+#define POPAD_()                       _O              (0x61                   
                                        )
 
-#define POPF()                         _wO             (0x9d                   
                                        )
-#define POPFD()                                _O              (0x9d           
                                                )
+#define POPF_()                                _wO             (0x9d           
                                                )
+#define POPFD_()                       _O              (0x9d                   
                                        )
 
 
 #define PUSHWr(R)                      _wOr            (0x50,_r2(R)            
                                        )
@@ -655,13 +656,13 @@ typedef _uc               jit_insn;
 #define PUSHLi(IM)                     _Os_sL          (0x68                   
                                ,IM     )
 
 
-#define PUSHA()                                _wO             (0x60           
                                                )
-#define PUSHAD()                       _O              (0x60                   
                                        )
+#define PUSHA_()                       _wO             (0x60                   
                                        )
+#define PUSHAD_()                      _O              (0x60                   
                                        )
 
-#define PUSHF()                                _O              (0x9c           
                                                )
-#define PUSHFD()                       _wO             (0x9c                   
                                        )
+#define PUSHF_()                       _O              (0x9c                   
                                        )
+#define PUSHFD_()                      _wO             (0x9c                   
                                        )
 
-#define RET()                          _O              (0xc3                   
                                        )
+#define RET_()                         _O              (0xc3                   
                                        )
 #define RETi(IM)                       _O_W            (0xc2                   
                                ,_su16(IM))
 
 
@@ -721,7 +722,7 @@ typedef _uc         jit_insn;
                                                JITFAIL         ("source 
register must be CL"                           ) )
 
 
-#define SAHF()                                 _O      (0x9e                   
                                        )
+#define SAHF_()                                        _O      (0x9e           
                                                )
 
 
 #define SALBir SHLBir
@@ -904,7 +905,7 @@ typedef _uc         jit_insn;
                                                JITFAIL         ("source 
register must be CL"                           ) )
 
 
-#define STC()                          _O              (0xf9                   
                                        )
+#define STC_()                         _O              (0xf9                   
                                        )
 
 
 #define SUBBrr(RS, RD)                 _O_Mrm          (0x28           
,_b11,_r1(RS),_r1(RD)                           )
@@ -985,8 +986,8 @@ typedef _uc         jit_insn;
 #define ESCmi(D,B,I,S,OP)      _O_r_X(0xd8|(OP >> 3), (OP & 7), D,B,I,S)
 #define ESCri(RD,OP)           _O_Mrm(0xd8|(OP >> 3), _b11, (OP & 7), RD)
 
-#define ESCrri(RS,RD,OP)       ((RS) = _ST0 ? ESCri(RD,(OP|040))               
        \
-                                : (RD) = _ST0 ? ESCri(RS,OP)                   
        \
+#define ESCrri(RS,RD,OP)       ((RS) == _ST0 ? ESCri(RD,(OP|040))              
        \
+                                : (RD) == _ST0 ? ESCri(RS,OP)                  
        \
                                 : JITFAIL ("coprocessor instruction without 
st0"))
 
 #define FLDSm(D,B,I,S)         ESCmi(D,B,I,S,010)     /* fld m32real  */
@@ -1036,7 +1037,7 @@ typedef _uc               jit_insn;
 #define FNSTSWr(RD)            ((RD == _AX || RD == _EAX) ? _OO (0xdfe0)       
        \
                                 : JITFAIL ("AX or EAX expected"))
 /* N byte NOPs */
-#define _NOPi(N)       (((  (N)    >= 8) ? 
(_jit_B(0x8d),_jit_B(0xb4),_jit_B(0x26),_jit_L(0x00),_jit_B(0x90)) : (void) 0), 
\
+#define NOPi(N)                (((  (N)    >= 8) ? 
(_jit_B(0x8d),_jit_B(0xb4),_jit_B(0x26),_jit_L(0x00),_jit_B(0x90)) : (void) 0), 
\
                         (( ((N)&7) == 7) ? 
(_jit_B(0x8d),_jit_B(0xb4),_jit_B(0x26),_jit_L(0x00)) : \
                          ( ((N)&7) == 6) ? 
(_jit_B(0x8d),_jit_B(0xb6),_jit_L(0x00)) : \
                          ( ((N)&7) == 5) ? 
(_jit_B(0x90),_jit_B(0x8d),_jit_B(0x74),_jit_B(0x26),_jit_B(0x00)) : \
@@ -1056,5 +1057,6 @@ typedef _uc               jit_insn;
 /* [2] "Intel Architecture Software Developer's Manual Volume 2: Instruction 
Set Reference",   */
 /*     Intel Corporation 1997.                                                 
                */
 
+#endif
 #endif /* __lightning_asm_h */
 
diff --git a/lightning/i386/core.h b/lightning/i386/core.h
index 79f4d64..dd9d58a 100644
--- a/lightning/i386/core.h
+++ b/lightning/i386/core.h
@@ -34,16 +34,15 @@
 #ifndef __lightning_core_h
 #define __lightning_core_h
 
-#define JIT_R0                 _EAX
-#define JIT_R1                 _ECX
-#define JIT_R2                 _EDX
-#define JIT_V0                 _EBX
-#define JIT_V1                 _ESI
-#define JIT_V2                 _EDI
 #define JIT_FP                 _EBP
 #define JIT_SP                 _ESP
 #define JIT_RET                        _EAX
 
+#define JIT_R_NUM              3
+#define JIT_V_NUM              3
+#define JIT_R(i)               (_EAX + (i))
+#define JIT_V(i)               ((i) == 0 ? _EBX : _ESI + (i) - 1)
+
 struct jit_local_state {
   int  framesize;
   int  argssize;
@@ -265,10 +264,13 @@ struct jit_local_state {
 
 /* The += allows for stack pollution */
 
-#define jitfp_prepare(ni,nf,nd) ((void) (_jitl.argssize += (ni) + (nf) + 
2*(nd)))
+#define jit_prepare_i(ni)      (_jitl.argssize += (ni))
+#define jit_prepare_f(nf)      (_jitl.argssize += (nf))
+#define jit_prepare_d(nd)      (_jitl.argssize += 2 * (nd))
 #define jit_pusharg_i(rs)      PUSHLr(rs)
 #define jit_finish(sub)                (jit_calli((sub)), ADDLir(4 * 
_jitl.argssize, JIT_SP), _jitl.argssize = 0)
-#define jit_retval(rd)         jit_movr_i ((rd), _EAX)
+#define jit_finishr(reg)       (jit_callr((reg)), ADDLir(4 * _jitl.argssize, 
JIT_SP), _jitl.argssize = 0)
+#define jit_retval_i(rd)       jit_movr_i ((rd), _EAX)
 
 #define        jit_arg_c()             ((_jitl.framesize += sizeof(int)) - 
sizeof(int))
 #define        jit_arg_uc()            ((_jitl.framesize += sizeof(int)) - 
sizeof(int))
@@ -289,6 +291,8 @@ struct jit_local_state {
 
 #define jit_movr_i(d, rs)      ((rs) == (d) ? 0 : MOVLrr((rs), (d)))
 #define jit_movi_i(d, is)      ((is) ? MOVLir((is), (d)) : XORLrr ((d), (d)) )
+#define jit_movi_p(d, is)      (MOVLir((is), (d)), _jit.x.pc)
+#define jit_patch_movi(pa,pv)   (*_PSL((pa) - 4) = _jit_SL((pv)))
 
 #define jit_ntoh_ui(d, rs)     jit_op_((d), (rs), BSWAPLr(d))
 #define jit_ntoh_us(d, rs)     jit_op_((d), (rs), RORWir(8, d))
@@ -311,7 +315,7 @@ struct jit_local_state {
 #define jit_gei_i(d, rs, is)   jit_bool_i0((d), (rs), (is), SETGEr, SETNSr )
 #define jit_eqi_i(d, rs, is)   jit_bool_i0((d), (rs), (is), SETEr,  SETEr  )
 #define jit_nei_i(d, rs, is)   jit_bool_i0((d), (rs), (is), SETNEr, SETNEr )
-#define jit_lti_ui(d, rs, is)  jit_bool_i ((d), (rs), (is), SETB           )
+#define jit_lti_ui(d, rs, is)  jit_bool_i ((d), (rs), (is), SETBr          )
 #define jit_lei_ui(d, rs, is)  jit_bool_i0((d), (rs), (is), SETBEr, SETEr  )
 #define jit_gti_ui(d, rs, is)  jit_bool_i0((d), (rs), (is), SETAr,  SETNEr )
 #define jit_gei_ui(d, rs, is)  jit_bool_i0((d), (rs), (is), SETAEr, INCLr  )
@@ -340,10 +344,10 @@ struct jit_local_state {
 #define jit_bgei_i(label, rs, is)      jit_bra_i0((rs), (is), 
JGEm(label,0,0,0), JNSm(label,0,0,0) )
 #define jit_beqi_i(label, rs, is)      jit_bra_i0((rs), (is), JEm(label, 
0,0,0), JEm(label, 0,0,0) )
 #define jit_bnei_i(label, rs, is)      jit_bra_i0((rs), (is), 
JNEm(label,0,0,0), JNEm(label,0,0,0) )
-#define jit_blti_ui(label, rs, is)     jit_bra_i ((rs), (is), JLm(label, 
0,0,0)                    )
-#define jit_blei_ui(label, rs, is)     jit_bra_i0((rs), (is), 
JLEm(label,0,0,0), JEm(label, 0,0,0) )
-#define jit_bgti_ui(label, rs, is)     jit_bra_i0((rs), (is), JGm(label, 
0,0,0), JNEm(label,0,0,0) )
-#define jit_bgei_ui(label, rs, is)     jit_bra_i ((rs), (is), 
JGEm(label,0,0,0)                    )
+#define jit_blti_ui(label, rs, is)     jit_bra_i ((rs), (is), JBm(label, 
0,0,0)                    )
+#define jit_blei_ui(label, rs, is)     jit_bra_i0((rs), (is), 
JBEm(label,0,0,0), JEm(label, 0,0,0) )
+#define jit_bgti_ui(label, rs, is)     jit_bra_i0((rs), (is), JAm(label, 
0,0,0), JNEm(label,0,0,0) )
+#define jit_bgei_ui(label, rs, is)     jit_bra_i ((rs), (is), 
JAEm(label,0,0,0)                    )
 #define jit_boaddi_i(label, rs, is)    (ADDLir((is), (rs)), JOm(label,0,0,0), 
_jit.x.pc)
 #define jit_bosubi_i(label, rs, is)    (SUBLir((is), (rs)), JOm(label,0,0,0), 
_jit.x.pc)
 #define jit_boaddi_ui(label, rs, is)   (ADDLir((is), (rs)), JCm(label,0,0,0), 
_jit.x.pc)
@@ -354,9 +358,10 @@ struct jit_local_state {
 
 #define jit_jmpi(label)                (JMPm( ((unsigned long) (label)),       
0, 0, 0), _jit.x.pc)
 #define jit_calli(label)       (CALLm( ((unsigned long) (label)),      0, 0, 
0), _jit.x.pc)
+#define jit_callr(reg)         (CALLsr(reg))
 #define jit_jmpr(reg)          JMPsr(reg)
-#define jit_patch(jump_pc)     (*_PSL((jump_pc) - 4) = _jit_SL(_jit.x.pc - 
(jump_pc)))
-#define jit_ret()              (POPLr(_EDI), POPLr(_ESI), POPLr(_EBX), 
POPLr(_EBP), RET())
+#define jit_patch_at(jump_pc,v)        (*_PSL((jump_pc) - 4) = _jit_SL((v) - 
(jump_pc)))
+#define jit_ret()              (POPLr(_EDI), POPLr(_ESI), POPLr(_EBX), 
POPLr(_EBP), RET_())
 
 /* Memory */
 #define jit_ldi_c(d, is)               MOVSBLmr((is), 0,    0,    0, (d))
@@ -400,9 +405,9 @@ struct jit_local_state {
 #define jit_stxi_i(id, rd, rs)         MOVLrm((rs), (id), (rd), 0,    0)
 
 /* Extra */
-#define jit_nop()                      NOP()
+#define jit_nop()                      NOP_()
 
 #define _jit_alignment(pc, n)          (((pc ^ _MASK(4)) + 1) & _MASK(n))
-#define jit_align(n)                   
_NOPi(_jit_alignment(_jit_UL(_jit.x.pc), (n)))
+#define jit_align(n)                   NOPi(_jit_alignment(_jit_UL(_jit.x.pc), 
(n)))
 
 #endif /* __lightning_core_h */
diff --git a/lightning/i386/fp.h b/lightning/i386/fp.h
index c347e3d..0d27255 100644
--- a/lightning/i386/fp.h
+++ b/lightning/i386/fp.h
@@ -33,49 +33,48 @@
 #ifndef __lightning_asm_fp_h
 #define __lightning_asm_fp_h
 
-/* Actually, we should redesign the jitfp interface.  As a first step, I have
-   defined the macros for many x87 instructions, and I am using them here.
+/* We really must map the x87 stack onto a flat register file.  In practice,
+   we can provide something sensible and make it work on the x86 using the
+   stack like a file of eight registers.
 
-   In practice, we can provide something sensible and make it work on the x86
-   using the stack like a file of eight registers.  Then this awful stuff goes
-   away, and everything is "beautiful" as the rest of GNU lightning---and we'll
-   document it, promised.
-
-   Well, let's use six or seven registers so as to have some freedom
-   for floor, ceil, round, log, tan, atn and exp.
+   We use six or seven registers so as to have some freedom
+   for floor, ceil, round, (and log, tan, atn and exp).
 
    Not hard at all, basically play with FXCH.  FXCH is mostly free,
    so the generated code is not bad.  Of course we special case when one
    of the operands turns out to be ST0.
 
-   - binary ops:
-
-       add FRR3 to FPR0
-               FADD ST0,ST3
-
-       add FPR0 to FPR3
-               FADD ST3,ST0
-
-       add FPR3 to FPR7 (I'm using nasm syntax here)
-               FXCH ST3
-               FADD ST7,ST0
-               FXCH ST3
+   Here are the macros that actually do the trick.  */
 
-   - stores:
+#define JIT_FPR_NUM           6
+#define JIT_FPR(i)            (i)
 
-       store FPR3
+#define jit_fxch(rs, op)       (((rs) != 0 ? FXCHr(rs) : 0),   \
+                                op, ((rs) != 0 ? FXCHr(rs) : 0))
 
-               FXCH ST3
-               FST [FUBAR]
-               FXCH ST3
+#define jit_fp_unary(rd, s1, op)                       \
+       ((rd) == (s1) ? jit_fxch ((rd), op)             \
+        : (rd) == 0 ? (FSTPr (0), FLDr ((s1)-1), op)   \
+        : (FLDr ((s1)), op, FSTPr ((rd))))
 
-       store FPR0
+#define jit_fp_binary(rd, s1, s2, op, opr)             \
+       ((rd) == (s1) ?                                 \
+          ((s2) == 0 ? opr(0, (rd))                    \
+           : (s2) == (s1) ? jit_fxch((rd), op(0, 0))   \
+           : jit_fxch((rd), op((s2), 0)))              \
+        : (rd) == (s2) ? jit_fxch((s1), opr(0, (rd) == 0 ? (s1) : (rd)))       
\
+        : (FLDr (s1), op(0, (s2)+1), FSTPr((rd)+1)))
 
-               FST [FUBAR]
+#define jit_addr_d(rd,s1,s2)    jit_fp_binary((rd),(s1),(s2),FADDrr,FADDrr)
+#define jit_subr_d(rd,s1,s2)    jit_fp_binary((rd),(s1),(s2),FSUBrr,FSUBRrr)
+#define jit_mulr_d(rd,s1,s2)    jit_fp_binary((rd),(s1),(s2),FMULrr,FMULrr)
+#define jit_divr_d(rd,s1,s2)    jit_fp_binary((rd),(s1),(s2),FDIVrr,FDIVRrr)
 
-       (and similarly for other unary ops like FCHS or FABS)
+#define jit_abs_d(rd,rs)       jit_fp_unary ((rd), (rs), _OO (0xd9e1))
+#define jit_negr_d(rd,rs)      jit_fp_unary ((rd), (rs), _OO (0xd9e0))
+#define jit_sqrt_d(rd,rs)      jit_fp_unary ((rd), (rs), _OO (0xd9fa))
 
-   - moves:
+/* - moves:
 
        move FPR0 to FPR3
                FST  ST3
@@ -85,11 +84,16 @@
                FST  ST3
 
        move FPR3 to FPR1
-               FSTP ST1   Save old st0 into destination register
-               FLD  ST2   Stack is rotated, so FPRn becomes STn-1
-               FXCH ST1   Get back old st0
+                FLD  ST1
+                FST  ST4   Stack is rotated, so FPRn becomes STn+1 */
 
-   - loads:
+#define jit_movr_d(rd,s1)                              \
+       ((s1) == (rd) ? 0                               \
+        : (s1) == 0 ? FSTr ((rd))                      \
+        : (rd) == 0 ? (FXCHr ((s1)), FSTr ((s1)))      \
+        : (FLDr ((s1)), FSTr ((rd)+1)))
+
+/* - loads:
 
        load into FPR0
                FSTP ST0
@@ -102,53 +106,82 @@
 
    (and similarly for immediates, using the stack) */
 
-#define jit_add_two(reg0)      FADDPr(1)
-#define jit_sub_two(reg0)      FSUBRPr(1)
-#define jit_mul_two(reg0)      FMULPr(1)
-#define jit_div_two(reg0)      FDIVRPr(1)
-
-#define jit_abs(reg0)          _OO(0xd9e1)                     /* fabs */
-#define jit_sqr(reg0)          FMULrr(0,0)
-#define jit_sqrt(reg0)         _OO(0xd9fa)                     /* fsqrt */
-
-#define jit_exti_d(reg0, rs)   (PUSHLr((rs)), FILDLm(0, _ESP, 0, 0), 
POPLr((rs)))
-
-#define jit_neg(reg0)          _OO(0xd9e0)                     /* fchs */
-
-#define jit_ldxr_f(reg0, s1, s2)       FLDSm(0, (s1), (s2), 1)
-#define jit_ldxi_f(reg0, rs, is)       FLDSm((is), (rs), 0, 0)
-#define jit_ldxr_f(reg0, s1, s2)       FLDSm(0, (s1), (s2), 1)
-#define jit_ldxi_d(reg0, rs, is)       FLDLm((is), (rs), 0, 0)
-#define jit_ldxr_d(reg0, s1, s2)       FLDLm(0, (s1), (s2), 1)
-#define jit_ldi_f(reg0, is)            FLDSm((is), 0,    0, 0)
-#define jit_ldr_f(reg0, rs)            FLDSm(0,    (rs), 0, 0)
-#define jit_ldi_d(reg0, is)            FLDLm((is), 0,    0, 0)
-#define jit_ldr_d(reg0, rs)            FLDLm(0,    (rs), 0, 0)
-#define jit_stxi_f(id, rd, reg0)       FSTPSm((id), (rd), 0, 0)
-#define jit_stxr_f(d1, d2, reg0)       FSTPSm(0, (d1), (d2), 1)
-#define jit_stxi_d(id, rd, reg0)       FSTPLm((id), (rd), 0, 0)
-#define jit_stxr_d(d1, d2, reg0)       FSTPLm(0, (d1), (d2), 1)
-#define jit_sti_f(id, reg0)            FSTPSm((id), 0,    0, 0)
-#define jit_str_f(rd, reg0)            FSTPSm(0,    (rd), 0, 0)
-#define jit_sti_d(id, reg0)            FSTPLm((id), 0,    0, 0)
-#define jit_str_d(rd, reg0)            FSTPLm(0,    (rd), 0, 0)
-
-#define jit_fpimm(reg0, first, second) \
-       (PUSHLi(second),                \
-       PUSHLi(first),                  \
-       FLDLm(0, _ESP, 0, 0),           \
-       ADDLir(8, _ESP))
-
+#define jit_movi_f(rd,immf)                     \
+        (_O (0x68),                            \
+         *((float *) _jit.x.pc) = (float) immf, \
+         _jit.x.uc_pc += sizeof (float),       \
+        jit_ldr_f((rd), _ESP),                 \
+        ADDLir(4, _ESP))
+
+union jit_double_imm {
+  double d;
+  int i[2];
+};
+
+#define jit_movi_d(rd,immd)                                                    
        \
+        (_O (0x68),                                                            
        \
+         _jit.x.uc_pc[4] = 0x68,                                               
        \
+         ((union jit_double_imm *) (_jit.x.uc_pc + 5))->d = (double) immd,     
        \
+         *((int *) _jit.x.uc_pc) = ((union jit_double_imm *) (_jit.x.uc_pc + 
5))->i[1],        \
+         _jit.x.uc_pc += 9,                                                    
        \
+        jit_ldr_d((rd), _ESP),                                                 
        \
+        ADDLir(8, _ESP))
+
+#define jit_ldi_f(rd, is)                              \
+  ((rd) == 0 ? (FSTPr (0), FLDSm((is), 0, 0, 0))       \
+   : (FLDSm((is), 0, 0, 0), FSTPr ((rd) + 1)))
+
+#define jit_ldi_d(rd, is)                              \
+  ((rd) == 0 ? (FSTPr (0), FLDLm((is), 0, 0, 0))       \
+   : (FLDLm((is), 0, 0, 0), FSTPr ((rd) + 1)))
+
+#define jit_ldr_f(rd, rs)                              \
+  ((rd) == 0 ? (FSTPr (0), FLDSm(0, (rs), 0, 0))       \
+   : (FLDSm(0, (rs), 0, 0), FSTPr ((rd) + 1)))
+
+#define jit_ldr_d(rd, rs)                              \
+  ((rd) == 0 ? (FSTPr (0), FLDLm(0, (rs), 0, 0))       \
+   : (FLDLm(0, (rs), 0, 0), FSTPr ((rd) + 1)))
+
+#define jit_ldxi_f(rd, rs, is)                         \
+  ((rd) == 0 ? (FSTPr (0), FLDSm((is), (rs), 0, 0))    \
+   : (FLDSm((is), (rs), 0, 0), FSTPr ((rd) + 1)))
+
+#define jit_ldxi_d(rd, rs, is)                         \
+  ((rd) == 0 ? (FSTPr (0), FLDLm((is), (rs), 0, 0))    \
+   : (FLDLm((is), (rs), 0, 0), FSTPr ((rd) + 1)))
+
+#define jit_ldxr_f(rd, s1, s2)                         \
+  ((rd) == 0 ? (FSTPr (0), FLDSm(0, (s1), (s2), 1))    \
+   : (FLDSm(0, (s1), (s2), 1), FSTPr ((rd) + 1)))
+
+#define jit_ldxr_d(rd, s1, s2)                         \
+  ((rd) == 0 ? (FSTPr (0), FLDLm(0, (s1), (s2), 1))    \
+   : (FLDLm(0, (s1), (s2), 1), FSTPr ((rd) + 1)))
+
+#define jit_extr_i_d(rd, rs)   (PUSHLr((rs)),          \
+  ((rd) == 0 ? (FSTPr (0), FILDLm(0, _ESP, 0, 0))      \
+   : (FILDLm(0, _ESP, 0, 0), FSTPr ((rd) + 1))),       \
+  POPLr((rs)))
+
+#define jit_stxi_f(id, rd, rs) jit_fxch ((rs), FSTSm((id), (rd), 0, 0))
+#define jit_stxr_f(d1, d2, rs) jit_fxch ((rs), FSTSm(0, (d1), (d2), 1))
+#define jit_stxi_d(id, rd, rs) jit_fxch ((rs), FSTLm((id), (rd), 0, 0))
+#define jit_stxr_d(d1, d2, rs) jit_fxch ((rs), FSTLm(0, (d1), (d2), 1))
+#define jit_sti_f(id, rs)      jit_fxch ((rs), FSTSm((id), 0,    0, 0))
+#define jit_str_f(rd, rs)      jit_fxch ((rs), FSTSm(0,    (rd), 0, 0))
+#define jit_sti_d(id, rs)      jit_fxch ((rs), FSTLm((id), 0,    0, 0))
+#define jit_str_d(rd, rs)      jit_fxch ((rs), FSTLm(0,    (rd), 0, 0))
 
 /* Assume round to near mode */
-#define jit_floor(rd, reg0)    \
-       jit_floor2((rd), ((rd) == _EDX ? _EAX : _EDX))
+#define jit_floorr_d_i(rd, rs) \
+       (FLDr (rs), jit_floor2((rd), ((rd) == _EDX ? _EAX : _EDX)))
 
-#define jit_ceil(rd, reg0)     \
-       jit_ceil2((rd), ((rd) == _EDX ? _EAX : _EDX))
+#define jit_ceilr_d_i(rd, rs)  \
+       (FLDr (rs), jit_ceil2((rd), ((rd) == _EDX ? _EAX : _EDX)))
 
-#define jit_trunc(rd, reg0)    \
-       jit_trunc2((rd), ((rd) == _EDX ? _EAX : _EDX))
+#define jit_truncr_d_i(rd, rs) \
+       (FLDr (rs), jit_trunc2((rd), ((rd) == _EDX ? _EAX : _EDX)))
 
 #define jit_calc_diff(ofs)             \
        FISTLm(ofs, _ESP, 0, 0),        \
@@ -200,53 +233,115 @@
        POPLr(aux))
 
 /* the easy one */
-#define jit_round(rd, reg0)            \
-       (PUSHLr(_EAX),                  \
-       FISTPLm(0, _ESP, 0, 0),         \
+#define jit_roundr_d_i(rd, rs)                         \
+        (PUSHLr(_EAX),                                 \
+        jit_fxch ((rs), FISTPLm(0, _ESP, 0, 0)),       \
        POPLr((rd)))
 
-#define jit_cmp(le, ge, reg0) (                                        \
-       ((le) == _EAX || (ge) == _EAX ? 0 : PUSHLr(_EAX)),      \
-       FCOMr(0),                                               \
-       FNSTSWr(_AX),                                           \
-       TESTBir(0x40, _AH),                                     \
-       MOVLir(0, (le)),                                        \
-       MOVLrr((le), (ge)),                                     \
-       JZSm(_jit.x.pc + 11, 0, 0, 0),                          \
-       _OO(0xd9e4),                    /* ftst */      /* 2 */ \
-       FNSTSWr(_AX),                                   /* 2 */ \
-       SAHF(),                                         /* 1 */ \
-       SETLEr( ((le) & 15) | 0x10),                    /* 3 */ \
-       SETGEr( ((ge) & 15) | 0x10),                    /* 3 */ \
-       ((le) == _EAX || (ge) == _EAX ? ANDLir (1, _EAX) : POPLr(_EAX)) )
-
-#define jitfp_getarg_f(ofs)             jitfp_ldxi_f(JIT_FP,(ofs))
-#define jitfp_getarg_d(ofs)             jitfp_ldxi_d(JIT_FP,(ofs))
-#define jitfp_pusharg_d(op1)            
(jit_subi_i(JIT_SP,JIT_SP,sizeof(double)), jitfp_str_d(JIT_SP,(op1)))
-#define jitfp_pusharg_f(op1)            
(jit_subi_i(JIT_SP,JIT_SP,sizeof(float)), jitfp_str_f(JIT_SP,(op1)))
-#define jitfp_retval(op1)               _jit_emit(&_jit, (op1), JIT_NULL, 0, 
0, 0)
-
-#define JIT_TRANSCENDENTAL
-
-#define jit_sin(reg0)          _OO(0xd9fe)                     /* fsin */
-#define jit_cos(reg0)          _OO(0xd9ff)                     /* fcos */
-#define jit_tan(reg0)          (_OO(0xd9f2),                   /* fptan */ \
-                                FSTPr(0))                      /* fstp st */
-#define jit_atn(reg0)          (_OO(0xd9e8),                   /* fld1 */ \
-                                _OO(0xd9f3))                   /* fpatan */
-#define jit_exp(reg0)          (_OO(0xd9ea),                   /* fldl2e */ \
-                                FMULPr(1),                     /* fmulp */ \
-                                _OO(0xd9c0),                   /* fld st */ \
-                                _OO(0xd9fc),                   /* frndint */ \
-                                _OO(0xdce9),                   /* fsubr */ \
-                                FXCHr(1),                      /* fxch st(1) 
*/ \
-                                _OO(0xd9f0),                   /* f2xm1 */ \
-                                _OO(0xd9e8),                   /* fld1 */ \
-                                _OO(0xdec1),                   /* faddp */ \
-                                _OO(0xd9fd),                   /* fscale */ \
-                                FSTPr(1))                      /* fstp st(1) */
-#define jit_log(reg0)          (_OO(0xd9ed),                   /* fldln2 */ \
-                                FXCHr(1),                      /* fxch st(1) 
*/ \
-                                _OO(0xd9f1))                   /* fyl2x */
+#define jit_fp_test(d, s1, s2, n, _and, res)           \
+       (((s1) == 0 ? FUCOMr((s2)) : (FLDr((s1)), FUCOMPr((s2) + 1))),     \
+        ((d) != _EAX ? MOVLrr(_EAX, (d)) : 0),                 \
+        FNSTSWr(_EAX),                                         \
+        SHRLir(n, _EAX),                                       \
+        ((_and) ? ANDLir((_and), _EAX) : MOVLir(0, _EAX)),     \
+        res,                                                   \
+        ((d) != _EAX ? _O (0x90 + ((d) & 7)) : 0))     /* xchg */
+
+#define jit_fp_btest(d, s1, s2, n, _and, cmp, res)             \
+       (((s1) == 0 ? FUCOMr((s2)) : (FLDr((s1)), FUCOMPr((s2) + 1))),    \
+        PUSHLr(_EAX),                                          \
+        FNSTSWr(_EAX),                                         \
+        SHRLir(n, _EAX),                                       \
+        ((_and) ? ANDLir ((_and), _EAX) : 0),                  \
+        ((cmp) ? CMPLir ((cmp), _AL) : 0),                     \
+        POPLr(_EAX),                                           \
+        res ((d), 0, 0, 0))
+
+#define jit_nothing_needed(x)
+
+/* After FNSTSW we have 1 if <, 40 if =, 0 if >, 45 if unordered.  Here
+   is how to map the values of the status word's high byte to the
+   conditions.
+
+         <     =     >     unord    valid values    condition
+  gt     no    no    yes   no       0               STSW & 45 == 0
+  lt     yes   no    no    no       1               STSW & 45 == 1
+  eq     no    yes   no    no       40              STSW & 45 == 40
+  unord  no    no    no    yes      45              bit 2 == 1
+
+  ge     no    yes   no    no       0, 40           bit 0 == 0
+  unlt   yes   no    no    yes      1, 45           bit 0 == 1
+  ltgt   yes   no    yes   no       0, 1            bit 6 == 0
+  uneq   no    yes   no    yes      40, 45          bit 6 == 1
+  le     yes   yes   no    no       1, 40           odd parity for STSW & 41
+  ungt   no    no    yes   yes      0, 45           even parity for STSW & 41
+
+  unle   yes   yes   no    yes      1, 40, 45       STSW & 45 != 0
+  unge   no    yes   yes   yes      0, 40, 45       STSW & 45 != 1
+  ne     yes   no    yes   yes      0, 1, 45        STSW & 45 != 40
+  ord    yes   yes   yes   no       0, 1, 40        bit 2 == 0
+
+  lt, le, ungt, unge are actually computed as gt, ge, unlt, unle with
+  the operands swapped; it is more efficient this way.  */
+
+#define jit_gtr_d(d, s1, s2)            jit_fp_test((d), (s1), (s2), 8, 0x45, 
SETZr (_AL))
+#define jit_ger_d(d, s1, s2)            jit_fp_test((d), (s1), (s2), 9, 0, 
SBBBir (-1, _AL))
+#define jit_unler_d(d, s1, s2)          jit_fp_test((d), (s1), (s2), 8, 0x45, 
SETNZr (_AL))
+#define jit_unltr_d(d, s1, s2)          jit_fp_test((d), (s1), (s2), 9, 0, 
ADCBir (0, _AL))
+#define jit_ltr_d(d, s1, s2)            jit_fp_test((d), (s2), (s1), 8, 0x45, 
SETZr (_AL))
+#define jit_ler_d(d, s1, s2)            jit_fp_test((d), (s2), (s1), 9, 0, 
SBBBir (-1, _AL))
+#define jit_unger_d(d, s1, s2)          jit_fp_test((d), (s2), (s1), 8, 0x45, 
SETNZr (_AL))
+#define jit_ungtr_d(d, s1, s2)          jit_fp_test((d), (s2), (s1), 9, 0, 
ADCBir (0, _AL))
+#define jit_eqr_d(d, s1, s2)            jit_fp_test((d), (s1), (s2), 8, 0x45, 
(CMPBir (0x40, _AL), SETEr (_AL)))
+#define jit_ner_d(d, s1, s2)            jit_fp_test((d), (s1), (s2), 8, 0x45, 
(CMPBir (0x40, _AL), SETNEr (_AL)))
+#define jit_ltgtr_d(d, s1, s2)          jit_fp_test((d), (s1), (s2), 15, 0, 
SBBBir (-1, _AL))
+#define jit_uneqr_d(d, s1, s2)          jit_fp_test((d), (s1), (s2), 15, 0, 
ADCBir (0, _AL))
+#define jit_ordr_d(d, s1, s2)           jit_fp_test((d), (s1), (s2), 11, 0, 
SBBBir (-1, _AL))
+#define jit_unordr_d(d, s1, s2)         jit_fp_test((d), (s1), (s2), 11, 0, 
ADCBir (0, _AL))
+
+#define jit_bgtr_d(d, s1, s2)           jit_fp_btest((d), (s1), (s2), 8, 0x45, 
0, JZm)
+#define jit_bger_d(d, s1, s2)           jit_fp_btest((d), (s1), (s2), 9, 0, 0, 
JNCm)
+#define jit_bunler_d(d, s1, s2)         jit_fp_btest((d), (s1), (s2), 8, 0x45, 
0, JNZm)
+#define jit_bunltr_d(d, s1, s2)         jit_fp_btest((d), (s1), (s2), 9, 0, 0, 
JCm)
+#define jit_bltr_d(d, s1, s2)           jit_fp_btest((d), (s2), (s1), 8, 0x45, 
0, JZm)
+#define jit_bler_d(d, s1, s2)           jit_fp_btest((d), (s2), (s1), 9, 0, 0, 
JNCm)
+#define jit_bunger_d(d, s1, s2)         jit_fp_btest((d), (s2), (s1), 8, 0x45, 
0, JNZm)
+#define jit_bungtr_d(d, s1, s2)         jit_fp_btest((d), (s2), (s1), 9, 0, 0, 
JCm)
+#define jit_beqr_d(d, s1, s2)           jit_fp_btest((d), (s1), (s2), 8, 0x45, 
0x40, JZm)
+#define jit_bner_d(d, s1, s2)           jit_fp_btest((d), (s1), (s2), 8, 0x45, 
0x40, JNZm)
+#define jit_bltgtr_d(d, s1, s2)         jit_fp_btest((d), (s1), (s2), 15, 0, 
0, JNCm)
+#define jit_buneqr_d(d, s1, s2)         jit_fp_btest((d), (s1), (s2), 15, 0, 
0, JCm)
+#define jit_bordr_d(d, s1, s2)          jit_fp_btest((d), (s1), (s2), 11, 0, 
0, JNCm)
+#define jit_bunordr_d(d, s1, s2)        jit_fp_btest((d), (s1), (s2), 11, 0, 
0, JCm)
+
+#define jit_getarg_f(rd, ofs)        jit_ldxi_f((rd), JIT_FP,(ofs))
+#define jit_getarg_d(rd, ofs)        jit_ldxi_d((rd), JIT_FP,(ofs))
+#define jit_pusharg_d(rs)            
(jit_subi_i(JIT_SP,JIT_SP,sizeof(double)), jit_str_d(JIT_SP,(rs)))
+#define jit_pusharg_f(rs)            (jit_subi_i(JIT_SP,JIT_SP,sizeof(float)), 
jit_str_f(JIT_SP,(rs)))
+#define jit_retval_d(op1)            jit_movr_d(0, (op1))
+
+
+#if 0
+#define jit_sin()      _OO(0xd9fe)                     /* fsin */
+#define jit_cos()      _OO(0xd9ff)                     /* fcos */
+#define jit_tan()      (_OO(0xd9f2),                   /* fptan */ \
+                        FSTPr(0))                      /* fstp st */
+#define jit_atn()      (_OO(0xd9e8),                   /* fld1 */ \
+                        _OO(0xd9f3))                   /* fpatan */
+#define jit_exp()      (_OO(0xd9ea),                   /* fldl2e */ \
+                        FMULPr(1),                     /* fmulp */ \
+                        _OO(0xd9c0),                   /* fld st */ \
+                        _OO(0xd9fc),                   /* frndint */ \
+                        _OO(0xdce9),                   /* fsubr */ \
+                        FXCHr(1),                      /* fxch st(1) */ \
+                        _OO(0xd9f0),                   /* f2xm1 */ \
+                        _OO(0xd9e8),                   /* fld1 */ \
+                        _OO(0xdec1),                   /* faddp */ \
+                        _OO(0xd9fd),                   /* fscale */ \
+                        FSTPr(1))                      /* fstp st(1) */
+#define jit_log()      (_OO(0xd9ed),                   /* fldln2 */ \
+                        FXCHr(1),                      /* fxch st(1) */ \
+                        _OO(0xd9f1))                   /* fyl2x */
+#endif
 
 #endif /* __lightning_asm_h */
diff --git a/lightning/i386/funcs.h b/lightning/i386/funcs.h
index a618a85..1ae48e7 100644
--- a/lightning/i386/funcs.h
+++ b/lightning/i386/funcs.h
@@ -34,6 +34,57 @@
 #ifndef __lightning_funcs_h
 #define __lightning_funcs_h
 
-#define jit_flush_code(dest, end)
+#ifdef __linux__
+#include <unistd.h>
+#endif
+
+static void
+jit_flush_code(void *dest, void *end)
+{
+  /* On the x86, the PROT_EXEC bits are not handled by the MMU.
+     However, the kernel can emulate this by setting the code
+     segment's limit to the end address of the highest page
+     whose PROT_EXEC bit is set.
+
+     Linux kernels that do so and that disable by default the
+     execution of the data and stack segment are becoming more
+     and more common (Fedora, for example), so we implement our
+     jit_flush_code as an mprotect.  */
+#ifdef __linux__
+  static unsigned long prev_page = 0, prev_length = 0;
+  int page, length;
+#ifdef PAGESIZE
+  const int page_size = PAGESIZE;
+#else
+  static int page_size = -1;
+  if (page_size == -1)
+    page_size = sysconf (_SC_PAGESIZE);
+#endif
+
+  page = (long) dest & ~(page_size - 1);
+  length = ((char *) end - (char *) page + page_size - 1) & ~(page_size - 1);
+
+  /* Simple-minded attempt at optimizing the common case where a single
+     chunk of memory is used to compile multiple functions.  */
+  if (page >= prev_page && page + length <= prev_page + prev_length)
+    return;
+
+  mprotect ((void *) page, length, PROT_READ | PROT_WRITE | PROT_EXEC);
+
+  /* See if we can extend the previously mprotect'ed memory area towards
+     higher addresses: the starting address remains the same as before.  */
+  if (page >= prev_page && page <= prev_page + prev_length)
+    prev_length = page + length - prev_page;
+
+  /* See if we can extend the previously mprotect'ed memory area towards
+     lower addresses: the highest address remains the same as before.  */
+  else if (page < prev_page && page + length <= prev_page + prev_length)
+    prev_length += prev_page - page, prev_page = page;
+
+  /* Nothing to do, replace the area.  */
+  else
+    prev_page = page, prev_length = length;
+#endif
+}
 
 #endif /* __lightning_funcs_h */
diff --git a/lightning/ppc/asm.h b/lightning/ppc/asm.h
index d102283..9f3c71d 100644
--- a/lightning/ppc/asm.h
+++ b/lightning/ppc/asm.h
@@ -61,6 +61,7 @@
 
 typedef unsigned int jit_insn;
 
+#ifndef LIGHTNING_DEBUG
 #define _cr0   0
 #define _cr1   1
 #define _cr2   2
@@ -81,9 +82,9 @@ typedef unsigned int jit_insn;
 
 /* primitive instruction forms [1, Section A.4] */
 
-#define _FB(  OP,         BD,AA,LK )   _jit_I((_u6(OP)<<26)|                   
                         _d26(BD)|     (_u1(AA)<<1)|_u1(LK))
+#define _FB(  OP,         BD,AA,LK )    (_jit_I_noinc((_u6(OP)<<26)|           
                                 _d26(BD)|     (_u1(AA)<<1)|_u1(LK)), 
_jit.x.pc++, 0)
 #define _FBA( OP,         BD,AA,LK )   _jit_I((_u6(OP)<<26)|                   
                        (_u26(BD)&~3)| (_u1(AA)<<1)|_u1(LK))
-#define _BB(   OP,BO,BI,   BD,AA,LK )          
_jit_I((_u6(OP)<<26)|(_u5(BO)<<21)|(_u5(BI)<<16)|                _d16(BD)|     
(_u1(AA)<<1)|_u1(LK))
+#define _BB(   OP,BO,BI,   BD,AA,LK )   
(_jit_I_noinc((_u6(OP)<<26)|(_u5(BO)<<21)|(_u5(BI)<<16)|                
_d16(BD)|     (_u1(AA)<<1)|_u1(LK)), _jit.x.pc++, 0)
 #define _D(   OP,RD,RA,         DD )   
_jit_I((_u6(OP)<<26)|(_u5(RD)<<21)|(_u5(RA)<<16)|                _s16(DD)       
                   )
 #define _Du(  OP,RD,RA,         DD )   
_jit_I((_u6(OP)<<26)|(_u5(RD)<<21)|(_u5(RA)<<16)|                _u16(DD)       
                   )
 #define _Ds(  OP,RD,RA,         DD )   
_jit_I((_u6(OP)<<26)|(_u5(RD)<<21)|(_u5(RA)<<16)|                _su16(DD)      
                   )
@@ -93,6 +94,7 @@ typedef unsigned int jit_insn;
 #define _XO(  OP,RD,RA,RB,OE,XO,RC )   
_jit_I((_u6(OP)<<26)|(_u5(RD)<<21)|(_u5(RA)<<16)|( _u5(RB)<<11)|(_u1(OE)<<10)|( 
_u9(XO)<<1)|_u1(RC))
 #define _M(   OP,RS,RA,SH,MB,ME,RC )   
_jit_I((_u6(OP)<<26)|(_u5(RS)<<21)|(_u5(RA)<<16)|( _u5(SH)<<11)|(_u5(MB)<< 6)|( 
_u5(ME)<<1)|_u1(RC))
 
+
 /* special purpose registers (form XFX) [1, Section 8.2, page 8-138] */
 
 #define SPR_LR         ((8<<5)|(0))
@@ -121,7 +123,7 @@ typedef unsigned int jit_insn;
 
 #define Bi(BD)                         _FB     (18, BD, 0, 0)
 #define BAi(BD)                                _FBA    (18, BD, 1, 0)
-#define BLi(BD)                                _FB     (18, BD, 0, 1)
+#define BLi(BD)                                _FB     (18, BD, 0, 1)
 #define BLAi(BD)                       _FBA    (18, BD, 1, 1)
 
 #define BCiii(BO,BI,BD)                        _BB     (16, BO, BI, BD, 0, 0)
@@ -313,10 +315,10 @@ typedef unsigned int jit_insn;
 #define MOVEIri(R,I)                   (_siP(16,I) ? LIri(R,I) :       \
                                        MOVEIri2(R, _HI(I), _LO(I)) )
 
-#define SUBIrri(RD,RA,IM)              ADDIrri(RD,RA,-_jit_L((IM)))    /* [1, 
Section F.2.1] */
-#define SUBISrri(RD,RA,IM)             ADDISrri(RD,RA,-_jit_L((IM)))
-#define SUBICrri(RD,RA,IM)             ADDICrri(RD,RA,-_jit_L((IM)))
-#define SUBIC_rri(RD,RA,IM)            ADDIC_rri(RD,RA,-_jit_L((IM)))
+#define SUBIrri(RD,RA,IM)              ADDIrri(RD,RA,-_LO((IM)))       /* [1, 
Section F.2.1] */
+#define SUBISrri(RD,RA,IM)             ADDISrri(RD,RA,-_LO((IM)))
+#define SUBICrri(RD,RA,IM)             ADDICrri(RD,RA,-_LO((IM)))
+#define SUBIC_rri(RD,RA,IM)            ADDIC_rri(RD,RA,-_LO((IM)))
 
 #define SUBrrr(RD,RA,RB)               SUBFrrr(RD,RB,RA)       /* [1, Section 
F.2.2] */
 #define SUBOrrr(RD,RA,RB)              SUBFOrrr(RD,RB,RA)
@@ -350,17 +352,21 @@ typedef unsigned int jit_insn;
 #define CLRRWIrri(RA,RS,N)             RLWINMrriii(RA, RS,            0,       
0,    31-(N))
 #define CLRLSLWIrrii(RA,RS,B,N)                RLWINMrriii(RA, RS,            
N, (B)-(N),    31-(N))
 
+
 /* 9 below inverts the branch condition and the branch prediction.
- * This has an incestuous knowledge of the fact that register 26
- * is used as auxiliary!!! */
+ * This has an incestuous knowledge of JIT_AUX */
 #define BC_EXT(A, C, D)  (_siP(16, _jit_UL(D)-_jit_UL(_jit.x.pc)) \
   ? BCiii((A), (C), (D)) \
-  : (BCiii((A)^9, (C), _jit.x.pc+5), LISri(26,_HI(D)), ORIrri(26,26,_LO(D)), \
-     MTLRr(26), BLR() ))
+  : (BCiii((A)^9, (C), _jit.x.pc+5), \
+     LISri(JIT_AUX,_HI(D)), \
+     ORIrri(JIT_AUX,JIT_AUX,_LO(D)), \
+     MTLRr(JIT_AUX), BLR() ))
 
 #define B_EXT(D)         (_siP(16, _jit_UL(D)-_jit_UL(_jit.x.pc)) \
   ? Bi((D)) \
-  : (LISri(26,_HI(D)), ORIrri(26,26,_LO(D)), MTLRr(26), BLR()) )
+  : (LISri(JIT_AUX,_HI(D)), \
+     ORIrri(JIT_AUX,JIT_AUX,_LO(D)), \
+     MTLRr(JIT_AUX), BLR()) )
 
 #define BTii(C,D)                      BC_EXT(12, C, D)                /* [1, 
Table F-5] */
 #define BFii(C,D)                      BC_EXT( 4, C, D)
@@ -379,7 +385,7 @@ typedef unsigned int jit_insn;
                
 
 #define BLTLRi(CR)                     BCLRii(12, ((CR)<<2)+0) /* [1, Table 
F-10] */
-#define BLELRi(CR)                     BCLRii( 4  ((CR)<<2)+1)
+#define BLELRi(CR)                     BCLRii( 4, ((CR)<<2)+1)
 #define BEQLRi(CR)                     BCLRii(12, ((CR)<<2)+2)
 #define BGELRi(CR)                     BCLRii( 4, ((CR)<<2)+0)
 #define BGTLRi(CR)                     BCLRii(12, ((CR)<<2)+1)
@@ -405,7 +411,7 @@ typedef unsigned int jit_insn;
 #define BNULRLi(CR)                    BCLRLii( 4, ((CR)<<2)+3)
                
 #define BLTCTRi(CR)                    BCCTRii(12, ((CR)<<2)+0)        /* [1, 
Table F-10] */
-#define BLECTRi(CR)                    BCCTRii( 4  ((CR)<<2)+1)
+#define BLECTRi(CR)                    BCCTRii( 4, ((CR)<<2)+1)
 #define BEQCTRi(CR)                    BCCTRii(12, ((CR)<<2)+2)
 #define BGECTRi(CR)                    BCCTRii( 4, ((CR)<<2)+0)
 #define BGTCTRi(CR)                    BCCTRii(12, ((CR)<<2)+1)
@@ -511,7 +517,7 @@ typedef unsigned int jit_insn;
 #define BNUi(D)                                BNUii(0,D)
 
 #define BLTLii(C,D)                    BCLiii(12, ((C)<<2)+0, D)       /* [1, 
Table F-??] */
-#define BLELii(C,D)                    BCLiii( 4  ((C)<<2)+1, D)
+#define BLELii(C,D)                    BCLiii( 4, ((C)<<2)+1, D)
 #define BEQLii(C,D)                    BCLiii(12, ((C)<<2)+2, D)
 #define BGELii(C,D)                    BCLiii( 4, ((C)<<2)+0, D)
 #define BGTLii(C,D)                    BCLiii(12, ((C)<<2)+1, D)
@@ -586,7 +592,50 @@ typedef unsigned int jit_insn;
 #define _LO(I)          (_jit_UL(I) & _MASK(16))
 #define _HI(I)          (_jit_UL(I) >>     (16))
 
-
+#define _A(OP,RD,RA,RB,RC,XO,RCx)    
_jit_I((_u6(OP)<<26)|(_u5(RD)<<21)|(_u5(RA)<<16)|( 
_u5(RB)<<11)|_u5(RC)<<6|(_u5(XO)<<1)|_u1(RCx))
+
+#define LFDrri(RD,RA,imm)       _D(50,RD,RA,imm)
+#define LFDUrri(RD,RA,imm)      _D(51,RD,RA,imm)
+#define LFDUxrrr(RD,RA,RB)      _X(31,RD,RA,RB,631,0)
+#define LFDxrrr(RD,RA,RB)       _X(31,RD,RA,RB,599,0)
+
+#define LFSrri(RD,RA,imm)       _D(48,RD,RA,imm)
+#define LFSUrri(RD,RA,imm)      _D(49,RD,RA,imm)
+#define LFSUxrrr(RD,RA,RB)      _X(31,RD,RA,RB,567,0)
+#define LFSxrrr(RD,RA,RB)       _X(31,RD,RA,RB,535,0)
+
+#define STFDrri(RS,RA,imm)      _D(54,RS,RA,imm)
+#define STFDUrri(RS,RA,imm)     _D(55,RS,RA,imm)
+#define STFDUxrrr(RS,RA,RB)     _X(31,RS,RA,RB,759,0)
+#define STFDxrrr(RS,RA,RB)      _X(31,RS,RA,RB,727,0)
+
+#define STFSrri(RS,RA,imm)      _D(52,RS,RA,imm)
+#define STFSUrri(RS,RA,imm)     _D(53,RS,RA,imm)
+#define STFSUxrrr(RS,RA,RB)     _X(31,RS,RA,RB,695,0)
+#define STFSxrrr(RS,RA,RB)      _X(31,RS,RA,RB,663,0)
+#define STFIWXrrr(RS,RA,RB)     _X(31,RS,RA,RB,983,0)
+
+#define FADDDrrr(RD,RA,RB)       _A(63,RD,RA,RB,0,21,0)
+#define FADDSrrr(RD,RA,RB)       _A(59,RD,RA,RB,0,21,0)
+#define FSUBDrrr(RD,RA,RB)       _A(63,RD,RA,RB,0,20,0)
+#define FSUBSrrr(RD,RA,RB)       _A(59,RD,RA,RB,0,20,0)
+#define FMULDrrr(RD,RA,RC)       _A(63,RD,RA,0,RC,25,0)
+#define FMULSrrr(RD,RA,RC)       _A(59,RD,RA,0,RC,25,0)
+#define FDIVDrrr(RD,RA,RB)       _A(63,RD,RA,RB,0,18,0)
+#define FDIVSrrr(RD,RA,RB)       _A(59,RD,RA,RB,0,25,0)
+#define FSQRTDrr(RD,RB)          _A(63,RD,0,RB,0,22,0)
+#define FSQRTSrr(RD,RB)          _A(59,RD,0,RB,0,22,0)
+#define FSELrrrr(RD,RA,RB,RC)    _A(63,RD,RA,RB,RC,23,0)
+#define FCTIWrr(RD,RB)           _X(63,RD,0,RB,14,0)
+#define FCTIWZrr(RD,RB)          _X(63,RD,0,RB,15,0)
+#define FRSPrr(RD,RB)            _X(63,RD,0,RB,12,0)
+#define FABSrr(RD,RB)            _X(63,RD,0,RB,264,0)
+#define FNABSrr(RD,RB)           _X(63,RD,0,RB,136,0)
+#define FNEGrr(RD,RB)            _X(63,RD,0,RB,40,0)
+#define FMOVErr(RD,RB)           _X(63,RD,0,RB,72,0)
+#define FCMPOrrr(CR,RA,RB)       _X(63,_u3((CR)<<2),RA,RB,32,0)
+#define FCMPUrrr(CR,RA,RB)       _X(63,_u3((CR)<<2),RA,RB,0,0)
+#define MTFSFIri(CR,IMM)          _X(63,_u5((CR)<<2),0,_u5((IMM)<<1),134,0)
 
 /*** References:
  *
@@ -594,4 +643,5 @@ typedef unsigned int jit_insn;
  */
 
 
+#endif
 #endif /* __ccg_asm_ppc_h */
diff --git a/lightning/ppc/core.h b/lightning/ppc/core.h
index 1377e51..14eaae3 100644
--- a/lightning/ppc/core.h
+++ b/lightning/ppc/core.h
@@ -36,20 +36,24 @@
 #define __lightning_core_h
 
 struct jit_local_state {
-  int  nextarg_put;   /* Next r3-r8 reg. to be written */
-  int  nextarg_putfp; /* Next r3-r8 reg. to be written */
-  int  nextarg_get;   /* Next r20-r25 reg. to be read */
+   int nextarg_puti;  /* number of integer args */
+   int nextarg_putf;  /* number of float args   */
+   int nextarg_putd;  /* number of double args  */
+   int nextarg_geti;  /* Next r20-r25 reg. to be read */
+   int nextarg_getd;  /* The FP args are picked up from FPR1 -> FPR10 */
+   int  nbArgs;        /* Number of arguments for the prolog */
 };
 
 #define JIT_SP                 1
 #define JIT_RET                        3
-#define JIT_R0                 9
-#define JIT_R1                 10
-#define JIT_R2                 30  /* using r8 would limit argument passing */
-#define JIT_V0                 29
-#define JIT_V1                 28
-#define JIT_V2                 27
-#define JIT_AUX                        26  /* for 32-bit operands & shift 
counts */
+#define JIT_R_NUM              3
+#define JIT_V_NUM              7
+#define JIT_R(i)               (9+(i))
+#define JIT_V(i)               (31-(i))
+#define JIT_AUX                        JIT_V(JIT_V_NUM)  /* for 32-bit 
operands & shift counts */
+
+#define jit_pfx_start()   (_jit.jitl.trampolines)
+#define jit_pfx_end()     (_jit.jitl.free)
 
 /* If possible, use the `small' instruction (rd, rs, imm)
  * else load imm into r26 and use the `big' instruction (rd, rs, r26)
@@ -58,6 +62,9 @@ struct jit_local_state {
 #define jit_chk_imu(imm, small, big)           (_uiP(16,(imm)) ? (small) : 
(MOVEIri(JIT_AUX, imm),  (big)) )
 #define jit_chk_imu15(imm, small, big)         (_uiP(15,(imm)) ? (small) : 
(MOVEIri(JIT_AUX, imm),  (big)) )
 
+#define jit_big_ims(imm, big)                 (MOVEIri(JIT_AUX, imm),  (big))
+#define jit_big_imu(imm, big)                 (MOVEIri(JIT_AUX, imm),  (big))
+
 /* Helper macros for branches */
 #define jit_s_brai(rs, is, jmp)                        (jit_chk_ims (is, 
CMPWIri(rs, is), CMPWrr(rs, JIT_AUX)),   jmp, _jit.x.pc)
 #define jit_s_brar(s1, s2, jmp)                        (                 
CMPWrr(s1, s2),                          jmp, _jit.x.pc)
@@ -87,38 +94,48 @@ struct jit_local_state {
                                                MULLWrrr(31, 31, JIT_AUX), 
SUBrrr((rs), (rs), JIT_AUX), \
                                                MFLRr(31))
 
-/* Emit a 2-instruction MOVEI, even if a 1-instruction one is possible
- * (it is a rare case for branches, and a fixed sequence of instructions
- * is easier to patch). */
-#define jit_movei(reg, imm)                    (LISri(reg,_HI(imm)), 
ORIrri((reg),(reg),_LO(imm)))
-
 /* Patch a movei instruction made of a LIS at lis_pc and an ORI at ori_pc. */
-#define jit_patch_movei(lis_pc, ori_pc)                                        
\
-       (*(lis_pc) &= ~_MASK(16), *lis_pc |= _HI(_jit.x.pc),            \
-        *(ori_pc) &= ~_MASK(16), *ori_pc |= _LO(_jit.x.pc))            \
+#define jit_patch_movei(lis_pc, ori_pc, dest)                  \
+       (*(lis_pc) &= ~_MASK(16), *(lis_pc) |= _HI(dest),               \
+        *(ori_pc) &= ~_MASK(16), *(ori_pc) |= _LO(dest))               \
 
 /* Patch a branch instruction */
-#define jit_patch_branch(jump_pc)                              \
+#define jit_patch_branch(jump_pc,pv)                           \
        (*(jump_pc) &= ~_MASK(16) | 3,                          \
-        *(jump_pc) |= (_jit_UL(_jit.x.pc) - _jit_UL(jump_pc)) & _MASK(16))
+        *(jump_pc) |= (_jit_UL(pv) - _jit_UL(jump_pc)) & _MASK(16))
+
+#define jit_patch_ucbranch(jump_pc,pv)                          \
+         (*(jump_pc) &= ~_MASK(26) | 3,                         \
+         (*(jump_pc) |= (_jit_UL((pv)) - _jit_UL(jump_pc)) & _MASK(26)))
 
+#define _jit_b_encoding                (18 << 26)
 #define _jit_blr_encoding      ((19 << 26) | (20 << 21) | (00 << 16) | (00 << 
11) | (16 << 1))
+#define _jit_is_ucbranch(a)     (((*(a) & (63<<26)) == _jit_b_encoding))
 
-#define jit_patch(jump_pc) (                                   \
+#define jit_patch_at(jump_pc, value) (                         \
        ((*(jump_pc - 1) & ~1) == _jit_blr_encoding)            \
-       ? jit_patch_movei(((jump_pc) - 4), ((jump_pc) - 3))     \
-       : jit_patch_branch((jump_pc) - 1))
+         ? jit_patch_movei(((jump_pc) - 4), ((jump_pc) - 3), (value))  \
+         : ( _jit_is_ucbranch((jump_pc) - 1)                   \
+             ? jit_patch_ucbranch((jump_pc) - 1, (value))       \
+             : jit_patch_branch((jump_pc) - 1, (value))))
 
+#define jit_patch_movi(movi_pc, val)                                   \
+       jit_patch_movei((movi_pc) - 2, (movi_pc) - 1, (val))
+
+#define        jit_arg_c()                     (_jitl.nextarg_geti--)
+#define        jit_arg_i()                     (_jitl.nextarg_geti--)
+#define        jit_arg_l()                     (_jitl.nextarg_geti--)
+#define        jit_arg_p()                     (_jitl.nextarg_geti--)
+#define        jit_arg_s()                     (_jitl.nextarg_geti--)
+#define        jit_arg_uc()                    (_jitl.nextarg_geti--)
+#define        jit_arg_ui()                    (_jitl.nextarg_geti--)
+#define        jit_arg_ul()                    (_jitl.nextarg_geti--)
+#define        jit_arg_us()                    (_jitl.nextarg_geti--)
+
+/* Check Mach-O-Runtime documentation: Must skip GPR(s) whenever 
"corresponding" FPR is used */
+#define jit_arg_f()                    (_jitl.nextarg_geti-- 
,_jitl.nextarg_getd++)
+#define jit_arg_d()                    
(_jitl.nextarg_geti-=2,_jitl.nextarg_getd++)
 
-#define        jit_arg_c()                     (_jitl.nextarg_get--)
-#define        jit_arg_i()                     (_jitl.nextarg_get--)
-#define        jit_arg_l()                     (_jitl.nextarg_get--)
-#define        jit_arg_p()                     (_jitl.nextarg_get--)
-#define        jit_arg_s()                     (_jitl.nextarg_get--)
-#define        jit_arg_uc()                    (_jitl.nextarg_get--)
-#define        jit_arg_ui()                    (_jitl.nextarg_get--)
-#define        jit_arg_ul()                    (_jitl.nextarg_get--)
-#define        jit_arg_us()                    (_jitl.nextarg_get--)
 #define jit_addi_i(d, rs, is)          jit_chk_ims((is), ADDICrri((d), (rs), 
(is)), ADDrrr((d), (rs), JIT_AUX))
 #define jit_addr_i(d, s1, s2)                            ADDrrr((d), (s1), 
(s2))
 #define jit_addci_i(d, rs, is)         jit_chk_ims((is), ADDICrri((d), (rs), 
(is)), ADDCrrr((d), (rs), JIT_AUX))
@@ -159,11 +176,12 @@ struct jit_local_state {
 #define jit_bosubi_ui(label, rs, is)   (jit_chk_ims ((is), SUBICri((rs), (rs), 
is), SUBCrr((rs), JIT_AUX)),       MCRXRi(0), BEQi((label)), _jit.x.pc)
 #define jit_boaddr_ui(label, s1, s2)   (                                       
     ADDCrr((s1), (s1), (s2)),     MCRXRi(0), BEQi((label)), _jit.x.pc)
 #define jit_bosubr_ui(label, s1, s2)   (                                       
     SUBCrr((s1), (s1), (s2)),     MCRXRi(0), BEQi((label)), _jit.x.pc)
-#define jit_calli(label)           (jit_movei(JIT_AUX, (label)), 
MTLRr(JIT_AUX), BLRL(), _jit.x.pc)
-#define jit_divi_i(d, rs, is)          jit_chk_ims(1111111, 0, DIVWrrr ((d), 
(rs), JIT_AUX))
-#define jit_divi_ui(d, rs, is)         jit_chk_imu(1111111, 0, DIVWUrrr((d), 
(rs), JIT_AUX))
-#define jit_divr_i(d, s1, s2)                                  DIVWrrr ((d), 
(s1), (s2))
-#define jit_divr_ui(d, s1, s2)                                 DIVWUrrr((d), 
(s1), (s2))
+#define jit_calli(label)               (jit_movi_p(JIT_AUX, (label)), 
MTCTRr(JIT_AUX), BCTRL(), _jitl.nextarg_puti = _jitl.nextarg_putf = 
_jitl.nextarg_putd = 0, _jit.x.pc)
+#define jit_callr(reg)                 (MTCTRr(reg), BCTRL())
+#define jit_divi_i(d, rs, is)          jit_big_ims((is), DIVWrrr ((d), (rs), 
JIT_AUX))
+#define jit_divi_ui(d, rs, is) jit_big_imu((is), DIVWUrrr((d), (rs), JIT_AUX))
+#define jit_divr_i(d, s1, s2)          DIVWrrr ((d), (s1), (s2))
+#define jit_divr_ui(d, s1, s2) DIVWUrrr((d), (s1), (s2))
 #define jit_eqi_i(d, rs, is)           (jit_chk_ims((is), SUBIrri(JIT_AUX, 
(rs), (is)), SUBrrr(JIT_AUX, (rs), JIT_AUX)), SUBFICrri((d), JIT_AUX, 0), 
ADDErrr((d), (d), JIT_AUX))
 #define jit_eqr_i(d, s1, s2)           (SUBrrr(JIT_AUX, (s1), (s2)), 
SUBFICrri((d), JIT_AUX, 0), ADDErrr((d), (d), JIT_AUX))
 #define jit_extr_c_i(d, rs)            EXTSBrr((d), (rs))
@@ -176,8 +194,8 @@ struct jit_local_state {
 #define jit_gti_ui(d, rs, is)          jit_ubooli ((d), (rs), (is), _gt)
 #define jit_gtr_i(d, s1, s2)           jit_sboolr ((d), (s1), (s2), _gt)
 #define jit_gtr_ui(d, s1, s2)          jit_uboolr ((d), (s1), (s2), _gt)
-#define jit_hmuli_i(d, rs, is)         jit_chk_ims(1111111, 0, MULHWrrr ((d), 
(rs), JIT_AUX))
-#define jit_hmuli_ui(d, rs, is)                jit_chk_imu(1111111, 0, 
MULHWUrrr((d), (rs), JIT_AUX))
+#define jit_hmuli_i(d, rs, is)         jit_big_ims((is), MULHWrrr ((d), (rs), 
JIT_AUX))
+#define jit_hmuli_ui(d, rs, is)                jit_big_imu((is), 
MULHWUrrr((d), (rs), JIT_AUX))
 #define jit_hmulr_i(d, s1, s2)                                 MULHWrrr ((d), 
(s1), (s2))
 #define jit_hmulr_ui(d, s1, s2)                                        
MULHWUrrr((d), (s1), (s2))
 #define jit_jmpi(label)                        (B_EXT((label)), _jit.x.pc)
@@ -197,16 +215,18 @@ struct jit_local_state {
 #define jit_ler_i(d, s1, s2)           jit_sboolr2((d), (s1), (s2), _gt )
 #define jit_ler_ui(d, s1, s2)          jit_uboolr2((d), (s1), (s2), _gt )
 #define jit_lshi_i(d, rs, is)                                       
SLWIrri((d), (rs), (is))
-#define jit_lshr_i(d, s1, s2)          (ANDIrri(JIT_AUX, (s2), 31), SLWrrr 
((d), (s1), JIT_AUX))
+#define jit_lshr_i(d, s1, s2)          (ANDI_rri(JIT_AUX, (s2), 31), SLWrrr 
((d), (s1), JIT_AUX))
 #define jit_lti_i(d, rs, is)           jit_sbooli ((d), (rs), (is), _lt )
 #define jit_lti_ui(d, rs, is)          jit_ubooli ((d), (rs), (is), _lt )
 #define jit_ltr_i(d, s1, s2)           jit_sboolr ((d), (s1), (s2), _lt )
 #define jit_ltr_ui(d, s1, s2)          jit_uboolr ((d), (s1), (s2), _lt )
-#define jit_modi_i(d, rs, is)          _jit_mod(jit_divi_i (31, (rs), 
JIT_AUX), (is))
-#define jit_modi_ui(d, rs, is)         _jit_mod(jit_divi_ui(31, (rs), 
JIT_AUX), (irs))
+#define jit_modi_i(d, rs, is)          _jit_mod(jit_divi_i (31, (rs), 
JIT_AUX), (rs), (is))
+#define jit_modi_ui(d, rs, is)         _jit_mod(jit_divi_ui(31, (rs), 
JIT_AUX), (rs), (is))
 #define jit_modr_i(d, s1, s2)          (DIVWrrr(JIT_AUX, (s1), (s2)), 
MULLWrrr(JIT_AUX, JIT_AUX, (s2)), SUBrrr((d), (s1), JIT_AUX))
 #define jit_modr_ui(d, s1, s2)         (DIVWUrrr(JIT_AUX, (s1), (s2)), 
MULLWrrr(JIT_AUX, JIT_AUX, (s2)), SUBrrr((d), (s1), JIT_AUX))
 #define jit_movi_i(d, is)              MOVEIri((d), (is))
+#define jit_movi_p(d, is)              (LISri((d), 
_HI((is))),ORIrri((d),(d),_LO((is))),_jit.x.pc)
+
 #define jit_movr_i(d, rs)              MRrr((d), (rs))
 #define jit_muli_i(d, rs, is)          jit_chk_ims  ((is), MULLIrri((d), (rs), 
(is)), MULLWrrr((d), (rs), JIT_AUX))
 #define jit_muli_ui(d, rs, is)         jit_chk_imu15((is), MULLIrri((d), (rs), 
(is)), MULLWrrr((d), (rs), JIT_AUX))
@@ -218,17 +238,19 @@ struct jit_local_state {
 #define jit_ori_i(d, rs, is)           jit_chk_imu((is), ORIrri((d), (rs), 
(is)), ORrrr((d), (rs), JIT_AUX))
 #define jit_orr_i(d, s1, s2)                             ORrrr((d), (s1), (s2))
 #define jit_popr_i(rs)                 (LWZrm((rs), 0, 1), ADDIrri(1, 1, 4))
-#define jitfp_prepare(numi, numf, numd)        (_jitl.nextarg_put = 3 + (numi) 
+ (numf) + 2*(numd))
+#define jit_prepare_i(numi)            (_jitl.nextarg_puti = numi)
+#define jit_prepare_f(numf)            (_jitl.nextarg_putf = numf)
+#define jit_prepare_d(numd)            (_jitl.nextarg_putd = numd)
 #define jit_prolog(n)                  _jit_prolog(&_jit, (n))
 #define jit_pushr_i(rs)                        STWUrm((rs), -4, 1)
-#define jit_pusharg_i(rs)              (--_jitl.nextarg_put, 
MRrr(_jitl.nextarg_put, (rs)))
-#define jit_ret()                      jit_jmpr(31)
-#define jit_retval(rd)                 MRrr((rd), 3)
+#define jit_pusharg_i(rs)              (--_jitl.nextarg_puti, MRrr((3 + 
_jitl.nextarg_putd * 2 + _jitl.nextarg_putf + _jitl.nextarg_puti), (rs)))
+#define jit_ret()                      _jit_epilog(&_jit)
+#define jit_retval_i(rd)               MRrr((rd), 3)
 #define jit_rsbi_i(d, rs, is)          jit_chk_ims((is), SUBFICrri((d), (rs), 
(is)), SUBFCrrr((d), (rs), JIT_AUX))
 #define jit_rshi_i(d, rs, is)                                       
SRAWIrri((d), (rs), (is))
 #define jit_rshi_ui(d, rs, is)                                      SRWIrri 
((d), (rs), (is))
-#define jit_rshr_i(d, s1, s2)          (ANDIrrr(JIT_AUX, (s2), 31), SRAWrrr 
((d), (s1), JIT_AUX))
-#define jit_rshr_ui(d, s1, s2)         (ANDIrrr(JIT_AUX, (s2), 31), SRWrrr  
((d), (s1), JIT_AUX))
+#define jit_rshr_i(d, s1, s2)          (ANDI_rri(JIT_AUX, (s2), 31), SRAWrrr 
((d), (s1), JIT_AUX))
+#define jit_rshr_ui(d, s1, s2)         (ANDI_rri(JIT_AUX, (s2), 31), SRWrrr  
((d), (s1), JIT_AUX))
 #define jit_stxi_c(id, rd, rs)         jit_chk_ims((id), STBrm((rs), (id), 
(rd)), STBrx((rs), (rd), JIT_AUX))
 #define jit_stxi_i(id, rd, rs)         jit_chk_ims((id), STWrm((rs), (id), 
(rd)), STWrx((rs), (rd), JIT_AUX))
 #define jit_stxi_s(id, rd, rs)         jit_chk_ims((id), STHrm((rs), (id), 
(rd)), STHrx((rs), (rd), JIT_AUX))
@@ -237,8 +259,8 @@ struct jit_local_state {
 #define jit_stxr_s(d1, d2, rs)                           STHrx((rs), (d1), 
(d2))
 #define jit_subr_i(d, s1, s2)                            SUBrrr((d), (s1), 
(s2))
 #define jit_subcr_i(d, s1, s2)                           SUBCrrr((d), (s1), 
(s2))
-#define jit_subxi_i(d, rs, is)         jit_chk_ims(111111111, 0, SUBErrr((d), 
(rs), JIT_AUX))
-#define jit_subxr_i(d, s1, s2)                                   SUBErrr((d), 
(s1), (s2))
+#define jit_subxi_i(d, rs, is)         jit_big_ims((is), SUBErrr((d), (rs), 
JIT_AUX))
+#define jit_subxr_i(d, s1, s2)                           SUBErrr((d), (s1), 
(s2))
 #define jit_xori_i(d, rs, is)          jit_chk_imu((is), XORIrri((d), (rs), 
(is)), XORrrr((d), (rs), JIT_AUX))
 #define jit_xorr_i(d, s1, s2)                            XORrrr((d), (s1), 
(s2))
 
diff --git a/lightning/ppc/fp.h b/lightning/ppc/fp.h
index 1767195..911882d 100644
--- a/lightning/ppc/fp.h
+++ b/lightning/ppc/fp.h
@@ -35,70 +35,177 @@
 #ifndef __lightning_asm_fp_h
 #define __lightning_asm_fp_h
 
-#if 0
-
-/* dummy for now */
-
-#define jit_add_two(reg0)      FADDrrr(13 - (reg0), 13 - (reg0), 12 - (reg0))
-#define jit_sub_two(reg0)      FSUBrrr(13 - (reg0), 13 - (reg0), 12 - (reg0))
-#define jit_mul_two(reg0)      FMULrrr(13 - (reg0), 13 - (reg0), 12 - (reg0))
-#define jit_div_two(reg0)      FDIVrrr(13 - (reg0), 13 - (reg0), 12 - (reg0))
-
-#define jit_abs(reg0)          FABSr(13 - (reg0))
-#define jit_sqrt(reg0)         FSQRTr(13 - (reg0))
-#define jit_neg(reg0)          FNEGr(13 - (reg0))
-
-#define jit_ldxi_f(reg0, rs, is) 0
-#define jit_ldxr_f(reg0, s1, s2) 0
-#define jit_ldxi_d(reg0, rs, is) 0
-#define jit_ldxr_d(reg0, s1, s2) 0
-#define jit_ldi_f(reg0, is) 0
-#define jit_ldr_f(reg0, rs) 0
-#define jit_ldi_d(reg0, is) 0
-#define jit_ldr_d(reg0, rs) 0
-#define jit_stxi_f(id, rd, reg0) 0
-#define jit_stxr_f(d1, d2, reg0) 0
-#define jit_stxi_d(id, rd, reg0) 0
-#define jit_stxr_d(d1, d2, reg0) 0
-#define jit_sti_f(id, reg0) 0
-#define jit_str_f(rd, reg0) 0
-#define jit_sti_d(id, reg0) 0
-#define jit_str_d(rd, reg0) 0
+
+#define JIT_FPR_NUM           6
+#define JIT_FPR(i)            (8+(i))
+
+#define JIT_FPFR              0
 
 /* Make space for 1 or 2 words, store address in REG */
-#define jit_data(REG, D1)      (_FBA   (18, 8, 0, 1), _jit_L(D1), MFLRr(REG))
-#define jit_data2(REG, D1, D2) (_FBA   (18, 12, 0, 1), _jit_L(D1), _jit_L(D2), 
MFLRr(REG))
-
-#define jit_fpimm(reg0, first, second)         \
-       (jit_data2(JIT_AUX, (first), (second)), \
-        jit_ldxi_d((reg0), JIT_AUX, 0))
-
-#define jit_floor(rd, reg0)    jit_call_fp((rd), (reg0), floor)
-#define jit_ceil(rd, reg0)     jit_call_fp((rd), (reg0), ceil)
-
-#define jit_call_fp(rd, reg0, fn)                                              
\
-       jit_fail(#fn " not supported", __FILE__, __LINE__, __FUNCTION__)
-/*     pass reg0 as first parameter of rd
-       bl      fn
-       mr      r3, rd */
-
-#define jit_trunc(rd, reg0)    (jit_data((rd), 0),                             
\
-                               FCTIWZrr(13 - (reg0), 13 - (reg0)),             
\
-                               STFIWXrrr(13 - (reg0), 0, (rd)),                
        \
-                               LWZrm((rd), 0, (rd)))
-
-#define jit_round(rd, reg0)    (jit_data((rd), 0),                             
\
-                               FCTIWrr(13 - (reg0), 13 - (reg0)),              
\
-                               STFIWXrrr(13 - (reg0), 0, (rd)),                
        \
-                               LWZrm((rd), 0, (rd)))
-                               
-#define jit_cmp(le, ge, reg0)  (FCMPOirr(7, 13 - (reg0), 0),              \
-                               CRORiii(28 + _gt, 28 + _gt, 28 + _eq),     \
-                               CRORiii(28 + _lt, 28 + _lt, 28 + _eq),     \
-                               MFCRr((ge)),                               \
-                               EXTRWIrrii((le), (ge), 1, 28 + _lt),       \
-                               EXTRWIrrii((ge), (ge), 1, 28 + _gt))
-
-#endif
+#define jit_data(REG, D1)              (_FBA   (18, 8, 0, 1),  _jit_L(D1), 
MFLRr(REG))
+
+#define jit_addr_d(rd,s1,s2)  FADDDrrr((rd),(s1),(s2))
+#define jit_subr_d(rd,s1,s2)  FSUBDrrr((rd),(s1),(s2))
+#define jit_mulr_d(rd,s1,s2)  FMULDrrr((rd),(s1),(s2))
+#define jit_divr_d(rd,s1,s2)  FDIVDrrr((rd),(s1),(s2))
+
+#define jit_addr_f(rd,s1,s2)  FADDSrrr((rd),(s1),(s2))
+#define jit_subr_f(rd,s1,s2)  FSUBSrrr((rd),(s1),(s2))
+#define jit_mulr_f(rd,s1,s2)  FMULSrrr((rd),(s1),(s2))
+#define jit_divr_f(rd,s1,s2)  FDIVSrrr((rd),(s1),(s2))
+
+#define jit_movr_d(rd,rs)     ( (rd) == (rs) ? 0 : FMOVErr((rd),(rs)))
+#define jit_movi_d(reg0,d) do {                   \
+      double _v = (d);                            \
+      _FBA (18, 12, 0, 1);                       \
+      memcpy(_jit.x.uc_pc, &_v, sizeof (double)); \
+      _jit.x.uc_pc += sizeof (double);            \
+      MFLRr (JIT_AUX);                           \
+      jit_ldxi_d((reg0), JIT_AUX, 0);            \
+   } while(0) 
+
+
+#define jit_movr_f(rd,rs)     ( (rd) == (rs) ? 0 : FMOVErr((rd),(rs)))
+#define jit_movi_f(reg0,f) do {                   \
+      float _v = (f);                             \
+      _FBA (18, 8, 0, 1);                        \
+      memcpy(_jit.x.uc_pc, &_v, sizeof (float));  \
+      _jit.x.uc_pc += sizeof (float);             \
+      MFLRr (JIT_AUX);                           \
+      jit_ldxi_f((reg0), JIT_AUX, 0);            \
+   } while(0) 
+
+
+#define jit_abs_d(rd,rs)       FABSrr((rd),(rs))
+#define jit_negr_d(rd,rs)      FNEGrr((rd),(rs))
+#define jit_sqrt_d(rd,rs)      FSQRTDrr((rd),(rs))
+
+
+#define jit_ldxi_f(reg0, rs, is)    (_siP(16,(is)) ? LFSrri((reg0),(rs),(is)) 
: (MOVEIri(JIT_AUX,(is)),LFSxrrr((reg0),(rs),JIT_AUX))) 
+#define jit_ldxi_d(reg0, rs, is)    (_siP(16,(is)) ? LFDrri((reg0),(rs),(is)) 
: (MOVEIri(JIT_AUX,(is)),LFDxrrr((reg0),(rs),JIT_AUX)))
+#define jit_ldxr_f(reg0, s1, s2)    LFSxrrr((reg0),(s1),(s2))
+#define jit_ldxr_d(reg0, s1, s2)    LFDxrrr((reg0),(s1),(s2))
+#define jit_ldi_f(reg0, is)          (_siP(16,(is)) ? LFSrri((reg0),0,(is)) : 
(MOVEIri(JIT_AUX,(is)),LFSrri((reg0),JIT_AUX,0)))
+#define jit_ldi_d(reg0, is)          (_siP(16,(is)) ? LFDrri((reg0),0,(is)) : 
(MOVEIri(JIT_AUX,(is)),LFDrri((reg0),JIT_AUX,0)))
+#define jit_ldr_f(reg0, rs)          LFSrri((reg0),(rs),0)
+#define jit_ldr_d(reg0, rs)          LFDrri((reg0),(rs),0)
+#define jit_stxi_f(id, rd, reg0)     (_siP(16,(id)) ? 
STFSrri((reg0),(rd),(id)) : 
(MOVEIri(JIT_AUX,(id)),STFSrri((reg0),(rd),JIT_AUX))) 
+#define jit_stxi_d(id, rd, reg0)     (_siP(16,(id)) ? 
STFDrri((reg0),(rd),(id)) : 
(MOVEIri(JIT_AUX,(id)),STFDrri((reg0),(rd),JIT_AUX))) 
+#define jit_stxr_f(d1, d2, reg0)     STFSxrrr((reg0),(d1),(d2))
+#define jit_stxr_d(d1, d2, reg0)     STFDxrrr((reg0),(d1),(d2))
+#define jit_sti_f(id, reg0)          (_siP(16,(id)) ? STFSrri((reg0),0,(id)) : 
(MOVEIri(JIT_AUX,(id)),STFSrri((reg0),JIT_AUX,0)))
+#define jit_sti_d(id, reg0)          (_siP(16,(id)) ? STFDrri((reg0),0,(id)) : 
(MOVEIri(JIT_AUX,(id)),STFDrri((reg0),JIT_AUX,0)))
+#define jit_str_f(rd, reg0)          STFSrri((reg0),(rd),0)
+#define jit_str_d(rd, reg0)          STFDrri((reg0),(rd),0)
+
+#define jit_fpboolr(d, s1, s2, rcbit) (                \
+       FCMPOrrr(_cr0,(s1),(s2)),               \
+       MFCRr((d)),                             \
+       EXTRWIrrii((d), (d), 1, (rcbit)))
+
+#define jit_fpboolr_neg(d, s1, s2,rcbit) (     \
+       FCMPOrrr(_cr0,(s1),(s2)),               \
+       MFCRr((d)),                             \
+       EXTRWIrrii((d), (d), 1, (rcbit)),       \
+       XORIrri((d), (d), 1))
+
+#define jit_fpboolur(d, s1, s2, rcbit) (       \
+       FCMPUrrr(_cr0,(s1),(s2)),               \
+       MFCRr((d)),                             \
+       EXTRWIrrii((d), (d), 1, (rcbit)))
+
+#define jit_fpboolur_neg(d, s1, s2,rcbit) (    \
+       FCMPUrrr(_cr0,(s1),(s2)),               \
+       MFCRr((d)),                             \
+       EXTRWIrrii((d), (d), 1, (rcbit)),       \
+       XORIrri((d), (d), 1))
+
+#define jit_fpboolur_or(d, s1, s2, bit1, bit2) (\
+       FCMPUrrr(_cr0,(s1),(s2)),               \
+       CRORiii((bit1), (bit1), (bit2)),        \
+       MFCRr((d)),                             \
+       EXTRWIrrii((d), (d), 1, (bit1)))
+
+#define jit_gtr_d(d, s1, s2)      jit_fpboolr ((d),(s1),(s2),_gt)   
+#define jit_ger_d(d, s1, s2)      jit_fpboolr_neg((d),(s1),(s2),_lt)   
+#define jit_ltr_d(d, s1, s2)      jit_fpboolr ((d),(s1),(s2),_lt)         
+#define jit_ler_d(d, s1, s2)      jit_fpboolr_neg((d),(s1),(s2),_gt)         
+#define jit_eqr_d(d, s1, s2)      jit_fpboolr ((d),(s1),(s2),_eq)         
+#define jit_ner_d(d, s1, s2)      jit_fpboolr_neg((d),(s1),(s2),_eq)
+#define jit_unordr_d(d, s1, s2)   jit_fpboolur ((d),(s1),(s2),_un)
+#define jit_ordr_d(d, s1, s2)     jit_fpboolur_neg((d),(s1),(s2),_un)
+#define jit_unler_d(d, s1, s2)    jit_fpboolur_neg ((d), (s1), (s2), _gt)
+#define jit_unltr_d(d, s1, s2)    jit_fpboolur_or ((d), (s1), (s2), _un, _lt)
+#define jit_unger_d(d, s1, s2)    jit_fpboolur_neg ((d), (s1), (s2), _lt)
+#define jit_ungtr_d(d, s1, s2)    jit_fpboolur_or ((d), (s1), (s2), _un, _gt)
+#define jit_ltgtr_d(d, s1, s2)    jit_fpboolur_or ((d), (s1), (s2), _gt, _lt)
+#define jit_uneqr_d(d, s1, s2)    jit_fpboolur_or ((d), (s1), (s2), _un, _eq)
+
+#define jit_fpbr(d, s1, s2, rcbit) (           \
+       FCMPOrrr(_cr0,(s1),(s2)),               \
+       BTii ((rcbit), (d)))
+
+#define jit_fpbr_neg(d, s1, s2,rcbit) (        \
+       FCMPOrrr(_cr0,(s1),(s2)),               \
+       BFii ((rcbit), (d)))
+
+#define jit_fpbur(d, s1, s2, rcbit) (          \
+       FCMPUrrr(_cr0,(s1),(s2)),               \
+       BTii ((rcbit), (d)))
+
+#define jit_fpbur_neg(d, s1, s2,rcbit) (       \
+       FCMPUrrr(_cr0,(s1),(s2)),               \
+       BFii ((rcbit), (d)))
+
+#define jit_fpbur_or(d, s1, s2, bit1, bit2) (  \
+       FCMPUrrr(_cr0,(s1),(s2)),               \
+       CRORiii((bit1), (bit1), (bit2)),        \
+       BTii ((bit1), (d)))
+
+#define jit_bgtr_d(d, s1, s2)      jit_fpbr ((d),(s1),(s2),_gt)   
+#define jit_bger_d(d, s1, s2)      jit_fpbr_neg((d),(s1),(s2),_lt)   
+#define jit_bltr_d(d, s1, s2)      jit_fpbr ((d),(s1),(s2),_lt)         
+#define jit_bler_d(d, s1, s2)      jit_fpbr_neg((d),(s1),(s2),_gt)         
+#define jit_beqr_d(d, s1, s2)      jit_fpbr ((d),(s1),(s2),_eq)         
+#define jit_bner_d(d, s1, s2)      jit_fpbr_neg((d),(s1),(s2),_eq)
+#define jit_bunordr_d(d, s1, s2)   jit_fpbur ((d),(s1),(s2),_un)
+#define jit_bordr_d(d, s1, s2)     jit_fpbur_neg((d),(s1),(s2),_un)
+#define jit_bunler_d(d, s1, s2)    jit_fpbur_neg ((d), (s1), (s2), _gt)
+#define jit_bunltr_d(d, s1, s2)    jit_fpbur_or ((d), (s1), (s2), _un, _lt)
+#define jit_bunger_d(d, s1, s2)    jit_fpbur_neg ((d), (s1), (s2), _lt)
+#define jit_bungtr_d(d, s1, s2)    jit_fpbur_or ((d), (s1), (s2), _un, _gt)
+#define jit_bltgtr_d(d, s1, s2)    jit_fpbur_or ((d), (s1), (s2), _gt, _lt)
+#define jit_buneqr_d(d, s1, s2)    jit_fpbur_or ((d), (s1), (s2), _un, _eq)
+
+#define jit_getarg_f(rd, ofs)        jit_movr_f((rd),(ofs))
+#define jit_getarg_d(rd, ofs)        jit_movr_d((rd),(ofs))
+#define jit_pusharg_d(rs)           
(_jitl.nextarg_putd--,jit_movr_d((_jitl.nextarg_putf+_jitl.nextarg_putd+1), 
(rs)))
+#define jit_pusharg_f(rs)           
(_jitl.nextarg_putf--,jit_movr_f((_jitl.nextarg_putf+_jitl.nextarg_putd+1), 
(rs)))
+#define jit_retval_d(op1)            jit_movr_d(1, (op1))
+#define jit_retval_f(op1)            jit_movr_f(1, (op1))
+
+
+#define jit_floorr_d_i(rd,rs)  (MTFSFIri(7,3), \
+                                  FCTIWrr(31,(rs)),    \
+                                  MOVEIri(JIT_AUX,-4), \
+                                  STFIWXrrr(31,JIT_SP,JIT_AUX),   \
+                                  LWZrm((rd),-4,JIT_SP))
+
+#define jit_ceilr_d_i(rd,rs)   (MTFSFIri(7,2), \
+                                  FCTIWrr(31,(rs)),    \
+                                  MOVEIri(JIT_AUX,-4), \
+                                  STFIWXrrr(31,JIT_SP,JIT_AUX),   \
+                                  LWZrm((rd),-4,JIT_SP))
+
+#define jit_roundr_d_i(rd,rs)  (MTFSFIri(7,0), \
+                                  FCTIWrr(31,(rs)),    \
+                                  MOVEIri(JIT_AUX,-4), \
+                                  STFIWXrrr(31,JIT_SP,JIT_AUX),   \
+                                  LWZrm((rd),-4,JIT_SP))
+
+#define jit_truncr_d_i(rd,rs)  (FCTIWZrr(31,(rs)), \
+                                  MOVEIri(JIT_AUX,-4), \
+                                  STFIWXrrr(31,JIT_SP,JIT_AUX),   \
+                                  LWZrm((rd),-4,JIT_SP))
 
 #endif /* __lightning_asm_h */
diff --git a/lightning/ppc/funcs.h b/lightning/ppc/funcs.h
index 38b6a65..a4a94b8 100644
--- a/lightning/ppc/funcs.h
+++ b/lightning/ppc/funcs.h
@@ -7,7 +7,7 @@
 
 /***********************************************************************
  *
- * Copyright 2000, 2001, 2002, 2003 Free Software Foundation, Inc.
+ * Copyright 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc.
  * Written by Paolo Bonzini.
  *
  * This file is part of GNU lightning.
@@ -69,13 +69,13 @@ jit_flush_code(void *start, void *end)
   end -= ((long) end) & (cache_line_size - 1);
 
   /* Force data cache write-backs */
-  for (ddest = start; ddest <= (char *) end; ddest += cache_line_size) {
+  for (ddest = (char *) start; ddest <= (char *) end; ddest += 
cache_line_size) {
     __asm__ __volatile__ ("dcbst 0,%0" : : "r"(ddest));
   }
   __asm__ __volatile__ ("sync" : : );
 
   /* Now invalidate the instruction cache */
-  for (idest = start; idest <= (char *) end; idest += cache_line_size) {
+  for (idest = (char *) start; idest <= (char *) end; idest += 
cache_line_size) {
     __asm__ __volatile__ ("icbi 0,%0" : : "r"(idest));
   }
   __asm__ __volatile__ ("isync" : : );
@@ -85,75 +85,78 @@ jit_flush_code(void *start, void *end)
 
 #define _jit (*jit)
 
-/* Emit a trampoline for a function.
- * Upon entrance to the trampoline:
- *   - R0      = return address for the function
- *   - LR      = address where the real code for the function lies
- *   - R3-R8   = parameters
- * After jumping to the address pointed to by R10:
- *   - LR      = address where the epilog lies (the function must return there)
- *   - R25-R20 = parameters (order is reversed, 1st argument is R25)
- */
-static jit_insn *
-_jit_trampoline(jit, n)
-     register jit_state *jit;
-     register int      n;
+static void
+_jit_epilog(jit_state *jit)
 {
-  static jit_insn      trampolines[200];
-  static jit_insn      *p_trampolines[6], *free = trampolines;
-  jit_insn             *trampo;
-  int                  i, ofs, frame_size;
-
-  if (!p_trampolines[n]) {
-    _jit.x.pc = trampo = p_trampolines[n] = free;
-
-    frame_size = 24 + (6 + n) * 4;     /* r26..r31 + args                 */
-    frame_size += 15;                  /* the stack must be quad-word     */
-    frame_size &= ~15;                 /* aligned                         */
-
-    STWUrm(1, -frame_size, 1);         /* stwu  r1, -x(r1)                */
-
-    for (ofs = frame_size - (6 + n) * 4, i = 26 - n; i <= 31; ofs += 4, i++) {
-      STWrm(i, ofs, 1);                        /* stw   rI, ofs(r1)            
   */
-    }
-    STWrm(0, ofs+4, 1);                        /* stw   r0, x(r1)              
   */
-    for (i = 0; i < n; i++) {
-      MRrr(25-i, 3+i);                 /* save parameters in r25..r20     */
-    }
-    BLRL();                            /* blrl                            */
-    LWZrm(0, ofs+4, 1);                        /* lwz   r0, x(r1)  (ret.addr.) 
   */
-    MTLRr(0);                          /* mtspr LR, r0                    */
-
-    for (ofs = frame_size - (6 + n) * 4, i = 26 - n; i <= 31; ofs += 4, i++) {
-      LWZrm(i, ofs, 1);                        /* lwz   rI, ofs(r1)            
   */
-    }
-    ADDIrri(1, 1, frame_size);         /* addi  r1, r1, x                 */
-    BLR();                             /* blr                             */
-
-    jit_flush_code(trampo, _jit.x.pc);
-    free = _jit.x.pc;
-  }
+  int n = _jitl.nbArgs;
+  int frame_size, i, ofs;
+  int first_saved_reg = JIT_AUX - n;
+  int num_saved_regs = 32 - first_saved_reg;
 
-  return p_trampolines[n];
+  frame_size = 24 + 32 + num_saved_regs * 4;   /* r24..r31 + args              
   */
+  frame_size += 15;                    /* the stack must be quad-word     */
+  frame_size &= ~15;                   /* aligned                         */
+
+#ifdef _CALL_DARWIN
+  LWZrm(0, frame_size + 8, 1); /* lwz   r0, x+8(r1)  (ret.addr.)  */
+#else
+  LWZrm(0, frame_size + 4, 1); /* lwz   r0, x+4(r1)  (ret.addr.)  */
+#endif
+  MTLRr(0);                            /* mtspr LR, r0                    */
+
+  ofs = frame_size - num_saved_regs * 4;
+  LMWrm(first_saved_reg, ofs, 1);      /* lmw   rI, ofs(r1)               */
+  ADDIrri(1, 1, frame_size);           /* addi  r1, r1, x                 */
+  BLR();                               /* blr                             */
 }
 
+/* Emit a prolog for a function.
+   Upon entrance to the trampoline:
+     - LR      = address where the real code for the function lies
+     - R3-R8   = parameters
+   Upon finishing the trampoline:
+     - R0      = return address for the function
+     - R25-R20 = parameters (order is reversed, 1st argument is R25)
+  
+   The +32 in frame_size computation is to accound for the parameter area of
+   a function frame. 
+
+   On PPC the frame must have space to host the arguments of any callee.
+   However, as it currently stands, the argument to jit_trampoline (n) is
+   the number of arguments of the caller we generate. Therefore, the
+   callee can overwrite a part of the stack (saved register area when it
+   flushes its own parameter on the stack. The addition of a constant 
+   offset = 32 is enough to hold eight 4 bytes arguments.  This is less
+   than perfect but is a reasonable work around for now. 
+   Better solution must be investigated.  */
 static void
-_jit_prolog(jit, n)
-     register jit_state *jit;
-     register int      n;
+_jit_prolog(jit_state *jit, int n)
 {
-  register jit_insn    *save_pc, *trampo;
+  int frame_size;
+  int ofs, i;
+  int first_saved_reg = JIT_AUX - n;
+  int num_saved_regs = 32 - first_saved_reg;
 
-  save_pc = _jit.x.pc;
-  trampo = _jit_trampoline(jit, n);
-  _jit.x.pc = save_pc;
+  _jitl.nextarg_geti = JIT_AUX - 1;
+  _jitl.nextarg_getd = 1;
+  _jitl.nbArgs = n;
+
+  frame_size = 24 + 32 + num_saved_regs * 4;   /* r27..r31 + args              
   */
+  frame_size += 15;                    /* the stack must be quad-word     */
+  frame_size &= ~15;                   /* aligned                         */
 
-  _jitl.nextarg_get = 25;
   MFLRr(0);
-  MOVEIri(10, trampo);
-  MTLRr(10);
-  BLRL();                              /* blrl                           */
-  MFLRr(31);                           /* mflr  r31                      */
+  STWUrm(1, -frame_size, 1);           /* stwu  r1, -x(r1)                */
+
+  ofs = frame_size - num_saved_regs * 4;
+  STMWrm(first_saved_reg, ofs, 1);             /* stmw  rI, ofs(r1)            
   */
+#ifdef _CALL_DARWIN
+  STWrm(0, frame_size + 8, 1);         /* stw   r0, x+8(r1)               */
+#else
+  STWrm(0, frame_size + 4, 1);         /* stw   r0, x+4(r1)               */
+#endif
+  for (i = 0; i < n; i++)
+    MRrr(JIT_AUX-1-i, 3+i);            /* save parameters below r24       */
 }
 
 #undef _jit
diff --git a/lightning/sparc/asm.h b/lightning/sparc/asm.h
index 808bb4a..a8c544c 100644
--- a/lightning/sparc/asm.h
+++ b/lightning/sparc/asm.h
@@ -49,6 +49,7 @@
 
 typedef unsigned int jit_insn;
 
+#ifndef LIGHTNING_DEBUG
 #define _d30(BD)       ((_jit_UL(BD) - _jit_UL(_jit.x.pc))>>2)
 #define _d22(BD)       _ck_d(22, _d30(BD))
 
@@ -82,6 +83,9 @@ typedef unsigned int jit_insn;
 #define _3( RD, OP3, RS1, I, ASI, RS2) _jit_I((3<<30)|         
(_u5(RD)<<25)|(_u6(OP3)<<19)|(_u5(RS1)<<14)|(_u1(I)<<13)|(_u8(ASI)<<5)|_u5 
(RS2))
 #define _3i(RD, OP3, RS1, I,     IMM)  _jit_I((3<<30)|         
(_u5(RD)<<25)|(_u6(OP3)<<19)|(_u5(RS1)<<14)|(_u1(I)<<13)|              
_s13(IMM))
 
+#define _FP1(RD, RS1, OPF, RS2)        _2f((RD), 52, (RS1), (OPF), (RS2))
+#define _FP2(RD, RS1, OPF, RS2)        _2f((RD), 53, (RS1), (OPF), (RS2))
+
 /* basic instructions  [Section B, page 87] */
 
 #define ADDrrr(RS1, RS2, RD)   _2   ((RD),  0, (RS1), 0, 0, (RS2))
@@ -300,4 +304,80 @@ typedef unsigned int jit_insn;
 #define WRii(IMM, RD)          WRrii(0, (IMM), (RD))
 #define WRri(RS2, RD)          WRrri(0, (RS2), (RD))
 
+#define LDFSRx(RS1, RS2)       _3   (0, 33, (RS1), 0, 0, (RS2))
+#define LDFSRm(RS1, IMM)       _3i  (0, 33, (RS1), 1,    (IMM))
+#define STFSRx(RD1, RD2)       _3   (0, 37, (RD1), 0, 0, (RD2))
+#define STFSRm(RD, IMM)                _3i  (0, 37, (RD),  1,    (IMM))
+
+#define FITODrr(FRS, FRD)              _FP1((FRD),  0, 200, (FRS))
+#define FITOSrr(FRS, FRD)              _FP1((FRD),  0, 196, (FRS))
+#define FDTOIrr(FRS, FRD)              _FP1((FRD),  0, 210, (FRS))
+#define FSTOIrr(FRS, FRD)              _FP1((FRD),  0, 209, (FRS))
+#define FSTODrr(FRS, FRD)              _FP1((FRD),  0, 201, (FRS))
+#define FDTOSrr(FRS, FRD)              _FP1((FRD),  0, 198, (FRS))
+#define FMOVSrr(FRS, FRD)              _FP1((FRD),  0,   1, (FRS))
+#define FNEGSrr(FRS, FRD)              _FP1((FRD),  0,   5, (FRS))
+#define FABSSrr(FRS, FRD)              _FP1((FRD),  0,   9, (FRS))
+#define FMOVDrr(FRS, FRD)              _FP1((FRD),  0,   2, (FRS))
+#define FNEGDrr(FRS, FRD)              _FP1((FRD),  0,   6, (FRS))
+#define FABSDrr(FRS, FRD)              _FP1((FRD),  0,  10, (FRS))
+#define FSQRTDrr(FRS, FRD)             _FP1((FRD),  0,  42, (FRS))
+#define FSQRTSrr(FRS, FRD)             _FP1((FRD),  0,  41, (FRS))
+
+#define FADDSrrr(FRS1, FRS2, FRD)      _FP1((FRD),  (FRS1),  65, (FRS2))
+#define FSUBSrrr(FRS1, FRS2, FRD)      _FP1((FRD),  (FRS1),  69, (FRS2))
+#define FMULSrrr(FRS1, FRS2, FRD)      _FP1((FRD),  (FRS1),  73, (FRS2))
+#define FDIVSrrr(FRS1, FRS2, FRD)      _FP1((FRD),  (FRS1),  77, (FRS2))
+
+#define FADDDrrr(FRS1, FRS2, FRD)      _FP1((FRD),  (FRS1),  66, (FRS2))
+#define FSUBDrrr(FRS1, FRS2, FRD)      _FP1((FRD),  (FRS1),  70, (FRS2))
+#define FMULDrrr(FRS1, FRS2, FRD)      _FP1((FRD),  (FRS1),  74, (FRS2))
+#define FDIVDrrr(FRS1, FRS2, FRD)      _FP1((FRD),  (FRS1),  78, (FRS2))
+
+#define FCMPSrr(FRS1, FRS2)            _FP2(0,      (FRS1),  81, (FRS2))
+#define FCMPDrr(FRS1, FRS2)            _FP2(0,      (FRS1),  82, (FRS2))
+
+#define LDFxr(RS1, RS2, RD)    _3   ((RD), 32, (RS1), 0, 0, (RS2))
+#define LDFmr(RS1, IMM, RD)    _3i  ((RD), 32, (RS1), 1,    (IMM))
+#define LDDFxr(RS1, RS2, RD)   _3   ((RD), 35, (RS1), 0, 0, (RS2))
+#define LDDFmr(RS1, IMM, RD)   _3i  ((RD), 35, (RS1), 1,    (IMM))
+#define STFrx(RS, RD1, RD2)    _3   ((RS), 36, (RD1), 0, 0, (RD2))
+#define STFrm(RS, RD1, IMM)    _3i  ((RS), 36, (RD1), 1,    (IMM))
+#define STDFrx(RS, RD1, RD2)   _3   ((RS), 39, (RD1), 0, 0, (RD2))
+#define STDFrm(RS, RD1, IMM)   _3i  ((RS), 39, (RD1), 1,    (IMM))
+
+#define FBNi(DISP)             _0   (0,  0, 6, (DISP))
+#define FBN_Ai(DISP)           _0   (1,  0, 6, (DISP))
+#define FBNEi(DISP)            _0   (0,  1, 6, (DISP))
+#define FBNE_Ai(DISP)          _0   (1,  1, 6, (DISP))
+#define FBLGi(DISP)            _0   (0,  2, 6, (DISP))
+#define FBLG_Ai(DISP)          _0   (1,  2, 6, (DISP))
+#define FBULi(DISP)            _0   (0,  3, 6, (DISP))
+#define FBUL_Ai(DISP)          _0   (1,  3, 6, (DISP))
+#define FBLi(DISP)             _0   (0,  4, 6, (DISP))
+#define FBL_Ai(DISP)           _0   (1,  4, 6, (DISP))
+#define FBUGi(DISP)            _0   (0,  5, 6, (DISP))
+#define FBUG_Ai(DISP)          _0   (1,  5, 6, (DISP))
+#define FBGi(DISP)             _0   (0,  6, 6, (DISP))
+#define FBG_Ai(DISP)           _0   (1,  6, 6, (DISP))
+#define FBUi(DISP)             _0   (0,  7, 6, (DISP))
+#define FBU_Ai(DISP)           _0   (1,  7, 6, (DISP))
+#define FBAi(DISP)             _0   (0,  8, 6, (DISP))
+#define FBA_Ai(DISP)           _0   (1,  8, 6, (DISP))
+#define FBEi(DISP)             _0   (0,  9, 6, (DISP))
+#define FBE_Ai(DISP)           _0   (1,  9, 6, (DISP))
+#define FBUEi(DISP)            _0   (0, 10, 6, (DISP))
+#define FBUE_Ai(DISP)          _0   (1, 10, 6, (DISP))
+#define FBGEi(DISP)            _0   (0, 11, 6, (DISP))
+#define FBGE_Ai(DISP)          _0   (1, 11, 6, (DISP))
+#define FBUGEi(DISP)           _0   (0, 12, 6, (DISP))
+#define FBUGE_Ai(DISP)         _0   (1, 12, 6, (DISP))
+#define FBLEi(DISP)            _0   (0, 13, 6, (DISP))
+#define FBLE_Ai(DISP)          _0   (1, 13, 6, (DISP))
+#define FBULEi(DISP)           _0   (0, 14, 6, (DISP))
+#define FBULE_Ai(DISP)         _0   (1, 14, 6, (DISP))
+#define FBOi(DISP)             _0   (0, 15, 6, (DISP))
+#define FBO_Ai(DISP)           _0   (1, 15, 6, (DISP))
+
+#endif
 #endif /* __ccg_asm_sparc_h */
diff --git a/lightning/sparc/core.h b/lightning/sparc/core.h
index b70259e..7912a3b 100644
--- a/lightning/sparc/core.h
+++ b/lightning/sparc/core.h
@@ -33,14 +33,13 @@
 #ifndef __lightning_core_h
 #define __lightning_core_h
 
-#define JIT_R0                 _Rl(0)
-#define JIT_R1                 _Rl(1)
-#define JIT_R2                 _Rl(2)
-#define JIT_V0                 _Rl(3)
-#define JIT_V1                 _Rl(4)
-#define JIT_V2                 _Rl(5)
+#define JIT_R_NUM              3
+#define JIT_V_NUM              6
+#define JIT_R(i)               ((i) ? _Rl((i) - 1) : _Rg(2))
+#define JIT_V(i)               _Rl((i)+2)
+
 #define JIT_BIG                        _Rg(1)  /* %g1 used to make 32-bit 
operands */
-#define JIT_BIG2               _Rg(2)  /* %g2 used to make 32-bit compare 
operands */
+#define JIT_BIG2               _Ro(7)  /* %o7 used to make 32-bit compare 
operands */
 #define JIT_SP                 _Ro(6)
 #define JIT_RZERO              _Rg(0)
 #define JIT_RET                        _Ri(0)
@@ -94,10 +93,18 @@ struct jit_local_state {
 #define jit_prepare_y(rs, is)          (SRArir(rs, 31, JIT_BIG), WRri(JIT_BIG, 
_y), NOP(), NOP(), NOP(), _jit.x.pc -= jit_immsize(is))
 #define jit_clr_y(rs, is)              (                         WRri(0,       
_y), NOP(), NOP(), NOP(), _jit.x.pc -= jit_immsize(is))
 
-#define jit_mod(div, mul, d, s1, s2) (                                 \
-       div (JIT_BIG2, s1, s2),                                         \
-       mul (JIT_BIG2, JIT_BIG2, s2),                                   \
-       jit_subr_i (d, s1, JIT_BIG2))
+#define jit_modr(jit_div, jit_mul, d, s1, s2)   \
+        (jit_div (JIT_BIG, s1, s2),             \
+         jit_mul (JIT_BIG, JIT_BIG, s2),        \
+         jit_subr_i (d, s1, JIT_BIG))
+
+#define jit_modi(jit_divi, jit_muli, jit_divr, jit_mulr, d, rs, is)     \
+        (_siP(13,(imm))                                                 \
+         ? (jit_divi (JIT_BIG, rs, is),                                 \
+            jit_muli (JIT_BIG, JIT_BIG, is),                            \
+            jit_subr_i (d, rs, JIT_BIG))                                \
+         : (SETir ((is), JIT_BIG2),                                     \
+            jit_modr (jit_divr, jit_mulr, d, rs, JIT_BIG2)))
 
 /* How many instruction are needed to put imm in a register.  */
 #define jit_immsize(imm)       (!(imm) ? 0 :                   \
@@ -107,10 +114,16 @@ struct jit_local_state {
 /* branch instructions return the address of the *delay* instruction -- this
  * is just a helper macro that makes jit_patch more readable.
  */
-#define jit_patch_(jump_pc)                                            \
+#define jit_patch_(jump_pc,pv)                                         \
        (*jump_pc &= ~_MASK(22),                                        \
-        *jump_pc |= ((_jit_UL(_jit.x.pc) - _jit_UL(jump_pc)) >> 2) & _MASK(22))
+        *jump_pc |= ((_jit_UL((pv)) - _jit_UL(jump_pc)) >> 2) & _MASK(22))
 
+#define jit_patch_set(sethi_pc, or_pc, dest)                   \
+       (*(sethi_pc) &= ~_MASK(22), *(sethi_pc) |= _HI(dest),   \
+        *(or_pc) &= ~_MASK(13), *(or_pc) |= _LO(dest))         \
+
+#define jit_patch_movi(movi_pc, val)                           \
+       jit_patch_set((movi_pc) - 2, (movi_pc) - 1, (val))
 
 #define        jit_arg_c()                     (_jitl.nextarg_get++)
 #define        jit_arg_i()                     (_jitl.nextarg_get++)
@@ -162,8 +175,10 @@ struct jit_local_state {
 #define jit_boaddr_ui(label, s1, s2)   (                  ADDCCrrr((s1), (s2), 
(s1)),                           BCSi((label)), NOP(), _jit.x.pc - 1)
 #define jit_bosubr_ui(label, s1, s2)   (                  SUBCCrrr((s1), (s2), 
(s1)),                           BCSi((label)), NOP(), _jit.x.pc - 1)
 #define jit_calli(label)               (CALLi(label), NOP(), _jit.x.pc - 1)
+#define jit_callr(reg)                 (CALLx((reg), 0), NOP())
+
 #define jit_divi_i(d, rs, is)          (jit_prepare_y((rs), 0x12345678), 
SETir((is), JIT_BIG), SDIVrrr((rs), JIT_BIG, (d)) )
-#define jit_divi_ui(d, rs, is)         (jit_clr_y((rs)),    0x12345678), 
SETir((is), JIT_BIG), UDIVrrr((rs), JIT_BIG, (d)) )
+#define jit_divi_ui(d, rs, is)         (jit_clr_y((rs),     0x12345678), 
SETir((is), JIT_BIG), UDIVrrr((rs), JIT_BIG, (d)) )
 #define jit_divr_i(d, s1, s2)          (jit_prepare_y((s1), 0),                
                SDIVrrr((s1), (s2), (d)))
 #define jit_divr_ui(d, s1, s2)         (jit_clr_y((s1),     0),                
                UDIVrrr((s1), (s2), (d)))
 #define jit_eqi_i(d, rs, is)           jit_chk_imm((is), \
@@ -208,11 +223,12 @@ struct jit_local_state {
 #define jit_lti_ui(d, rs, is)          jit_booli ((d), (rs), (is), 
BLUi(_jit.x.pc + 3) )
 #define jit_ltr_i(d, s1, s2)           jit_boolr ((d), (s1), (s2), 
BLi(_jit.x.pc + 3)  )
 #define jit_ltr_ui(d, s1, s2)          jit_boolr ((d), (s1), (s2), 
BLUi(_jit.x.pc + 3) )
-#define jit_modi_i(d, rs, is)          jit_modi(jit_divi_i, jit_muli_i, (d), 
(rs), (is))
-#define jit_modi_ui(d, rs, is)         jit_modi(jit_divi_i, jit_muli_i, (d), 
(rs), (is))
-#define jit_modr_i(d, s1, s2)          jit_modr(jit_divr_i, jit_mulr_i, (d), 
(s1), (s2))
-#define jit_modr_ui(d, s1, s2)         jit_modr(jit_divr_i, jit_mulr_i, (d), 
(s1), (s2))
+#define jit_modi_i(d, rs, is)           jit_modi(jit_divi_i, jit_muli_i, 
jit_divr_i, jit_mulr_i, (d), (rs), (is))
+#define jit_modi_ui(d, rs, is)          jit_modi(jit_divi_ui, jit_muli_ui, 
jit_divr_ui, jit_mulr_ui, (d), (rs), (is))
+#define jit_modr_i(d, s1, s2)           jit_modr(jit_divr_i, jit_mulr_i, (d), 
(s1), (s2))
+#define jit_modr_ui(d, s1, s2)          jit_modr(jit_divr_ui, jit_mulr_ui, 
(d), (s1), (s2))
 #define jit_movi_i(d, is)              SETir((is), (d))
+#define jit_movi_p(d, is)              (SETir2(_HI((is)), _LO((is)), (d)), 
_jit.x.pc)
 #define jit_movr_i(d, rs)              MOVrr((rs), (d))
 #define jit_muli_i(d, rs, is)          jit_chk_imm((is), SMULrir((rs), (is), 
(d)), SMULrrr((rs), JIT_BIG, (d)))
 #define jit_muli_ui(d, rs, is)         jit_chk_imm((is), UMULrir((rs), (is), 
(d)), UMULrrr((rs), JIT_BIG, (d)))
@@ -221,14 +237,14 @@ struct jit_local_state {
 #define jit_nop()                      NOP()
 #define jit_ori_i(d, rs, is)           jit_chk_imm((is), ORrir((rs), (is), 
(d)), ORrrr((rs), JIT_BIG, (d)))
 #define jit_orr_i(d, s1, s2)                             ORrrr((s1), (s2), (d))
-#define jit_patch(delay_pc)            jit_patch_ ( ((delay_pc) - 1) )
+#define jit_patch_at(delay_pc, pv)     jit_patch_ (((delay_pc) - 1) , (pv))
 #define jit_popr_i(rs)                 (LDmr(JIT_SP, 0, (rs)), ADDrir(JIT_SP, 
8, JIT_SP))
-#define jitfp_prepare(numargs, nf, nd) (_jitl.nextarg_put = (numargs))
-#define jit_prolog(numargs)            (SAVErir(JIT_SP, -96, JIT_SP), 
_jitl.nextarg_get = _Ri(0))
+#define jit_prepare_i(num)             (_jitl.nextarg_put += (num))
+#define jit_prolog(numargs)            (SAVErir(JIT_SP, -120, JIT_SP), 
_jitl.nextarg_get = _Ri(0))
 #define jit_pushr_i(rs)                        (STrm((rs), JIT_SP, -8), 
SUBrir(JIT_SP, 8, JIT_SP))
 #define jit_pusharg_i(rs)              (--_jitl.nextarg_put, MOVrr((rs), 
_Ro(_jitl.nextarg_put)))
 #define jit_ret()                      (RET(), RESTORE())
-#define jit_retval(rd)                 MOVrr(_Ro(0), (rd))
+#define jit_retval_i(rd)               MOVrr(_Ro(0), (rd))
 #define jit_rshi_i(d, rs, is)          SRArir((rs), (is), (d))
 #define jit_rshi_ui(d, rs, is)         SRLrir((rs), (is), (d))
 #define jit_rshr_i(d, r1, r2)          SRArrr((r1), (r2), (d))
diff --git a/lightning/sparc/fp.h b/lightning/sparc/fp.h
index 660af3d..5632ac8 100644
--- a/lightning/sparc/fp.h
+++ b/lightning/sparc/fp.h
@@ -7,7 +7,7 @@
 
 /***********************************************************************
  *
- * Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
+ * Copyright 2000, 2001, 2002, 2004 Free Software Foundation, Inc.
  * Written by Paolo Bonzini.
  *
  * This file is part of GNU lightning.
@@ -35,99 +35,61 @@
 #ifndef __lightning_asm_fp_h
 #define __lightning_asm_fp_h
 
-#if 0
-
-/* dummy for now */
-
-#define _FP1(RD, RS1, OPF, RS2)        _2f((RD), 52, (RS1), (OPF), (RS2))
-#define _FP2(RD, RS1, OPF, RS2)        _2f((RD), 53, (RS1), (OPF), (RS2))
-
-#define FITODrr(FRS, FRD)              _FP1((FRD),  0, 200, (FRS))
-#define FDTOIrr(FRS, FRD)              _FP1((FRD),  0, 210, (FRS))
-#define FSTODrr(FRS, FRD)              _FP1((FRD),  0, 201, (FRS))
-#define FDTOSrr(FRS, FRD)              _FP1((FRD),  0, 198, (FRS))
-#define FMOVSrr(FRS, FRD)              _FP1((FRD),  0,   1, (FRS))
-#define FNEGSrr(FRS, FRD)              _FP1((FRD),  0,   5, (FRS))
-#define FABSSrr(FRS, FRD)              _FP1((FRD),  0,   9, (FRS))
-#define FSQRTDrr(FRS, FRD)             _FP1((FRD),  0,  74, (FRS))
-
-#define FADDDrrr(FRS1, FRS2, FRD)      _FP1((FRD),  (FRS1),  66, (FRS2))
-#define FSUBDrrr(FRS1, FRS2, FRD)      _FP1((FRD),  (FRS1),  70, (FRS2))
-#define FMULDrrr(FRS1, FRS2, FRD)      _FP1((FRD),  (FRS1),  82, (FRS2))
-#define FDIVDrrr(FRS1, FRS2, FRD)      _FP1((FRD),  (FRS1),  86, (FRS2))
-
-#define FCMPDrr(FRS1, FRS2)            _FP2(0,      (FRS1),  82, (FRS2))
-
-#define LDFxr(RS1, RS2, RD)    _3   ((RD), 32, (RS1), 0, 0, (RS2))
-#define LDFmr(RS1, IMM, RD)    _3i  ((RD), 32, (RS1), 1,    (IMM))
-#define LDDFxr(RS1, RS2, RD)   _3   ((RD), 35, (RS1), 0, 0, (RS2))
-#define LDDFmr(RS1, IMM, RD)   _3i  ((RD), 35, (RS1), 1,    (IMM))
-#define STFrx(RS, RD1, RD2)    _3   ((RS), 36, (RD1), 0, 0, (RD2))
-#define STFrm(RS, RD1, IMM)    _3i  ((RS), 36, (RD1), 1,    (IMM))
-#define STDFrx(RS, RD1, RD2)   _3   ((RS), 39, (RD1), 0, 0, (RD2))
-#define STDFrm(RS, RD1, IMM)   _3i  ((RS), 39, (RD1), 1,    (IMM))
-
-#define FBNi(DISP)             _0   (0,  0, 6, (DISP))
-#define FBN_Ai(DISP)           _0   (1,  0, 6, (DISP))
-#define FBNEi(DISP)            _0   (0,  1, 6, (DISP))
-#define FBNE_Ai(DISP)          _0   (1,  1, 6, (DISP))
-#define FBLGi(DISP)            _0   (0,  2, 6, (DISP))
-#define FBLG_Ai(DISP)          _0   (1,  2, 6, (DISP))
-#define FBULi(DISP)            _0   (0,  3, 6, (DISP))
-#define FBUL_Ai(DISP)          _0   (1,  3, 6, (DISP))
-#define FBLi(DISP)             _0   (0,  4, 6, (DISP))
-#define FBL_Ai(DISP)           _0   (1,  4, 6, (DISP))
-#define FBUGi(DISP)            _0   (0,  5, 6, (DISP))
-#define FBUG_Ai(DISP)          _0   (1,  5, 6, (DISP))
-#define FBGi(DISP)             _0   (0,  6, 6, (DISP))
-#define FBG_Ai(DISP)           _0   (1,  6, 6, (DISP))
-#define FBUi(DISP)             _0   (0,  7, 6, (DISP))
-#define FBU_Ai(DISP)           _0   (1,  7, 6, (DISP))
-#define FBAi(DISP)             _0   (0,  8, 6, (DISP))
-#define FBA_Ai(DISP)           _0   (1,  8, 6, (DISP))
-#define FBEi(DISP)             _0   (0,  9, 6, (DISP))
-#define FBE_Ai(DISP)           _0   (1,  9, 6, (DISP))
-#define FBUEi(DISP)            _0   (0, 10, 6, (DISP))
-#define FBUE_Ai(DISP)          _0   (1, 10, 6, (DISP))
-#define FBGEi(DISP)            _0   (0, 11, 6, (DISP))
-#define FBGE_Ai(DISP)          _0   (1, 11, 6, (DISP))
-#define FBUGEi(DISP)           _0   (0, 12, 6, (DISP))
-#define FBUGE_Ai(DISP)         _0   (1, 12, 6, (DISP))
-#define FBLEi(DISP)            _0   (0, 13, 6, (DISP))
-#define FBLE_Ai(DISP)          _0   (1, 13, 6, (DISP))
-#define FBULEi(DISP)           _0   (0, 14, 6, (DISP))
-#define FBULE_Ai(DISP)         _0   (1, 14, 6, (DISP))
-#define FBOi(DISP)             _0   (0, 15, 6, (DISP))
-#define FBO_Ai(DISP)           _0   (1, 15, 6, (DISP))
-
-#define FSKIPUG()              _0d  (1, 13, 6, 2)      /* fble,a .+8 */
-#define FSKIPUL()              _0d  (1, 11, 6, 2)      /* fbge,a .+8 */
-
-#define jit_add_two(reg0)      FADDDrrr(30 - (reg0) * 2, 28 - (reg0) * 2, 30 - 
(reg0) * 2)
-#define jit_sub_two(reg0)      FSUBDrrr(30 - (reg0) * 2, 28 - (reg0) * 2, 30 - 
(reg0) * 2)
-#define jit_mul_two(reg0)      FMULDrrr(30 - (reg0) * 2, 28 - (reg0) * 2, 30 - 
(reg0) * 2)
-#define jit_div_two(reg0)      FDIVDrrr(30 - (reg0) * 2, 28 - (reg0) * 2, 30 - 
(reg0) * 2)
-
-#define jit_abs(reg0)          FABSSrr(30 - (reg0) * 2, 30 - (reg0) * 2)
-#define jit_neg(reg0)          FNEGSrr(30 - (reg0) * 2, 30 - (reg0) * 2)
-#define jit_sqrt(reg0)         FSQRTDrr(30 - (reg0) * 2, 30 - (reg0) * 2)
-
-#define jit_fpimm(reg0, first, second)         \
-       (_1(4), NOP(), _jit_L(first), _jit_L(second),   \
-        jit_ldxi_d((reg0), _Ro(7), 8))
-
-#define jit_ldxi_f(reg0, rs, is)       (jit_chk_imm((is), LDFmr((rs), (is), 30 
- (reg0) * 2), LDFxr((rs), JIT_BIG, 30 - (reg0) * 2)), FSTODrr(30 - (reg0) * 2, 
30 - (reg0) * 2))
-#define jit_ldxi_d(reg0, rs, is)       jit_chk_imm((is), LDDFmr((rs), (is), 30 
- (reg0) * 2), LDDFxr((rs), JIT_BIG, 30 - (reg0) * 2))
-#define jit_ldxr_f(reg0, s1, s2)       (LDFxr((s1), (s2), 30 - (reg0) * 2), 
FSTODrr(30 - (reg0) * 2, 30 - (reg0) * 2))
-#define jit_ldxr_d(reg0, s1, s2)       LDDFxr((s1), (s2), 30 - (reg0) * 2)
-#define jit_stxi_f(id, rd, reg0)       (FDTOSrr(30 - (reg0) * 2, 30 - (reg0) * 
2), jit_chk_imm((id), STFrm(30 - (reg0) * 2, (rd), (id)), STFrx(30 - (reg0) * 
2, (rd),  JIT_BIG)))
-#define jit_stxi_d(id, rd, reg0)                                         
jit_chk_imm((id), STDFrm(30 - (reg0) * 2, (rd), (id)), STDFrx(30 - (reg0) * 2, 
(rd),  JIT_BIG))
-#define jit_stxr_f(d1, d2, reg0)       (FDTOSrr(30 - (reg0) * 2, 30 - (reg0) * 
2), STFrx (30 - (reg0) * 2, (d1), (d2)))
-#define jit_stxr_d(d1, d2, reg0)                                         
STDFrx(30 - (reg0) * 2, (d1), (d2))
-
-
-#define jit_do_round(mode, rd, freg)   (                       \
-       _1(3),                                                  \
+#define JIT_FPR_NUM    6
+#define JIT_FPR(i)     (30-(i)*2)
+#define JIT_FPTMP      18
+
+#define jit_addr_f(rd,s1,s2)   FADDSrrr((s1), (s2), (rd))
+#define jit_subr_f(rd,s1,s2)   FSUBSrrr((s1), (s2), (rd))
+#define jit_mulr_f(rd,s1,s2)   FMULSrrr((s1), (s2), (rd))
+#define jit_divr_f(rd,s1,s2)   FDIVSrrr((s1), (s2), (rd))
+
+#define jit_addr_d(rd,s1,s2)   FADDDrrr((s1), (s2), (rd))
+#define jit_subr_d(rd,s1,s2)   FSUBDrrr((s1), (s2), (rd))
+#define jit_mulr_d(rd,s1,s2)   FMULDrrr((s1), (s2), (rd))
+#define jit_divr_d(rd,s1,s2)   FDIVDrrr((s1), (s2), (rd))
+
+#define jit_movr_f(rd,rs)      FMOVSrr((rs), (rd))
+#define jit_abs_d(rd,rs)       FABSSrr((rs), (rd))
+#define jit_negr_d(rd,rs)      FNEGSrr((rs), (rd))
+#define jit_sqrt_d(rd,rs)      FSQRTSrr((rs), (rd))
+#define jit_movr_d(rd,rs)      FMOVDrr((rs), (rd))
+#define jit_abs_f(rd,rs)       FABSDrr((rs), (rd))
+#define jit_negr_f(rd,rs)      FNEGDrr((rs), (rd))
+#define jit_sqrt_f(rd,rs)      FSQRTDrr((rs), (rd))
+#define jit_extr_f_d(rs, rd)   FSTODrr((rs), (rd))
+#define jit_extr_d_f(rs, rd)   FDTOSrr((rs), (rd))
+
+#define jit_movi_f(rd,immf)                      \
+    do {                                         \
+      float _v = (immf);                          \
+      _1(_jit.x.pc + 3), LDFmr(_Ro(7), 8, (rd));  \
+      memcpy(_jit.x.uc_pc, &_v, sizeof (float));  \
+      _jit.x.uc_pc += sizeof (float);             \
+    } while(0)
+
+#define jit_movi_d(rd,immd)                            \
+    do {                                               \
+      double _v = (immd);                              \
+      if ((long)_jit.x.pc & 4) NOP();                  \
+      _1(_jit.x.pc + 4);                               \
+      LDDFmr(_Ro(7), 8, (rd));                         \
+      memcpy(_jit.x.uc_pc, &_v, sizeof (double));      \
+      _jit.x.uc_pc += sizeof (double);                 \
+    } while(0)
+
+
+#define jit_ldxi_f(rd, rs, is)         jit_chk_imm((is), LDFmr((rs), (is), 
(rd)), LDFxr((rs), JIT_BIG, (rd)))
+#define jit_ldxi_d(rd, rs, is)         jit_chk_imm((is), LDDFmr((rs), (is), 
(rd)), LDDFxr((rs), JIT_BIG, (rd)))
+#define jit_ldxr_f(rd, s1, s2)         LDFxr((s1), (s2), (rd))
+#define jit_ldxr_d(rd, s1, s2)         LDDFxr((s1), (s2), (rd))
+#define jit_stxi_f(id, rd, rs)         jit_chk_imm((id), STFrm((rs), (rd), 
(id)), STFrx((rs), (rd),  JIT_BIG))
+#define jit_stxi_d(id, rd, rs)         jit_chk_imm((id), STDFrm((rs), (rd), 
(id)), STDFrx((rs), (rd),  JIT_BIG))
+#define jit_stxr_f(d1, d2, rs)         STFrx((rs), (d1), (d2))
+#define jit_stxr_d(d1, d2, rs)         STDFrx((rs), (d1), (d2))
+
+#define jit_do_round(mode, rd, freg, macro)    (               \
+       _1(_jit.x.pc + 3),                                      \
        SETHIir(_HI(mode << 29), JIT_BIG),                      \
        NOP(),                                                  \
        STFSRm(_Ro(7), 8),              /* store fsr */         \
@@ -135,29 +97,71 @@
        XORrrr(rd, JIT_BIG, JIT_BIG),   /* adjust mode */       \
        STrm(JIT_BIG, _Ro(7), 8),                               \
        LDFSRm(_Ro(7), 8),              /* load fsr */          \
-       FDTOIrr(freg, freg),            /* truncate */          \
+       macro,                          /* truncate */          \
        STrm(rd, _Ro(7), 8),            /* load old fsr */      \
        LDFSRm(_Ro(7), 8),                                      \
-       STFrm(freg, _Ro(7), 8),         /* store truncated value */ \
+       STFrm(JIT_FPTMP, _Ro(7), 8),    /* store truncated value */ \
        LDmr(_Ro(7), 8, rd))            /* load it into rd */
 
-
-/*                                      call   delay slot                      
 data   ,--- call lands here */
-#define jit_exti_d(reg0, rs)           (_1(3), NOP(),                          
 NOP(), STrm((rs), _Ro(7), 8), LDFmr(_Ro(7), 8, 30 - (reg0) * 2), FITODrr(30 - 
(reg0) * 2, 30 - (reg0) * 2))
-#define jit_round(rd, reg0)            (_1(3), FDTOIrr(30 - (reg0) * 2, 30 - 
(reg0) * 2), NOP(), STFrm(30 - (reg0) * 2, _Ro(7), 8), LDmr(_Ro(7), 8, (rd)))
-#define jit_floor(rd, reg0)            jit_do_round(3, (rd), (30 - (reg0) * 2))
-#define jit_ceil(rd, reg0)             jit_do_round(2, (rd), (30 - (reg0) * 2))
-#define jit_trunc(rd, reg0)            jit_do_round(1, (rd), (30 - (reg0) * 2))
-
-static double jit_zero = 0.0;
-
-#define jit_cmp(le, ge, reg0)          (SETHIir(_HI(_jit_UL(&jit_zero)), 
(le)),                        \
-                                        LDDFmr((le), _LO(_jit_UL(&jit_zero)), 
28 - (reg0) * 2),        \
-                                        FCMPDrr(30 - (reg0) * 2, 28 - (reg0) * 
2),                     \
-                                        MOVir(0, (le)), MOVir(0, (ge)),        
                \
-                                        FSKIPUL(), MOVir(1, (ge)),             
                \
-                                        FSKIPUG(), MOVir(1, (le)))
-
-#endif
+#define jit_do_round_no_fsr(macro1, macro2) (          \
+       _1(_jit.x.pc + 3),                              \
+       macro1,                                         \
+       NOP(),                                          \
+       macro2)
+
+#define jit_extr_i_d(rd, rs)           jit_do_round_no_fsr (NOP(),             
       (STrm((rs), _Ro(7), 8), LDFmr(_Ro(7), 8, (rd)), FITODrr((rd), (rd))))
+#define jit_extr_i_f(rd, rs)           jit_do_round_no_fsr (NOP(),             
       (STrm((rs), _Ro(7), 8), LDFmr(_Ro(7), 8, (rd)), FITOSrr((rd), (rd))))
+#define jit_roundr_d_i(rd, rs)         jit_do_round_no_fsr (FDTOIrr((rs), 
JIT_FPTMP), (STFrm(JIT_FPTMP, _Ro(7), 8), LDmr(_Ro(7), 8, (rd))))
+#define jit_roundr_f_i(rd, rs)         jit_do_round_no_fsr (FSTOIrr((rs), 
JIT_FPTMP), (STFrm(JIT_FPTMP, _Ro(7), 8), LDmr(_Ro(7), 8, (rd))))
+#define jit_floorr_d_i(rd, rs)         jit_do_round(3, (rd), (rs), 
FDTOIrr((rs), JIT_FPTMP))
+#define jit_ceilr_d_i(rd, rs)          jit_do_round(2, (rd), (rs), 
FDTOIrr((rs), JIT_FPTMP))
+#define jit_truncr_d_i(rd, rs)         jit_do_round(1, (rd), (rs), 
FDTOIrr((rs), JIT_FPTMP))
+#define jit_floorr_f_i(rd, rs)         jit_do_round(3, (rd), (rs), 
FSTOIrr((rs), JIT_FPTMP))
+#define jit_ceilr_f_i(rd, rs)          jit_do_round(2, (rd), (rs), 
FSTOIrr((rs), JIT_FPTMP))
+#define jit_truncr_f_i(rd, rs)         jit_do_round(1, (rd), (rs), 
FSTOIrr((rs), JIT_FPTMP))
+
+#define jit_ltr_d(d, s1, s2)            (FCMPDrr ((s1), (s2)), FBLi(_jit.x.pc 
+ 3), MOVir (1, (d)), MOVir (0, (d)))
+#define jit_ltr_f(d, s1, s2)            (FCMPSrr ((s1), (s2)), FBLi(_jit.x.pc 
+ 3), MOVir (1, (d)), MOVir (0, (d)))
+#define jit_ler_d(d, s1, s2)            (FCMPDrr ((s1), (s2)), FBLEi(_jit.x.pc 
+ 3), MOVir (1, (d)), MOVir (0, (d)))
+#define jit_ler_f(d, s1, s2)            (FCMPSrr ((s1), (s2)), FBLEi(_jit.x.pc 
+ 3), MOVir (1, (d)), MOVir (0, (d)))
+#define jit_eqr_d(d, s1, s2)            (FCMPDrr ((s1), (s2)), FBEi(_jit.x.pc 
+ 3), MOVir (1, (d)), MOVir (0, (d)))
+#define jit_eqr_f(d, s1, s2)            (FCMPSrr ((s1), (s2)), FBEi(_jit.x.pc 
+ 3), MOVir (1, (d)), MOVir (0, (d)))
+#define jit_ner_d(d, s1, s2)            (FCMPDrr ((s1), (s2)), FBNEi(_jit.x.pc 
+ 3), MOVir (1, (d)), MOVir (0, (d)))
+#define jit_ner_f(d, s1, s2)            (FCMPSrr ((s1), (s2)), FBNEi(_jit.x.pc 
+ 3), MOVir (1, (d)), MOVir (0, (d)))
+#define jit_ger_d(d, s1, s2)            (FCMPDrr ((s1), (s2)), FBGEi(_jit.x.pc 
+ 3), MOVir (1, (d)), MOVir (0, (d)))
+#define jit_ger_f(d, s1, s2)            (FCMPSrr ((s1), (s2)), FBGEi(_jit.x.pc 
+ 3), MOVir (1, (d)), MOVir (0, (d)))
+#define jit_gtr_d(d, s1, s2)            (FCMPDrr ((s1), (s2)), FBGi(_jit.x.pc 
+ 3), MOVir (1, (d)), MOVir (0, (d)))
+#define jit_gtr_f(d, s1, s2)            (FCMPSrr ((s1), (s2)), FBGi(_jit.x.pc 
+ 3), MOVir (1, (d)), MOVir (0, (d)))
+#define jit_unltr_d(d, s1, s2)          (FCMPDrr ((s1), (s2)), FBULi(_jit.x.pc 
+ 3), MOVir (1, (d)), MOVir (0, (d)))
+#define jit_unltr_f(d, s1, s2)          (FCMPSrr ((s1), (s2)), FBULi(_jit.x.pc 
+ 3), MOVir (1, (d), MOVir (0, (d)))
+#define jit_unler_d(d, s1, s2)          (FCMPDrr ((s1), (s2)), 
FBULEi(_jit.x.pc + 3), MOVir (1, (d)), MOVir (0, (d)))
+#define jit_unler_f(d, s1, s2)          (FCMPSrr ((s1), (s2)), 
FBULEi(_jit.x.pc + 3), MOVir (1, (d)), MOVir (0, (d)))
+#define jit_uneqr_d(d, s1, s2)          (FCMPDrr ((s1), (s2)), FBUEi(_jit.x.pc 
+ 3), MOVir (1, (d)), MOVir (0, (d)))
+#define jit_uneqr_f(d, s1, s2)          (FCMPSrr ((s1), (s2)), FBUEi(_jit.x.pc 
+ 3), MOVir (1, (d)), MOVir (0, (d)))
+#define jit_ltgtr_d(d, s1, s2)          (FCMPDrr ((s1), (s2)), FBLGi(_jit.x.pc 
+ 3), MOVir (1, (d)), MOVir (0, (d)))
+#define jit_ltgtr_f(d, s1, s2)          (FCMPSrr ((s1), (s2)), FBLGi(_jit.x.pc 
+ 3), MOVir (1, (d)), MOVir (0, (d)))
+#define jit_unger_d(d, s1, s2)          (FCMPDrr ((s1), (s2)), 
FBUGEi(_jit.x.pc + 3), MOVir (1, (d)), MOVir (0, (d)))
+#define jit_unger_f(d, s1, s2)          (FCMPSrr ((s1), (s2)), 
FBUGEi(_jit.x.pc + 3), MOVir (1, (d)), MOVir (0, (d)))
+#define jit_ungtr_d(d, s1, s2)          (FCMPDrr ((s1), (s2)), FBUGi(_jit.x.pc 
+ 3), MOVir (1, (d)), MOVir (0, (d)))
+#define jit_ungtr_f(d, s1, s2)          (FCMPSrr ((s1), (s2)), FBUGi(_jit.x.pc 
+ 3), MOVir (1, (d)), MOVir (0, (d)))
+#define jit_ordr_d(d, s1, s2)           (FCMPDrr ((s1), (s2)), FBOi(_jit.x.pc 
+ 3), MOVir (1, (d)), MOVir (0, (d)))
+#define jit_ordr_f(d, s1, s2)           (FCMPSrr ((s1), (s2)), FBOi(_jit.x.pc 
+ 3), MOVir (1, (d)), MOVir (0, (d)))
+#define jit_unordr_d(d, s1, s2)         (FCMPDrr ((s1), (s2)), FBUi(_jit.x.pc 
+ 3), MOVir (1, (d)), MOVir (0, (d)))
+#define jit_unordr_f(d, s1, s2)         (FCMPSrr ((s1), (s2)), FBUi(_jit.x.pc 
+ 3), MOVir (1, (d)), MOVir (0, (d)))
+
+#define jit_prepare_f(num)              (_jitl.nextarg_put += (num))
+#define jit_prepare_d(num)              (_jitl.nextarg_put += 2 * (num))
+
+#define jit_arg_f()                     (_jitl.nextarg_get++)
+#define jit_arg_d()                     (_jitl.nextarg_get += 
_jitl.nextarg_get & 1, _jitl.nextarg_get += 2, _jitl.nextarg_get - 2)
+
+#define jit_getarg_f(rd, ofs)           (STrm(ofs, _Ri(6), -24), LDFmr 
(_Ri(6), -24, (rd)))
+#define jit_getarg_d(rd, ofs)           (STDrm(ofs, _Ri(6), -24), LDDFmr 
(_Ri(6), -24, (rd)))
+
+#define jit_pusharg_f(rs)               (STFrm((rs), _Ri(6), -24), 
--_jitl.nextarg_put, LDmr (_Ri(6), -24, _Ro(_jitl.nextarg_put)))
+#define jit_pusharg_d(rs)               (STDFrm((rs), _Ri(6), -24), 
_jitl.nextarg_put -= 2, LDmr (_Ri(6), -24, _Ro(_jitl.nextarg_put)))
+
+#define jit_retval_f(rs)               jit_movr_f(0, rs)
+#define jit_retval_d(rs)               jit_movr_d(0, rs)
 
 #endif /* __lightning_asm_fp_h */
diff --git a/opcode/Makefile.am b/opcode/Makefile.am
index 198480d..ab4e5b0 100644
--- a/opcode/Makefile.am
+++ b/opcode/Makefile.am
@@ -1,6 +1,8 @@
 EXTRA_LIBRARIES = libdisass.a
 noinst_LIBRARIES = @LIBDISASS@
 
+AM_CPPFLAGS = -I$(top_srcdir)
+
 libdisass_a_SOURCES = dis-buf.c i386-dis.c ppc-dis.c ppc-opc.c sparc-dis.c \
        sparc-opc.c disass.c
 
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 04abed3..ba52a1a 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -1,8 +1,7 @@
 AM_CPPFLAGS = -I$(top_builddir) -I$(top_srcdir)
 
-EXTRA_PROGRAMS = testfp funcfp rpnfp
-noinst_PROGRAMS = fibit incr printf printf2 rpn fib fibdelay add
-noinst_DATA = fibit.ok incr.ok printf.ok printf2.ok rpn.ok fib.ok fibdelay.ok 
testfp.ok funcfp.ok rpnfp.ok add.ok
+noinst_PROGRAMS = fibit incr printf printf2 rpn fib fibdelay add bp testfp 
funcfp rpnfp
+noinst_DATA = fibit.ok incr.ok printf.ok printf2.ok rpn.ok fib.ok fibdelay.ok 
testfp.ok funcfp.ok rpnfp.ok add.ok bp.ok
 EXTRA_DIST = $(noinst_DATA) run-test
 
 if DISASS
@@ -10,7 +9,6 @@ LDADD = $(top_builddir)/opcode/libdisass.a
 endif
 
 if REGRESSION_TESTING
-TESTS = fib fibit fibdelay incr printf printf2 rpn add \
-       #testfp funcfp rpnfp
+TESTS = fib fibit fibdelay incr printf printf2 rpn add bp testfp funcfp rpnfp
 TESTS_ENVIRONMENT=$(srcdir)/run-test
 endif
diff --git a/tests/fib.c b/tests/bp.c
similarity index 76%
copy from tests/fib.c
copy to tests/bp.c
index 647ec8d..47e49c2 100644
--- a/tests/fib.c
+++ b/tests/bp.c
@@ -1,14 +1,14 @@
 /******************************** -*- C -*- ****************************
  *
- *     Sample example of recursion and forward references
+ *     Simple example of recursion and forward references
  *
  ***********************************************************************/
 
 
 /***********************************************************************
  *
- * Copyright 2000 Free Software Foundation, Inc.
- * Written by Paolo Bonzini.
+ * Copyright 2000, 2004 Free Software Foundation, Inc.
+ * Written by Paolo Bonzini and Laurent Michel.
  *
  * This file is part of GNU lightning.
  *
@@ -41,10 +41,20 @@ int main()
   pifi      nfibs = (pifi) (jit_set_ip(codeBuffer).iptr);
   int      in;                         /* offset of the argument */
   jit_insn  *ref;                      /* to patch the forward reference */
+  jit_insn  *mref;                     /* ref of move to backpatch */
+  jit_insn  *tp;                       /* location to patch */
 
         jit_prolog   (1);
   in =  jit_arg_ui   ();
         jit_getarg_ui(JIT_V0, in);              /* V0 = n */
+  mref= jit_movi_p(JIT_V2,jit_forward ());      /* Generate a dumb movi */
+        jit_jmpr(JIT_V2);
+        /* generate some dump filler that will never be executed!*/
+        jit_addi_ui(JIT_V0,JIT_V0,1);
+        jit_addi_ui(JIT_V0,JIT_V0,1);        
+        jit_addi_ui(JIT_V0,JIT_V0,1);        
+        jit_addi_ui(JIT_V0,JIT_V0,1);        
+  tp  = jit_get_label ();
   ref = jit_blti_ui  (jit_forward(), JIT_V0, 2);
         jit_subi_ui  (JIT_V1, JIT_V0, 1);       /* V1 = n-1 */
         jit_subi_ui  (JIT_V2, JIT_V0, 2);       /* V2 = n-2 */
@@ -64,11 +74,13 @@ int main()
         jit_movi_i(JIT_RET, 1);                 /* RET = 1 */
         jit_ret();
 
+  jit_patch_movi(mref,tp);                      /* Ok. Do the back-patching */
+
   /* call the generated code, passing 32 as an argument */
   jit_flush_code(codeBuffer, jit_get_ip().ptr);
 
 #ifdef LIGHTNING_DISASSEMBLE
-  disassemble(stderr, codeBuffer, jit_get_ip().ptr);
+  disassemble(stderr, (char *)codeBuffer, jit_get_ip().ptr);
 #endif
 #ifndef LIGHTNING_CROSS
   printf("nfibs(%d) = %d\n", 32, nfibs(32));
diff --git a/tests/bp.ok b/tests/bp.ok
new file mode 100644
index 0000000..ce73f6e
--- /dev/null
+++ b/tests/bp.ok
@@ -0,0 +1 @@
+nfibs(32) = 7049155
diff --git a/tests/fib.c b/tests/fib.c
index 647ec8d..88c50da 100644
--- a/tests/fib.c
+++ b/tests/fib.c
@@ -48,14 +48,14 @@ int main()
   ref = jit_blti_ui  (jit_forward(), JIT_V0, 2);
         jit_subi_ui  (JIT_V1, JIT_V0, 1);       /* V1 = n-1 */
         jit_subi_ui  (JIT_V2, JIT_V0, 2);       /* V2 = n-2 */
-        jit_prepare  (1);
+        jit_prepare_i(1);
           jit_pusharg_ui(JIT_V1);
         jit_finish(nfibs);
-        jit_retval(JIT_V1);                     /* V1 = nfibs(n-1) */
-        jit_prepare(1);
+        jit_retval_i (JIT_V1);                   /* V1 = nfibs(n-1) */
+        jit_prepare_i(1);
           jit_pusharg_ui(JIT_V2);
         jit_finish(nfibs);
-        jit_retval(JIT_V2);                     /* V2 = nfibs(n-2) */
+        jit_retval_i (JIT_V2);                   /* V2 = nfibs(n-2) */
         jit_addi_ui(JIT_V1,  JIT_V1,  1);
         jit_addr_ui(JIT_RET, JIT_V1, JIT_V2);   /* RET = V1 + V2 + 1 */
         jit_ret();
diff --git a/tests/fibit.c b/tests/fibit.c
index 4281b19..4758de0 100644
--- a/tests/fibit.c
+++ b/tests/fibit.c
@@ -65,7 +65,7 @@ int main()
   jit_flush_code(codeBuffer, jit_get_ip().ptr);
 
 #ifdef LIGHTNING_DISASSEMBLE
-  disassemble(stderr, codeBuffer, jit_get_ip().ptr);
+  disassemble(stderr, (char *) codeBuffer, jit_get_ip().ptr);
 #endif
 #ifndef LIGHTNING_CROSS
   /* call the generated code, passing 36 as an argument */
diff --git a/tests/funcfp.c b/tests/funcfp.c
index a95f3f5..01e15b5 100644
--- a/tests/funcfp.c
+++ b/tests/funcfp.c
@@ -7,7 +7,7 @@
 
 /***********************************************************************
  *
- * Copyright 2000 Free Software Foundation, Inc.
+ * Copyright 2000, 2004 Free Software Foundation, Inc.
  * Written by Paolo Bonzini.
  *
  * This file is part of GNU lightning.
@@ -34,140 +34,140 @@
 #include "lightning.h"
 
 static jit_insn codeBuffer[300];
-static struct jit_fp buffer[300];
 
+typedef int (*intFunc) (int, int);
+typedef double (*dblFunc) (double, double);
+typedef float (*floatFunc) (float, float);
 
-typedef int (*intFunc)(int,int);
-typedef double (*dblFunc)(double,double);
-typedef float (*floatFunc)(float,float);
 
-
-dblFunc makeDblFunc()
-     /* Generate a function that computes and returns the sum of 
-        its two double arguments (return an int)
-        i.e., double foo(double x,double y) { return x + y;}
-     */
+/* Generate a function that computes and returns the sum of 
+   its two double arguments (return an int)
+   i.e., double foo(double x,double y) { return x + y;} */
+dblFunc
+makeDblFunc ()
 {
-   dblFunc retVal;
-   int dbl1,dbl2;
-   jit_set_ip(codeBuffer);
-   retVal = (dblFunc)jit_get_ip().iptr; 
-   jit_prolog(2);
-   jitfp_begin(buffer);   
-   dbl1 = jit_arg_d();
-   dbl2 = jit_arg_d();
-   
-   
-   jitfp_retval(jitfp_add(jitfp_getarg_d(dbl1),
-                          jitfp_getarg_d(dbl2)));
-   
-   jit_ret();
-   jit_flush_code((char*)retVal,jit_get_ip().ptr);  
-   
+  dblFunc retVal;
+  int dbl1, dbl2;
+  retVal = (dblFunc) jit_get_ip ().iptr;
+  jit_prolog (2);
+  dbl1 = jit_arg_d ();
+  dbl2 = jit_arg_d ();
+  jit_getarg_d (JIT_FPR0, dbl1);
+  jit_getarg_d (JIT_FPR1, dbl2);
+  jit_addr_d (JIT_FPR0, JIT_FPR0, JIT_FPR1);
+  jit_retval_d (JIT_FPR0);
+  jit_ret ();
+  jit_flush_code ((char *) retVal, jit_get_ip ().ptr);
+
 #ifdef LIGHTNING_DISASSEMBLE
-   disassemble(stderr, retVal, jit_get_ip().ptr);
+  disassemble (stderr, (char *) retVal, jit_get_ip ().ptr);
 #endif
 
-   return retVal;
+  return retVal;
 }
 
 
-floatFunc makeFloatFunc()
-     /* Generate a function that computes and returns the sum of 
-        its two double arguments (return an int)
-        i.e., double foo(double x,double y) { return x + y;}
-     */
+/* Generate a function that computes and returns the sum of 
+   its two double arguments (return an int)
+   i.e., double foo(double x,double y) { return x + y;} */
+floatFunc
+makeFloatFunc ()
 {
-   floatFunc retVal;
-   int dbl1,dbl2;
-   //jit_set_ip(codeBuffer);
-   retVal = (floatFunc)jit_get_ip().iptr; 
-   jit_prolog(2);
-   jitfp_begin(buffer);   
-   dbl1 = jit_arg_f();
-   dbl2 = jit_arg_f();
-   
-   
-   jitfp_retval(jitfp_add(jitfp_getarg_f(dbl1),
-                          jitfp_getarg_f(dbl2)));
-   
-   jit_ret();
-   jit_flush_code((char*)retVal,jit_get_ip().ptr);  
-   
+  floatFunc retVal;
+  int dbl1, dbl2;
+  retVal = (floatFunc) jit_get_ip ().iptr;
+  jit_prolog (2);
+  dbl1 = jit_arg_f ();
+  dbl2 = jit_arg_f ();
+  jit_getarg_f (JIT_FPR0, dbl1);
+  jit_getarg_f (JIT_FPR1, dbl2);
+  jit_addr_f (JIT_FPR0, JIT_FPR0, JIT_FPR1);
+  jit_retval_f (JIT_FPR0);
+  jit_ret ();
+  jit_flush_code ((char *) retVal, jit_get_ip ().ptr);
+
 #ifdef LIGHTNING_DISASSEMBLE
-   disassemble(stderr, retVal, jit_get_ip().ptr);
+  disassemble (stderr, (char *) retVal, jit_get_ip ().ptr);
 #endif
 
-   return retVal;
+  return retVal;
 }
 
-dblFunc makeCallFunc(dblFunc theFunc) 
+dblFunc
+makeCallFunc (dblFunc theFunc)
 {
-   dblFunc retVal;
-   int dbl1,dbl2;
-   //jit_set_ip(codeBuffer);
-   retVal = (dblFunc)jit_get_ip().iptr; 
-   jit_prolog(2);
-   jitfp_begin(buffer);   
-   dbl1 = jit_arg_d();
-   dbl2 = jit_arg_d();
-
-   jitfp_prepare(0,0,2);
-   jitfp_pusharg_d(jitfp_mul(jitfp_getarg_d(dbl1),
-                             jitfp_getarg_d(dbl2)));
-   jitfp_pusharg_d(jitfp_getarg_d(dbl1));
-   jit_finish((void*)theFunc);
-   jit_ret();
-   jit_flush_code((char*)retVal,jit_get_ip().ptr);  
-   
+  dblFunc retVal;
+  int dbl1, dbl2;
+  retVal = (dblFunc) jit_get_ip ().iptr;
+  jit_prolog (2);
+  dbl1 = jit_arg_d ();
+  dbl2 = jit_arg_d ();
+
+  jit_prepare_d (2);
+  jit_getarg_d (JIT_FPR0, dbl1);
+  jit_getarg_d (JIT_FPR1, dbl2);
+  jit_mulr_d (JIT_FPR1, JIT_FPR1, JIT_FPR0);
+  jit_pusharg_d (JIT_FPR1);
+  jit_pusharg_d (JIT_FPR0);
+  jit_finish ((void *) theFunc);
+  jit_ret ();
+  jit_flush_code ((char *) retVal, jit_get_ip ().ptr);
+
 #ifdef LIGHTNING_DISASSEMBLE
-   disassemble(stderr, retVal, jit_get_ip().ptr);
+  disassemble (stderr, (char *) retVal, jit_get_ip ().ptr);
 #endif
 
-   return retVal;
+  return retVal;
 }
 
-floatFunc makeCallFloatFunc(floatFunc theFunc) 
+floatFunc
+makeCallFloatFunc (floatFunc theFunc)
 {
-   floatFunc retVal;
-   int dbl1,dbl2;
-   //jit_set_ip(codeBuffer);
-   retVal = (floatFunc)jit_get_ip().iptr; 
-   jit_prolog(2);
-   jitfp_begin(buffer);   
-   dbl1 = jit_arg_f();
-   dbl2 = jit_arg_f();
-
-   jitfp_prepare(0,2,0);
-   jitfp_pusharg_f(jitfp_mul(jitfp_getarg_f(dbl1),
-                             jitfp_getarg_f(dbl2)));
-   jitfp_pusharg_f(jitfp_getarg_f(dbl1));
-   jit_finish((void*)theFunc);
-   jit_ret();
-   jit_flush_code((char*)retVal,jit_get_ip().ptr);  
-   
+  floatFunc retVal;
+  int dbl1, dbl2;
+  retVal = (floatFunc) jit_get_ip ().iptr;
+  jit_prolog (2);
+  dbl1 = jit_arg_f ();
+  dbl2 = jit_arg_f ();
+
+  jit_prepare_f (2);
+  jit_getarg_f (JIT_FPR0, dbl1);
+  jit_getarg_f (JIT_FPR1, dbl2);
+  jit_mulr_f (JIT_FPR1, JIT_FPR1, JIT_FPR0);
+  jit_pusharg_f (JIT_FPR1);
+  jit_pusharg_f (JIT_FPR0);
+  jit_finish ((void *) theFunc);
+  jit_ret ();
+  jit_flush_code ((char *) retVal, jit_get_ip ().ptr);
+
 #ifdef LIGHTNING_DISASSEMBLE
-   disassemble(stderr, retVal, jit_get_ip().ptr);
+  disassemble (stderr, (char *) retVal, jit_get_ip ().ptr);
 #endif
 
-   return retVal;
+  return retVal;
 }
 
 
-int main(int argc,char* argv[])
+int
+main (int argc, char *argv[])
 {
-   dblFunc myFunc2 = makeDblFunc();
-   floatFunc myFunc3 = makeFloatFunc();
-   dblFunc callIt1  = makeCallFunc(myFunc2);
-   floatFunc callIt2  = makeCallFloatFunc(myFunc3);
-
+  dblFunc myFunc2, callIt1;
+  floatFunc myFunc3, callIt2;
+  double y;
+  float a, b, z;
+
+  jit_set_ip (codeBuffer);
+  myFunc2 = makeDblFunc ();
+  myFunc3 = makeFloatFunc ();
+  callIt1 = makeCallFunc (myFunc2);
+  callIt2 = makeCallFloatFunc (myFunc3);
 #ifndef LIGHTNING_CROSS
-   double y = callIt1(10.5,15.3);
-   float a = 1.5;
-   float b = 10.5;
-   float z = callIt2(a,b);
-   printf("result is %f\t %f\n",y,z);
+  y = callIt1 (10.5, 15.3);
+  a = 1.5;
+  b = 10.5;
+  z = callIt2 (a, b);
+  printf ("result is %.5g\t %.5g\n", y, z);
 #endif
 
-   return 0;
+  return 0;
 }
diff --git a/tests/funcfp.ok b/tests/funcfp.ok
index 5077368..6282c30 100644
--- a/tests/funcfp.ok
+++ b/tests/funcfp.ok
@@ -1 +1 @@
-result is 171.150000    17.250000
+result is 171.15        17.25
diff --git a/tests/printf.c b/tests/printf.c
index ec27a2f..e4291e7 100644
--- a/tests/printf.c
+++ b/tests/printf.c
@@ -48,7 +48,7 @@ int main()
   ofs = jit_arg_i();
   jit_movi_p(JIT_R0, "looks like %d bytes sufficed\n");
   jit_getarg_i(JIT_R1, ofs);
-  jit_prepare(2);
+  jit_prepare_i(2);
     jit_pusharg_i(JIT_R1);             /* push in reverse order */
     jit_pusharg_p(JIT_R0);
   jit_finish(printf);
diff --git a/tests/rpnfp.c b/tests/rpnfp.c
index 85b10af..d756fbe 100644
--- a/tests/rpnfp.c
+++ b/tests/rpnfp.c
@@ -38,19 +38,18 @@ static jit_insn codeBuffer[1024];
 
 typedef double (*pdfd) (double);       /* Pointer to Double Function of Double 
*/
 
+static int regs[6] = { JIT_FPR0, JIT_FPR1, JIT_FPR2, JIT_FPR3, JIT_FPR4, 
JIT_FPR5 };
 
 pdfd
 compile_rpn (char *expr)
 {
   pdfd fn;
   int ofs, sp = 1;
-  struct jit_fp buffer[300], *stack[10];
 
-  jitfp_begin (buffer);
   fn = (pdfd) (jit_get_ip ().dptr);
   jit_leaf (1);
   ofs = jit_arg_d ();
-  stack[0] = jitfp_getarg_d (ofs);
+  jit_getarg_d (regs[0], ofs);
 
   while (*expr)
     {
@@ -62,26 +61,27 @@ compile_rpn (char *expr)
        {
          double d = strtod (buf, NULL);
          expr += n - 1;
-         stack[sp++] = jitfp_imm (d);
+         jit_movi_d (regs[sp], d);
+         sp++;
        }
       else if (*expr == '+')
        {
-         stack[sp - 2] = jitfp_add (stack[sp - 2], stack[sp - 1]);
+         jit_addr_d (regs[sp - 2], regs[sp - 2], regs[sp - 1]);
          sp--;
        }
       else if (*expr == '-')
        {
-         stack[sp - 2] = jitfp_sub (stack[sp - 2], stack[sp - 1]);
+         jit_subr_d (regs[sp - 2], regs[sp - 2], regs[sp - 1]);
          sp--;
        }
       else if (*expr == '*')
        {
-         stack[sp - 2] = jitfp_mul (stack[sp - 2], stack[sp - 1]);
+         jit_mulr_d (regs[sp - 2], regs[sp - 2], regs[sp - 1]);
          sp--;
        }
       else if (*expr == '/')
        {
-         stack[sp - 2] = jitfp_div (stack[sp - 2], stack[sp - 1]);
+         jit_divr_d (regs[sp - 2], regs[sp - 2], regs[sp - 1]);
          sp--;
        }
       else
@@ -91,7 +91,7 @@ compile_rpn (char *expr)
        }
       ++expr;
     }
-  jitfp_retval (stack[0]);
+  jit_retval_d (regs[0]);
   jit_ret ();
 
   jit_flush_code ((char *) fn, jit_get_ip ().ptr);
diff --git a/tests/testfp.c b/tests/testfp.c
index 83d3edf..8012b65 100644
--- a/tests/testfp.c
+++ b/tests/testfp.c
@@ -7,7 +7,7 @@
 
 /***********************************************************************
  *
- * Copyright 2000, 2002 Free Software Foundation, Inc.
+ * Copyright 2000, 2002, 2004 Free Software Foundation, Inc.
  * Written by Paolo Bonzini.
  *
  * This file is part of GNU lightning.
@@ -34,104 +34,115 @@
 #include "lightning.h"
 
 static jit_insn codeBuffer[300];
-static struct jit_fp buffer[300];
 static double a;
 
 void
-int_test(what, code)
-     char     *what;
-     jit_code code;
+int_test(char *what, jit_code code, double b, double c, double d, double e, 
double f)
 {
-  a = -2.6; printf("%s\t\t%d ", what, code.iptr());
-  a = -2.4; printf("%d ", code.iptr());
-  a = 0.0; printf("%d ", code.iptr());
-  a = 2.4; printf("%d ", code.iptr());
-  a = 2.6; printf("%d\n", code.iptr());
+  a = b; printf("%s\t\t%d ", what, code.iptr());
+  a = c; printf("%d ", code.iptr());
+  a = d; printf("%d ", code.iptr());
+  a = e; printf("%d ", code.iptr());
+  a = f; printf("%d\n", code.iptr());
 }
 
 int
 main()
 {
   jit_code code;
+  volatile double x = 0.0;
   code.ptr = (char *) codeBuffer;
 
   jit_set_ip(codeBuffer);
   jit_leaf(0);
-  jitfp_begin(buffer);
-  jitfp_cmp(JIT_R1, JIT_R0,
-    jitfp_ldi_d(&a)
-  );
+  jit_ldi_d(JIT_FPR0, &a);
+  jit_movi_d(JIT_FPR1, 0.0);
+  jit_gtr_d(JIT_R0, JIT_FPR0, JIT_FPR1);
+  jit_ltr_d(JIT_R1, JIT_FPR0, JIT_FPR1);
   jit_subr_i(JIT_RET, JIT_R0, JIT_R1); /* [greater] - [less] = -1/0/1 */
   jit_ret();
 
   jit_flush_code(codeBuffer, jit_get_ip().ptr);
 #ifdef LIGHTNING_DISASSEMBLE
-  disassemble(stderr, codeBuffer, jit_get_ip().ptr);
+  disassemble(stderr, (char *)codeBuffer, jit_get_ip().ptr);
 #endif
 #ifndef LIGHTNING_CROSS
-  int_test("compare", code);
+  int_test("compare", code, -2.6, -2.4, 0, 2.4, 2.6);
 #endif
 
+#ifdef __GNUC__
   jit_set_ip(codeBuffer);
   jit_leaf(0);
-  jitfp_begin(buffer);
-  jitfp_trunc(JIT_RET,
-    jitfp_ldi_d(&a)
-  );
+  jit_ldi_d(JIT_FPR0, &a);
+  jit_movi_d(JIT_FPR1, 0.0);
+  jit_eqr_d(JIT_R0, JIT_FPR0, JIT_FPR1);
+  jit_ltgtr_d(JIT_R1, JIT_FPR0, JIT_FPR1);
+  jit_lshi_i(JIT_R1, JIT_R1, 1);
+  jit_orr_i(JIT_RET, JIT_R0, JIT_R1);
   jit_ret();
+
+  jit_flush_code(codeBuffer, jit_get_ip().ptr);
 #ifdef LIGHTNING_DISASSEMBLE
-  disassemble(stderr, codeBuffer, jit_get_ip().ptr);
+  disassemble(stderr, (char *)codeBuffer, jit_get_ip().ptr);
 #endif
 #ifndef LIGHTNING_CROSS
-  int_test("trunc", code);
+  int_test("nans", code, x / x, 1 / (a - a), -1 / (a - a), 0.0, -2.0);
+#endif
+#else
+  printf ("nans\t\t1 3 3 0 3\n");
 #endif
 
   jit_set_ip(codeBuffer);
   jit_leaf(0);
-  jitfp_begin(buffer);
-  jitfp_ceil(JIT_RET,
-    jitfp_ldi_d(&a)
-  );
+  jit_ldi_d(JIT_FPR0, &a);
+  jit_truncr_d_i(JIT_RET, JIT_FPR0);
   jit_ret();
 #ifdef LIGHTNING_DISASSEMBLE
-  disassemble(stderr, codeBuffer, jit_get_ip().ptr);
+  disassemble(stderr, (char *)codeBuffer, jit_get_ip().ptr);
 #endif
 #ifndef LIGHTNING_CROSS
-  int_test("ceil", code);
+  int_test("trunc", code, -2.6, -2.4, 0, 2.4, 2.6);
 #endif
 
   jit_set_ip(codeBuffer);
   jit_leaf(0);
-  jitfp_begin(buffer);
-  jitfp_floor(JIT_RET,
-    jitfp_ldi_d(&a)
-  );
+  jit_ldi_d(JIT_FPR0, &a);
+  jit_ceilr_d_i(JIT_RET, JIT_FPR0);
   jit_ret();
 #ifdef LIGHTNING_DISASSEMBLE
-  disassemble(stderr, codeBuffer, jit_get_ip().ptr);
+  disassemble(stderr, (char *)codeBuffer, jit_get_ip().ptr);
 #endif
 #ifndef LIGHTNING_CROSS
-  int_test("floor", code);
+  int_test("ceil", code, -2.6, -2.4, 0, 2.4, 2.6);
 #endif
 
   jit_set_ip(codeBuffer);
   jit_leaf(0);
-  jitfp_begin(buffer);
-  jitfp_round(JIT_RET,
-    jitfp_ldi_d(&a)
-  );
+  jit_ldi_d(JIT_FPR0, &a);
+  jit_floorr_d_i(JIT_RET, JIT_FPR0);
+  jit_ret();
+#ifdef LIGHTNING_DISASSEMBLE
+  disassemble(stderr, (char *)codeBuffer, jit_get_ip().ptr);
+#endif
+#ifndef LIGHTNING_CROSS
+  int_test("floor", code, -2.6, -2.4, 0, 2.4, 2.6);
+#endif
+
+  jit_set_ip(codeBuffer);
+  jit_leaf(0);
+  jit_ldi_d(JIT_FPR0, &a);
+  jit_roundr_d_i(JIT_RET, JIT_FPR0);
   jit_ret();
 #ifdef LIGHTNING_DISASSEMBLE
-  disassemble(stderr, codeBuffer, jit_get_ip().ptr);
+  disassemble(stderr, (char *)codeBuffer, jit_get_ip().ptr);
 #endif
 #ifndef LIGHTNING_CROSS
-  int_test("round", code);
+  int_test("round", code, -2.6, -2.4, 0, 2.4, 2.6);
 #endif
 
 #if 0 && defined JIT_TRANSCENDENTAL
   jit_set_ip(codeBuffer);
   jit_leaf(0);
-  jitfp_begin(buffer);
   jitfp_sti_d(&a,
     jitfp_log(
       jitfp_exp(jitfp_imm(1.0))
@@ -140,7 +151,7 @@ main()
   jit_ret();
   code.vptr();
 #ifdef LIGHTNING_DISASSEMBLE
-  disassemble(stderr, codeBuffer, jit_get_ip().ptr);
+  disassemble(stderr, (char *)codeBuffer, jit_get_ip().ptr);
 #endif
 #ifndef LIGHTNING_CROSS
   printf("log e = \t%f\n", a);
@@ -148,7 +159,6 @@ main()
 
   jit_set_ip(codeBuffer);
   jit_leaf(0);
-  jitfp_begin(buffer);
   jitfp_sti_d(&a,
     jitfp_atn(
       jitfp_imm(1.732050807657)
@@ -157,7 +167,7 @@ main()
   jit_ret();
   code.vptr();
 #ifdef LIGHTNING_DISASSEMBLE
-  disassemble(stderr, codeBuffer, jit_get_ip().ptr);
+  disassemble(stderr, (char *)codeBuffer, jit_get_ip().ptr);
 #endif
 #ifndef LIGHTNING_CROSS
   printf("pi =         \t%f\n", a*3);
@@ -165,7 +175,6 @@ main()
 
   jit_set_ip(codeBuffer);
   jit_leaf(0);
-  jitfp_begin(buffer);
   jitfp_sti_d(&a,
     jitfp_tan(
       jitfp_ldi_d(&a)
@@ -174,7 +183,7 @@ main()
   jit_ret();
   code.vptr();
 #ifdef LIGHTNING_DISASSEMBLE
-  disassemble(stderr, codeBuffer, jit_get_ip().ptr);
+  disassemble(stderr, (char *)codeBuffer, jit_get_ip().ptr);
 #endif
 #ifndef LIGHTNING_CROSS
   printf("tan^2 pi/3 = \t%f\n", a*a);
diff --git a/tests/testfp.ok b/tests/testfp.ok
index 8822dee..e23a31d 100644
--- a/tests/testfp.ok
+++ b/tests/testfp.ok
@@ -1,4 +1,5 @@
-compare                1 1 0 1 1
+compare                -1 -1 0 1 1
+nans           0 2 2 1 2
 trunc          -2 -2 0 2 2
 ceil           -2 -2 0 3 3
 floor          -3 -3 0 2 2



reply via email to

[Prev in Thread] Current Thread [Next in Thread]