guile-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Guile-commits] 204/437: Add basic Itanium port infrastructure.


From: Andy Wingo
Subject: [Guile-commits] 204/437: Add basic Itanium port infrastructure.
Date: Mon, 2 Jul 2018 05:14:21 -0400 (EDT)

wingo pushed a commit to branch lightning
in repository guile.

commit c2e4eb621d7d2e512b34fa5257d9fe4690e330bc
Author: pcpa <address@hidden>
Date:   Thu Apr 25 21:56:32 2013 -0300

    Add basic Itanium port infrastructure.
    
        * include/lightning/jit_ia64.h, lib/jit_ia64-cpu.c,
        lib/jit_ia64-fpu.c, lib/jit_ia64.c: New files implementing
        the basic infrastructure of an Itanium port. The code
        compiles and can generate jit for basic hello world like
        functions.
    
        * check/lightning.c, configure.ac, include/lightning.h,
        include/lightning/Makefile.am, include/lightning/jit_private.h,
        lib/Makefile.am, lib/lightning.c: Update for the Itanium
        port.
    
        * lib/jit_mips-cpu.c, lib/jit_mips.c: Correct typo and
        make the jit_carry register local to the jit_state_t.
        This matches code reviewed in the Itanium port, that
        should use the same base logic to handle carry/borrow.
---
 ChangeLog                       |   18 +
 check/lightning.c               |    7 +
 configure.ac                    |    2 +
 include/lightning.h             |    2 +
 include/lightning/Makefile.am   |    4 +
 include/lightning/jit_ia64.h    |  125 +
 include/lightning/jit_private.h |   98 +-
 lib/Makefile.am                 |    3 +
 lib/jit_ia64-cpu.c              | 5030 +++++++++++++++++++++++++++++++++++++++
 lib/jit_ia64-fpu.c              | 1530 ++++++++++++
 lib/{jit_mips.c => jit_ia64.c}  |  828 ++++---
 lib/jit_mips-cpu.c              |    2 +-
 lib/jit_mips.c                  |   14 +-
 lib/lightning.c                 |  182 +-
 14 files changed, 7394 insertions(+), 451 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index f4b40b9..ecd3846 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,21 @@
+2013-04-25 Paulo Andrade <address@hidden>
+
+       * include/lightning/jit_ia64.h, lib/jit_ia64-cpu.c,
+       lib/jit_ia64-fpu.c, lib/jit_ia64.c: New files implementing
+       the basic infrastructure of an Itanium port. The code
+       compiles and can generate jit for basic hello world like
+       functions.
+
+       * check/lightning.c, configure.ac, include/lightning.h,
+       include/lightning/Makefile.am, include/lightning/jit_private.h,
+       lib/Makefile.am, lib/lightning.c: Update for the Itanium
+       port.
+
+       * lib/jit_mips-cpu.c, lib/jit_mips.c: Correct typo and
+       make the jit_carry register local to the jit_state_t.
+       This matches code reviewed in the Itanium port, that
+       should use the same base logic to handle carry/borrow.
+
 2013-04-10 Paulo Andrade <address@hidden>
 
        * include/lightning/jit_private.h, lib/jit_arm.c,
diff --git a/check/lightning.c b/check/lightning.c
index 439aede..0c7e914 100644
--- a/check/lightning.c
+++ b/check/lightning.c
@@ -522,6 +522,9 @@ static void rehash(hash_t *hash);
 /*
  * Initialization
  */
+#if __ia64__
+extern int missing_count;
+#endif
 static jit_state_t      *_jit;
 static int               flag_verbose;
 static int               flag_disasm;
@@ -3566,6 +3569,10 @@ execute(int argc, char *argv[])
     }
 
     function = jit_emit();
+#if __ia64__
+    if (missing_count)
+       flag_disasm = 1;
+#endif
     if (flag_verbose > 1 || flag_disasm) {
        jit_print();
        fprintf(stdout, "  - - - - - - - - - - - - - - - - - - - - - - - - - - 
- - - - - - - - - - - -\n");
diff --git a/configure.ac b/configure.ac
index d4badf5..f32e532 100644
--- a/configure.ac
+++ b/configure.ac
@@ -73,6 +73,7 @@ case "$target_cpu" in
     *mips*)            cpu=mips        ;;
     *powerpc*)         cpu=ppc         ;;
     *sparc*)           cpu=sparc       ;;
+    ia64)              cpu=ia64        ;;
     *)                                 ;;
 esac
 AM_CONDITIONAL(cpu_arm,    [test cpu-$cpu = cpu-arm])
@@ -80,6 +81,7 @@ AM_CONDITIONAL(cpu_mips,   [test cpu-$cpu = cpu-mips])
 AM_CONDITIONAL(cpu_ppc,    [test cpu-$cpu = cpu-ppc])
 AM_CONDITIONAL(cpu_sparc,  [test cpu-$cpu = cpu-sparc])
 AM_CONDITIONAL(cpu_x86,    [test cpu-$cpu = cpu-x86])
+AM_CONDITIONAL(cpu_ia64,   [test cpu-$cpu = cpu-ia64])
 
 # Test x87 if both, x87 and sse2 available
 ac_cv_test_x86_x87=
diff --git a/include/lightning.h b/include/lightning.h
index a5c7cc8..88029b8 100644
--- a/include/lightning.h
+++ b/include/lightning.h
@@ -74,6 +74,8 @@ typedef jit_int32_t           jit_fpr_t;
 #  include <lightning/jit_ppc.h>
 #elif defined(__sparc__)
 #  include <lightning/jit_sparc.h>
+#elif defined(__ia64__)
+#  include <lightning/jit_ia64.h>
 #endif
 
 #define jit_flag_node          0x00000001 /* patch node not absolute */
diff --git a/include/lightning/Makefile.am b/include/lightning/Makefile.am
index 9e1425a..23e232b 100644
--- a/include/lightning/Makefile.am
+++ b/include/lightning/Makefile.am
@@ -37,3 +37,7 @@ if cpu_x86
 lightning_include_HEADERS =    \
        jit_x86.h
 endif
+if cpu_ia64
+lightning_include_HEADERS =    \
+       jit_ia64.h
+endif
diff --git a/include/lightning/jit_ia64.h b/include/lightning/jit_ia64.h
new file mode 100644
index 0000000..06c71a4
--- /dev/null
+++ b/include/lightning/jit_ia64.h
@@ -0,0 +1,125 @@
+/*
+ * Copyright (C) 2013  Free Software Foundation, Inc.
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#ifndef _jit_ia64_h
+#define _jit_ia64_h
+
+#define JIT_HASH_CONSTS                1
+#define JIT_NUM_OPERANDS       3
+
+/*
+ * Types
+ */
+#define JIT_FP                 _R4     /* Not abi specific */
+typedef enum {
+#define JIT_R0         _R14
+#define JIT_R1         _R15
+#define JIT_R2         _R16
+#define JIT_V0         _R40
+#define JIT_V1         _R41
+#define JIT_V2         _R42
+#define jit_r_num()    16
+#define jit_r(n)       (_R14 - (n))
+#define jit_v_num()    8
+#define jit_v(n)       (_R40 + (n))
+    _R0,       /* constant - Always 0 */
+    _R1,       /* special - Global Data pointer (gp) */
+    /* r2-r3    - scratch - Use with 22-bit immediate add - scratch */
+    _R2,       _R3,
+    /* r4-r7 - preserved */
+    _R4,       _R5,    _R6,    _R7,
+    _R8,       /* scratch - Return value; structure/union return pointer */
+    /* r9-r11   - scratch - Return values */
+    _R9,       _R10,   _R11,
+    _R12,      /* special - Memory stack pointer (sp) */
+    _R13,      /* special - Reserved as a thread pointer (tp)*/
+    /* r14-r31  - scratch */
+    _R31,      _R30,
+    _R29,      _R28,   _R27,   _R26,   _R25,   _R24,   _R23,   _R22,
+    _R21,      _R20,   _R19,   _R18,   _R17,   _R16,   _R15,   _R14,
+    /* r32-r39  - aka in0-in7 - Incoming register arguments */
+    _R32,      _R33,   _R34,   _R35,   _R36,   _R37,   _R38,   _R39,
+    /* r40-r127         - loc0...locn,out0...outn */
+    _R40,      _R41,   _R42,   _R43,   _R44,   _R45,   _R46,   _R47,
+    _R48,      _R49,   _R50,   _R51,   _R52,   _R53,   _R54,   _R55,
+    _R56,      _R57,   _R58,   _R59,   _R60,   _R61,   _R62,   _R63,
+    _R64,      _R65,   _R66,   _R67,   _R68,   _R69,   _R70,   _R71,
+    _R72,      _R73,   _R74,   _R75,   _R76,   _R77,   _R78,   _R79,
+    _R80,      _R81,   _R82,   _R83,   _R84,   _R85,   _R86,   _R87,
+    _R88,      _R89,   _R90,   _R91,   _R92,   _R93,   _R94,   _R95,
+    _R96,      _R97,   _R98,   _R99,   _R100,  _R101,  _R102,  _R103,
+    _R104,     _R105,  _R106,  _R107,  _R108,  _R109,  _R110,  _R111,
+    _R112,     _R113,  _R114,  _R115,  _R116,  _R117,  _R118,  _R119,
+    _R120,     _R121,  _R122,  _R123,  _R124,  _R125,  _R126,  _R127,
+#define JIT_F0         _F32
+#define JIT_F1         _F33
+#define JIT_F2         _F34
+#define JIT_F3         _F35
+#define JIT_F4         _F36
+#define JIT_F5         _F37
+#define JIT_F6         _F38
+#define JIT_F7         _F39
+#define jit_f_num()    8
+#define jit_f(n)       (_F32 + (n))
+    _F0,       /* constant - Always 0.0 */
+    _F1,       /* constant - Always 1.0 */
+    /* f2-f5    - preserved */
+    _F2,       _F3,    _F4,    _F5,
+    /* f6-f7    - scratch */
+    _F6,       _F7,
+    /* f8-f15   - scratch - Argument/return registers */
+    _F8,       _F9,    _F10,   _F11,   _F12,   _F13,   _F14,   _F15,
+    /* f16-f31  - preserved */
+    _F16,      _F17,   _F18,   _F19,   _F20,   _F21,   _F22,   _F23,
+    _F24,      _F25,   _F26,   _F27,   _F28,   _F29,   _F30,   _F31,
+    /* f32-f127         - scratch - Rotating registers or scratch */
+    _F32,      _F33,   _F34,   _F35,   _F36,   _F37,   _F38,   _F39,
+    _F40,      _F41,   _F42,   _F43,   _F44,   _F45,   _F46,   _F47,
+    _F48,      _F49,   _F50,   _F51,   _F52,   _F53,   _F54,   _F55,
+    _F56,      _F57,   _F58,   _F59,   _F60,   _F61,   _F62,   _F63,
+    _F64,      _F65,   _F66,   _F67,   _F68,   _F69,   _F70,   _F71,
+    _F72,      _F73,   _F74,   _F75,   _F76,   _F77,   _F78,   _F79,
+    _F80,      _F81,   _F82,   _F83,   _F84,   _F85,   _F86,   _F87,
+    _F88,      _F89,   _F90,   _F91,   _F92,   _F93,   _F94,   _F95,
+    _F96,      _F97,   _F98,   _F99,   _F100,  _F101,  _F102,  _F103,
+    _F104,     _F105,  _F106,  _F107,  _F108,  _F109,  _F110,  _F111,
+    _F112,     _F113,  _F114,  _F115,  _F116,  _F117,  _F118,  _F119,
+
+#if 0
+    /* Do not list these to not need an unique identifier larger
+     * than 255 for jit_regset_t */
+    _F120,     _F121,  _F122,  _F123,  _F124,  _F125,  _F126,  _F127,
+#endif
+
+    /* Fake registers. Required because while "in" parameters start at r32,
+     * "out" parameters start *after* registers allocated for temporaries,
+     * and that are supposed to kept alive (what is desirable, that is, to
+      * not spill/reload them in memory) */
+    _OUT0,     _OUT1,  _OUT2,  _OUT3,  _OUT4,  _OUT5,  _OUT6,  _OUT7,
+
+#define JIT_NOREG              _NOREG
+    _NOREG,
+} jit_reg_t;
+
+typedef struct {
+    jit_int64_t                rl;
+    jit_int64_t                rh;
+    jit_int64_t                fl;
+    jit_int64_t                fh;
+} jit_regset_t;
+
+#endif /* _jit_ia64_h */
diff --git a/include/lightning/jit_private.h b/include/lightning/jit_private.h
index ae8a9e7..d6fd8ea 100644
--- a/include/lightning/jit_private.h
+++ b/include/lightning/jit_private.h
@@ -73,6 +73,10 @@
 #  define JIT_SP               _SP
 #  define JIT_RET              _I0
 #  define JIT_FRET             _F0
+#elif defined(__ia64__)
+#  define JIT_SP               _R12
+#  define JIT_RET              _R8
+#  define JIT_FRET             _F8
 #endif
 
 #define jit_size(vector)       (sizeof(vector) / sizeof((vector)[0]))
@@ -121,20 +125,62 @@
 #define jit_cc_a2_flt          0x00200000      /* arg2 is immediate float */
 #define jit_cc_a2_dbl          0x00400000      /* arg2 is immediate double */
 
-#define jit_regset_com(u, v)           (*(u) = ~*(v))
-#define jit_regset_and(u, v, w)                (*(u) = *(v) & *(w))
-#define jit_regset_ior(u, v, w)                (*(u) = *(v) | *(w))
-#define jit_regset_xor(u, v, w)                (*(u) = *(v) ^ *(w))
-#define jit_regset_set(u, v)           (*(u) = *(v))
-#define jit_regset_set_mask(u, v)      (*(u) = (1LL << (v)) - 1)
-#define jit_regset_cmp_ui(u, v)                (*(u) != (v))
-#define jit_regset_set_ui(u, v)                (*(u) = (v))
-#define jit_regset_set_p(set)          (*set)
-#define jit_regset_clrbit(set, bit)    (*(set) &= ~(1LL << (bit)))
-#define jit_regset_setbit(set, bit)    (*(set) |= 1LL << (bit))
-#define jit_regset_tstbit(set, bit)    (*(set) & (1LL << (bit)))
-#define jit_regset_new(set)            (*(set) = 0)
-#define jit_regset_del(set)            (*(set) = 0)
+#if __ia64__
+extern void
+jit_regset_com(jit_regset_t*, jit_regset_t*);
+
+extern void
+jit_regset_and(jit_regset_t*, jit_regset_t*, jit_regset_t*);
+
+extern void
+jit_regset_ior(jit_regset_t*, jit_regset_t*, jit_regset_t*);
+
+extern void
+jit_regset_xor(jit_regset_t*, jit_regset_t*, jit_regset_t*);
+
+extern void
+jit_regset_set(jit_regset_t*, jit_regset_t*);
+
+extern void
+jit_regset_set_mask(jit_regset_t*, jit_int32_t);
+
+extern jit_bool_t
+jit_regset_cmp_ui(jit_regset_t*, jit_word_t);
+
+extern void
+jit_regset_set_ui(jit_regset_t*, jit_word_t);
+
+extern jit_bool_t
+jit_regset_set_p(jit_regset_t*);
+
+extern void
+jit_regset_clrbit(jit_regset_t*, jit_int32_t);
+
+extern void
+jit_regset_setbit(jit_regset_t*, jit_int32_t);
+
+extern jit_bool_t
+jit_regset_tstbit(jit_regset_t*, jit_int32_t);
+#  define jit_regset_new(set)                                          \
+    do { (set)->rl = (set)->rh = (set)->fl = (set)->fh = 0; } while (0)
+#  define jit_regset_del(set)                                          \
+    do { (set)->rl = (set)->rh = (set)->fl = (set)->fh = 0; } while (0)
+#else
+#  define jit_regset_com(u, v)         (*(u) = ~*(v))
+#  define jit_regset_and(u, v, w)      (*(u) = *(v) & *(w))
+#  define jit_regset_ior(u, v, w)      (*(u) = *(v) | *(w))
+#  define jit_regset_xor(u, v, w)      (*(u) = *(v) ^ *(w))
+#  define jit_regset_set(u, v)         (*(u) = *(v))
+#  define jit_regset_set_mask(u, v)    (*(u) = (1LL << (v)) - 1)
+#  define jit_regset_cmp_ui(u, v)      (*(u) != (v))
+#  define jit_regset_set_ui(u, v)      (*(u) = (v))
+#  define jit_regset_set_p(set)                (*set)
+#  define jit_regset_clrbit(set, bit)  (*(set) &= ~(1LL << (bit)))
+#  define jit_regset_setbit(set, bit)  (*(set) |= 1LL << (bit))
+#  define jit_regset_tstbit(set, bit)  (*(set) & (1LL << (bit)))
+#  define jit_regset_new(set)          (*(set) = 0)
+#  define jit_regset_del(set)          (*(set) = 0)
+#endif
 extern unsigned long
 jit_regset_scan1(jit_regset_t*, jit_int32_t);
 
@@ -255,12 +301,30 @@ struct jit_function {
 
 /* data used only during jit generation */
 struct jit_compiler {
+#if __ia64__
+    struct {
+       jit_uint64_t      i : 41;
+       jit_uint64_t      t :  4;
+    } inst[3];
+    jit_regset_t         gprs;         /* changed gprs since last stop */
+    jit_regset_t         fprs;         /* changed fprs since last stop */
+    jit_int32_t                  ioff;         /* offset in inst vector */
+    jit_int32_t                  rout;         /* first output register */
+    jit_int32_t                  breg;         /* base register for 
prolog/epilog */
+#endif
+#if __mips__ || __ia64__
+    jit_int32_t                  carry;
+#define jit_carry        _jitc->carry
+#endif
     jit_node_t          *head;
     jit_node_t          *tail;
     jit_uint32_t         done  : 1;    /* emit state finished */
     jit_uint32_t         emit  : 1;    /* emit state entered */
     jit_uint32_t         again : 1;    /* start over emiting function */
     jit_uint32_t         prepare : 1;  /* inside prepare/finish* block */
+#if DEBUG
+    jit_uint32_t         getreg : 1;
+#endif
     jit_int32_t                  reglen;       /* number of registers */
     jit_regset_t         regarg;       /* cannot allocate */
     jit_regset_t         regsav;       /* automatic spill only once */
@@ -307,7 +371,6 @@ struct jit_compiler {
     struct {
        jit_node_t       *head;         /* first note node */
        jit_node_t       *tail;         /* linked list insertion */
-
        /* fields to store temporary state information */
        jit_word_t        size;
        jit_node_t       *name;
@@ -338,7 +401,7 @@ struct jit_compiler {
        jit_int32_t       values[1024]; /* pending constants */
        jit_word_t        patches[2048];
     } consts;
-#elif __powerpc64__
+#elif __powerpc64__ || __ia64__
     /* Keep track of prolog addresses, just for the sake of making
      * jit that starts with a jump to a "main" label work like other
      * backends. */
@@ -488,5 +551,8 @@ extern jit_bool_t _jit_remap(jit_state_t*);
  */
 extern jit_register_t   _rvs[];
 extern const char      *jit_progname;
+#if __ia64__
+extern int missing_count;
+#endif
 
 #endif /* _jit_private_h */
diff --git a/lib/Makefile.am b/lib/Makefile.am
index a723e73..6b5dbc2 100644
--- a/lib/Makefile.am
+++ b/lib/Makefile.am
@@ -28,6 +28,9 @@ EXTRA_DIST =                  \
        jit_arm-cpu.c           \
        jit_arm-swf.c           \
        jit_arm-vfp.c           \
+       jit_ia64.c              \
+       jit_ia64-cpu.c          \
+       jit_ia64-fpu.c          \
        jit_mips.c              \
        jit_mips-cpu.c          \
        jit_mips-fpu.c          \
diff --git a/lib/jit_ia64-cpu.c b/lib/jit_ia64-cpu.c
new file mode 100644
index 0000000..981ca68
--- /dev/null
+++ b/lib/jit_ia64-cpu.c
@@ -0,0 +1,5030 @@
+/*
+ * Copyright (C) 2013  Free Software Foundation, Inc.
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#if PROTO
+#define INST_NONE                      0       /* should never be generated */
+#define INST_STOP                      1       /* or'ed if stop is required */
+#define INST_A                         2       /* M- or I- unit */
+#define INST_As                                3
+#define INST_I                         4
+#define INST_Is                                5
+#define INST_M                         6
+#define INST_Ms                                7
+#define INST_F                         8
+#define INST_Fs                                9
+#define INST_B                         10
+#define INST_Bs                                11
+#define INST_L                         12
+#define INST_Ls                                13
+#define INST_X                         14
+#define INST_Xs                                15
+
+typedef union {
+    struct {
+       jit_uint64_t    tm : 5;
+       jit_uint64_t    s0 : 41;
+       jit_uint64_t    s1 : 18;
+    } b;
+    jit_int64_t        w;
+} inst_lo_t;
+
+typedef union {
+    struct {
+       jit_uint64_t    s1 : 23;
+       jit_uint64_t    s2 : 41;
+    } b;
+    jit_int64_t        w;
+} inst_hi_t;
+#define il(ii)                         *_jit->pc.ul++ = ii
+#define to_tm(v)                       l.b.tm = v
+#define get_tm(v)                      v = l.b.tm
+#define to_s0(v)                       l.b.s0 = v
+#define get_s0(v)                      v = l.b.s0
+#define to_s1(v)                                                       \
+    do {                                                               \
+       l.b.s1 = (v) & 0x3ffff;                                         \
+       h.b.s1 = ((v) >> 18) & 0x7fffff;                                \
+    } while (0)
+#define get_s1(v)                      v = l.b.s1 | ((jit_word_t)h.b.s1 << 18)
+#define to_s2(v)                       h.b.s2 = v
+#define get_s2(v)                      v = h.b.s2
+
+/*  Need to insert a stop if a modified register would (or could)
+ *  be read in the same cycle.
+ */
+#define TSTREG1(r0)                                                    \
+    do {                                                               \
+       if (jit_regset_tstbit(&_jitc->gprs, r0))                        \
+           stop();                                                     \
+    } while (0)
+#define TSTREG2(r0, r1)                                                        
\
+    do {                                                               \
+       if (jit_regset_tstbit(&_jitc->gprs, r0) ||                      \
+           jit_regset_tstbit(&_jitc->gprs, r1))                        \
+           stop();                                                     \
+    } while (0)
+/* Record register was modified */
+#define SETREG(r0)             jit_regset_setbit(&_jitc->gprs, r0)     
+
+/* Avoid using constants in macros and code */
+typedef enum {
+    GR_0,              GR_1,           GR_2,           GR_3,
+    GR_4,              GR_5,           GR_6,           GR_7,
+    GR_8,              GR_9,           GR_10,          GR_11,
+    GR_12,             GR_13,          GR_14,          GR_15,
+    GR_16,             GR_17,          GR_18,          GR_19,
+    GR_20,             GR_21,          GR_22,          GR_23,
+    GR_24,             GR_25,          GR_26,          GR_27,
+    GR_28,             GR_29,          GR_30,          GR_31,
+    GR_32,             GR_33,          GR_34,          GR_35,
+    GR_36,             GR_37,          GR_38,          GR_39,
+    GR_40,             GR_41,          GR_42,          GR_43,
+    /* GR_44...GR_127 */
+} gr_reg_t;
+
+typedef enum {
+    PR_0,              /* constant - always 1 */
+    /* p0-p5            -  preserved */
+    PR_1,              PR_2,           PR_3,           PR_4,
+    PR_5,
+    /* p6-p15           - scratch */
+    PR_6,              PR_7,           PR_8,           PR_9,
+    PR_10,             PR_11,          PR_12,          PR_13,
+    PR_14,             PR_15,
+    /* p16-...          - preserved - rotating */
+} pr_reg_t;
+
+typedef enum {
+    BR_0,              /* scratch - Return link */
+    /* b1-b5            - preserved */
+    BR_1,              BR_2,           BR_3,           BR_4,
+    BR_5,
+    /* b6-b7           - scratch */
+    BR_6,              BR_7
+} br_reg_t;
+
+typedef enum {
+    AR_KR0,            AR_KR1,         AR_KR2,         AR_KR3,
+    AR_KR4,            AR_KR5,         AR_KR6,         AR_KR7,
+    AR_8,              AR_9,           AR_10,          AR_11,
+    AR_12,             AR_13,          AR_14,          AR_15,
+    AR_RSC,            AR_BSP,         AR_BSPSTORE,    AR_RNAT,
+    AR_20,             AR_FCR,         AR_22,          AR_23,
+    AR_EFLAG,          AR_CSD,         AR_SSD,         AR_CFLG,
+    AR_FSR,            AR_FIR,         AR_FDR,         AR_31,
+    AR_CCV,            AR_33,          AR_34,          AR_35,
+    AR_UNAT,           AR_37,          AR_38,          AR_39,
+    AR_FPSR,           AR_41,          AR_42,          AR_43,
+    AR_ITC,            AR_RUC,         AR_46,          AR_47,
+    AR_48,             AR_49,          AR_50,          AR_51,
+    AR_52,             AR_53,          AR_54,          AR_55,
+    AR_56,             AR_57,          AR_58,          AR_59,
+    AR_60,             AR_61,          AR_62,          AR_63,
+    AR_PFS,            AR_LC,          AR_EC,
+    /* AR_67 ... AR_127 */
+} ar_reg_t;
+
+typedef enum {
+    TM_M_I_I_, TM_M_I_Is,      TM_M_IsI_,      TM_M_IsIs,
+    TM_M_L_X_, TM_M_L_Xs,      TM_ILL_06,      TM_ILL_07,
+    TM_M_M_I_, TM_M_M_Is,      TM_MsM_I_,      TM_MsM_Is,
+    TM_M_F_I_, TM_M_F_Is,      TM_M_M_F_,      TM_M_M_Fs,
+    TM_M_I_B_, TM_M_I_Bs,      TM_M_B_B_,      TM_M_B_Bs,
+    TM_ILL_14, TM_ILL_15,      TM_B_B_B_,      TM_B_B_Bs,
+    TM_M_M_B_, TM_M_M_Bs,      TM_ILL_1A,      TM_ILL_1B,
+    TM_M_F_B_, TM_M_F_Bs,      TM_ILL_1E,      TM_ILL_1F,
+} template_t;
+
+#define MWH_SPTK                       0
+#define MWH_NONE                       1
+#define MWH_DPTK                       2
+
+#define IH_NONE                                0
+#define IH_IMP                         1
+
+#define LD_NONE                                0
+#define LD_NT1                         1
+#define LD_NTA                         3
+
+#define ST_NONE                                0
+#define ST_NTA                         3
+
+#define LF_NONE                                0
+#define LF_NT1                         1
+#define LF_NT2                         2
+#define LF_NTA                         3
+
+#define BR_PH_FEW                      0
+#define BR_PH_MANY                     1
+
+#define BR_BWH_SPTK                    0
+#define BR_BWH_SPNT                    1
+#define BR_BWH_DPTK                    2
+#define BR_BWH_DPNT                    3
+
+#define BRI_BWH_SPTK                   1
+#define BRI_BWH_SPNT                   3
+#define BRI_BWH_DPTK                   5
+#define BRI_BWH_DPNT                   7
+
+#define BR_DH_NONE                     0
+#define BR_DH_CLR                      1
+
+#define BR_IH_NONE                     0
+#define BR_IH_IMP                      1
+
+#define BR_IPWH_SPTK                   0
+#define BR_IPWH_LOOP                   1
+#define BR_IPWH_DPTK                   2
+#define BR_IPWH_EXIT                   3
+
+#define BR_INDWH_SPTK                  0
+#define BR_INDWH_DPTK                  2
+
+#define MUX_BRCST                      0
+/* FIXME only @name values in manual and only @brcst disassembled by binutils 
*/
+#define MUX_REV                                1       /* not disassembled as 
@rev */
+#define MUX_MIX                                2       /* not disassembled as 
@mix */
+#define MUX_SHUF                       3       /* not disassembled as @shuf */
+#define MUX_ALT                                4       /* not disassembled as 
@alt */
+
+#define ldr(r0,r1)                     ldr_l(r0,r1)
+#define ldi(r0,i0)                     ldi_l(r0,i0)
+#define str(r0,r1)                     str_l(r0,r1)
+#define sti(i0,r0)                     str_l(i0,r0)
+#define ldxr(r0,r1,r2)                 ldxr_l(r0,r1,r2)
+#define ldxi(r0,r1,i0)                 ldxi_l(r0,r1,i0)
+#define stxr(r0,r1,r2)                 stxr_l(r0,r1,r2)
+#define stxi(i0,r0,r1)                 stxi_l(i0,r0,r1)
+
+/* libgcc */
+extern void __divdi3(long,long);
+extern void __udivdi3(unsigned long,unsigned long);
+extern void __moddi3(long,long);
+extern void __umoddi3(unsigned long,unsigned long);
+
+#define out(n,tm,s0,s1,s2)             _out(_jit,n,tm,s0,s1,s2)
+static void _out(jit_state_t*,int,int,jit_word_t,jit_word_t,jit_word_t);
+#define stop()                         _stop(_jit)
+static void _stop(jit_state_t*);
+#define sync()                         _sync(_jit)
+static void _sync(jit_state_t*);
+#define flush()                                _flush(_jit)
+static void _flush(jit_state_t*);
+#define inst(w, t)                     _inst(_jit, w, t)
+static void _inst(jit_state_t*, jit_word_t, jit_uint8_t);
+#define A1(x4,x2,r3,r2,r1)             _A1(_jit,0,x4,x2,r3,r2,r1)
+static void _A1(jit_state_t*, jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define A2(x4,ct,r3,r2,r1)             A1(x4,ct,r3,r2,r1)
+#define A3(x4,x2,r3,im,r1)             _A3(_jit,0,x4,x2,r3,im,r1)
+static void _A3(jit_state_t*, jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define A4(x2a,r3,im,r1)               _A4(_jit,0,x2a,r3,im,r1)
+static void _A4(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define A5(r3,im,r1)                   _A5(_jit,0,r3,im,r1)
+static void _A5(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t);
+#define A6(o,x2,ta,p2,r3,r2,c,p1)      _A6(_jit,0,o,x2,ta,p2,r3,r2,c,p1)
+static void _A6(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define A7(o,x2,ta,p2,r3,c,p1)         _A7(_jit,0,o,x2,ta,p2,r3,c,p1)
+static void _A7(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define A8(o,x2,ta,p2,r3,im,c,p1)      _A8(_jit,0,o,x2,ta,p2,r3,im,c,p1)
+static void _A8(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define A9(za,zb,x4,x2,r3,r2,r1)       _A9(_jit,0,za,zb,x4,x2,r3,r2,r1)
+static void _A9(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define A10(x4,ct,r3,r2,r1)            A9(0,1,x4,ct,r3,r2,r1)
+#define I1(ct,x2,r3,r2,r1)             _I1(_jit,0,ct,x2,r3,r2,r1)
+static void _I1(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define I2(za,x2a,zb,x2c,x2b,r3,r2,r1) _I2(_jit,0,za,x2a,zb,x2c,x2b,r3,r2,r1)
+static void _I2(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define I3(mbt,r2,r1)                  _I3(_jit,0,mbt,r2,r1)
+static void _I3(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t);
+#define I4(mht,r2,r1)                  _I4(_jit,0,mht,r2,r1)
+static void _I4(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t);
+#define I5(za,zb,x2b,r3,r2,r1)         _I5(_jit,0,za,zb,x2b,r3,r2,r1)
+static void _I5(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t);
+#define I6(za,zb,x2b,r3,ct,r1)         _I6(_jit,0,za,zb,x2b,r3,ct,r1)
+static void _I6(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t);
+#define I7(za,zb,r3,r2,r1)             _I7(_jit,0,za,zb,r3,r2,r1)
+static void _I7(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define I8(za,zb,ct,r2,r1)             _I8(_jit,0,za,zb,ct,r2,r1)
+static void _I8(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define I9(x2c,r3,r1)                  _I9(_jit,0,x2c,r3,r1)
+static void _I9(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t);
+#define I10(ct,r3,r2,r1)               _I10(_jit,0,ct,r3,r2,r1)
+static void _I10(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define I11(len,r3,pos,y,r1)           _I11(_jit,0,len,r3,pos,y,r1)
+static void _I11(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define I12(len,pos,r2,r1)             _I12(_jit,0,len,pos,r2,r1)
+static void _I12(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define I13(len,pos,im,r1)             _I13(_jit,0,len,pos,im,r1)
+static void _I13(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define I14(s,len,r3,pos,r1)           _I14(_jit,0,s,len,r3,pos,r1)
+static void _I14(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define I15(pos,len,r3,r2,r1)          _I15(_jit,0,pos,len,r3,r2,r1)
+static void _I15(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define I16(tb,ta,p2,r3,pos,c,p1)      _I16(_jit,0,tb,ta,p2,r3,pos,c,p1)
+static void _I16(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define I17(tb,ta,p2,r3,c,p1)          _I17(_jit,0,tb,ta,p2,r3,c,p1)
+static void _I17(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t);
+#define I18(im,y)                      _I18(_jit,0,im,y)
+static void _I18(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t);
+#define I19(im)                                _I19(_jit,0,im)
+static void _I19(jit_state_t*,jit_word_t,
+                jit_word_t);
+#define I20(r2,im)                     _I20(_jit,0,r2,im)
+static void _I20(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t);
+#define I21(im,ih,x,wh,r2,b1)          _I21(_jit,0,im,ih,x,wh,r2,b1)
+static void _I21(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define I22(b2,r1)                     _I22(_jit,0,b2,r1)
+static void _I22(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t);
+#define I23(r2,im)                     _I23(_jit,0,r2,im)
+static void _I23(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t);
+#define I24(im)                                _I24(_jit,0,im)
+static void _I24(jit_state_t*,jit_word_t,
+                jit_word_t);
+#define I25(x6,r1)                     _I25(_jit,0,x6,r1)
+static void _I25(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t);
+#define I26(ar,r2)                     _I26(_jit,0,ar,r2)
+static void _I26(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t);
+#define I27(ar,im)                     _I27(_jit,0,ar,im)
+static void _I27(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t);
+#define I28(ar,r1)                     _I28(_jit,0,ar,r1)
+static void _I28(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t);
+#define I29(x6,r3,r1)                  _I29(_jit,0,x6,r3,r1)
+static void _I29(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t);
+#define I30(tb,ta,p2,im,c,p1)          _I30(_jit,0,ta,tb,p2,im,c,p1)
+static void _I30(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t);
+#define M1(x6,ht,x,r3,r1)              _M1(_jit,0,x6,ht,x,r3,r1)
+static void _M1(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define M2(x6,ht,r3,r2,r1)             _M2(_jit,0,x6,ht,r3,r2,r1)
+static void _M2(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define M3(x6,ht,r3,im,r1)             _M3(_jit,0,x6,ht,r3,im,r1)
+static void _M3(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define M5(x6,ht,r3,r2,im)             _M5(_jit,0,x6,ht,r3,r2,im)
+static void _M5(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define M6(x6,ht,x,r3,r2)              _M6(_jit,0,x6,ht,x,r3,r2)
+static void _M6(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define M13(x6,ht,r3,f2)               _M13(_jit,0,x6,ht,r3,f2)
+static void _M13(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define M14(x6,ht,r3,r2)               _M14(_jit,0,x6,ht,r3,r2)
+static void _M14(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define M15(x6,ht,r3,im)               _M15(_jit,0,x6,ht,r3,im)
+static void _M15(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define M16(x6,ht,r3,r2,r1)            _M16(_jit,0,x6,ht,r3,r2,r1)
+static void _M16(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define M17(x6,ht,r3,im,r1)            _M17(_jit,0,x6,ht,r3,im,r1)
+static void _M17(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define M20(r2,im)                     M20x(0x1,r2,im)
+#define M20x(x3,r2,im)                 _M20x(_jit,0,x3,r2,im)
+static void _M20x(jit_state_t*,jit_word_t,
+                 jit_word_t,jit_word_t,jit_word_t);
+#define M22(x3,im,r1)                  M22x(x3,im,r1)
+#define M22x(x3,im,r1)                 _M22x(_jit,0,x3,im,r1)
+static void _M22x(jit_state_t*,jit_word_t,
+                 jit_word_t,jit_word_t,jit_word_t);
+#define M24(x2,x4)                     _M24(_jit,0,x2,x4)
+#define M25(x4)                                M24(0,x4)
+static void _M24(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t);
+#define M26(r1)                                M26x(2,r1)
+#define M26x(x4,r1)                    _M26x(_jit,0,x4,r1)
+static void _M26x(jit_state_t*,jit_word_t,
+                 jit_word_t,jit_word_t);
+#define M28(x,r3)                      _M28(_jit,0,x,r3)
+static void _M28(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t);
+#define M29(ar,r2)                     _M29(_jit,0,ar,r2)
+static void _M29(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t);
+#define M30(ar,im)                     _M30(_jit,0,ar,im)
+static void _M30(jit_state_t*,jit_word_t,jit_word_t,jit_word_t);
+#define M31(ar,r1)                     _M31(_jit,0,ar,r1)
+static void _M31(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t);
+#define M32(cr,r2)                     _M32(_jit,0,cr,r2)
+static void _M32(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t);
+#define M33(cr,r1)                     _M33(_jit,0,cr,r1)
+static void _M33(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t);
+#define M34(sor,sol,sof,r1)            _M34(_jit,0,sor,sol,sof,r1)
+static void _M34(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define M35(x6,r2)                     _M35(_jit,0,x6,r2)
+static void _M35(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t);
+#define M36(x6,r1)                     _M36(_jit,0,x6,r1)
+static void _M36(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t);
+#define M37(im)                                _M37(_jit,0,im)
+static void _M37(jit_state_t*,jit_word_t,
+                jit_word_t);
+#define M38(x6,r3,r2,r1)               _M38(_jit,0,x6,r3,r2,r1)
+static void _M38(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define M39(x6,r3,im,r1)               _M39(_jit,0,x6,r3,im,r1)
+static void _M39(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define M40(x6,r3,im)                  _M40(_jit,0,x6,r3,im)
+static void _M40(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t);
+#define M41(x6,r2)                     _M41(_jit,0,x6,r2)
+static void _M41(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t);
+#define M42(x6,r3,r2)                  _M42(_jit,0,x6,r3,r2)
+static void _M42(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t);
+#define M43(x6,r3,r1)                  _M43(_jit,0,x6,r3,r1)
+static void _M43(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t);
+#define M44(x4,im)                     _M44(_jit,0,x4,im)
+static void _M44(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t);
+#define M45(x6,r3,r2)                  _M45(_jit,0,x6,r3,r2)
+static void _M45(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t);
+#define M46(x6,r3,r1)                  _M46(_jit,0,x6,r3,r1)
+#define M47(x6,r3)                     M46(x6,r3,0)
+static void _M46(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t);
+#define M48(y,im)                      _M48(_jit,0,y,im)
+static void _M48(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t);
+#define B1(d,wh,im,p,tp)               _B1(_jit,0,d,wh,im,p,tp)
+#define B2(d,wh,im,p,tp)               B1(d,wh,im,p,tp)
+static void _B1(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define B3(d,wh,im,p,b)                        _B3(_jit,0,d,wh,im,p,b)
+static void _B3(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define B4(d,wh,x6,b,p,tp)             _B4(_jit,0,d,wh,x6,b,p,tp)
+static void _B4(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t);
+#define B5(d,wh,b2,p,b1)               _B5(_jit,0,d,wh,b2,p,b1)
+static void _B5(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define B6(ih,im,tag,wh)               _B6(_jit,0,ih,im,tag,wh)
+static void _B6(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define B7(ih,x6,b2,tag,wh)            _B7(_jit,0,ih,x6,b2,tag,wh)
+static void _B7(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define B8(x6)                         _B8(_jit,0,x6)
+static void _B8(jit_state_t*,jit_word_t,
+               jit_word_t);
+#define B9(op,x6,im)                   _B9(_jit,0,op,x6,im)
+static void _B9(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t);
+#define X1(im)                         _X1(_jit,0,im)
+static void _X1(jit_state_t*,jit_word_t,
+               jit_word_t);
+#define X2(r1,im)                      _X2(_jit,0,r1,im)
+static void _X2(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t);
+#define X3x(o,d,wh,p,tp,im)            _X3x(_jit,0,o,d,wh,p,tp,im)
+#define X3(d,wh,p,tp,im)               X3x(0xc,d,wh,p,tp,im)
+#define X4(d,wh,p,tp,im)               X3x(0xd,d,wh,p,tp,im)
+static void _X3x(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t);
+#define X5(y,im)                       _X5(_jit,0,y,im)
+static void _X5(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t);
+
+/* add */
+#define ADD(r1,r2,r3)                  A1(0,0,r3,r2,r1)
+#define ADD1(r1,r2,r3)                 A1(0,1,r3,r2,r1)
+#define ADDS(r1,im,r3)                 A4(2,r3,im,r1)
+#define ADDS_p(r1,im,r3,_p)            _A4(_jit,_p,2,r3,im,r1)
+#define ADDL(r1,im,r3)                 A5(r3,im,r1)
+#define ADDL_p(r1,im,r3,_p)            _A5(_jit,_p,r3,im,r1)
+/* addp4 */
+#define ADDP4(r1,r2,r3)                        A1(2,0,r3,r2,r1)
+#define ADDIP4(r1,im,r3)               A4(3,r3,im,r1)
+/* alloc */
+#define ALLOCR(r1,i,l,o,r)             M34((r)>>3,(i)+(l),(i)+(l)+(o),r1)
+#define ALLOC(r1,i,o)                  ALLOCR(r1,i,0,o,0)
+/* and */
+#define AND(r1,r2,r3)                  A1(3,0,r3,r2,r1)
+#define ANDI(r1,im,r3)                 A3(0xb,0,r3,im,r1)
+/* andcm */
+#define ANDCM(r1,r2,r3)                        A1(3,1,r3,r2,r1)
+#define ANDCMI(r1,im,r3)               A3(0xb,1,r3,im,r1)
+/* br */
+#define BRI(im)                                
B1(BR_DH_NONE,BR_BWH_SPTK,im,BR_PH_FEW,0)
+#define BRI_COND(im,_p)                        
_B1(_jit,_p,BR_DH_NONE,BR_BWH_SPTK,im,BR_PH_FEW,0)
+#define BRI_WEXIT(im)                  
B1(BR_DH_NONE,BR_BWH_SPTK,im,BR_PH_MANY,2)
+#define BRI_WTOP(im)                   
B1(BR_DH_NONE,BR_BWH_SPTK,im,BR_PH_MANY,3)
+#define BRI_CALL(b,im)                 
B3(BR_DH_NONE,BR_BWH_SPTK,im,BR_PH_MANY,b)
+#define BRI_CLOOP(im)                  
B2(BR_DH_NONE,BR_BWH_SPTK,im,BR_PH_MANY,5)
+#define BRI_CEXIT(im)                  
B2(BR_DH_NONE,BR_BWH_SPTK,im,BR_PH_MANY,6)
+#define BRI_CTOP(im)                   
B2(BR_DH_NONE,BR_BWH_SPTK,im,BR_PH_MANY,7)
+#define BR_COND(b,_p)                  
_B4(_jit,_p,BR_DH_NONE,BR_BWH_SPTK,0x20,b,BR_PH_FEW,0)
+#define BR(b)                          
B4(BR_DH_NONE,BR_BWH_SPTK,0x20,b,BR_PH_FEW,0)
+#define BR_IA(b)                       
B4(BR_DH_NONE,BR_BWH_SPTK,0x20,b,BR_PH_MANY,1)
+#define BR_RET(b)                      
B4(BR_DH_NONE,BR_BWH_SPTK,0x21,b,BR_PH_MANY,4)
+#define BR_CALL(b1,b2)                 
B5(BR_DH_NONE,BRI_BWH_SPTK,b2,BR_PH_MANY,b1)
+/* break */
+#define BREAK_I(im)                    I19(im)
+#define BREAK_M(im)                    M37(im)
+#define BREAK_B(im)                    B9(0,0,im)
+#define BREAK_X(im)                    X1(im)
+/* brl */
+#define BRL(im)                                
X3(BR_DH_NONE,BR_BWH_SPTK,BR_PH_MANY,0,im)
+#define BRL_COND(im,_p)                        
_X3(_jit,_p,BR_DH_NONE,BR_BWH_SPTK,BR_PH_MANY,0,im)
+#define BRL_CALL(b1,im)                        
X4(BR_DH_NONE,BR_BWH_SPTK,BR_PH_MANY,b1,im)
+/* brp */
+#define BRP(im,tag)                    B6(BR_IH_NONE,im,tag,BR_IPWH_SPTK)
+#define BRPI(b2,tag)                   B7(BR_IH_NONE,0x10,b2,tag,BR_INDWH_SPTK)
+#define BRPI_RET(b2,tag)               B7(BR_IH_NONE,0x11,b2,tag,BR_INDWH_SPTK)
+/* bsw */
+#define BSW_0()                                B8(0x0c)
+#define BSW_1()                                B8(0x0d)
+/* chk */
+#define CHK_S_I(r2,im)                 I20(r2,im)
+#define CHK_S_M(r2,im)                 M20(r2,im)
+#define CHK_A_NC(r1,im)                        M22(0x4,im,r1)
+#define CHK_A_CLR(r1,im)               M22(0x5,im,r1)
+/* clrrrb */
+#define CLRRRB()                       B8(0x04)
+#define CLRRRB_PR()                    B8(0x05)
+/* clz */
+#define CLZ(r1,r3)                     I9(3,r3,r1)
+/* cmp */
+#define CMP_LT(p1,p2,r2,r3)            A6(0xc,0,0,p2,r3,r2,0,p1)
+#define CMP_LT_p(p1,p2,r2,r3,_p)       A6(_jit,_p,0xc,0,0,p2,r3,r2,0,p1)
+#define CMP_LTU(p1,p2,r2,r3)           A6(0xd,0,0,p2,r3,r2,0,p1)
+#define CMP_EQ(p1,p2,r2,r3)            A6(0xe,0,0,p2,r3,r2,0,p1)
+#define CMP_LT_UNC(p1,p2,r2,r3)                A6(0xc,0,0,p2,r3,r2,1,p1)
+#define CMP_LTU_UNC(p1,p2,r2,r3)       A6(0xd,0,0,p2,r3,r2,1,p1)
+#define CMP_EQ_UNC(p1,p2,r2,r3)                A6(0xe,0,0,p2,r3,r2,1,p1)
+#define CMP_EQ_AND(p1,p2,r2,r3)                A6(0xc,0,1,p2,r3,r2,0,p1)
+#define CMP_EQ_OR(p1,p2,r2,r3)         A6(0xd,0,1,p2,r3,r2,0,p1)
+#define CMP_EQ_OR_ANDCM(p1,p2,r2,r3)   A6(0xe,0,1,p2,r3,r2,0,p1)
+#define CMP_NE_AND(p1,p2,r2,r3)                A6(0xc,0,1,p2,r3,r2,1,p1)
+#define CMP_NE_OR(p1,p2,r2,r3)         A6(0xd,0,1,p2,r3,r2,1,p1)
+#define CMP_NE_OR_ANDCM(p1,p2,r2,r3)   A6(0xe,0,1,p2,r3,r2,1,p1)
+#define CMPI_LT(p1,p2,im,r3)           A8(0xc,2,0,p2,r3,im,0,p1)
+#define CMPI_LTU(p1,p2,im,r3)          A8(0xd,2,0,p2,r3,im,0,p1)
+#define CMPI_EQ(p1,p2,im,r3)           A8(0xe,2,0,p2,r3,im,0,p1)
+#define CMPI_EQ_p(p1,p2,im,r3,_p)      _A8(_jit,_p,0xe,2,0,p2,r3,im,0,p1)
+#define CMPI_LT_UNC(p1,p2,im,r3)       A8(0xc,2,0,p2,r3,im,1,p1)
+#define CMPI_LTU_UNC(p1,p2,im,r3)      A8(0xd,2,0,p2,r3,im,1,p1)
+#define CMPI_EQ_UNC(p1,p2,im,r3)       A8(0xe,2,0,p2,r3,im,1,p1)
+#define CMPI_EQ_AND(p1,p2,im,r3)       A8(0xc,2,1,p2,r3,im,0,p1)
+#define CMPI_EQ_OR(p1,p2,im,r3)                A8(0xd,2,1,p2,r3,im,0,p1)
+#define CMPI_EQ_ANDCM(p1,p2,im,r3)     A8(0xe,2,1,p2,r3,im,0,p1)
+#define CMPI_NE_AND(p1,p2,im,r3)       A8(0xc,2,1,p2,r3,im,1,p1)
+#define CMPI_NE_OR(p1,p2,im,r3)                A8(0xd,2,1,p2,r3,im,1,p1)
+#define CMPI_NE_ANDCM(p1,p2,im,r3)     A8(0xe,2,1,p2,r3,im,1,p1)
+#define ZCMP_GT_AND(p1,p2,r3)          A7(0xc,0,0,p2,r3,0,p1)
+#define ZCMP_GT_OR(p1,p2,r3)           A7(0xd,0,0,p2,r3,0,p1)
+#define ZCMP_GT_ANDCM(p1,p2,r3)                A7(0xe,0,0,p2,r3,0,p1)
+#define ZCMP_LE_AND(p1,p2,r3)          A7(0xc,0,0,p2,r3,1,p1)
+#define ZCMP_LE_OR(p1,p2,r3)           A7(0xd,0,0,p2,r3,1,p1)
+#define ZCMP_LE_ANDCM(p1,p2,r3)                A7(0xe,0,0,p2,r3,1,p1)
+#define ZCMP_GE_AND(p1,p2,r3)          A7(0xc,0,1,p2,r3,0,p1)
+#define ZCMP_GE_OR(p1,p2,r3)           A7(0xd,0,1,p2,r3,0,p1)
+#define ZCMP_GE_ANDCM(p1,p2,r3)                A7(0xe,0,1,p2,r3,0,p1)
+#define ZCMP_LT_AND(p1,p2,r3)          A7(0xc,0,1,p2,r3,1,p1)
+#define ZCMP_LT_OR(p1,p2,r3)           A7(0xd,0,1,p2,r3,1,p1)
+#define ZCMP_LT_ANDCM(p1,p2,r3)                A7(0xe,0,1,p2,r3,1,p1)
+/* cmp4 */
+#define CMP4_LT(p1,p2,r2,r3)           A6(0xc,1,0,p2,r3,r2,0,p1)
+#define CMP4_LTU(p1,p2,r2,r3)          A6(0xd,1,0,p2,r3,r2,0,p1)
+#define CMP4_EQ(p1,p2,r2,r3)           A6(0xe,1,0,p2,r3,r2,0,p1)
+#define CMP4_LT_UNC(p1,p2,r2,r3)       A6(0xc,1,0,p2,r3,r2,1,p1)
+#define CMP4_LTU_UNC(p1,p2,r2,r3)      A6(0xd,1,0,p2,r3,r2,1,p1)
+#define CMP4_EQ_UNC(p1,p2,r2,r3)       A6(0xe,1,0,p2,r3,r2,1,p1)
+#define CMP4_EQ_AND(p1,p2,r2,r3)       A6(0xc,1,1,p2,r3,r2,0,p1)
+#define CMP4_EQ_OR(p1,p2,r2,r3)                A6(0xd,1,1,p2,r3,r2,0,p1)
+#define CMP4_EQ_XOR(p1,p2,r2,r3)       A6(0xe,1,1,p2,r3,r2,0,p1)
+#define CMP4_NE_AND(p1,p2,r2,r3)       A6(0xc,1,1,p2,r3,r2,1,p1)
+#define CMP4_NE_OR(p1,p2,r2,r3)                A6(0xd,1,1,p2,r3,r2,1,p1)
+#define CMP4_NE_XOR(p1,p2,r2,r3)       A6(0xe,1,1,p2,r3,r2,1,p1)
+#define CMP4I_LT(p1,p2,im,r3)          A8(0xc,3,0,p2,r3,im,0,p1)
+#define CMP4I_LTU(p1,p2,im,r3)         A8(0xd,3,0,p2,r3,im,0,p1)
+#define CMP4I_EQ(p1,p2,im,r3)          A8(0xe,3,0,p2,r3,im,0,p1)
+#define CMP4I_LT_UNC(p1,p2,im,r3)      A8(0xc,3,0,p2,r3,im,1,p1)
+#define CMP4I_LTU_UNC(p1,p2,im,r3)     A8(0xd,3,0,p2,r3,im,1,p1)
+#define CMP4I_EQ_UNC(p1,p2,im,r3)      A8(0xe,3,0,p2,r3,im,1,p1)
+#define CMP4I_EQ_AND(p1,p2,im,r3)      A8(0xc,3,1,p2,r3,im,0,p1)
+#define CMP4I_EQ_OR(p1,p2,im,r3)       A8(0xd,3,1,p2,r3,im,0,p1)
+#define CMP4I_EQ_ANDCM(p1,p2,im,r3)    A8(0xe,3,1,p2,r3,im,0,p1)
+#define CMP4I_NE_AND(p1,p2,im,r3)      A8(0xc,3,1,p2,r3,im,1,p1)
+#define CMP4I_NE_OR(p1,p2,im,r3)       A8(0xd,3,1,p2,r3,im,1,p1)
+#define CMP4I_NE_ANDCM(p1,p2,im,r3)    A8(0xe,3,1,p2,r3,im,1,p1)
+#define ZCMP4_GT_AND(p1,p2,r3)         A7(0xc,1,0,p2,r3,0,p1)
+#define ZCMP4_GT_OR(p1,p2,r3)          A7(0xd,1,0,p2,r3,0,p1)
+#define ZCMP4_GT_ANDCM(p1,p2,r3)       A7(0xe,1,0,p2,r3,0,p1)
+#define ZCMP4_LE_AND(p1,p2,r3)         A7(0xc,1,0,p2,r3,1,p1)
+#define ZCMP4_LE_OR(p1,p2,r3)          A7(0xd,1,0,p2,r3,1,p1)
+#define ZCMP4_LE_ANDCM(p1,p2,r3)       A7(0xe,1,0,p2,r3,1,p1)
+#define ZCMP4_GE_AND(p1,p2,r3)         A7(0xc,1,1,p2,r3,0,p1)
+#define ZCMP4_GE_OR(p1,p2,r3)          A7(0xd,1,1,p2,r3,0,p1)
+#define ZCMP4_GE_ANDCM(p1,p2,r3)       A7(0xe,1,1,p2,r3,0,p1)
+#define ZCMP4_LT_AND(p1,p2,r3)         A7(0xc,1,1,p2,r3,1,p1)
+#define ZCMP4_LT_OR(p1,p2,r3)          A7(0xd,1,1,p2,r3,1,p1)
+#define ZCMP4_LT_ANDCM(p1,p2,r3)       A7(0xe,1,1,p2,r3,1,p1)
+/* cmpxchg */
+#define CMPXCHG1_ACQ(r1,r3,r2)         M16(0x00,LD_NONE,r3,r2,r1)
+#define CMPXCHG2_ACQ(r1,r3,r2)         M16(0x01,LD_NONE,r3,r2,r1)
+#define CMPXCHG4_ACQ(r1,r3,r2)         M16(0x02,LD_NONE,r3,r2,r1)
+#define CMPXCHG8_ACQ(r1,r3,r2)         M16(0x03,LD_NONE,r3,r2,r1)
+#define CMPXCHG1_REL(r1,r3,r2)         M16(0x04,LD_NONE,r3,r2,r1)
+#define CMPXCHG2_REL(r1,r3,r2)         M16(0x05,LD_NONE,r3,r2,r1)
+#define CMPXCHG4_REL(r1,r3,r2)         M16(0x06,LD_NONE,r3,r2,r1)
+#define CMPXCHG8_REL(r1,r3,r2)         M16(0x07,LD_NONE,r3,r2,r1)
+#define CMP8XCHG16_ACQ(r1,r3,r2)       M16(0x20,LD_NONE,r3,r2,r1)
+#define CMP8XCHG16_REL(r1,r3,r2)       M16(0x24,LD_NONE,r3,r2,r1)
+/* cover */
+#define COVER()                                B8(0x02)
+/* cxz */
+#define CZX1_L(r1,r3)                  I29(0x18,r3,r1)
+#define CZX2_L(r1,r3)                  I29(0x19,r3,r1)
+#define CZX1_R(r1,r3)                  I29(0x1c,r3,r1)
+#define CZX2_R(r1,r3)                  I29(0x1d,r3,r1)
+/* dep */
+#define DEP_Z(r1,r2,pos,len)           I12(len,pos,r2,r1)
+#define DEPI_Z(r1,im,pos,len)          I13(len,pos,im,r1)
+#define DEPs(r1,r2,r3,pos,len)         I14(1,len,r3,pos,r1)
+#define DEPu(r1,r2,r3,pos,len)         I14(0,len,r3,pos,r1)
+#define DEP(r1,r2,r3,pos,len)          I15(pos,len,r3,r2,r1)
+/* epc */
+#define EPC()                          B8(0x10)
+/* extr */
+#define EXTR(r1,r3,pos,len)            I11(len,r3,pos,1,r1)
+#define EXTR_U(r1,r3,pos,len)          I11(len,r3,pos,0,r1)
+/* fc */
+#define FC(r3)                         M28(0,r3)
+#define FC_I(r3)                       M28(1,r3)
+/* fetchadd */
+#define FETCHADD4_ACQ(r1,r3,im)                M17(0x12,LD_NONE,r3,im,r1)
+#define FETCHADD8_ACQ(r1,r3,im)                M17(0x13,LD_NONE,r3,im,r1)
+#define FETCHADD4_REL(r1,r3,im)                M17(0x16,LD_NONE,r3,im,r1)
+#define FETCHADD8_REL(r1,r3,im)                M17(0x17,LD_NONE,r3,im,r1)
+/* flushrs */
+#define FLUSHRS()                      M25(0xc)
+/* fwb */
+#define FWB()                          M24(2,0)
+/* hint */
+#define HINT_I(im)                     I18(im,1)
+#define HINT_M(im)                     M48(1,im)
+#define HINT_B(im)                     B9(2,1,im)
+#define HINT_X(im)                     X5(1,im)
+/* invala */
+#define INVALA()                       M24(1,0)
+#define INVALA_E(r1)                   M26(r1)
+/* itc */
+#define ITC_I(r2)                      M41(0x2f,r2)
+#define ITC_D(r2)                      M41(0x2e,r2)
+/* itr */
+#define ITR_I(r3,r2)                   M42(0x0f,r3,r2)
+#define ITR_D(r3,r2)                   M42(0x0e,r3,r2)
+/* ld */
+#define LD1(r1,r3)                     M1(0x00,LD_NONE,0,r3,r1)
+#define LD2(r1,r3)                     M1(0x01,LD_NONE,0,r3,r1)
+#define LD4(r1,r3)                     M1(0x02,LD_NONE,0,r3,r1)
+#define LD8(r1,r3)                     M1(0x03,LD_NONE,0,r3,r1)
+#define LD1_S(r1,r3)                   M1(0x04,LD_NONE,0,r3,r1)
+#define LD2_S(r1,r3)                   M1(0x05,LD_NONE,0,r3,r1)
+#define LD4_S(r1,r3)                   M1(0x06,LD_NONE,0,r3,r1)
+#define LD8_S(r1,r3)                   M1(0x07,LD_NONE,0,r3,r1)
+#define LD1_A(r1,r3)                   M1(0x08,LD_NONE,0,r3,r1)
+#define LD2_A(r1,r3)                   M1(0x09,LD_NONE,0,r3,r1)
+#define LD4_A(r1,r3)                   M1(0x0a,LD_NONE,0,r3,r1)
+#define LD8_A(r1,r3)                   M1(0x0b,LD_NONE,0,r3,r1)
+#define LD1_SA(r1,r3)                  M1(0x0c,LD_NONE,0,r3,r1)
+#define LD2_SA(r1,r3)                  M1(0x0d,LD_NONE,0,r3,r1)
+#define LD4_SA(r1,r3)                  M1(0x0e,LD_NONE,0,r3,r1)
+#define LD8_SA(r1,r3)                  M1(0x0f,LD_NONE,0,r3,r1)
+#define LD1_BIAS(r1,r3)                        M1(0x10,LD_NONE,0,r3,r1)
+#define LD2_BIAS(r1,r3)                        M1(0x11,LD_NONE,0,r3,r1)
+#define LD4_BIAS(r1,r3)                        M1(0x12,LD_NONE,0,r3,r1)
+#define LD8_BIAS(r1,r3)                        M1(0x13,LD_NONE,0,r3,r1)
+#define LD1_ACQ(r1,r3)                 M1(0x14,LD_NONE,0,r3,r1)
+#define LD2_ACQ(r1,r3)                 M1(0x15,LD_NONE,0,r3,r1)
+#define LD4_ACQ(r1,r3)                 M1(0x16,LD_NONE,0,r3,r1)
+#define LD8_ACQ(r1,r3)                 M1(0x17,LD_NONE,0,r3,r1)
+#define LD8_FILL(r1,r3)                        M1(0x1b,LD_NONE,0,r3,r1)
+#define LD1_C_CLR(r1,r3)               M1(0x20,LD_NONE,0,r3,r1)
+#define LD2_C_CLR(r1,r3)               M1(0x21,LD_NONE,0,r3,r1)
+#define LD4_C_CLR(r1,r3)               M1(0x22,LD_NONE,0,r3,r1)
+#define LD8_C_CLR(r1,r3)               M1(0x23,LD_NONE,0,r3,r1)
+#define LD1_C_NC(r1,r3)                        M1(0x24,LD_NONE,0,r3,r1)
+#define LD2_C_NC(r1,r3)                        M1(0x25,LD_NONE,0,r3,r1)
+#define LD4_C_NC(r1,r3)                        M1(0x26,LD_NONE,0,r3,r1)
+#define LD8_C_NC(r1,r3)                        M1(0x27,LD_NONE,0,r3,r1)
+#define LD1_C_CLR_ACQ(r1,r3)           M1(0x28,LD_NONE,0,r3,r1)
+#define LD2_C_CLR_ACQ(r1,r3)           M1(0x29,LD_NONE,0,r3,r1)
+#define LD4_C_CLR_ACQ(r1,r3)           M1(0x2a,LD_NONE,0,r3,r1)
+#define LD8_C_CLR_ACQ(r1,r3)           M1(0x2b,LD_NONE,0,r3,r1)
+#define LD16(r1,r3)                    M1(0x28,LD_NONE,1,r3,r1)
+#define LD16_ACQ(r1,r3)                        M1(0x2c,LD_NONE,1,r3,r1)
+#define LD1_inc(r1,r3,im)              M3(0x00,LD_NONE,r3,im,r1)
+#define LD2_inc(r1,r3,im)              M3(0x01,LD_NONE,r3,im,r1)
+#define LD4_inc(r1,r3,im)              M3(0x02,LD_NONE,r3,im,r1)
+#define LD8_inc(r1,r3,im)              M3(0x03,LD_NONE,r3,im,r1)
+#define LD1_S_inc(r1,r3,im)            M3(0x04,LD_NONE,r3,im,r1)
+#define LD2_S_inc(r1,r3,im)            M3(0x05,LD_NONE,r3,im,r1)
+#define LD4_S_inc(r1,r3,im)            M3(0x06,LD_NONE,r3,im,r1)
+#define LD8_S_inc(r1,r3,im)            M3(0x07,LD_NONE,r3,im,r1)
+#define LD1_A_inc(r1,r3,im)            M3(0x08,LD_NONE,r3,im,r1)
+#define LD2_A_inc(r1,r3,im)            M3(0x09,LD_NONE,r3,im,r1)
+#define LD4_A_inc(r1,r3,im)            M3(0x0a,LD_NONE,r3,im,r1)
+#define LD8_A_inc(r1,r3,im)            M3(0x0b,LD_NONE,r3,im,r1)
+#define LD1_SA_inc(r1,r3,im)           M3(0x0c,LD_NONE,r3,im,r1)
+#define LD2_SA_inc(r1,r3,im)           M3(0x0d,LD_NONE,r3,im,r1)
+#define LD4_SA_inc(r1,r3,im)           M3(0x0e,LD_NONE,r3,im,r1)
+#define LD8_SA_inc(r1,r3,im)           M3(0x0f,LD_NONE,r3,im,r1)
+#define LD1_BIAS_inc(r1,r3,im)         M3(0x10,LD_NONE,r3,im,r1)
+#define LD2_BIAS_inc(r1,r3,im)         M3(0x11,LD_NONE,r3,im,r1)
+#define LD4_BIAS_inc(r1,r3,im)         M3(0x12,LD_NONE,r3,im,r1)
+#define LD8_BIAS_inc(r1,r3,im)         M3(0x13,LD_NONE,r3,im,r1)
+#define LD1_ACQ_inc(r1,r3,im)          M3(0x14,LD_NONE,r3,im,r1)
+#define LD2_ACQ_inc(r1,r3,im)          M3(0x15,LD_NONE,r3,im,r1)
+#define LD4_ACQ_inc(r1,r3,im)          M3(0x16,LD_NONE,r3,im,r1)
+#define LD8_AVQ_inc(r1,r3,im)          M3(0x17,LD_NONE,r3,im,r1)
+#define LD8_FILL_inc(r1,r3,im)         M3(0x1b,LD_NONE,r3,im,r1)
+#define LD1_C_CLR_inc(r1,r3,im)                M3(0x20,LD_NONE,r3,im,r1)
+#define LD2_C_CLR_inc(r1,r3,im)                M3(0x21,LD_NONE,r3,im,r1)
+#define LD4_C_CLR_inc(r1,r3,im)                M3(0x22,LD_NONE,r3,im,r1)
+#define LD8_C_CLR_inc(r1,r3,im)                M3(0x23,LD_NONE,r3,im,r1)
+#define LD1_C_NC_inc(r1,r3,im)         M3(0x24,LD_NONE,r3,im,r1)
+#define LD2_C_NC_inc(r1,r3,im)         M3(0x25,LD_NONE,r3,im,r1)
+#define LD4_C_NC_inc(r1,r3,im)         M3(0x26,LD_NONE,r3,im,r1)
+#define LD8_C_NC_inc(r1,r3,im)         M3(0x27,LD_NONE,r3,im,r1)
+#define LD1_C_CLR_ACQ_inc(r1,r3,im)    M3(0x28,LD_NONE,r3,im,r1)
+#define LD2_C_CLR_ACQ_inc(r1,r3,im)    M3(0x29,LD_NONE,r3,im,r1)
+#define LD4_C_CLR_ACQ_inc(r1,r3,im)    M3(0x2a,LD_NONE,r3,im,r1)
+#define LD8_C_CLR_ACQ_inc(r1,r3,im)    M3(0x2b,LD_NONE,r3,im,r1)
+#define LDX1(r1,r3,r2)                 M2(0x00,LD_NONE,r3,r2,r1)
+#define LDX2(r1,r3,r2)                 M2(0x01,LD_NONE,r3,r2,r1)
+#define LDX4(r1,r3,r2)                 M2(0x02,LD_NONE,r3,r2,r1)
+#define LDX8(r1,r3,r2)                 M2(0x03,LD_NONE,r3,r2,r1)
+#define LDX1_S(r1,r3,r2)               M2(0x04,LD_NONE,r3,r2,r1)
+#define LDX2_S(r1,r3,r2)               M2(0x05,LD_NONE,r3,r2,r1)
+#define LDX4_S(r1,r3,r2)               M2(0x06,LD_NONE,r3,r2,r1)
+#define LDX8_S(r1,r3,r2)               M2(0x07,LD_NONE,r3,r2,r1)
+#define LDX1_A(r1,r3,r2)               M2(0x08,LD_NONE,r3,r2,r1)
+#define LDX2_A(r1,r3,r2)               M2(0x09,LD_NONE,r3,r2,r1)
+#define LDX4_A(r1,r3,r2)               M2(0x0a,LD_NONE,r3,r2,r1)
+#define LDX8_A(r1,r3,r2)               M2(0x0b,LD_NONE,r3,r2,r1)
+#define LDX1_SA(r1,r3,r2)              M2(0x0c,LD_NONE,r3,r2,r1)
+#define LDX2_SA(r1,r3,r2)              M2(0x0d,LD_NONE,r3,r2,r1)
+#define LDX4_SA(r1,r3,r2)              M2(0x0e,LD_NONE,r3,r2,r1)
+#define LDX8_SA(r1,r3,r2)              M2(0x0f,LD_NONE,r3,r2,r1)
+#define LDX1_BIAS(r1,r3,r2)            M2(0x10,LD_NONE,r3,r2,r1)
+#define LDX2_BIAS(r1,r3,r2)            M2(0x11,LD_NONE,r3,r2,r1)
+#define LDX4_BIAS(r1,r3,r2)            M2(0x12,LD_NONE,r3,r2,r1)
+#define LDX8_BIAS(r1,r3,r2)            M2(0x13,LD_NONE,r3,r2,r1)
+#define LDX1_ACQ(r1,r3,r2)             M2(0x14,LD_NONE,r3,r2,r1)
+#define LDX2_ACQ(r1,r3,r2)             M2(0x15,LD_NONE,r3,r2,r1)
+#define LDX4_ACQ(r1,r3,r2)             M2(0x16,LD_NONE,r3,r2,r1)
+#define LDX8_ACQ(r1,r3,r2)             M2(0x17,LD_NONE,r3,r2,r1)
+#define LDX8_FILL(r1,r3,r2)            M2(0x1b,LD_NONE,r3,r2,r1)
+#define LDX1_C_CLR(r1,r3,r2)           M2(0x20,LD_NONE,r3,r2,r1)
+#define LDX2_C_CLR(r1,r3,r2)           M2(0x21,LD_NONE,r3,r2,r1)
+#define LDX4_C_CLR(r1,r3,r2)           M2(0x22,LD_NONE,r3,r2,r1)
+#define LDX8_C_CLR(r1,r3,r2)           M2(0x23,LD_NONE,r3,r2,r1)
+#define LDX1_C_NC(r1,r3,r2)            M2(0x24,LD_NONE,r3,r2,r1)
+#define LDX2_C_NC(r1,r3,r2)            M2(0x25,LD_NONE,r3,r2,r1)
+#define LDX4_C_NC(r1,r3,r2)            M2(0x26,LD_NONE,r3,r2,r1)
+#define LDX8_C_NC(r1,r3,r2)            M2(0x27,LD_NONE,r3,r2,r1)
+#define LDX1_C_CLR_ACQ(r1,r3,r2)       M2(0x28,LD_NONE,r3,r2,r1)
+#define LDX2_C_CLR_ACQ(r1,r3,r2)       M2(0x29,LD_NONE,r3,r2,r1)
+#define LDX4_C_CLR_ACQ(r1,r3,r2)       M2(0x2a,LD_NONE,r3,r2,r1)
+#define LDX8_C_CLR_ACQ(r1,r3,r2)       M2(0x2b,LD_NONE,r3,r2,r1)
+/* lfetch */
+#define LFETCH_EXCL(r3)                        M13(0x2d,LF_NONE,r3,GR_0)
+#define LFETCH_FAULT(r3)               M13(0x2e,LF_NONE,r3,GR_0)
+#define LFETCH_FAULT_EXCL(r3)          M13(0x2f,LF_NONE,r3,GR_0)
+#define LXFETCH(r3,r2)                 M14(0x2c,LF_NONE,r3,r2)
+#define LXFETCH_EXCL(r3,r2)            M14(0x2d,LF_NONE,r3,r2)
+#define LXFETCH_FAULT(r3,r2)           M14(0x2e,LF_NONE,r3,r2)
+#define LXFETCH_FAULT_EXCL(r3,r2)      M14(0x2f,LF_NONE,r3,r2)
+#define LFETCHI(r3,im)                 M15(0x2c,LF_NONE,r3,im)
+#define LFETCHI_EXCL(r3,im)            M15(0x2d,LF_NONE,r3,im)
+#define LFETCHI_FAULT(r3,im)           M15(0x2e,LF_NONE,r3,im)
+#define LFETCHI_FAULT_EXCL(r3,im)      M15(0x2f,LF_NONE,r3,im)
+/* loadrs */
+#define LOADRS()                       M25(0xa)
+/* mf */
+#define MF()                           M24(2,2)
+#define MF_A()                         M24(2,3)
+/* mix */
+#define MIX1_R(r1,r2,r3)               I2(0,2,0,2,0,r3,r2,r1)
+#define MIX2_R(r1,r2,r3)               I2(0,2,1,2,0,r3,r2,r1)
+#define MIX4_R(r1,r2,r3)               I2(1,2,0,2,0,r3,r2,r1)
+#define MIX1_L(r1,r2,r3)               I2(0,2,0,2,2,r3,r2,r1)
+#define MIX2_L(r1,r2,r3)               I2(0,2,1,2,2,r3,r2,r1)
+#define MIX4_L(r1,r2,r3)               I2(1,2,0,2,2,r3,r2,r1)
+/* mov - Move Application Register */
+#define MOV_I_rn_ar(r1,ar)             I28(ar,r1)
+#define MOV_I_ar_rn(ar,r2)             I26(ar,r2)
+#define MOV_I_ar_im(ar,im)             I27(ar,im)
+#define MOV_M_rn_a(r1,ar)              M31(r1,ar)
+#define MOV_M_ar_rn(ar,r2)             M29(ar,r2)
+#define MOV_M_ar_im(ar,im)             M30(ar,im)
+/* mov - Move Branch Register */
+#define MOV_rn_br(r1,b2)               I22(b2,r1)
+#define MOV_br_rn_tg(b1,r2,tag)                
I21(tag,IH_NONE,0,MWH_NONE,r2,b1)
+#define MOV_br_rn(b1,r2)               MOV_br_rn_tg(b1,r2,0)
+#define MOV_RET_br_rn_tg(b1,r2,tag)    I21(tag,IH_NONE,1,MWH_NONE,r2,b1)
+/* mov - Move Control Register */
+#define MOV_rn_cr(cr,r1)               M33(cr,r1)
+#define MOV_cr_rr(cr,r2)               M32(cr,r2)
+/* mov - Move General Register */
+#define MOV(r0,r1)                     ADDS(r0,0,r1)
+#define MOV_p(r0,r1,_p)                        ADDS_p(r0,0,r1,_p)
+/* mov - Move Immediate */
+#define MOVI(r1,im)                    ADDL(r1,im,0)
+#define MOVI_p(r1,im,_p)               ADDL_p(r1,im,0,_p)
+/* mov - Move Indirect Register */
+#define MOV_rn_RR(r1,r3)               M43(0x10,r3,r1)
+#define MOV_rn_DBR(r1,r3)              M43(0x11,r3,r1)
+#define MOV_rn_IBR(r1,r3)              M43(0x012,r3,r1)
+#define MOV_rn_PKR(r1,r3)              M43(0x13,r3,r1)
+#define MOV_rn_PMC(r1,r3)              M43(0x14,r3,r1)
+#define MOV_rn_PMD(r1,r3)              M43(0x15,r3,r1)
+#define MOV_rn_CPUID(r1,r3)            M43(0x17,r3,r1)
+#define MOV_RR_rn(r3,r2)               M42(0x00,r3,r2)
+#define MOV_DBR_rn(r3,r2)              M42(0x01,r3,r2)
+#define MOV_IBR_rn(r3,r2)              M42(0x02,r3,r2)
+#define MOV_PKR_rn(r3,r2)              M42(0x03,r3,r2)
+#define MOV_PMC_rn(r3,r2)              M42(0x04,r3,r2)
+#define MOV_PMD_rn(r3,r2)              M42(0x05,r3,r2)
+/* mov - Move Instruction Pointer */
+#define MOV_rn_ip(r1)                  I25(0x30,r1)
+/* mov - Move Predicates */
+#define MOV_rn_pr(r1)                  I25(0x33,r1)
+#define MOV_pr_rn(r2,im)               I23(r2,im)
+#define MOVI_pr(im)                    I24(im)
+/* mov - Move Processor Status Register */
+#define MOV_rn_psr(r1)                 M36(0x25,r1)
+#define MOV_psr_l_rn(r2)               M35(0x2d,r2)
+/* mov - Move User Mask */
+#define MOV_rn_psr_um(r1)              M36(0x21,r1)
+#define MOV_psr_um_rn(r2)              M35(0x29,r2)
+/* movl */
+#define MOVL(r1,im)                    X2(r1,im)
+/* mpy4 */
+#define MPY4(r1,r2,r3)                 I2(1,0,0,3,1,r3,r2,r1)
+/* mpyshl4 */
+#define MPYSHL4(r1,r2,r3)              I2(1,0,0,3,3,r3,r2,r1)
+/* mux */
+#define MUX1(r1,r2,mbt)                        I3(mbt,r2,r1)
+#define MUX2(r1,r2,mht)                        I4(mht,r2,r1)
+/* nop */
+#define NOP_I(im)                      I18(im,0)
+#define NOP_M(im)                      M48(0,im)
+#define NOP_B(im)                      B9(2,0,im)
+#define NOP_X(im)                      X5(0,im)
+/* or */
+#define OR(r1,r2,r3)                   A1(3,2,r3,r2,r1)
+#define ORI(r1,im,r3)                  A3(0xb,2,r3,im,r1)
+/* pack */
+#define PACK2_USS(r1,r2,r3)            I2(0,2,1,0,0,r3,r2,r1)
+#define PACK2_SSS(r1,r2,r3)            I2(0,2,1,0,2,r3,r2,r1)
+#define PACK4_SSS(r1,r2,r3)            I2(1,2,0,0,2,r3,r2,r1)
+/* padd */
+#define PADD1(r1,r2,r3)                        A9(0,0,0,0,r3,r2,r1)
+#define PADD1_SSS(r1,r2,r3)            A9(0,0,0,1,r3,r2,r1)
+#define PADD1_UUU(r1,r2,r3)            A9(0,0,0,2,r3,r2,r1)
+#define PADD1_UUS(r1,r2,r3)            A9(0,0,0,3,r3,r2,r1)
+#define PADD2(r1,r2,r3)                        A9(0,1,0,0,r3,r2,r1)
+#define PADD2_SSS(r1,r2,r3)            A9(0,1,0,1,r3,r2,r1)
+#define PADD2_UUU(r1,r2,r3)            A9(0,1,0,2,r3,r2,r1)
+#define PADD2_UUS(r1,r2,r3)            A9(0,1,0,3,r3,r2,r1)
+#define PADD4(r1,r2,r3)                        A9(1,0,0,0,r3,r2,r1)
+/* pavg */
+#define PAVG1(r1,r2,r3)                        A9(0,0,2,2,r3,r2,r1)
+#define PAVG2(r1,r2,r3)                        A9(0,1,2,2,r3,r2,r1)
+#define PAVG1_RAZ(r1,r2,r3)            A9(0,0,2,3,r3,r2,r1)
+#define PAVG2_RAZ(r1,r2,r3)            A9(0,1,2,3,r3,r2,r1)
+/* pavgsub */
+#define PAVGSUB1(r1,r2,r3)             A9(0,0,3,2,r3,r2,r1)
+#define PAVGSUB2(r1,r2,r3)             A9(0,1,3,2,r3,r2,r1)
+/* pcmp */
+#define PCMP1_EQ(r1,r2,r3)             A9(0,0,9,0,r3,r2,r1)
+#define PCMP2_EQ(r1,r2,r3)             A9(0,1,9,0,r3,r2,r1)
+#define PCMP4_EQ(r1,r2,r3)             A9(1,0,9,0,r3,r2,r1)
+#define PCMP1_GT(r1,r2,r3)             A9(0,0,9,1,r3,r2,r1)
+#define PCMP2_GT(r1,r2,r3)             A9(0,1,9,1,r3,r2,r1)
+#define PCMP4_GT(r1,r2,r3)             A9(1,0,9,1,r3,r2,r1)
+/* pmax */
+#define PMAX1_U(r1,r2,r3)              I2(0,2,0,1,1,r3,r2,r1)
+#define PMAX2(r1,r2,r3)                        I2(0,2,1,1,3,r3,r2,r1)
+/* pmin */
+#define PMIN1_U(r1,r2,r3)              I2(0,2,0,0,1,r3,r2,r1)
+#define PMIN2(r1,r2,r3)                        I2(0,2,1,0,3,r3,r2,r1)
+/* pmpy */
+#define PMPY2_R(r1,r2,r3)              I2(0,2,1,3,1,r3,r2,r1)
+#define PMPY2_L(r1,r2,r3)              I2(0,2,1,3,3,r3,r2,r1)
+/* pmpyshr */
+#define PMPYSHR2(r1,r2,r3,im)          I1(im,3,r3,r2,r1)
+#define PMPYSHR2_U(r1,r2,r3,im)                I1(im,1,r3,r2,r1)
+/* popcnt */
+#define POPCNT(r1,r3)                  I9(2,r3,r1)
+/* probe */
+#define PROBE_R(r1,r3,r2)              M38(0x38,r3,r2,r1)
+#define PROBE_W(r1,r3,r2)              M38(0x39,r3,r2,r1)
+#define PROBEI_R(r1,r3,im)             M39(0x18,r3,im,r1)
+#define PROBEI_W(r1,r3,im)             M39(0x19,r3,im,r1)
+#define PROBE_RW_FAULT(r3,im)          M40(0x31,r3,im)
+#define PROBE_R_FAULT(r3,im)           M40(0x32,r3,im)
+#define PROBE_W_FAULT(r3,im)           M40(0x33,r3,im)
+/* psad */
+#define PSAD1(r1,r2,r3)                        I2(0,2,0,2,3,r3,r2,r1)
+/* pshl */
+#define PSHL2(r1,r2,r3)                        I7(0,1,r3,r2,r1)
+#define PSHL4(r1,r2,r3)                        I7(1,0,r3,r2,r1)
+#define PSHL2I(r1,r2,im)               I8(0,1,im,r2,r1)
+#define PSHL4I(r1,r2,im)               I8(1,0,im,r2,r1)
+/* pshladd */
+#define PSHLADD2(r1,r2,im,r3)          A10(4,im,r3,r2,r1)
+/* pshr */
+#define PSHR2(r1,r3,r2)                        I5(0,1,2,r3,r2,r1)
+#define PSHR2I(r1,r3,im)               I6(0,1,3,r3,im,r1)
+#define PSHR2_U(r1,r3,r2)              I5(0,1,0,r3,r2,r1)
+#define PSHR2I_U(r1,r3,im)             I6(0,1,1,r3,im,r1)
+#define PSHR4(r1,r3,r2)                        I5(1,0,2,r3,r2,r1)
+#define PSHR4I(r1,r3,im)               I6(1,0,3,r3,im,r1)
+#define PSHR4_U(r1,r3,r2)              I5(1,0,0,r3,r2,r1)
+#define PSHR4I_U(r1,r3,im)             I6(1,0,1,r3,im,r1)
+/* pshradd */
+#define PSHRADD2(r1,r2,im,r3)          A10(6,im,r3,r2,r1)
+/* psub */
+#define PSUB1(r1,r2,r3)                        A9(0,0,1,0,r3,r2,r1)
+#define PSUB1_SSS(r1,r2,r3)            A9(0,0,1,1,r3,r2,r1)
+#define PSUB1_UUU(r1,r2,r3)            A9(0,0,1,2,r3,r2,r1)
+#define PSUB1_UUS(r1,r2,r3)            A9(0,0,1,3,r3,r2,r1)
+#define PSUB2(r1,r2,r3)                        A9(0,1,1,0,r3,r2,r1)
+#define PSUB2_SSS(r1,r2,r3)            A9(0,1,1,1,r3,r2,r1)
+#define PSUB2_UUU(r1,r2,r3)            A9(0,1,1,2,r3,r2,r1)
+#define PSUB2_UUS(r1,r2,r3)            A9(0,1,1,3,r3,r2,r1)
+#define PSUB4(r1,r2,r3)                        A9(1,0,1,0,r3,r2,r1)
+/* ptc.e */
+#define PTC_E(r3)                      M47(0x34,r3)
+/* ptc.g, ptc.ga */
+#define PTC_G(r3,r2)                   M45(0xa,r3,r2)
+#define PTC_GA(r3,r2)                  M45(0xb,r3,r2)
+/* ptc.l */
+#define PTC_L(r3,r2)                   M45(0x9,r3,r2)
+/* ptr */
+#define PTR_D(r3,r2)                   M45(0xc,r3,r2)
+#define PTR_I(r3,r2)                   M45(0xd,r3,r2)
+/* rfi */
+#define RFI()                          B8(0x08)
+/* rsm */
+#define RSM(im)                                M44(7,im)
+/* rum */
+#define RUM(im)                                M44(5,im)
+/* shl */
+#define SHL(r1,r2,r3)                  I7(1,1,r3,r2,r1)
+/* shladd */
+#define SHLADD(r1,r2,im,r3)            A2(4,im,r3,r2,r1)
+/* shladdp4 */
+#define SHLADDP4(r1,r2,im,r3)          A2(6,im,r3,r2,r1)
+/* shr */
+#define SHR(r1,r3,r2)                  I5(1,1,2,r3,r2,r1)
+#define SHR_U(r1,r3,r2)                        I5(1,1,0,r3,r2,r1)
+/* shrp */
+#define SHRP(r1,r2,r3,im)              I10(im,r3,r2,r1)
+/* srlz */
+#define SRLZ_I()                       M24(3,1)
+#define SRLZ_D()                       M24(3,0)
+/* ssm */
+#define SSM(im)                                M44(6,im)
+/* st */
+#define ST1(r3,r2)                     M6(0x30,ST_NONE,0,r3,r2)
+#define ST2(r3,r2)                     M6(0x31,ST_NONE,0,r3,r2)
+#define ST4(r3,r2)                     M6(0x32,ST_NONE,0,r3,r2)
+#define ST8(r3,r2)                     M6(0x33,ST_NONE,0,r3,r2)
+#define ST1_REL(r3,r2)                 M6(0x34,ST_NONE,0,r3,r2)
+#define ST2_REL(r3,r2)                 M6(0x35,ST_NONE,0,r3,r2)
+#define ST4_REL(r3,r2)                 M6(0x36,ST_NONE,0,r3,r2)
+#define ST8_REL(r3,r2)                 M6(0x37,ST_NONE,0,r3,r2)
+#define ST8_SPILL(r3,r2)               M6(0x3b,ST_NONE,0,r3,r2)
+#define ST16(r3,r2)                    M6(0x30,ST_NONE,1,r3,r2)
+#define ST16_REL(r3,r2)                        M6(0x34,ST_NONE,1,r3,r2)
+#define ST1_inc(r3,r2,im)              M5(0x30,ST_NONE,r3,r2,im)
+#define ST2_inc(r3,r2,im)              M5(0x31,ST_NONE,r3,r2,im)
+#define ST4_inc(r3,r2,im)              M5(0x32,ST_NONE,r3,r2,im)
+#define ST8_inc(r3,r2,im)              M5(0x33,ST_NONE,r3,r2,im)
+#define ST1_REL_inc(r3,r2,im)          M5(0x34,ST_NONE,r3,r2,im)
+#define ST2_REL_inc(r3,r2,im)          M5(0x35,ST_NONE,r3,r2,im)
+#define ST4_REL_inc(r3,r2,im)          M5(0x36,ST_NONE,r3,r2,im)
+#define ST8_REL_inc(r3,r2,im)          M5(0x37,ST_NONE,r3,r2,im)
+#define ST8_SPILL_inc(r3,r2,im)                M5(0x3b,ST_NONE,r3,r2,im)
+/* sub */
+#define SUB(r1,r2,r3)                  A1(1,1,r3,r2,r1)
+#define SUB1(r1,r2,r3)                 A1(1,0,r3,r2,r1)
+#define SUBI(r1,im,r3)                 A3(9,1,r3,im,r1)
+/* sum */
+#define SUM(im)                                M44(4,im)
+/* sxt */
+#define SXT1(r1,r3)                    I29(0x14,r3,r1)
+#define SXT2(r1,r3)                    I29(0x15,r3,r1)
+#define SXT4(r1,r3)                    I29(0x16,r3,r1)
+/* sync */
+#define SYNC_I()                       M24(3,3)
+/* tak */
+#define TAK(r1,r3)                     M46(0x1f,r3,r1)
+/* tbit */
+#define TBIT_Z(p1,p2,r3,pos)           I16(0,0,p2,r3,pos,0,p1)
+#define TBIT_Z_UNC(p1,p2,r3,pos)       I16(0,0,p2,r3,pos,1,p1)
+#define TBIT_Z_AND(p1,p2,r3,pos)       I16(1,0,p2,r3,pos,0,p1)
+#define TBIT_NZ_AND(p1,p2,r3,pos)      I16(1,0,p2,r3,pos,1,p1)
+#define TBIT_Z_OR(p1,p2,r3,pos)                I16(0,1,p2,r3,pos,0,p1)
+#define TBIT_NZ_OR(p1,p2,r3,pos)       I16(0,1,p2,r3,pos,1,p1)
+#define TBIT_Z_ANDCM(p1,p2,r3,pos)     I16(1,1,p2,r3,pos,0,p1)
+#define TBIT_NZ_ANDCM(p1,p2,r3,pos)    I16(1,1,p2,r3,pos,1,p1)
+/* tf */
+#define TF_Z(p1,p2,im)                 I30(0,0,p2,im,0,p1)
+#define TF_Z_UNC(p1,p2,im)             I30(0,0,p2,im,1,p1)
+#define TF_Z_AND(p1,p2,im)             I30(1,0,p2,im,0,p1)
+#define TF_NZ_AND(p1,p2,im)            I30(1,0,p2,im,1,p1)
+#define TF_Z_OR(p1,p2,im)              I30(0,1,p2,im,0,p1)
+#define TF_NZ_OR(p1,p2,im)             I30(0,1,p2,im,1,p1)
+#define TF_Z_ANDCM(p1,p2,im)           I30(1,1,p2,im,0,p1)
+#define TF_NZ_ANDCM(p1,p2,im)          I30(1,1,p2,im,1,p1)
+/* thash */
+#define THASH(r1,r3)                   M46(0x1a,r3,r1)
+/* tnat */
+#define TNAT_Z(p1,p2,r3)               I17(0,0,p2,r3,0,p1)
+#define TNAT_Z_UNC(p1,p2,r3)           I17(0,0,p2,r3,1,p1)
+#define TNAT_Z_AND(p1,p2,r3)           I17(1,0,p2,r3,0,p1)
+#define TNAT_NZ_AND(p1,p2,r3)          I17(1,0,p2,r3,1,p1)
+#define TNAT_Z_OR(p1,p2,r3)            I17(0,1,p2,r3,0,p1)
+#define TNAT_NZ_OR(p1,p2,r3)           I17(0,1,p2,r3,1,p1)
+#define TNAT_Z_ANDCM(p1,p2,r3)         I17(1,1,p2,r3,0,p1)
+#define TNAT_NZ_ANDCM(p1,p2,r3)                I17(1,1,p2,r3,1,p1)
+/* tpa */
+#define TPA(r1,r3)                     M46(0x1e,r3,r1)
+/* ttag */
+#define TTAG(r1,r3)                    M46(0x1b,r3,r1)
+/* unpack */
+#define UNPACK1_H(r1,r2,r3)            I2(0,2,0,1,0,r3,r2,r1)
+#define UNPACK2_H(r1,r2,r3)            I2(0,2,1,1,0,r3,r2,r1)
+#define UNPACK4_H(r1,r2,r3)            I2(1,2,0,1,0,r3,r2,r1)
+#define UNPACK1_L(r1,r2,r3)            I2(0,2,0,1,2,r3,r2,r1)
+#define UNPACK2_L(r1,r2,r3)            I2(0,2,1,1,2,r3,r2,r1)
+#define UNPACK4_L(r1,r2,r3)            I2(1,2,0,1,2,r3,r2,r1)
+/* vmsw */
+#define VMSW_0()                       B8(0x18)
+#define VMSW_1()                       B8(0x19)
+/* xchg */
+#define XCHG1_ACQ(r1,r3,r2)            M16(0x08,LD_NONE,r3,r2,r1)
+#define XCHG2_ACQ(r1,r3,r2)            M16(0x09,LD_NONE,r3,r2,r1)
+#define XCHG4_ACQ(r1,r3,r2)            M16(0x0a,LD_NONE,r3,r2,r1)
+#define XCHG8_ACQ(r1,r3,r2)            M16(0x0b,LD_NONE,r3,r2,r1)
+/* xor */
+#define XOR(r1,r2,r3)                  A1(3,3,r3,r2,r1)
+#define XORI(r1,im,r3)                 A3(0xb,3,r3,im,r1)
+/* zxt */
+#define ZXT1(r1,r3)                    I29(0x10,r3,r1)
+#define ZXT2(r1,r3)                    I29(0x11,r3,r1)
+#define ZXT4(r1,r3)                    I29(0x12,r3,r1)
+
+#define addr(r0,r1,r2)                 ADD(r0,r1,r2)
+#define addi(r0,r1,i0)                 _addi(_jit,r0,r1,i0)
+static void _addi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define addcr(r0, r1, r2)              _addcr(_jit, r0, r1, r2)
+static void _addcr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#define addci(r0, r1, i0)              _addci(_jit, r0, r1, i0)
+static void _addci(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#  define addxr(r0, r1, r2)            _addxr(_jit, r0, r1, r2)
+static void _addxr(jit_state_t*, jit_int32_t, jit_int32_t, jit_int32_t);
+#  define addxi(r0, r1, i0)            _addxi(_jit, r0, r1, i0)
+static void _addxi(jit_state_t*, jit_int32_t, jit_int32_t, jit_word_t);
+#define subr(r0,r1,r2)                 SUB(r0,r1,r2)
+#define subi(r0,r1,i0)                 _subi(_jit,r0,r1,i0)
+static void _subi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define subcr(r0, r1, r2)            _subcr(_jit, r0, r1, r2)
+static void _subcr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define subci(r0, r1, i0)            _subci(_jit, r0, r1, i0)
+static void _subci(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  define subxr(r0, r1, r2)            _subxr(_jit, r0, r1, r2)
+static void _subxr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  define subxi(r0, r1, i0)            _subxi(_jit, r0, r1, i0)
+static void _subxi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define mulr(r0,r1,r2)                 _mulr(_jit,r0,r1,r2)
+static void _mulr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define muli(r0,r1,i0)                 _muli(_jit,r0,r1,i0)
+static void _muli(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define divr(r0,r1,r2)                 _divr(_jit,r0,r1,r2)
+static void _divr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define divi(r0,r1,i0)                 _divi(_jit,r0,r1,i0)
+static void _divi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define divr_u(r0,r1,r2)               _divr_u(_jit,r0,r1,r2)
+static void _divr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define divi_u(r0,r1,i0)               _divi_u(_jit,r0,r1,i0)
+static void _divi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define remr(r0,r1,r2)                 _remr(_jit,r0,r1,r2)
+static void _remr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define remi(r0,r1,i0)                 _remi(_jit,r0,r1,i0)
+static void _remi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define remr_u(r0,r1,r2)               _remr_u(_jit,r0,r1,r2)
+static void _remr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define remi_u(r0,r1,i0)               _remi_u(_jit,r0,r1,i0)
+static void _remi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define mulh(r0,r1,r2,sign)            _mulh(_jit,r0,r1,r2,sign)
+static void _mulh(jit_state_t*,jit_bool_t,jit_int32_t,jit_int32_t,jit_int32_t);
+#define qmulr(r0,r1,r2,r3)             iqmulr(r0,r1,r2,r3,1)
+#define qmulr_u(r0,r1,r2,r3)           iqmulr(r0,r1,r2,r3,0)
+#define iqmulr(r0,r1,r2,r3,sign)       _iqmulr(_jit,r0,r1,r2,r3,sign)
+static void _iqmulr(jit_state_t*,jit_int32_t,jit_int32_t,
+                   jit_int32_t,jit_int32_t,jit_bool_t);
+#define qmuli(r0,r1,r2,i0)             iqmuli(r0,r1,r2,i0,1)
+#define qmuli_u(r0,r1,r2,i0)           iqmuli(r0,r1,r2,i0,0)
+#define iqmuli(r0,r1,r2,i0,sign)       _iqmuli(_jit,r0,r1,r2,i0,sign)
+static void _iqmuli(jit_state_t*,jit_int32_t,jit_int32_t,
+                   jit_int32_t,jit_word_t,jit_bool_t);
+#define qdivr(r0,r1,r2,r3)             iqdivr(r0,r1,r2,r3,1)
+#define qdivr_u(r0,r1,r2,r3)           iqdivr(r0,r1,r2,r3,0)
+#define iqdivr(r0,r1,r2,r3,sign)       _iqdivr(_jit,r0,r1,r2,r3,sign)
+static void _iqdivr(jit_state_t*,jit_int32_t,jit_int32_t,
+                   jit_int32_t,jit_int32_t,jit_bool_t);
+#define qdivi(r0,r1,r2,i0)             iqdivi(r0,r1,r2,i0,1)
+#define qdivi_u(r0,r1,r2,i0)           iqdivi(r0,r1,r2,i0,0)
+#define iqdivi(r0,r1,r2,i0,sign)       _iqdivi(_jit,r0,r1,r2,i0,sign)
+static void _iqdivi(jit_state_t*,jit_int32_t,jit_int32_t,
+                   jit_int32_t,jit_word_t,jit_bool_t);
+#define andr(r0,r1,r2)                 AND(r0,r1,r2)
+#define andi(r0,r1,i0)                 _andi(_jit,r0,r1,i0)
+static void _andi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define orr(r0,r1,r2)                  OR(r0,r1,r2)
+#define ori(r0,r1,i0)                  _ori(_jit,r0,r1,i0)
+static void _ori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define xorr(r0,r1,r2)                 XOR(r0,r1,r2)
+#define xori(r0,r1,i0)                 _xori(_jit,r0,r1,i0)
+static void _xori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define lshr(r0,r1,r2)                 SHL(r0,r1,r2)
+#define lshi(r0,r1,i0)                 _lshi(_jit,r0,r1,i0)
+static void _lshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define rshr(r0,r1,r2)                 SHR(r0,r1,r2)
+#define rshi(r0,r1,i0)                 _rshi(_jit,r0,r1,i0)
+static void _rshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define rshr_u(r0,r1,r2)               SHR_U(r0,r1,r2)
+#define rshi_u(r0,r1,i0)               _rshi_u(_jit,r0,r1,i0)
+static void _rshi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define ltr(r0,r1,r2)                  _ltr(_jit,r0,r1,r2)
+static void _ltr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define lti(r0,r1,i0)                  _lti(_jit,r0,r1,i0)
+static void _lti(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define ltr_u(r0,r1,r2)                        _ltr_u(_jit,r0,r1,r2)
+static void _ltr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define lti_u(r0,r1,i0)                        _lti_u(_jit,r0,r1,i0)
+static void _lti_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define ler(r0,r1,r2)                  _ler(_jit,r0,r1,r2)
+static void _ler(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define lei(r0,r1,i0)                  _lei(_jit,r0,r1,i0)
+static void _lei(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define ler_u(r0,r1,r2)                        _ler_u(_jit,r0,r1,r2)
+static void _ler_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define lei_u(r0,r1,i0)                        _lei_u(_jit,r0,r1,i0)
+static void _lei_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define eqr(r0,r1,r2)                  _eqr(_jit,r0,r1,r2)
+static void _eqr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define eqi(r0,r1,i0)                  _eqi(_jit,r0,r1,i0)
+static void _eqi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define ger(r0,r1,r2)                  _ger(_jit,r0,r1,r2)
+static void _ger(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define gei(r0,r1,i0)                  _gei(_jit,r0,r1,i0)
+static void _gei(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define ger_u(r0,r1,r2)                        _ger_u(_jit,r0,r1,r2)
+static void _ger_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define gei_u(r0,r1,i0)                        _gei_u(_jit,r0,r1,i0)
+static void _gei_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define gtr(r0,r1,r2)                  _gtr(_jit,r0,r1,r2)
+static void _gtr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define gti(r0,r1,i0)                  _gti(_jit,r0,r1,i0)
+static void _gti(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define gtr_u(r0,r1,r2)                        _gtr_u(_jit,r0,r1,r2)
+static void _gtr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define gti_u(r0,r1,i0)                        _gti_u(_jit,r0,r1,i0)
+static void _gti_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define ner(r0,r1,r2)                  _ner(_jit,r0,r1,r2)
+static void _ner(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define nei(r0,r1,i0)                  _nei(_jit,r0,r1,i0)
+static void _nei(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define negr(r0,r1)                    subr(r0,0,r1)
+#define comr(r0,r1)                    ANDCMI(r0,-1,r1)
+#define movr(r0,r1)                    _movr(_jit,r0,r1)
+static void _movr(jit_state_t*,jit_int32_t,jit_int32_t);
+#define movi(r0,i0)                    _movi(_jit,r0,i0)
+static void _movi(jit_state_t*,jit_int32_t,jit_word_t);
+#define movi_p(r0,i0)                  _movi_p(_jit,r0,i0)
+static jit_word_t _movi_p(jit_state_t*,jit_int32_t,jit_word_t);
+#define htonr(r0,r1)                   MUX1(r0,r1,MUX_REV)
+#define extr_c(r0,r1)                  SXT1(r0,r1)
+#define extr_uc(r0,r1)                 ZXT1(r0,r1)
+#define extr_s(r0,r1)                  SXT2(r0,r1)
+#define extr_us(r0,r1)                 ZXT2(r0,r1)
+#define extr_i(r0,r1)                  SXT4(r0,r1)
+#define extr_ui(r0,r1)                 ZXT4(r0,r1)
+#define bltr(i0,r0,r1)                 _bltr(_jit,i0,r0,r1)
+static jit_word_t _bltr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define blti(i0,r0,i1)                 _blti(_jit,i0,r0,i1)
+static jit_word_t _blti(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bltr_u(i0,r0,r1)               _bltr_u(_jit,i0,r0,r1)
+static jit_word_t _bltr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define blti_u(i0,r0,i1)               _blti_u(_jit,i0,r0,i1)
+static jit_word_t _blti_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bler(i0,r0,r1)                 _bler(_jit,i0,r0,r1)
+static jit_word_t _bler(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define blei(i0,r0,i1)                 _blei(_jit,i0,r0,i1)
+static jit_word_t _blei(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bler_u(i0,r0,r1)               _bler_u(_jit,i0,r0,r1)
+static jit_word_t _bler_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define blei_u(i0,r0,i1)               _blei_u(_jit,i0,r0,i1)
+static jit_word_t _blei_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define beqr(i0,r0,r1)                 _beqr(_jit,i0,r0,r1)
+static jit_word_t _beqr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define beqi(i0,r0,i1)                 _beqi(_jit,i0,r0,i1)
+static jit_word_t _beqi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bger(i0,r0,r1)                 _bger(_jit,i0,r0,r1)
+static jit_word_t _bger(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bgei(i0,r0,i1)                 _bgei(_jit,i0,r0,i1)
+static jit_word_t _bgei(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bger_u(i0,r0,r1)               _bger_u(_jit,i0,r0,r1)
+static jit_word_t _bger_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bgei_u(i0,r0,i1)               _bgei_u(_jit,i0,r0,i1)
+static jit_word_t _bgei_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bgtr(i0,r0,r1)                 _bgtr(_jit,i0,r0,r1)
+static jit_word_t _bgtr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bgti(i0,r0,i1)                 _bgti(_jit,i0,r0,i1)
+static jit_word_t _bgti(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bgtr_u(i0,r0,r1)               _bgtr_u(_jit,i0,r0,r1)
+static jit_word_t _bgtr_u(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bgti_u(i0,r0,i1)               _bgti_u(_jit,i0,r0,i1)
+static jit_word_t _bgti_u(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bner(i0,r0,r1)                 _bner(_jit,i0,r0,r1)
+static jit_word_t _bner(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bnei(i0,r0,i1)                 _bnei(_jit,i0,r0,i1)
+static jit_word_t _bnei(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bmsr(i0,r0,r1)                 _bmsr(_jit,i0,r0,r1)
+static jit_word_t _bmsr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bmsi(i0,r0,i1)                 _bmsi(_jit,i0,r0,i1)
+static jit_word_t _bmsi(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define bmcr(i0,r0,r1)                 _bmcr(_jit,i0,r0,r1)
+static jit_word_t _bmcr(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bmci(i0,r0,i1)                 _bmci(_jit,i0,r0,i1)
+static jit_word_t _bmci(jit_state_t*,jit_word_t,jit_int32_t,jit_word_t);
+#define baddr(i0,r0,r1,cc)             _baddr(_jit,i0,r0,r1,cc)
+static jit_word_t _baddr(jit_state_t*,jit_word_t,
+                        jit_int32_t,jit_int32_t,jit_bool_t);
+#define baddi(i0,r0,i1,cc)             _baddi(_jit,i0,r0,i1,cc)
+static jit_word_t _baddi(jit_state_t*,jit_word_t,
+                        jit_int32_t,jit_word_t,jit_bool_t);
+#define baddr_u(i0,r0,r1,cc)           _baddr_u(_jit,i0,r0,r1,cc)
+static jit_word_t _baddr_u(jit_state_t*,jit_word_t,
+                          jit_int32_t,jit_int32_t,jit_bool_t);
+#define baddi_u(i0,r0,i1,cc)           _baddi_u(_jit,i0,r0,i1,cc)
+static jit_word_t _baddi_u(jit_state_t*,jit_word_t,
+                          jit_int32_t,jit_word_t,jit_bool_t);
+#define bsubr(i0,r0,r1,cc)             _bsubr(_jit,i0,r0,r1,cc)
+static jit_word_t _bsubr(jit_state_t*,jit_word_t,
+                        jit_int32_t,jit_int32_t,jit_bool_t);
+#define bsubi(i0,r0,i1,cc)             _bsubi(_jit,i0,r0,i1,cc)
+static jit_word_t _bsubi(jit_state_t*,jit_word_t,
+                        jit_int32_t,jit_word_t,jit_bool_t);
+#define bsubr_u(i0,r0,r1,cc)           _bsubr_u(_jit,i0,r0,r1,cc)
+static jit_word_t _bsubr_u(jit_state_t*,jit_word_t,
+                          jit_int32_t,jit_int32_t,jit_bool_t);
+#define bsubi_u(i0,r0,i1,cc)           _bsubi_u(_jit,i0,r0,i1,cc)
+static jit_word_t _bsubi_u(jit_state_t*,jit_word_t,
+                          jit_int32_t,jit_word_t,jit_bool_t);
+#define boaddr(i0,r0,r1)               baddr(i0,r0,r1,1)
+#define boaddi(i0,r0,i1)               baddi(i0,r0,i1,1)
+#define boaddr_u(i0,r0,r1)             baddr_u(i0,r0,r1,1)
+#define boaddi_u(i0,r0,i1)             baddi_u(i0,r0,i1,1)
+#define bxaddr(i0,r0,r1)               baddr(i0,r0,r1,0)
+#define bxaddi(i0,r0,i1)               baddi(i0,r0,i1,0)
+#define bxaddr_u(i0,r0,r1)             baddr_u(i0,r0,r1,0)
+#define bxaddi_u(i0,r0,i1)             baddi_u(i0,r0,i1,0)
+#define bosubr(i0,r0,r1)               bsubr(i0,r0,r1,1)
+#define bosubi(i0,r0,i1)               bsubi(i0,r0,i1,1)
+#define bosubr_u(i0,r0,r1)             bsubr_u(i0,r0,r1,1)
+#define bosubi_u(i0,r0,i1)             bsubi_u(i0,r0,i1,1)
+#define bxsubr(i0,r0,r1)               bsubr(i0,r0,r1,0)
+#define bxsubi(i0,r0,i1)               bsubi(i0,r0,i1,0)
+#define bxsubr_u(i0,r0,r1)             bsubr_u(i0,r0,r1,0)
+#define bxsubi_u(i0,r0,i1)             bsubi_u(i0,r0,i1,0)
+#define ldr_c(r0,r1)                   LD1_S(r0,r1)
+#define ldi_c(r0,i0)                   _ldi_c(_jit,r0,i0)
+static void _ldi_c(jit_state_t*,jit_int32_t,jit_word_t);
+#define ldr_uc(r0,r1)                  LD1(r0,r1)
+#define ldi_uc(r0,i0)                  _ldi_uc(_jit,r0,i0)
+static void _ldi_uc(jit_state_t*,jit_int32_t,jit_word_t);
+#define ldr_s(r0,r1)                   LD2_S(r0,r1)
+#define ldi_s(r0,i0)                   _ldi_s(_jit,r0,i0)
+static void _ldi_s(jit_state_t*,jit_int32_t,jit_word_t);
+#define ldr_us(r0,r1)                  LD2(r0,r1)
+#define ldi_us(r0,i0)                  _ldi_us(_jit,r0,i0)
+static void _ldi_us(jit_state_t*,jit_int32_t,jit_word_t);
+#define ldr_i(r0,r1)                   LD4_S(r0,r1)
+#define ldi_i(r0,i0)                   _ldi_i(_jit,r0,i0)
+static void _ldi_i(jit_state_t*,jit_int32_t,jit_word_t);
+#define ldr_ui(r0,r1)                  LD4(r0,r1)
+#define ldi_ui(r0,i0)                  _ldi_ui(_jit,r0,i0)
+static void _ldi_ui(jit_state_t*,jit_int32_t,jit_word_t);
+#define ldr_l(r0,r1)                   LD8(r0,r1)
+#define ldi_l(r0,i0)                   _ldi_l(_jit,r0,i0)
+static void _ldi_l(jit_state_t*,jit_int32_t,jit_word_t);
+#define ldxr_c(r0,r1,r2)               _ldxr_c(_jit,r0,r1,r2)
+static void _ldxr_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define ldxi_c(r0,r1,i0)               _ldxi_c(_jit,r0,r1,i0)
+static void _ldxi_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define ldxr_uc(r0,r1,r2)              _ldxr_uc(_jit,r0,r1,r2)
+static void _ldxr_uc(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define ldxi_uc(r0,r1,i0)              _ldxi_uc(_jit,r0,r1,i0)
+static void _ldxi_uc(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define ldxr_s(r0,r1,r2)               _ldxr_s(_jit,r0,r1,r2)
+static void _ldxr_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define ldxi_s(r0,r1,i0)               _ldxi_s(_jit,r0,r1,i0)
+static void _ldxi_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define ldxr_us(r0,r1,r2)              _ldxr_us(_jit,r0,r1,r2)
+static void _ldxr_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define ldxi_us(r0,r1,i0)              _ldxi_us(_jit,r0,r1,i0)
+static void _ldxi_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define ldxr_i(r0,r1,r2)               _ldxr_i(_jit,r0,r1,r2)
+static void _ldxr_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define ldxi_i(r0,r1,i0)               _ldxi_i(_jit,r0,r1,i0)
+static void _ldxi_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define ldxr_ui(r0,r1,r2)              _ldxr_ui(_jit,r0,r1,r2)
+static void _ldxr_ui(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define ldxi_ui(r0,r1,i0)              _ldxi_ui(_jit,r0,r1,i0)
+static void _ldxi_ui(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define ldxr_l(r0,r1,r2)               _ldxr_l(_jit,r0,r1,r2)
+static void _ldxr_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define ldxi_l(r0,r1,i0)               _ldxi_l(_jit,r0,r1,i0)
+static void _ldxi_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define str_c(r0,r1)                   ST1(r0,r1)
+#define sti_c(i0,r0)                   _sti_c(_jit,i0,r0)
+static void _sti_c(jit_state_t*,jit_word_t,jit_int32_t);
+#define str_s(r0,r1)                   ST2(r0,r1)
+#define sti_s(i0,r0)                   _sti_s(_jit,i0,r0)
+static void _sti_s(jit_state_t*,jit_word_t,jit_int32_t);
+#define str_i(r0,r1)                   ST4(r0,r1)
+#define sti_i(i0,r0)                   _sti_i(_jit,i0,r0)
+static void _sti_i(jit_state_t*,jit_word_t,jit_int32_t);
+#define str_l(r0,r1)                   ST8(r0,r1)
+#define sti_l(i0,r0)                   _sti_l(_jit,i0,r0)
+static void _sti_l(jit_state_t*,jit_word_t,jit_int32_t);
+#define stxr_c(r0,r1,r2)               _stxr_c(_jit,r0,r1,r2)
+static void _stxr_c(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define stxi_c(i0,r0,r1)               _stxi_c(_jit,i0,r0,r1)
+static void _stxi_c(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define stxr_s(r0,r1,r2)               _stxr_s(_jit,r0,r1,r2)
+static void _stxr_s(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define stxi_s(i0,r0,r1)               _stxi_s(_jit,i0,r0,r1)
+static void _stxi_s(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define stxr_i(r0,r1,r2)               _stxr_i(_jit,r0,r1,r2)
+static void _stxr_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define stxi_i(i0,r0,r1)               _stxi_i(_jit,i0,r0,r1)
+static void _stxi_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define stxr_l(r0,r1,r2)               _stxr_l(_jit,r0,r1,r2)
+static void _stxr_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define stxi_l(i0,r0,r1)               _stxi_l(_jit,i0,r0,r1)
+static void _stxi_l(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define jmpr(r0)                       _jmpr(_jit,r0)
+static void _jmpr(jit_state_t*,jit_int32_t);
+#define jmpi(i0)                       _jmpi(_jit,i0)
+static void _jmpi(jit_state_t*,jit_word_t);
+#define jmpi_p(i0)                     _jmpi_p(_jit,i0)
+static jit_word_t _jmpi_p(jit_state_t*,jit_word_t);
+#define callr(r0)                      _callr(_jit,r0)
+static void _callr(jit_state_t*,jit_int32_t);
+#define calli(i0)                      _calli(_jit,i0)
+static void _calli(jit_state_t*,jit_word_t);
+#define calli_p(i0)                    _calli_p(_jit,i0)
+static jit_word_t _calli_p(jit_state_t*,jit_word_t);
+#define prolog(node)                   _prolog(_jit,node)
+static void _prolog(jit_state_t*,jit_node_t*);
+#define epilog(node)                   _epilog(_jit,node)
+static void _epilog(jit_state_t*,jit_node_t*);
+#define patch_at(node,instr,label)     _patch_at(_jit,node,instr,label)
+static void _patch_at(jit_state_t*,jit_node_t*,jit_word_t,jit_word_t);
+#endif
+
+#if CODE
+static void
+_out(jit_state_t *_jit, int n, int tm,
+     jit_word_t s0, jit_word_t s1, jit_word_t s2)
+{
+    int                         i;
+    inst_lo_t           l;
+    inst_hi_t           h;
+    jit_word_t         *w;
+    to_tm(tm);
+    to_s0(s0);
+    to_s1(s1);
+    to_s2(s2);
+    il(l.w);
+    il(h.w);
+    w = (jit_word_t *)_jitc->inst;
+    for (i = n; i < _jitc->ioff; i++)
+       w[i - n] = w[i];
+    _jitc->ioff -= n;
+}
+
+#define nop_m          0x0008000000L
+#define nop_i          0x0008000000L
+#define nop_b          0x4000000000L
+static void
+_stop(jit_state_t *_jit)
+{
+    /* Clear set of live registers */
+    jit_regset_set_ui(&_jitc->gprs, 0);
+    jit_regset_set_ui(&_jitc->fprs, 0);
+    /* Flag a stop is required */
+    if (_jitc->ioff)
+       _jitc->inst[_jitc->ioff - 1].t |= INST_STOP;
+    else
+       inst(nop_m, INST_Ms);
+}
+
+static void
+_sync(jit_state_t *_jit)
+{
+    /* Taken branches are supposed to not need a stop, so, it
+     * should not be required to stop if no registers live in
+     * sequential code */
+    if (jit_regset_cmp_ui(&_jitc->gprs, 0) != 0 ||
+       jit_regset_cmp_ui(&_jitc->fprs, 0) != 0)
+       stop();
+    do
+       flush();
+    while (_jitc->ioff);
+}
+
+#define A_0            INST_A
+#define As0            INST_As
+#define I_0            INST_I
+#define Is0            INST_Is
+#define M_0            INST_M
+#define Ms0            INST_Ms
+#define F_0            INST_F
+#define Fs0            INST_Fs
+#define B_0            INST_B
+#define Bs0            INST_Bs
+#define L_0            INST_L
+#define Ls0            INST_Ls
+#define X_0            INST_X
+#define Xs0            INST_Xs
+#define A_1            (INST_A<<4)
+#define As1            (INST_As<<4)
+#define I_1            (INST_I<<4)
+#define Is1            (INST_Is<<4)
+#define M_1            (INST_M<<4)
+#define Ms1            (INST_Ms<<4)
+#define F_1            (INST_F<<4)
+#define Fs1            (INST_Fs<<4)
+#define B_1            (INST_B<<4)
+#define Bs1            (INST_Bs<<4)
+#define L_1            (INST_L<<4)
+#define Ls1            (INST_Ls<<4)
+#define X_1            (INST_X<<4)
+#define Xs1            (INST_Xs<<4)
+#define A_2            (INST_A<<8)
+#define As2            (INST_As<<8)
+#define I_2            (INST_I<<8)
+#define Is2            (INST_Is<<8)
+#define M_2            (INST_M<<8)
+#define Ms2            (INST_Ms<<8)
+#define F_2            (INST_F<<8)
+#define Fs2            (INST_Fs<<8)
+#define B_2            (INST_B<<8)
+#define Bs2            (INST_Bs<<8)
+#define L_2            (INST_L<<8)
+#define Ls2            (INST_Ls<<8)
+#define X_2            (INST_X<<8)
+#define Xs2            (INST_Xs<<8)
+
+#define I_             I_0
+#define I_I_           I_0|I_1
+#define I_Is           I_0|Is1
+#define I_B_           I_0|B_1
+#define I_Bs           I_0|Bs1
+#define Is             Is0
+#define IsI_           Is0|I_1
+#define IsIs           Is0|Is1
+#define M_             M_0
+#define M_I_           M_0|I_1
+#define M_Is           M_0|Is1
+#define M_M_           M_0|M_1
+#define M_Ms           M_0|Ms1
+#define M_F_           M_0|F_1
+#define M_Fs           M_0|Fs1
+#define M_B_           M_0|B_1
+#define M_Bs           M_0|Bs1
+#define M_I_I_         M_0|I_1|I_2
+#define M_I_Is         M_0|I_1|Is2
+#define M_I_B_         M_0|I_1|B_2
+#define M_I_Bs         M_0|I_1|Bs2
+#define M_IsI_         M_0|Is1|I_2
+#define M_IsIs         M_0|Is1|Is2
+#define M_M_I_         M_0|M_1|I_2
+#define M_M_Is         M_0|M_1|Is2
+#define M_M_F_         M_0|M_1|F_2
+#define M_M_Fs         M_0|M_1|Fs2
+#define M_M_B_         M_0|M_1|B_2
+#define M_M_Bs         M_0|M_1|Bs2
+#define M_F_I_         M_0|F_1|I_2
+#define M_F_Is         M_0|F_1|Is2
+#define M_F_B_         M_0|F_1|B_2
+#define M_F_Bs         M_0|F_1|Bs2
+#define M_B_B_         M_0|B_1|B_2
+#define M_B_Bs         M_0|B_1|Bs2
+#define M_L_X_         M_0|L_1|X_2
+#define M_L_Xs         M_0|L_1|Xs2
+#define Ms             Ms0
+#define MsI_           Ms0|I_1
+#define MsIs           Ms0|Is1
+#define MsM_           Ms0|M_1
+#define MsMs           Ms0|Ms1
+#define MsM_I_         Ms0|M_1|I_2
+#define MsM_Is         Ms0|M_1|Is2
+#define F_             F_0
+#define F_I_           F_0|I_1
+#define F_Is           F_0|Is1
+#define F_B_           F_0|B_1
+#define F_Bs           F_0|Bs1
+#define Fs             Fs0
+#define B_             B_0
+#define B_B_           B_0|B_1
+#define B_Bs           B_0|Bs1
+#define B_B_B_         B_0|B_1|B_2
+#define B_B_Bs         B_0|B_1|Bs2
+#define Bs             Bs0
+#define L_X_           L_0|X_1
+#define L_Xs           L_0|Xs1
+
+static jit_word_t
+templat(jit_word_t cc)
+{
+    switch (cc) {
+       case I_:
+       case I_I_:      case I_Is:
+       case I_B_:      case I_Bs:
+       case Is:
+       case IsI_:      case IsIs:
+       case M_:
+       case M_I_:      case M_Is:
+       case M_M_:      case M_Ms:
+       case M_F_:      case M_Fs:
+       case M_B_:      case M_Bs:
+       case M_I_I_:    case M_I_Is:
+       case M_I_B_:    case M_I_Bs:
+       case M_IsI_:    case M_IsIs:
+       case M_M_I_:    case M_M_Is:
+       case M_M_F_:    case M_M_Fs:
+       case M_M_B_:    case M_M_Bs:
+       case M_F_I_:    case M_F_Is:
+       case M_F_B_:    case M_F_Bs:
+       case M_B_B_:    case M_B_Bs:
+       case M_L_X_:    case M_L_Xs:
+       case Ms:
+       case MsI_:      case MsIs:
+       case MsM_:      case MsMs:
+       case MsM_I_:    case MsM_Is:
+       case F_:
+       case F_I_:      case F_Is:
+       case F_B_:      case F_Bs:
+       case Fs:
+       case B_:
+       case B_B_:      case B_Bs:
+       case B_B_B_:    case B_B_Bs:
+       case Bs:
+       case L_X_:      case L_Xs:
+           return (cc);
+       default:
+           return (0);
+    }
+}
+
+/* match* functions recurse attempting to find a template for A-
+ * instructions, that may be executed in M- or I- unit.
+ * It also uses an heuristic of trying first M- for slot 0 and 2,
+ * and I- for slot 1, but tries all possible matches.
+ */
+static jit_word_t
+match_2(jit_word_t cc)
+{
+    jit_word_t         t;
+    if ((t = templat(cc | M_2)))
+       return (t);
+    if ((t = templat(cc | I_2)))
+       return (t);
+    return (0);
+}
+
+static jit_word_t
+matchs2(jit_word_t cc)
+{
+    jit_word_t         t;
+    if ((t = templat(cc | Ms2)))
+       return (t);
+    if ((t = templat(cc | Is2)))
+       return (t);
+    return (0);
+}
+
+static jit_word_t
+match2(jit_word_t cc)
+{
+    if ((cc & 0xf00) == A_2)
+       return (match_2(cc & ~0xf00));
+    if ((cc & 0xf00) == As2)
+       return (matchs2(cc & ~0xf00));
+    return (0);
+}
+
+static jit_word_t
+match_1(jit_word_t cc)
+{
+    jit_word_t         t;
+    if ((t = templat(cc | I_1)))
+       return (t);
+    if ((t = templat(cc | M_1)))
+       return (t);
+    if ((t = match2(cc | I_1)))
+       return (t);
+    if ((t = match2(cc | M_1)))
+       return (t);
+    return (0);
+}
+
+static jit_word_t
+matchs1(jit_word_t cc)
+{
+    jit_word_t         t;
+    if ((t = templat(cc | Is1)))
+       return (t);
+    if ((t = templat(cc | Ms1)))
+       return (t);
+    if ((t = match2(cc | Is1)))
+       return (t);
+    if ((t = match2(cc | Ms1)))
+       return (t);
+    return (0);
+}
+
+static jit_word_t
+match1(jit_word_t cc)
+{
+    if ((cc & 0x0f0) == A_1)
+       return (match_1(cc & ~0x0f0));
+    if ((cc & 0x0f0) == As1)
+       return (matchs1(cc & ~0x0f0));
+    return (0);
+}
+
+static jit_word_t
+match_0(jit_word_t cc)
+{
+    jit_word_t         t;
+    if ((t = templat(cc | M_0)))
+       return (t);
+    if ((t = templat(cc | I_0)))
+       return (t);
+    if ((t = match1(cc | M_0)))
+       return (t);
+    if ((t = match1(cc | I_0)))
+       return (t);
+    return (0);
+}
+
+static jit_word_t
+matchs0(jit_word_t cc)
+{
+    jit_word_t         t;
+    if ((t = templat(cc | Ms0)))
+       return (t);
+    if ((t = templat(cc | Is0)))
+       return (t);
+    if ((t = match1(cc | Ms0)))
+       return (t);
+    if ((t = match1(cc | Is0)))
+       return (t);
+    return (0);
+}
+
+static jit_word_t
+match0(jit_word_t cc)
+{
+    if ((cc & 0x00f) == A_0)
+       return (match_0(cc & ~0x00f));
+    if ((cc & 0x00f) == As0)
+       return (matchs0(cc & ~0x00f));
+    return (0);
+}
+
+static void
+_flush(jit_state_t *_jit)
+{
+    int                        n, soff;
+    jit_word_t         t, cc, tm, s0, s1, s2;
+
+    if (!_jitc->ioff)
+       return;
+    for (cc = 0, n = soff = 0; n < _jitc->ioff; n++, soff += 4)
+       cc |= (jit_uword_t)(_jitc->inst[n].t) << soff;
+
+    soff = 0xf00;
+    while (soff) {
+       /* Try to find a template, or reduce down
+        * to one instruction if no template match */
+       if ((t = templat(cc))) {
+           cc = t;
+           break;
+       }
+       /* A- instructions may be execute in M- or I- unit */
+       if ((t = match0(cc))) {
+           cc = t;
+           break;
+       }
+       cc &= ~soff;
+       soff >>= 4;
+    }
+    assert(soff);
+
+    /* Prefer tail nop if need to add some nop, so that patching is easier */
+#define ii(n)          _jitc->inst[n].i
+    switch (cc) {
+       case I_:
+           n = 1;              tm = TM_M_I_I_;
+           s0 = nop_m;         s1 = ii(0);             s2 = nop_i;
+           break;
+       case I_I_:
+           n = 2;              tm = TM_M_I_I_;
+           s0 = nop_m;         s1 = ii(0);             s2 = ii(1);
+           break;
+       case I_Is:
+           n = 2;              tm = TM_M_I_Is;
+           s0 = nop_m;         s1 = ii(0);             s2 = ii(1);
+           break;
+       case I_B_:
+           n = 2;              tm = TM_M_I_B_;
+           s0 = nop_m;         s1 = ii(0);             s2 = ii(1);
+           break;
+       case I_Bs:
+           n = 2;              tm = TM_M_I_Bs;
+           s0 = nop_m;         s1 = ii(0);             s2 = ii(1);
+           break;
+       case Is:
+           n = 1;              tm = TM_M_IsI_;
+           s0 = nop_m;         s1 = ii(0);             s2 = nop_i;
+           break;
+       case IsI_:
+           n = 2;              tm = TM_M_IsI_;
+           s0 = nop_m;         s1 = ii(0);             s2 = ii(1);
+           break;
+       case IsIs:
+           n = 2;              tm = TM_M_IsIs;
+           s0 = nop_m;         s1 = ii(0);             s2 = ii(1);
+           break;
+       case M_:
+           n = 1;              tm = TM_M_I_I_;
+           s0 = ii(0);         s1 = nop_i;             s2 = nop_i;
+           break;
+       case M_I_:
+           n = 2;              tm = TM_M_I_I_;
+           s0 = ii(0);         s1 = ii(1);             s2 = nop_i;
+           break;
+       case M_Is:
+           n = 2;              tm = TM_M_IsI_;
+           s0 = ii(0);         s1 = ii(1);             s2 = nop_i;
+           break;
+       case M_M_:
+           n = 2;              tm = TM_M_M_I_;
+           s0 = ii(0);         s1 = ii(1);             s2 = nop_i;
+           break;
+       case M_Ms:
+           n = 2;              tm = TM_M_M_Is;
+           s0 = ii(0);         s1 = ii(1);             s2 = nop_i;
+           break;
+       case M_F_:
+           n = 2;              tm = TM_M_F_I_;
+           s0 = ii(0);         s1 = ii(1);             s2 = nop_i;
+           break;
+       case M_Fs:
+           n = 2;              tm = TM_M_F_Is;
+           s0 = ii(0);         s1 = ii(1);             s2 = nop_i;
+           break;
+       case M_B_:
+           n = 2;              tm = TM_M_B_B_;
+           s0 = ii(0);         s1 = ii(1);             s2 = nop_b;
+           break;
+       case M_Bs:
+           n = 2;              tm = TM_M_B_Bs;
+           s0 = ii(0);         s1 = ii(1);             s2 = nop_b;
+           break;
+       case M_I_I_:
+           n = 3;              tm = TM_M_I_I_;
+           s0 = ii(0);         s1 = ii(1);             s2 = ii(2);
+           break;
+       case M_I_Is:
+           n = 3;              tm = TM_M_I_Is;
+           s0 = ii(0);         s1 = ii(1);             s2 = ii(2);
+           break;
+       case M_I_B_:
+           n = 3;              tm = TM_M_I_B_;
+           s0 = ii(0);         s1 = ii(1);             s2 = ii(2);
+           break;
+       case M_I_Bs:
+           n = 3;              tm = TM_M_I_Bs;
+           s0 = ii(0);         s1 = ii(1);             s2 = ii(2);
+           break;
+       case M_IsI_:
+           n = 3;              tm = TM_M_IsI_;
+           s0 = ii(0);         s1 = ii(1);             s2 = ii(2);
+           break;
+       case M_IsIs:
+           n = 3;              tm = TM_M_IsIs;
+           s0 = ii(0);         s1 = ii(1);             s2 = ii(2);
+           break;
+       case M_M_I_:
+           n = 3;              tm = TM_M_M_I_;
+           s0 = ii(0);         s1 = ii(1);             s2 = ii(2);
+           break;
+       case M_M_Is:
+           n = 3;              tm = TM_M_M_Is;
+           s0 = ii(0);         s1 = ii(1);             s2 = ii(2);
+           break;
+       case M_M_F_:
+           n = 3;              tm = TM_M_M_F_;
+           s0 = ii(0);         s1 = ii(1);             s2 = ii(2);
+           break;
+       case M_M_Fs:
+           n = 3;              tm = TM_M_M_Fs;
+           s0 = ii(0);         s1 = ii(1);             s2 = ii(2);
+           break;
+       case M_M_B_:
+           n = 3;              tm = TM_M_M_B_;
+           s0 = ii(0);         s1 = ii(1);             s2 = ii(2);
+           break;
+       case M_M_Bs:
+           n = 3;              tm = TM_M_M_Bs;
+           s0 = ii(0);         s1 = ii(1);             s2 = ii(2);
+           break;
+       case M_F_I_:
+           n = 3;              tm = TM_M_F_I_;
+           s0 = ii(0);         s1 = ii(1);             s2 = ii(2);
+           break;
+       case M_F_Is:
+           n = 3;              tm = TM_M_F_Is;
+           s0 = ii(0);         s1 = ii(1);             s2 = ii(2);
+           break;
+       case M_F_B_:
+           n = 3;              tm = TM_M_F_B_;
+           s0 = ii(0);         s1 = ii(1);             s2 = ii(2);
+           break;
+       case M_F_Bs:
+           n = 3;              tm = TM_M_F_Bs;
+           s0 = ii(0);         s1 = ii(1);             s2 = ii(2);
+           break;
+       case M_B_B_:
+           n = 3;              tm = TM_M_B_B_;
+           s0 = ii(0);         s1 = ii(1);             s2 = ii(2);
+           break;
+       case M_B_Bs:
+           n = 3;              tm = TM_M_B_Bs;
+           s0 = ii(0);         s1 = ii(1);             s2 = ii(2);
+           break;
+       case M_L_X_:
+           n = 3;              tm = TM_M_L_X_;
+           s0 = ii(0);         s1 = ii(1);             s2 = ii(2);
+           break;
+       case M_L_Xs:
+           n = 3;              tm = TM_M_L_Xs;
+           s0 = ii(0);         s1 = ii(1);             s2 = ii(2);
+           break;
+       case Ms:
+           n = 1;              tm = TM_MsM_I_;
+           s0 = ii(0);         s1 = nop_m;             s2 = nop_i;
+           break;
+       case MsI_:
+           n = 2;              tm = TM_MsM_I_;
+           s0 = ii(0);         s1 = nop_m;             s2 = ii(1);
+           break;
+       case MsIs:
+           n = 2;              tm = TM_MsM_Is;
+           s0 = ii(0);         s1 = nop_m;             s2 = ii(1);
+           break;
+       case MsM_:
+           n = 2;              tm = TM_MsM_I_;
+           s0 = ii(0);         s1 = ii(1);             s2 = nop_i;
+           break;
+       case MsMs:
+           n = 2;              tm = TM_MsM_I_;
+           s0 = ii(0);         s1 = ii(1);             s2 = nop_i;
+           break;
+       case MsM_I_:
+           n = 3;              tm = TM_MsM_I_;
+           s0 = ii(0);         s1 = ii(1);             s2 = ii(2);
+           break;
+       case MsM_Is:
+           n = 3;              tm = TM_MsM_Is;
+           s0 = ii(0);         s1 = ii(1);             s2 = ii(2);
+           break;
+       case F_:
+           n = 1;              tm = TM_M_F_I_;
+           s0 = nop_m;         s1 = ii(0);             s2 = nop_i;
+           break;
+       case F_I_:
+           n = 2;              tm = TM_M_F_I_;
+           s0 = nop_m;         s1 = ii(0);             s2 = ii(1);
+           break;
+       case F_Is:
+           n = 2;              tm = TM_M_F_Is;
+           s0 = nop_m;         s1 = ii(0);             s2 = ii(1);
+           break;
+       case F_B_:
+           n = 2;              tm = TM_M_F_B_;
+           s0 = nop_m;         s1 = ii(0);             s2 = ii(1);
+           break;
+       case F_Bs:
+           n = 2;              tm = TM_M_F_Bs;
+           s0 = nop_m;         s1 = ii(0);             s2 = ii(1);
+           break;
+       case Fs:
+           n = 1;              tm = TM_M_F_Is;
+           s0 = nop_m;         s1 = ii(0);             s2 = nop_i;
+           break;
+       case B_:
+           n = 1;              tm = TM_B_B_B_;
+           s0 = ii(0);         s1 = nop_b;             s2 = nop_b;
+           break;
+       case B_B_:
+           n = 2;              tm = TM_B_B_B_;
+           s0 = ii(0);         s1 = ii(1);             s2 = nop_b;
+           break;
+       case B_Bs:
+           n = 2;              tm = TM_B_B_Bs;
+           s0 = ii(0);         s1 = ii(1);             s2 = nop_b;
+           break;
+       case B_B_B_:
+           n = 3;              tm = TM_B_B_B_;
+           s0 = ii(0);         s1 = ii(1);             s2 = ii(2);
+           break;
+       case B_B_Bs:
+           n = 3;              tm = TM_B_B_Bs;
+           s0 = ii(0);         s1 = ii(1);             s2 = ii(2);
+           break;
+       case Bs:
+           n = 1;              tm = TM_B_B_Bs;
+           s0 = ii(0);         s1 = nop_b;             s2 = nop_b;
+           break;
+       case L_X_:
+           n = 2;              tm = TM_M_L_X_;
+           s0 = nop_m;         s1 = ii(0);             s2 = ii(1);
+           break;
+       case L_Xs:
+           n = 2;              tm = TM_M_L_Xs;
+           s0 = nop_m;         s1 = ii(0);             s2 = ii(1);
+           break;
+       default:
+           abort();
+    }
+    out(n, tm, s0, s1, s2);
+}
+
+static void
+_inst(jit_state_t *_jit, jit_word_t i, jit_uint8_t t)
+{
+    if (_jitc->ioff > 2)
+       flush();
+    assert(!(i & 0x11111e0000000000L));
+    _jitc->inst[_jitc->ioff].i = i;
+    _jitc->inst[_jitc->ioff].t = t;
+    ++_jitc->ioff;
+}
+
+static void
+_A1(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t x4, jit_word_t x2, jit_word_t r3, jit_word_t r2, jit_word_t r1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(x4 &  ~0xfL));
+    assert(!(x2 &  ~0x3L));
+    assert(!(r3 & ~0x7fL));
+    assert(!(r2 & ~0x7fL));
+    assert(!(r1 & ~0x7fL));
+    TSTREG2(r2, r3);
+    inst((8L<<37)|(x4<<29)|(x2<<27)|(r3<<20)|(r2<<13)|(r1<<6)|_p, INST_A);
+    SETREG(r1);
+}
+
+static void
+_A3(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t x4, jit_word_t x2, jit_word_t r3, jit_word_t im, jit_word_t r1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(x4 &  ~0xfL));
+    assert(!(x2 &  ~0x3L));
+    assert(!(r3 & ~0x7fL));
+    assert(im >= -128 && im < 127);
+    assert(!(r1 & ~0x7f));
+    TSTREG1(r3);
+    inst((8L<<37)|(((im>>7)&1L)<<36)|(x4<<29)|(x2<<27)|
+        (r3<<20)|((im&0x7fL)<<13)|(r1<<6)|_p, INST_A);
+    SETREG(r1);
+}
+
+static void
+_A4(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t x2, jit_word_t r3, jit_word_t im, jit_word_t r1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(x2 &  ~0x3L));
+    assert(!(r3 & ~0x7fL));
+    assert(im >= -8192 && im < 8191);
+    assert(!(r1  & ~0x7f));
+    TSTREG1(r3);
+    inst((8L<<37)|(((im>>13)&1L)<<36)|(x2<<34)|(((im>>7)&0x3fL)<<27)|
+        (r3<<20)|((im&0x7fL)<<13)|(r1<<6)|_p, INST_A);
+    SETREG(r1);
+}
+
+static void
+_A5(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t r3, jit_word_t im, jit_word_t r1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(r3  & ~0x3L));
+    assert(im >= -2097152 && im < 2097151);
+    assert(!(r1  & ~0x7fL));
+    TSTREG1(r3);
+    inst((9L<<37)|(((im>>7)&0x7fffL)<<22)|(r3<<20)|
+        ((im&0x7fL)<<13)|(r1<<6)|_p, INST_A);
+    SETREG(r1);
+}
+
+static void
+_A6(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t o, jit_word_t x2, jit_word_t ta, jit_word_t p2,
+    jit_word_t r3, jit_word_t r2, jit_word_t c, jit_word_t p1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(o  &  ~0xfL));
+    assert(!(x2 &  ~0x3L));
+    assert(!(ta &  ~0x1L));
+    assert(!(p2 & ~0x7fL));
+    assert(!(r3 & ~0x7fL));
+    assert(!(r2 & ~0x7fL));
+    assert(!(c  &  ~0x1L));
+    assert(!(p1 & ~0x3fL));
+    TSTREG2(r2, r3);
+    inst((o<<37)|(x2<<34)|(ta<<33)|(p2<<27)|(r3<<20)|
+        (r2<<13)|(c<<12)|(p1<<6)|_p, INST_A);
+}
+
+static void
+_A7(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t o, jit_word_t x2, jit_word_t ta,
+    jit_word_t p2, jit_word_t r3, jit_word_t c, jit_word_t p1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(o  &  ~0xfL));
+    assert(!(x2 &  ~0x3L));
+    assert(!(ta &  ~0x1L));
+    assert(!(p2 & ~0x7fL));
+    assert(!(r3 & ~0x7fL));
+    assert(!(c  &  ~0x1L));
+    assert(!(p1 & ~0x3fL));
+    TSTREG1(r3);
+    inst((o<<37)|(1L<<36)|(x2<<34)|(ta<<33)|
+        (p2<<27)|(r3<<20)|(c<<12)|(p1<<6)|_p, INST_A);
+}
+
+static void
+_A8(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t o, jit_word_t x2, jit_word_t ta, jit_word_t p2,
+    jit_word_t r3, jit_word_t im, jit_word_t c, jit_word_t p1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(o  &  ~0xfL));
+    assert(!(x2 &  ~0x3L));
+    assert(!(ta &  ~0x1L));
+    assert(!(p2 & ~0x7fL));
+    assert(!(r3 & ~0x7fL));
+    assert(im >= -128 && im <= 127);
+    assert(!(c  &  ~0x1L));
+    assert(!(p1 & ~0x3fL));
+    TSTREG1(r3);
+    inst((o<<37)|(((im>>7)&1L)<<36)|(x2<<34)|(ta<<33)|(p2<<27)|(r3<<20)|
+        ((im&0x7fL)<<13)|(c<<12)|(p1<<6)|_p, INST_A);
+}
+
+static void
+_A9(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t za, jit_word_t zb, jit_word_t x4,
+    jit_word_t x2, jit_word_t r3, jit_word_t r2, jit_word_t r1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(za &  ~0x1L));
+    assert(!(zb &  ~0x1L));
+    assert(!(x4 &  ~0xfL));
+    assert(!(x2 &  ~0x3L));
+    assert(!(r3 & ~0x7fL));
+    assert(!(r2 & ~0x7fL));
+    assert(!(r1 & ~0x7fL));
+    TSTREG2(r2, r3);
+    inst((8L<<37)|(za<<36)|(1L<<34)|(zb<<33)|(x4<<29)|(x2<<27)|
+        (r3<<20)|(r2<<13)|(r1<<6)|_p, INST_A);
+    SETREG(r1);
+}
+
+static void
+_I1(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t ct, jit_word_t x2, jit_word_t r3, jit_word_t r2, jit_word_t r1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(ct &  ~0x3L));
+    assert(!(x2 &  ~0x3L));
+    assert(!(r3 & ~0x7fL));
+    assert(!(r2 & ~0x7fL));
+    assert(!(r1 & ~0x7fL));
+    TSTREG2(r2, r3);
+    inst((7L<<37)|(1L<<33)|(ct<<30)|(x2<<28)|
+        (r3<<20)|(r2<<13)|(r1<<6)|_p, INST_I);
+    SETREG(r1);
+}
+
+static void
+_I2(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t za, jit_word_t xa, jit_word_t zb, jit_word_t xc,
+    jit_word_t xb ,jit_word_t r3, jit_word_t r2, jit_word_t r1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(za &  ~0x1L));
+    assert(!(xa &  ~0x3L));
+    assert(!(zb &  ~0x1L));
+    assert(!(xc &  ~0x3L));
+    assert(!(xb &  ~0x3L));
+    assert(!(r3 & ~0x7fL));
+    assert(!(r2 & ~0x7fL));
+    assert(!(r1 & ~0x7fL));
+    TSTREG2(r2, r3);
+    inst((7L<<37)|(za<<36)|(xa<<34)|(zb<<33)|(xc<<30)|
+        (xb<<28)|(r3<<20)|(r2<<13)|(r1<<6), INST_I);
+    SETREG(r1);
+}
+
+static void
+_I3(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t mb, jit_word_t r2, jit_word_t r1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(mb &  ~0xfL));
+    assert(!(r2 & ~0x7fL));
+    assert(!(r1 & ~0x7fL));
+    TSTREG1(r2);
+    inst((7L<<37)|(3L<<34)|(2L<<30)|(2L<<28)|
+        (mb<<20)|(r2<<13)|(r1<<6)|_p, INST_I);
+    SETREG(r1);
+}
+
+static void
+_I4(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t mh, jit_word_t r2, jit_word_t r1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(mh & ~0xffL));
+    assert(!(r2 & ~0x7fL));
+    assert(!(r1 & ~0x7fL));
+    TSTREG1(r2);
+   inst((7L<<37)|(3L<<34)|(1L<<33)|(2L<<30)|
+        (2L<<28)|(mh<<20)|(r2<<13)|(r1<<6)|_p, INST_I);
+    SETREG(r1);
+}
+
+static void
+_I5(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t za, jit_word_t zb, jit_word_t x2,
+    jit_word_t r3, jit_word_t r2, jit_word_t r1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(za &  ~0x1L));
+    assert(!(zb &  ~0x1L));
+    assert(!(x2 &  ~0x3L));
+    assert(!(r3 & ~0x7fL));
+    assert(!(r2 & ~0x7fL));
+    assert(!(r1 & ~0x7fL));
+    TSTREG2(r2, r3);
+    inst((7L<<37)|(za<<36)|(zb<<33)|(x2<<28)|
+        (r3<<20)|(r2<<13)|(r1<<6)|_p, INST_I);
+    SETREG(r1);
+}
+
+static void
+_I6(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t za, jit_word_t zb, jit_word_t x2,
+    jit_word_t r3, jit_word_t ct, jit_word_t r1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(za &  ~0x1L));
+    assert(!(zb &  ~0x1L));
+    assert(!(x2 &  ~0x3L));
+    assert(!(r3 & ~0x7fL));
+    assert(!(ct & ~0x1fL));
+    assert(!(r1 & ~0x7fL));
+    TSTREG1(r3);
+    inst((7L<<37)|(za<<36)|(1L<<34)|(zb<<33)|
+        (x2<<28)|(r3<<20)|(ct<<14)|(r1<<6)|_p, INST_I);
+    SETREG(r1);
+}
+
+static void
+_I7(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t za, jit_word_t zb, jit_word_t r3, jit_word_t r2, jit_word_t r1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(za &  ~0x1L));
+    assert(!(zb &  ~0x1L));
+    assert(!(r3 & ~0x7fL));
+    assert(!(r2 & ~0x7fL));
+    assert(!(r1 & ~0x7fL));
+    TSTREG2(r2, r3);
+    inst((7L<<37)|(za<<36)|(zb<<33)|(1L<<30)|
+        (r3<<20)|(r2<<13)|(r1<<6)|_p, INST_I);
+    SETREG(r1);
+}
+
+static void
+_I8(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t za, jit_word_t zb, jit_word_t im, jit_word_t r2, jit_word_t r1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(za &  ~0x1L));
+    assert(!(zb &  ~0x1L));
+    assert(!(im & ~0x1fL));
+    assert(!(r2 & ~0x7fL));
+    assert(!(r1 & ~0x7fL));
+    TSTREG1(r2);
+    inst((7L<<37)|(za<<36)|(3L<<34)|(zb<<33)|(1L<<30)|(1L<<28)|
+        (im<<20)|(r2<<13)|(r1<<6), INST_I);
+    SETREG(r1);
+}
+
+static void
+_I9(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t x2, jit_word_t r3, jit_word_t r1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(x2 &  ~0x3L));
+    assert(!(r3 & ~0x7fL));
+    assert(!(r1 & ~0x7fL));
+    TSTREG1(r3);
+    inst((7L<<37)|(1L<<34)|(1L<<34)|(1L<<33)|
+        (x2<<30)|(1L<<28)|(r3<<20)|(r1<<6)|_p, INST_I);
+    SETREG(r1);
+}
+
+static void
+_I10(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t ct, jit_word_t r3, jit_word_t r2, jit_word_t r1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(ct & ~0x3fL));
+    assert(!(r3 & ~0x7fL));
+    assert(!(r2 & ~0x7fL));
+    assert(!(r1 & ~0x7fL));
+    TSTREG2(r2, r3);
+    inst((5L<<37)|(3L<<34)|(ct<<27)|(r3<<20)|(r2<<13)|(r1<<6)|_p, INST_I);
+    SETREG(r1);
+}
+
+static void
+_I11(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t len, jit_word_t r3,
+     jit_word_t pos, jit_word_t y, jit_word_t r1)
+{
+    assert(!(_p  & ~0x3fL));
+    assert(!(len & ~0x3fL));
+    assert(!(r3  & ~0x7fL));
+    assert(!(pos & ~0x1fL));
+    assert(!(y   &  ~0x1L));
+    assert(!(r1  & ~0x7fL));
+    TSTREG1(r3);
+    inst((5L<<37)|(1L<<34)|(len<<27)|(r3<<20)|
+        (pos<<14)|(y<<13)|(r1<<6)|_p, INST_I);
+    SETREG(r1);
+}
+
+static void
+_I12(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t len, jit_word_t pos, jit_word_t r2, jit_word_t r1)
+{
+    assert(!(_p  & ~0x3fL));
+    assert(!(len & ~0x3fL));
+    assert(!(pos & ~0x3fL));
+    assert(!(r2  & ~0x7fL));
+    assert(!(r1  & ~0x7fL));
+    TSTREG1(r2);
+    inst((5L<<37)|(1L<<34)|(1L<<33)|(len<<27)|
+        (pos<<20)|(r2<<13)|(r1<<6)|_p, INST_I);
+    SETREG(r1);
+}
+
+static void
+_I13(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t len, jit_word_t pos, jit_word_t im, jit_word_t r1)
+{
+    assert(!(_p  & ~0x3fL));
+    assert(!(len & ~0x3fL));
+    assert(!(pos & ~0x3fL));
+    assert(!(im  & ~0x7fL));
+    assert(!(r1  & ~0x7fL));
+    inst((5L<<37)|(((im>>7)&1L)<<36)|(1L<<34)|(1L<<33)|(len<<27)|
+        (1L<<26)|(pos<<20)|((im&0x7fL)<<13)|(r1<<6)|_p, INST_I);
+    SETREG(r1);
+}
+
+static void
+_I14(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t s, jit_word_t len, jit_word_t r3, jit_word_t pos, jit_word_t 
r1)
+{
+    assert(!(_p  & ~0x3fL));
+    assert(!(s   &  ~0x1L));
+    assert(!(len & ~0x3fL));
+    assert(!(r3  & ~0x7fL));
+    assert(!(pos & ~0x1fL));
+    assert(!(r1  & ~0x7fL));
+    TSTREG1(r3);
+    inst((5L<<37)|(s<<36)|(3L<<34)|(1L<<33)|
+        (len<<27)|(r3<<20)|(pos<<14)|(r1<<6)|_p, INST_I);
+    SETREG(r1);
+}
+
+static void
+_I15(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t pos, jit_word_t len,
+     jit_word_t r3, jit_word_t r2, jit_word_t r1)
+{
+    assert(!(_p  & ~0x3fL));
+    assert(!(pos & ~0x3fL));
+    assert(!(len &  ~0xfL));
+    assert(!(r3  & ~0x7fL));
+    assert(!(r2  & ~0x7fL));
+    assert(!(r1  & ~0x7fL));
+    TSTREG2(r2, r3);
+    inst((4L<<37)|(pos<<31)|(len<<27)|(r3<<20)|(r2<<13)|(r1<<6)|_p, INST_I);
+    SETREG(r1);
+}
+
+static void
+_I16(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t tb, jit_word_t ta, jit_word_t p2,
+     jit_word_t r3, jit_word_t ps, jit_word_t c, jit_word_t p1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(tb &  ~0x1L));
+    assert(!(ta &  ~0x1L));
+    assert(!(p2 & ~0x7fL));
+    assert(!(r3 & ~0x7fL));
+    assert(!(ps & ~0x3fL));
+    assert(!(c  &  ~0x1L));
+    assert(!(p1 & ~0x3fL));
+    TSTREG1(r3);
+    inst((5L<<37)|(tb<<36)|(ta<<33)|(p2<<27)|
+        (r3<<20)|(ps<<14)|(c<<12)|(p1<<6), INST_I);
+}
+
+static void
+_I17(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t tb, jit_word_t ta, jit_word_t p2,
+     jit_word_t r3, jit_word_t c, jit_word_t p1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(tb &  ~0x1L));
+    assert(!(ta &  ~0x1L));
+    assert(!(p2 & ~0x7fL));
+    assert(!(r3 & ~0x7fL));
+    assert(!(c  &  ~0x1L));
+    assert(!(p1 & ~0x3fL));
+    TSTREG1(r3);
+    inst((5L<<37)|(tb<<36)|(ta<<33)|(p2<<27)|
+        (r3<<20)|(1L<<13)|(c<<12)|(p1<<6)|_p, INST_I);
+}
+
+static void
+_I18(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t im, jit_word_t y)
+{
+    assert(!(_p &     ~0x3fL));
+    assert(!(im & ~0x1fffffL));
+    assert(!(y  &      ~0x1L));
+    /* no register referenced */
+    inst((((im>>20)&1L)<<26)|(1L<<27)|(y<<26)|((im&0xffffL)<<6)|_p, INST_I);
+}
+
+static void
+_I19(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t im)
+{
+    assert(!(_p &     ~0x3fL));
+    assert(!(im & ~0x1fffffL));
+    /* no register referenced */
+    inst(((im>>20)&1L)|((im&0xffffL)<<6)|_p, INST_I);
+}
+
+static void
+_I20(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t r2, jit_word_t im)
+{
+    assert(!(_p &     ~0x3fL));
+    assert(!(r2 &     ~0x7fL));
+    assert(!(im & ~0x1fffffL));
+    TSTREG1(r2);
+    inst(((im>>20)&1L)|(1L<<33)|(((im>>7)&0x1fffL)<<20)|
+        (r2<<13)|((im&0x7fL)<<6)|_p, INST_I);
+}
+
+static void
+_I21(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t im, jit_word_t ih, jit_word_t x,
+     jit_word_t wh, jit_word_t r2, jit_word_t b1)
+{
+    assert(!(_p &     ~0x3fL));
+    assert(!(im &    ~0x1ffL));
+    assert(!(ih &      ~0x1L));
+    assert(!(x  &      ~0x1L));
+    assert(!(wh &      ~0x3L));
+    assert(!(r2 &     ~0x7fL));
+    assert(!(b1 &      ~0x7L));
+    TSTREG1(r2);
+    inst((7L<<33)|(im<<24)|(ih<<23)|(x<<22)|(wh<<20)|
+        (r2<<13)|(b1<<6), INST_I);
+}
+
+static void
+_I22(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t b2, jit_word_t r1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(b2 &  ~0x7L));
+    assert(!(r1 & ~0x7fL));
+    inst((0x31L<<27)|(b2<<13)|(r1<<6)|_p, INST_I);
+}
+
+static void
+_I23(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t r2, jit_word_t im)
+{
+    assert(!(_p &   ~0x3fL));
+    assert(!(r2 &   ~0x7fL));
+    assert(!(im & ~0xffffL));
+    TSTREG1(r2);
+    inst((((im>>15)&1L)<<36)|(3L<<33)|(((im>>7)&0xffL)<<24)|
+        (r2<<13)|(im&0x7fL)|_p, INST_I);
+}
+
+static void
+_I24(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t im)
+{
+    jit_uint8_t                cc = INST_I;
+    assert(!(_p &      ~0x3fL));
+    assert(!(im & ~0xfffffffL));
+    /* no register referenced */
+    inst((((im>>27)&1L)<<36)|(2L<<33)|((im&0x7ffffffL)<<6)|_p, cc);
+}
+
+static void
+_I25(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t x6, jit_word_t r1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(x6 & ~0x3fL));
+    assert(!(r1 & ~0x7fL));
+    inst((x6<<27)|(r1<<6)|_p, INST_I);
+    SETREG(r1);
+}
+
+static void
+_I26(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t ar,jit_word_t r2)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(ar & ~0x7fL));
+    assert(!(r2 & ~0x7fL));
+    TSTREG1(r2);
+    inst((0x2aL<<27)|(ar<<20)|(r2<<13)|_p, INST_I);
+}
+
+static void
+_I27(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t ar,jit_word_t im)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(ar & ~0x7fL));
+    assert(!(im & ~0xffL));
+    /* no register referenced */
+    inst((((im>>7)&1L)<<36)|(0xaL<<27)|(ar<<20)|((im&0x7fL)<<13)|_p, INST_I);
+}
+
+static void
+_I28(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t ar, jit_word_t r1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(ar & ~0x7fL));
+    assert(!(r1 & ~0x7fL));
+    inst((0x32L<<27)|(ar<<20)|(r1<<6)|_p, INST_I);
+    SETREG(r1);
+}
+
+static void
+_I29(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t x6, jit_word_t r3,jit_word_t r1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(x6 & ~0x3fL));
+    assert(!(r3 & ~0x7fL));
+    assert(!(r1 & ~0x7fL));
+    TSTREG1(r3);
+    inst((x6<<27)|(r3<<20)|(r1<<6)|_p, INST_I);
+    SETREG(r1);
+}
+
+static void
+_I30(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t ta, jit_word_t tb, jit_word_t p2,
+     jit_word_t im, jit_word_t c, jit_word_t p1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(ta &  ~0x1L));
+    assert(!(tb &  ~0x1L));
+    assert(!(p2 & ~0x3fL));
+    assert(!(im & ~0x1fL));
+    assert(!(c  &  ~0x1L));
+    assert(!(p1 & ~0x1fL));
+    /* no register referenced (only predicates) */
+    inst((5L<<37)|(tb<<36)|(ta<<33)|(1L<<19)|(im<<14)|
+        (1L<<13)|(c<<12)|(p1<<6)|_p, INST_I);
+}
+
+static void
+_M1(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t x6, jit_word_t ht, jit_word_t x, jit_word_t r3, jit_word_t r1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(x6 & ~0x3fL));
+    assert(!(ht &  ~0x3L));
+    assert(!(x  &  ~0x1L));
+    assert(!(r3 & ~0x7fL));
+    assert(!(r1 & ~0x7fL));
+    TSTREG1(r3);
+    inst((4L<<37)|(x6<<30)|(ht<<28)|(x<<27)|(r3<<20)|(r1<<6)|_p, INST_M);
+    SETREG(r1);
+}
+
+static void
+_M2(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t x6, jit_word_t ht, jit_word_t r3, jit_word_t r2, jit_word_t r1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(x6 & ~0x3fL));
+    assert(!(ht &  ~0x3L));
+    assert(!(r3 & ~0x7fL));
+    assert(!(r2 & ~0x7fL));
+    assert(!(r1 & ~0x7fL));
+    TSTREG2(r2, r3);
+    inst((4L<<37)|(1L<<36)|(x6<<30)|(ht<<28)|
+        (r3<<20)|(r2<<13)|(r1<<6)|_p, INST_M);
+    SETREG(r1);
+    SETREG(r3);
+}
+
+static void
+_M3(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t x6, jit_word_t ht, jit_word_t r3, jit_word_t im, jit_word_t r1)
+{
+    jit_uint8_t                cc = INST_M;
+    assert(!(_p &  ~0x3fL));
+    assert(!(x6 &  ~0x3fL));
+    assert(!(ht &   ~0x3L));
+    assert(!(r3 &  ~0x7fL));
+    assert(im > -256 && im < 255);
+    assert(!(r1 &  ~0x7fL));
+    TSTREG1(r3);
+    inst((5L<<37)|(((im>>8)&1L)<<36)|(x6<<30)|(ht<<28)|
+        (((im>>7)&1L)<<27)|(r3<<20)|((im&0x7fL)<<13)|(r1<<6)|_p, cc);
+    SETREG(r1);
+    SETREG(r3);
+}
+
+static void
+_M5(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t x6, jit_word_t ht, jit_word_t r3, jit_word_t r2, jit_word_t im)
+{
+    assert(!(_p &  ~0x3fL));
+    assert(!(x6 &  ~0x3fL));
+    assert(!(ht &   ~0x3L));
+    assert(!(r3 &  ~0x7fL));
+    assert(!(r2 &  ~0x7fL));
+    assert(im > -256 && im < 255);
+    TSTREG2(r2, r3);
+    inst((5L<<37)|(((im>>8)&1L)<<36)|(x6<<30)|(ht<<28)|
+        (((im>>7)&1L)<<27)|(r3<<20)|(r2<<13)|((im&0x7fL)<<6)|_p, INST_M);
+    SETREG(r3);
+}
+
+static void
+_M6(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t x6, jit_word_t ht, jit_word_t x, jit_word_t r3, jit_word_t r2)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(x6 & ~0x3fL));
+    assert(!(ht &  ~0x3L));
+    assert(!(x  &  ~0x1L));
+    assert(!(r3 & ~0x7fL));
+    assert(!(r2 & ~0x7fL));
+    TSTREG2(r2, r3);
+    inst((4L<<37)|(x6<<30)|(ht<<28)|(x<<27)|(r3<<20)|(r2<<13)|_p, INST_M);
+}
+
+static void
+_M13(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t x6, jit_word_t ht, jit_word_t r3, jit_word_t r2)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(x6 & ~0x3fL));
+    assert(!(ht &  ~0x3L));
+    assert(!(r3 & ~0x7fL));
+    assert(!(r2 & ~0x7fL));
+    TSTREG1(r3);
+    if (r2)
+       TSTFREG1(r2);
+    inst((6L<<37)|(x6<<30)|(ht<<28)|(r3<<20)|(r2<<13)|_p, INST_M);
+}
+
+static void
+_M14(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t x6, jit_word_t ht, jit_word_t r3, jit_word_t r2)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(x6 & ~0x3fL));
+    assert(!(ht &  ~0x3L));
+    assert(!(r3 & ~0x7fL));
+    assert(!(r2 & ~0x7fL));
+    TSTREG2(r2, r3);
+    inst((6L<<37)|(1L<<36)|(x6<<30)|(ht<<28)|(r3<<20)|(r2<<13)|_p, INST_M);
+}
+
+static void
+_M15(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t x6, jit_word_t ht, jit_word_t r3, jit_word_t im)
+{
+    assert(!(_p &  ~0x3fL));
+    assert(!(x6 &  ~0x3fL));
+    assert(!(ht &   ~0x3L));
+    assert(!(r3 &  ~0x7fL));
+    assert(!(im & ~0x1ffL));
+    TSTREG1(r3);
+    inst((7L<<37)|(((im>>8)&1L)<<36)|(x6<<30)|(ht<<28)|
+        (((im>>7)&1L)<<27)|(r3<<20)|((im&0x7fL)<<13)|_p, INST_M);
+}
+
+static void
+_M16(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t x6, jit_word_t ht, jit_word_t r3, jit_word_t r2, jit_word_t r1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(x6 & ~0x3fL));
+    assert(!(ht &  ~0x3L));
+    assert(!(r3 & ~0x7fL));
+    assert(!(r2 & ~0x7fL));
+    assert(!(r1 & ~0x7fL));
+    TSTREG2(r2, r3);
+    inst((4L<<37)|(x6<<30)|(ht<<28)|(1L<<27)|
+        (r3<<20)|(r2<<13)|(r1<<6)|_p, INST_M);
+    SETREG(r1);
+}
+
+static void
+_M17(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t x6, jit_word_t ht, jit_word_t r3, jit_word_t im, jit_word_t r1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(x6 & ~0x3fL));
+    assert(!(ht &  ~0x3L));
+    assert(!(r3 & ~0x7fL));
+    assert(!(im &  ~0x7L));
+    assert(!(r1 & ~0x7fL));
+    TSTREG1(r3);
+    inst((4L<<37)|(x6<<30)|(ht<<28)|(1L<<27)|
+        (r3<<20)|(im<<13)|(r1<<6)|_p, INST_M);
+    SETREG(r1);
+}
+
+static void
+_M20x(jit_state_t *_jit, jit_word_t _p,
+      jit_word_t x3, jit_word_t r2, jit_word_t im)
+{
+    assert(!(_p &     ~0x3fL));
+    assert(!(x3 &      ~0x7L));
+    assert(!(r2 &     ~0x7fL));
+    assert(!(im & ~0x1fffffL));
+    if (x3 == 1)
+       TSTREG1(r2);
+    else
+       TSTFREG1(r2);
+    inst((1L<<37)|(((im>>20)&1L)<<36)|(x3<<33)|
+        (((im>>7)&0x1fffL)<<20)|(r2<<13)|((im&0x7fL)<<6)|_p, INST_M);
+}
+
+static void
+_M22x(jit_state_t *_jit, jit_word_t _p,
+      jit_word_t x3, jit_word_t im, jit_word_t r1)
+{
+    assert(!(_p &     ~0x3fL));
+    assert(!(x3 &      ~0x7L));
+    assert(!(im & ~0x1fffffL));
+    assert(!(r1 &     ~0x7fL));
+    inst((((im>>20)&1L)<<36)|(x3<<33)|((im&0xffffL)<<13)|(r1<<6)|_p, INST_M);
+    if (x3 < 6)
+       SETREG(r1);
+    else
+       SETFREG(r1);
+}
+
+static void
+_M24(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t x2, jit_word_t x4)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(x2 &  ~0x3L));
+    assert(!(x4 &  ~0xfL));
+    /* no registers referenced */
+    inst((x2<<31)|(x4<<27)|_p, INST_M);
+}
+
+static void
+_M26x(jit_state_t *_jit, jit_word_t _p,
+      jit_word_t x4, jit_word_t r1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(x4 &  ~0xfL));
+    assert(!(r1 & ~0x7fL));
+    if (x4 == 2)
+       TSTREG1(r1);
+    else
+       TSTFREG1(r1);
+    inst((1L<<31)|(x4<<27)|(r1<<6)|_p, INST_M);
+}
+
+static void
+_M28(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t x, jit_word_t r3)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(x  &  ~0x1L));
+    assert(!(r3 & ~0x7fL));
+    TSTREG1(r3);
+    inst((1L<<37)|(x<<36)|(0x30L<<27)|(r3<<20)|_p, INST_M);
+}
+
+static void
+_M29(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t ar, jit_word_t r2)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(ar  & ~0x7L));
+    assert(!(r2 & ~0x7fL));
+    TSTREG1(r2);
+    inst((1L<<37)|(0x2aL<<27)|(ar<<20)|(r2<<13)|_p, INST_M);
+}
+
+static void
+_M30(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t ar, jit_word_t im)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(ar  & ~0x7L));
+    assert(!(im & ~0xffL));
+    /* no registers referenced (only "application registers") */
+    inst((((im>>7)&1L)<<36)|(2L<<31)|(0x8L<<27)|
+        (ar<<20)|((im&0x7fL)<<13)|_p, INST_M);
+}
+
+static void
+_M31(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t ar, jit_word_t r1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(ar  & ~0x7L));
+    assert(!(r1 & ~0x7fL));
+    inst((1L<<37)|(0x22L<<27)|(ar<<20)|(r1<<6)|_p, INST_M);
+    SETREG(r1);
+}
+
+static void
+_M32(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t cr, jit_word_t r2)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(cr  & ~0x7L));
+    assert(!(r2 & ~0x7fL));
+    TSTREG1(r2);
+    inst((1L<<37)|(0x2cL<<27)|(cr<<20)|(r2<<13)|_p, INST_M);
+}
+
+static void
+_M33(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t cr, jit_word_t r1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(cr  & ~0x7L));
+    assert(!(r1 & ~0x7fL));
+    inst((1L<<37)|(0x24L<<27)|(cr<<20)|(r1<<6)|_p, INST_M);
+    SETREG(r1);
+}
+
+static void
+_M34(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t sor, jit_word_t sol, jit_word_t sof, jit_word_t r1)
+{
+    assert(!(_p  & ~0x3fL));
+    assert(!(sor &  ~0xfL));
+    assert(!(sol & ~0x7fL));
+    assert(!(sof & ~0x7fL));
+    assert(!(r1  & ~0x7fL));
+    /* specification says changes are immediate, no need to "stop" */
+    inst((1L<<37)|(6L<<33)|(sor<<27)|(sol<<20)|(sof<<13)|(r1<<6)|_p, INST_M);
+}
+
+static void
+_M35(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t x6, jit_word_t r2)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(x6 & ~0x3fL));
+    assert(!(r2 & ~0x7fL));
+    TSTREG1(r2);
+    inst((1L<<37)|(x6<<27)|(r2<<13)|_p, INST_M);
+}
+
+static void
+_M36(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t x6, jit_word_t r1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(x6 & ~0x3fL));
+    assert(!(r1 & ~0x7fL));
+    inst((1L<<37)|(x6<<27)|(r1<<6)|_p, INST_M);
+    SETREG(r1);
+}
+
+static void
+_M37(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t im)
+{
+    assert(!(_p &    ~0x3fL));
+    assert(!(im & ~0x1ffffL));
+    /* no registers referenced */
+    inst((((im>>20)&1L)<<36)|((im&0xffffL)<<6)|_p, INST_M);
+}
+
+static void
+_M38(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t x6, jit_word_t r3, jit_word_t r2, jit_word_t r1)
+{
+    assert(!(_p &  ~0x3fL));
+    assert(!(x6 &  ~0x3fL));
+    assert(!(r3 &  ~0x7fL));
+    assert(!(r2 &  ~0x7fL));
+    assert(!(r1 &  ~0x7fL));
+    TSTREG2(r2, r3);
+    inst((1L<<37)|(x6<<27)|(r3<<20)|(r2<<13)|(r1<<6)|_p, INST_M);
+    SETREG(r1);
+}
+
+static void
+_M39(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t x6, jit_word_t r3, jit_word_t im, jit_word_t r1)
+{
+    assert(!(_p &  ~0x3fL));
+    assert(!(x6 &  ~0x3fL));
+    assert(!(r3 &  ~0x7fL));
+    assert(!(im &   ~0x7L));
+    assert(!(r1 &  ~0x7fL));
+    TSTREG1(r3);
+    inst((1L<<37)|(x6<<27)|(r3<<20)|(im<<13)|(r1<<6)|_p, INST_M);
+    SETREG(r1);
+}
+
+static void
+_M40(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t x6, jit_word_t r3, jit_word_t im)
+{
+    assert(!(_p &  ~0x3fL));
+    assert(!(x6 &  ~0x3fL));
+    assert(!(r3 &  ~0x7fL));
+    assert(!(im &   ~0x7L));
+    TSTREG1(r3);
+    inst((1L<<37)|(x6<<27)|(r3<<20)|(im<<13)|_p, INST_M);
+}
+
+static void
+_M41(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t x6, jit_word_t r2)
+{
+    assert(!(_p &  ~0x3fL));
+    assert(!(x6 &  ~0x3fL));
+    assert(!(r2 &  ~0x7fL));
+    TSTREG1(r2);
+    inst((1L<<37)|(x6<<27)|(r2<<13)|_p, INST_M);
+}
+
+static void
+_M42(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t x6, jit_word_t r3, jit_word_t r2)
+{
+    assert(!(_p &  ~0x3fL));
+    assert(!(x6 &  ~0x3fL));
+    assert(!(r3 &  ~0x7fL));
+    assert(!(r2 &  ~0x7fL));
+    TSTREG1(r2);
+    inst((1L<<37)|(x6<<27)|(r3<<20)|(r2<<13)|_p, INST_M);
+}
+
+static void
+_M43(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t x6, jit_word_t r3, jit_word_t r1)
+{
+    assert(!(_p &  ~0x3fL));
+    assert(!(x6 &  ~0x3fL));
+    assert(!(r3 &  ~0x7fL));
+    assert(!(r1 &  ~0x7fL));
+    inst((1L<<37)|(x6<<27)|(r3<<20)|(r1<<6)|_p, INST_M);
+    SETREG(r1);
+}
+
+static void
+_M44(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t x4, jit_word_t im)
+{
+    assert(!(_p &    ~0x3fL));
+    assert(!(x4 &     ~0xfL));
+    assert(!(im & ~0xfffffL));
+    /* no registers referenced */
+    inst((((im>>23)&1L)<<36)|(((im>>21)&3L)<<31)|
+        (x4<<27)|((im&0x1ffffL)<<6)|_p, INST_M);
+}
+
+static void
+_M45(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t x6, jit_word_t r3, jit_word_t r2)
+{
+    assert(!(_p &  ~0x3fL));
+    assert(!(x6 &  ~0x3fL));
+    assert(!(r3 &  ~0x7fL));
+    assert(!(r2 &  ~0x7fL));
+    TSTREG2(r2, r3);
+    inst((1L<<37)|(x6<<27)|(r3<<20)|(r2<<13)|_p, INST_M);
+}
+
+static void
+_M46(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t x6, jit_word_t r3, jit_word_t r1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(x6 & ~0x3fL));
+    assert(!(r3 & ~0x7fL));
+    assert(!(r1 & ~0x7fL));
+    TSTREG1(r3);
+    inst((1L<<37)|(x6<<27)|(r3<<20)|(r1<<6)|_p, INST_M);
+    if (r1)    SETREG(r1);
+}
+
+static void
+_M48(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t y, jit_word_t im)
+{
+    assert(!(_p &    ~0x3fL));
+    assert(!(y  &     ~0x1L));
+    assert(!(im & ~0x1ffffL));
+    /* no registers referenced */
+    inst((((im>>20)&1L)<<36)|(1L<<27)|(y<<26)|((im&0xffffL)<<6)|_p, INST_M);
+}
+
+static void
+_B1(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t d, jit_word_t wh, jit_word_t im, jit_word_t p, jit_word_t tp)
+{
+    assert(!(_p &    ~0x3fL));
+    assert(!(d  &     ~0x1L));
+    assert(!(wh &     ~0x3L));
+    assert(im >= -1048576 && im < 1048575);
+    assert(!(p  &     ~0x1L));
+    assert(!(tp &     ~0x7L));
+    /* no registers referenced */
+    inst((4L<<37)|(((im>>20)&1L)<<36)|(d<<35)|(wh<<33)|
+        ((im&0xfffffL)<<13)|(p<<12)|(tp<<6)|_p, INST_B);
+}
+
+static void
+_B3(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t d, jit_word_t wh, jit_word_t im, jit_word_t p, jit_word_t b)
+{
+    assert(!(_p &    ~0x3fL));
+    assert(!(d  &     ~0x1L));
+    assert(!(wh &     ~0x3L));
+    assert(im >= -1048576 && im < 1048575);
+    assert(!(p  &     ~0x1L));
+    assert(!(b  &     ~0x3L));
+    /* no registers referenced */
+    inst((5L<<37)|(((im>>20)&1L)<<36)|(d<<35)|(wh<<33)|
+        ((im&0xfffffL)<<13)|(p<<12)|(b<<6)|_p, INST_B);
+}
+
+static void
+_B4(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t d, jit_word_t wh, jit_word_t x6,
+    jit_word_t b, jit_word_t p, jit_word_t tp)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(d  &  ~0x1L));
+    assert(!(wh &  ~0x3L));
+    assert(!(x6 & ~0x3fL));
+    assert(!(b  &  ~0x7L));
+    assert(!(p  &  ~0x1L));
+    assert(!(tp &  ~0x7L));
+    /* no registers referenced */
+    inst((d<<37)|(wh<<33)|(x6<<27)|(b<<13)|(p<<12)|(tp<<6)|_p, INST_B);
+}
+
+static void
+_B5(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t d, jit_word_t wh, jit_word_t b2, jit_word_t p, jit_word_t b1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(d  &  ~0x1L));
+    assert(!(wh &  ~0x3L));
+    assert(!(b2 &  ~0x7L));
+    assert(!(p  &  ~0x1L));
+    assert(!(b1 &  ~0x7L));
+    /* no registers referenced */
+    inst((1L<<37)|(d<<35)|(wh<<32)|(b2<<13)|(p<<12)|(b1<<6)|_p, INST_B);
+}
+
+static void
+_B6(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t ih, jit_word_t im, jit_word_t tag, jit_word_t wh)
+{
+    assert(!(_p &     ~0x3fL));
+    assert(!(ih  &     ~0x1L));
+    assert(!(im  & ~0x1ffffL));
+    assert(!(tag &   ~0x1ffL));
+    assert(!(wh  &     ~0x3L));
+    /* no registers referenced */
+    inst((7L<<37)|(((im>>20)&1L)<<36)|(ih<<35)|(((tag>>7)&3L)<<33)|
+        ((im&0xfffffL)<<13)|((tag&0x7fL)<<6)|(wh<<3)|_p, INST_B);
+}
+
+static void
+_B7(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t ih, jit_word_t x6, jit_word_t b2, jit_word_t tag, jit_word_t wh)
+{
+    assert(!(_p &   ~0x3fL));
+    assert(!(ih  &   ~0x1L));
+    assert(!(x6  &  ~0x3fL));
+    assert(!(b2  &   ~0x7L));
+    assert(!(tag & ~0x1ffL));
+    assert(!(wh  &   ~0x3L));
+    /* no registers referenced */
+    inst((2L<<37)|(ih<<35)|(((tag>>7)&3L)<<33)|(x6<<27)|
+        (b2<<13)|((tag&0x7fL)<<6)|(wh<<3)|_p, INST_B);
+}
+
+static void
+_B8(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t x6)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(x6 & ~0x3fL));
+    /* no registers referenced */
+    inst((x6<<27)|_p, INST_B);
+}
+
+static void
+_B9(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t op, jit_word_t x6, jit_word_t im)
+{
+    assert(!(_p &    ~0x3fL));
+    assert(!(op &     ~0xfL));
+    assert(!(x6 &    ~0x3fL));
+    assert(!(im & ~0x1ffffL));
+    /* no registers referenced */
+    inst((op<<37)|(((im>>20)&1L)<<36)|(x6<<27)|((im&0xffffL)<<6)|_p, INST_B);
+}
+
+static void
+_X1(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t im)
+{
+    jit_word_t         i41, i1, i20;
+    assert(!(_p &               ~0x3fL));
+    assert(im > -0x2000000000000000 && im < 0x1fffffffffffffff);
+    i41 = (im >> 22) & 0x1ffffffffffL;
+    i1  = (im >> 21) &           0x1L;
+    i20 =  im        &       0xfffffL;
+    /* no registers referenced */
+    inst(i41, INST_L);
+    inst((i1<<36)|(i20<<6)|_p, INST_X);
+}
+
+static void
+_X2(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t r1, jit_word_t im)
+{
+    jit_word_t         i1, i41, ic, i5, i9, i7;
+    assert(!(_p & ~0x3fL));
+    assert(!(r1 & ~0x7fL));
+    i1  = (im >> 63) &           0x1L;
+    i41 = (im >> 22) & 0x1ffffffffffL;
+    ic  = (im >> 21) &           0x1L;
+    i5  = (im >> 16) &          0x1fL;
+    i9  = (im >>  7) &         0x1ffL;
+    i7  =  im        &          0x7fL;
+    inst(i41, INST_L);
+    inst((6L<<37)|(i1<<36)|(i9<<27)|(i5<<22)|
+        (ic<<21)|(i7<<13)|(r1<<6)|_p, INST_X);
+    SETREG(r1);
+}
+
+static void
+_X3x(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t op, jit_word_t d, jit_word_t wh,
+     jit_word_t p, jit_word_t tp, jit_word_t im)
+{
+    /* target64 = IP + ((i1 << 59 | imm39 << 20 | imm20b) << 4) */
+    jit_word_t         i1, i41, i20;
+    assert(!(_p &               ~0x3fL));
+    assert(!(op &                ~0xfL));
+    assert(!(d  &                ~0x1L));
+    assert(!(wh &                ~0x3L));
+    assert(!(p  &                ~0x1L));
+    assert(!(tp &                ~0x7L));
+    i1  = (im >> 61) &           0x1L;
+    i41 = (im >> 22) & 0x1ffffffffffL;
+    i20 =  im        &       0xfffffL;
+    /* no registers referenced */
+    inst(i41, INST_L);
+    inst((op<<37)|(i1<<36)|(d<<35)|(wh<<33)|
+        (i20<<13)|(p<<12)|(tp<<6)|_p, INST_X);
+}
+
+static void
+_X5(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t y, jit_word_t im)
+{
+    jit_word_t         i41, i1, i20;
+    assert(!(_p &               ~0x3fL));
+    assert(im > -0x2000000000000000 && im < 0x1fffffffffffffff);
+    i41 = (im >> 22) & 0x1ffffffffffL;
+    i1  = (im >> 21) &           0x1L;
+    i20 =  im        &       0xfffffL;
+    /* no registers referenced */
+    inst(i41, INST_L);
+    inst((i1<<36)|(1L<<27)|(y<<26)|(i20<<6)|_p, INST_X);
+}
+
+static void
+_movr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    /* patch pushargr */
+    if (r0 >= 120)
+       r0 = _jitc->rout + (r0 - 120);
+
+    MOV(r0, r1);
+}
+
+static void
+_movi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    /* patch pushargi */
+    if (r0 >= 120)
+       r0 = _jitc->rout + (r0 - 120);
+
+    if (i0 >= -2097152 && i0 < 2097151)
+       MOVI(r0, i0);
+    else
+       MOVL(r0, i0);
+}
+
+static jit_word_t
+_movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_word_t         w;
+    sync();
+    w = _jit->pc.w;
+    MOVL(r0, i0);
+    return (w);
+}
+
+static void
+_addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 >= -8192 && i0 <= 8191)
+       ADDS(r0, i0, r1);
+    else if (!(r1 & ~3) && i0 >= -2097152 && i0 < 2097151)
+       ADDL(r1, i0, r0);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       addr(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_addcr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (jit_carry == _NOREG)
+       jit_carry = jit_get_reg(jit_class_gpr);
+    if (r0 == r1) {
+       reg = jit_get_reg(jit_class_gpr);
+       addr(rn(reg), r1, r2);
+       ltr_u(rn(jit_carry), rn(reg), r1);
+       movr(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else {
+       addr(r0, r1, r2);
+       ltr_u(rn(jit_carry), r0, r1);
+    }
+}
+
+static void
+_addci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (jit_carry == _NOREG)
+       jit_carry = jit_get_reg(jit_class_gpr);
+    if (r0 == r1) {
+       reg = jit_get_reg(jit_class_gpr);
+       addi(rn(reg), r1, i0);
+       ltr_u(rn(jit_carry), rn(reg), r1);
+       movr(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else {
+       addi(r0, r1, i0);
+       ltr_u(rn(jit_carry), r0, r1);
+    }
+}
+
+static void
+_addxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    assert(jit_carry != _NOREG);
+    reg = jit_get_reg(jit_class_gpr);
+    movr(rn(reg), rn(jit_carry));
+    addcr(r0, r1, r2);
+    addcr(r0, r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_addxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    assert(jit_carry != _NOREG);
+    reg = jit_get_reg(jit_class_gpr);
+    movr(rn(reg), rn(jit_carry));
+    addci(r0, r1, i0);
+    addcr(r0, r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_subi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 >= -128 && i0 <= 127)
+       SUBI(r0, i0, r1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       addr(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_subcr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (jit_carry == _NOREG)
+       jit_carry = jit_get_reg(jit_class_gpr);
+    if (r0 == r1) {
+       reg = jit_get_reg(jit_class_gpr);
+       subr(rn(reg), r1, r2);
+       ltr_u(rn(jit_carry), r1, rn(reg));
+       movr(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else {
+       subr(r0, r1, r2);
+       ltr_u(rn(jit_carry), r1, r0);
+    }
+}
+
+static void
+_subci(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (jit_carry == _NOREG)
+       jit_carry = jit_get_reg(jit_class_gpr);
+    if (r0 == r1) {
+       reg = jit_get_reg(jit_class_gpr);
+       subi(rn(reg), r1, i0);
+       ltr_u(rn(jit_carry), r1, rn(reg));
+       movr(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    else {
+       subi(r0, r1, i0);
+       ltr_u(rn(jit_carry), r1, r0);
+    }
+}
+
+static void
+_subxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    assert(jit_carry != _NOREG);
+    reg = jit_get_reg(jit_class_gpr);
+    movr(rn(reg), rn(jit_carry));
+    subcr(r0, r1, r2);
+    subcr(r0, r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_subxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    assert(jit_carry != _NOREG);
+    reg = jit_get_reg(jit_class_gpr);
+    movr(rn(reg), rn(jit_carry));
+    subci(r0, r1, i0);
+    subcr(r0, r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_mulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                f0, f1;
+    f0 = jit_get_reg(jit_class_fpr);
+    f1 = jit_get_reg(jit_class_fpr);
+    SETF_SIG(rn(f0), r1);
+    SETF_SIG(rn(f1), r2);
+    XMPY_L(rn(f0), rn(f0), rn(f1));
+    GETF_SIG(r0, rn(f0));
+    jit_unget_reg(f0);
+    jit_unget_reg(f1);
+}
+
+static void
+_muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    mulr(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_divr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         d;
+    /* @arg0 = r1, @arg1 = r2 */
+    sync();
+    d = ((jit_word_t)__divdi3 - _jit->pc.w) >> 4;
+    if (d < -16777216 && d > 16777215)
+       BRI_CALL(0, d);
+    else
+       /* FIXME displacement likely wrong (in either case) */
+       BRL_CALL(0, d);
+    /* r0 = @ret */
+}
+
+static void
+_divi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    switch (i0) {
+       case 1:
+           movr(r0, r1);
+           return;
+       case -1:
+           negr(r0, r1);
+           return;
+       default:
+           if (i0 > 0 && !(i0 & (i0 - 1))) {
+               movr(r0, r1);
+               rshi(r0, r0, ffsl(i0) - 1);
+               return;
+           }
+           break;
+    }
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    divr(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_divr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         d;
+    /* @arg0 = r1, @arg1 = r2 */
+    sync();
+    d = ((jit_word_t)__udivdi3 - _jit->pc.w) >> 4;
+    if (d < -16777216 && d > 16777215)
+       BRI_CALL(0, d);
+    else
+       /* FIXME displacement likely wrong (in either case) */
+       BRL_CALL(0, d);
+    /* r0 = @ret */
+}
+
+static void
+_divi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 == 1) {
+       movr(r0, r1);
+       return;
+    }
+    else if (i0 > 0 && !(i0 & (i0 - 1))) {
+       movr(r0, r1);
+       rshi_u(r0, r0, ffsl(i0) - 1);
+       return;
+    }
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    divr_u(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_remr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         d;
+    /* @arg0 = r1, @arg1 = r2 */
+    sync();
+    d = ((jit_word_t)__moddi3 - _jit->pc.w) >> 4;
+    if (d < -16777216 && d > 16777215)
+       BRI_CALL(0, d);
+    else
+       /* FIXME displacement likely wrong (in either case) */
+       BRL_CALL(0, d);
+    /* r0 = @ret */
+}
+
+static void
+_remi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 == 1 || i0 == -1) {
+       MOV(r0, GR_0);
+       return;
+    }
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    remr(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_remr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_word_t         d;
+    /* @arg0 = r1, @arg1 = r2 */
+    sync();
+    d = (jit_word_t)(__umoddi3 - _jit->pc.w) >> 4;
+    if (d < -16777216 && d > 16777215)
+       BRI_CALL(0, d);
+    else
+       /* FIXME displacement likely wrong (in either case) */
+       BRL_CALL(0, d);
+    /* r0 = @ret */
+}
+
+static void
+_remi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 == 1) {
+       MOV(r0, GR_0);
+       return;
+    }
+    else if (i0 > 0 && !(i0 & (i0 - 1))) {
+       movr(r0, r1);
+       andi(r0, r0, i0 - 1);
+       return;
+    }
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    remr_u(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_mulh(jit_state_t *_jit,
+      jit_int32_t r0, jit_int32_t r1, jit_int32_t r2, jit_bool_t sign)
+{
+    jit_int32_t                f0, f1;
+    f0 = jit_get_reg(jit_class_fpr);
+    f1 = jit_get_reg(jit_class_fpr);
+    SETF_SIG(rn(f0), r1);
+    SETF_SIG(rn(f1), r2);
+    if (sign)
+       XMPY_H(rn(f0), rn(f0), rn(f1));
+    else
+       XMPY_HU(rn(f0), rn(f0), rn(f1));
+    GETF_SIG(r0, rn(f0));
+    jit_unget_reg(f0);
+    jit_unget_reg(f1);
+}
+
+static void
+_iqmulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+       jit_int32_t r2, jit_int32_t r3, jit_bool_t sign)
+{
+    jit_int32_t                reg;
+    if (r0 == r2 || r0 == r3) {
+       reg = jit_get_reg(jit_class_gpr);
+       mulr(rn(reg), r2, r3);
+    }
+    else
+       mulr(r0, r2, r3);
+    mulh(sign, r1, r2, r3);
+    if (r0 == r2 || r0 == r3) {
+       movr(r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_iqmuli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+       jit_int32_t r2, jit_word_t i0, jit_bool_t sign)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    iqmulr(r0, r1, r2, rn(reg), sign);
+    jit_unget_reg(reg);
+}
+
+static void
+_iqdivr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+       jit_int32_t r2, jit_int32_t r3, jit_bool_t sign)
+{
+    jit_int32_t                sv0, rg0;
+    jit_int32_t                sv1, rg1;
+
+    if (r0 == r2 || r0 == r3) {
+       sv0 = jit_get_reg(jit_class_gpr);
+       rg0 = rn(sv0);
+    }
+    else
+       rg0 = r0;
+    if (r1 == r2 || r1 == r3) {
+       sv1 = jit_get_reg(jit_class_gpr);
+       rg1 = rn(sv1);
+    }
+    else
+       rg1 = r1;
+
+    if (sign)
+       divr(rg0, r2, r3);
+    else
+       divr_u(rg0, r2, r3);
+    mulr(rg1, r3, rg0);
+    subr(rg1, r2, rg1);
+    if (rg0 != r0) {
+       movr(r0, rg0);
+       jit_unget_reg(sv0);
+    }
+    if (rg1 != r1) {
+       movr(r1, rg1);
+       jit_unget_reg(sv1);
+    }
+}
+
+static void
+_iqdivi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1,
+       jit_int32_t r2, jit_word_t i0, jit_bool_t sign)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    iqdivr(r0, r1, r2, rn(reg), sign);
+    jit_unget_reg(reg);
+}
+
+static void
+_andi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 >= -128 && i0 <= 127)
+       ANDI(r0, i0, r1);
+    else if (~i0 >= -128 && ~i0 <= 127)
+       ANDCMI(r0, ~i0, r1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       andr(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_ori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 >= -128 && i0 <= 127)
+       ORI(r0, i0, r1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       orr(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_xori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 >= -128 && i0 <= 127)
+       XORI(r0, i0, r1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       xorr(r0, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+}
+
+static void
+_lshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    lshr(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_rshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    rshr(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_rshi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    rshr_u(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ltr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    CMP_LT(PR_6, PR_7, r1, r2);
+    MOVI_p(r0, 1, PR_6);
+    MOV_p(r0, GR_0, PR_7);
+}
+
+static void
+_lti(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 >= -128 && i0 <= 127)
+       CMPI_LT(PR_6, PR_7, i0, r1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       CMP_LT(PR_6, PR_7, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+    MOVI_p(r0, 1, PR_6);
+    MOV_p(r0, GR_0, PR_7);
+}
+
+static void
+_ltr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    CMP_LTU(PR_6, PR_7, r1, r2);
+    MOVI_p(r0, 1, PR_6);
+    MOV_p(r0, GR_0, PR_7);
+}
+
+static void
+_lti_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 >= -128 && i0 <= 127)
+       CMPI_LTU(PR_6, PR_7, i0, r1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       CMP_LTU(PR_6, PR_7, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+    MOVI_p(r0, 1, PR_6);
+    MOV_p(r0, GR_0, PR_7);
+}
+
+static void
+_ler(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    CMP_LT(PR_6, PR_7, r2, r1);
+    MOV_p(r0, GR_0, PR_6);
+    MOVI_p(r0, 1, PR_7);
+}
+
+static void
+_lei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    ler(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ler_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    CMP_LTU(PR_6, PR_7, r2, r1);
+    MOV_p(r0, GR_0, PR_6);
+    MOVI_p(r0, 1, PR_7);
+}
+
+static void
+_lei_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    ler_u(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_eqr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    CMP_EQ(PR_6, PR_7, r1, r2);
+    MOVI_p(r0, 1, PR_6);
+    MOV_p(r0, GR_0, PR_7);
+}
+
+static void
+_eqi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 >= -128 && i0 <= 127)
+       CMPI_EQ(PR_6, PR_7, i0, r1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       CMP_EQ(PR_6, PR_7, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+    MOVI_p(r0, 1, PR_6);
+    MOV_p(r0, GR_0, PR_7);
+}
+
+static void
+_ger(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    CMP_LT(PR_6, PR_7, r1, r2);
+    MOV_p(r0, GR_0, PR_6);
+    MOVI_p(r0, 1, PR_7);
+}
+
+static void
+_gei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 >= -128 && i0 <= 127)
+       CMPI_LT(PR_6, PR_7, i0, r1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       CMP_LT(PR_6, PR_7, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+    MOV_p(r0, GR_0, PR_6);
+    MOVI_p(r0, 1, PR_7);
+}
+
+static void
+_ger_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    CMP_LTU(PR_6, PR_7, r1, r2);
+    MOV_p(r0, GR_0, PR_6);
+    MOVI_p(r0, 1, PR_7);
+}
+
+static void
+_gei_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 >= -128 && i0 <= 127)
+       CMPI_LTU(PR_6, PR_7, i0, r1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       CMP_LTU(PR_6, PR_7, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+    MOV_p(r0, GR_0, PR_6);
+    MOVI_p(r0, 1, PR_7);
+}
+
+static void
+_gtr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    CMP_LT(PR_6, PR_7, r2, r1);
+    MOVI_p(r0, 1, PR_6);
+    MOV_p(r0, GR_0, PR_7);
+}
+
+static void
+_gti(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    gtr(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_gtr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    CMP_LTU(PR_6, PR_7, r2, r1);
+    MOVI_p(r0, 1, PR_6);
+    MOV_p(r0, GR_0, PR_7);
+}
+
+static void
+_gti_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    gtr_u(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ner(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    CMP_EQ(PR_6, PR_7, r1, r2);
+    MOV_p(r0, GR_0, PR_6);
+    MOVI_p(r0, 1, PR_7);
+}
+
+static void
+_nei(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    if (i0 >= -128 && i0 <= 127)
+       CMPI_EQ(PR_6, PR_7, i0, r1);
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i0);
+       CMP_EQ(PR_6, PR_7, r1, rn(reg));
+       jit_unget_reg(reg);
+    }
+    MOV_p(r0, GR_0, PR_6);
+    MOVI_p(r0, 1, PR_7);
+}
+
+static void
+_ldi_c(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    ldr_c(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldi_uc(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    ldr_uc(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldi_s(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    ldr_s(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldi_us(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    ldr_us(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldi_i(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    ldr_i(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldi_ui(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    ldr_ui(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldi_l(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    ldr_l(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r1, r2);
+    ldr_c(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxi_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    ldxr_c(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r1, r2);
+    ldr_uc(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxi_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    ldxr_uc(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r1, r2);
+    ldr_s(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxi_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    ldxr_s(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r1, r2);
+    ldr_us(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxi_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    ldxr_us(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r1, r2);
+    ldr_i(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxi_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    ldxr_i(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r1, r2);
+    ldr_ui(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxi_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    ldxr_ui(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxr_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r1, r2);
+    ldr_l(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxi_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    ldxr_l(r0, r1, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_sti_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    str_c(rn(reg), r0);
+    jit_unget_reg(reg);
+}
+
+static void
+_sti_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    str_s(rn(reg), r0);
+    jit_unget_reg(reg);
+}
+
+static void
+_sti_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    str_i(rn(reg), r0);
+    jit_unget_reg(reg);
+}
+
+static void
+_sti_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    str_l(rn(reg), r0);
+    jit_unget_reg(reg);
+}
+
+static void
+_stxr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r0, r1);
+    str_c(rn(reg), r2);
+    jit_unget_reg(reg);
+}
+
+static void
+_stxi_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addi(rn(reg), r0, i0);
+    str_c(rn(reg), r1);
+    jit_unget_reg(reg);
+}
+
+static void
+_stxr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r0, r1);
+    str_s(rn(reg), r2);
+    jit_unget_reg(reg);
+}
+
+static void
+_stxi_s(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addi(rn(reg), r0, i0);
+    str_s(rn(reg), r1);
+    jit_unget_reg(reg);
+}
+
+static void
+_stxr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r0, r1);
+    str_i(rn(reg), r2);
+    jit_unget_reg(reg);
+}
+
+static void
+_stxi_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addi(rn(reg), r0, i0);
+    str_i(rn(reg), r1);
+    jit_unget_reg(reg);
+}
+
+static void
+_stxr_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r0, r1);
+    str_l(rn(reg), r2);
+    jit_unget_reg(reg);
+}
+
+static void
+_stxi_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addi(rn(reg), r0, i0);
+    str_l(rn(reg), r1);
+    jit_unget_reg(reg);
+}
+
+static jit_word_t
+_bltr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    sync();
+    w = _jit->pc.w;
+    CMP_LT(PR_6, PR_7, r0, r1);
+    BRI_COND((i0 - w) >> 4, PR_6);
+    return (w);
+}
+
+static jit_word_t
+_blti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    if (i1 >= -128 && i1 <= 127) {
+       sync();
+       w = _jit->pc.w;
+       CMPI_LT(PR_6, PR_7, i1, r0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i1);
+       sync();
+       w = _jit->pc.w;
+       CMP_LT(PR_6, PR_7, r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    BRI_COND((i0 - w) >> 4, PR_6);
+    return (w);
+}
+
+static jit_word_t
+_bltr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    sync();
+    w = _jit->pc.w;
+    CMP_LTU(PR_6, PR_7, r0, r1);
+    BRI_COND((i0 - w) >> 4, PR_6);
+    return (w);
+}
+
+static jit_word_t
+_blti_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    if (i1 >= -128 && i1 <= 127) {
+       sync();
+       w = _jit->pc.w;
+       CMPI_LTU(PR_6, PR_7, i1, r0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i1);
+       sync();
+       w = _jit->pc.w;
+       CMP_LTU(PR_6, PR_7, r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    BRI_COND((i0 - w) >> 4, PR_6);
+    return (w);
+}
+
+static jit_word_t
+_bler(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    sync();
+    w = _jit->pc.w;
+    CMP_LT(PR_6, PR_7, r1, r0);
+    BRI_COND((i0 - w) >> 4, PR_7);
+    return (w);
+}
+
+static jit_word_t
+_blei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i1);
+    w = bler(i0, r0, rn(reg));
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static jit_word_t
+_bler_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    sync();
+    w = _jit->pc.w;
+    CMP_LTU(PR_6, PR_7, r1, r0);
+    BRI_COND((i0 - w) >> 4, PR_7);
+    return (w);
+}
+
+static jit_word_t
+_blei_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i1);
+    w = bler_u(i0, r0, rn(reg));
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static jit_word_t
+_beqr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    sync();
+    w = _jit->pc.w;
+    CMP_EQ(PR_6, PR_7, r0, r1);
+    BRI_COND((i0 - w) >> 4, PR_6);
+    return (w);
+}
+
+static jit_word_t
+_beqi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    if (i1 >= -128 && i1 <= 127) {
+       sync();
+       w = _jit->pc.w;
+       CMPI_EQ(PR_6, PR_7, i1, r0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i1);
+       sync();
+       w = _jit->pc.w;
+       CMP_EQ(PR_6, PR_7, r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    BRI_COND((i0 - w) >> 4, PR_6);
+    return (w);
+}
+
+static jit_word_t
+_bger(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    sync();
+    w = _jit->pc.w;
+    CMP_LT(PR_6, PR_7, r0, r1);
+    BRI_COND((i0 - w) >> 4, PR_7);
+    return (w);
+}
+
+static jit_word_t
+_bgei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    if (i1 >= -128 && i1 <= 127) {
+       sync();
+       w = _jit->pc.w;
+       CMPI_LT(PR_6, PR_7, i1, r0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i1);
+       sync();
+       w = _jit->pc.w;
+       CMP_LT(PR_6, PR_7, r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    BRI_COND((i0 - w) >> 4, PR_7);
+    return (w);
+}
+
+static jit_word_t
+_bger_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    sync();
+    w = _jit->pc.w;
+    CMP_LTU(PR_6, PR_7, r0, r1);
+    BRI_COND((i0 - w) >> 4, PR_7);
+    return (w);
+}
+
+static jit_word_t
+_bgei_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    if (i1 >= -128 && i1 <= 127) {
+       sync();
+       w = _jit->pc.w;
+       CMPI_LTU(PR_6, PR_7, i1, r0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i1);
+       sync();
+       w = _jit->pc.w;
+       CMP_LTU(PR_6, PR_7, r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    BRI_COND((i0 - w) >> 4, PR_7);
+    return (w);
+}
+
+static jit_word_t
+_bgtr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    sync();
+    w = _jit->pc.w;
+    CMP_LT(PR_6, PR_7, r1, r0);
+    BRI_COND((i0 - w) >> 4, PR_6);
+    return (w);
+}
+
+static jit_word_t
+_bgti(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i1);
+    w = bgtr(i0, r0, rn(reg));
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static jit_word_t
+_bgtr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    sync();
+    w = _jit->pc.w;
+    CMP_LTU(PR_6, PR_7, r1, r0);
+    BRI_COND((i0 - w) >> 4, PR_6);
+    return (w);
+}
+
+static jit_word_t
+_bgti_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i1);
+    w = bgtr_u(i0, r0, rn(reg));
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static jit_word_t
+_bner(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    sync();
+    w = _jit->pc.w;
+    CMP_EQ(PR_6, PR_7, r0, r1);
+    BRI_COND((i0 - w) >> 4, PR_7);
+    return (w);
+}
+
+static jit_word_t
+_bnei(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    if (i1 >= -128 && i1 <= 127) {
+       sync();
+       w = _jit->pc.w;
+       CMPI_EQ(PR_6, PR_7, i1, r0);
+    }
+    else {
+       reg = jit_get_reg(jit_class_gpr);
+       movi(rn(reg), i1);
+       sync();
+       w = _jit->pc.w;
+       CMP_EQ(PR_6, PR_7, r0, rn(reg));
+       jit_unget_reg(reg);
+    }
+    BRI_COND((i0 - w) >> 4, PR_7);
+    return (w);
+}
+
+static jit_word_t
+_bmsr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    andr(rn(reg), r0, r1);
+    sync();
+    w = _jit->pc.w;
+    CMPI_EQ(PR_6, PR_7, 0, rn(reg));
+    jit_unget_reg(reg);
+    BRI_COND((i0 - w) >> 4, PR_7);
+    return (w);
+}
+
+static jit_word_t
+_bmsi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i1);
+    andr(rn(reg), r0, rn(reg));
+    sync();
+    w = _jit->pc.w;
+    CMPI_EQ(PR_6, PR_7, 0, rn(reg));
+    jit_unget_reg(reg);
+    BRI_COND((i0 - w) >> 4, PR_7);
+    return (w);
+}
+
+static jit_word_t
+_bmcr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    andr(rn(reg), r0, r1);
+    sync();
+    w = _jit->pc.w;
+    CMPI_EQ(PR_6, PR_7, 0, rn(reg));
+    jit_unget_reg(reg);
+    BRI_COND((i0 - w) >> 4, PR_6);
+    return (w);
+}
+
+static jit_word_t
+_bmci(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i1);
+    andr(rn(reg), r0, rn(reg));
+    sync();
+    w = _jit->pc.w;
+    CMPI_EQ(PR_6, PR_7, 0, rn(reg));
+    jit_unget_reg(reg);
+    BRI_COND((i0 - w) >> 4, PR_6);
+    return (w);
+}
+
+static jit_word_t
+_baddr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1,
+       jit_bool_t carry)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    jit_int32_t                t1;
+    jit_int32_t                t2;
+    /* t1 = r0 + r1;   overflow = r1 < 0 ? r0 < t1 : t1 < r0 */
+    t0 = jit_get_reg(jit_class_gpr);
+    t1 = jit_get_reg(jit_class_gpr);
+    t2 = jit_get_reg(jit_class_gpr);
+    lti(rn(t0), r1, 0);                        /* t0 = r1 < 0 */
+    addr(rn(t1), r0, r1);              /* t1 = r0 + r1 */
+    ltr(rn(t2), rn(t1), r0);           /* t2 = t1 < r0 */
+    ltr(rn(t1), r0, rn(t1));           /* t1 = r0 < t1 */
+    CMPI_EQ(PR_6, PR_7, 0, rn(t0));
+    CMPI_EQ_p(PR_8, PR_9, 0, rn(t2), PR_6);/* if (t0==0) p4=t2==0,p5=t2!=0; */
+    CMPI_EQ_p(PR_8, PR_9, 0, rn(t2), PR_7);/* if (t0!=0) p4=t1==0,p5=t1!=0; */
+    sync();
+    w = _jit->pc.w;
+    MOV(r0, rn(t0));
+    BRI_COND((i0 - w) >> 4, carry ? PR_9 : PR_8);
+    jit_unget_reg(t2);
+    jit_unget_reg(t1);
+    jit_unget_reg(t0);
+    return (w);
+}
+
+static jit_word_t
+_baddi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1,
+       jit_bool_t carry)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i1);
+    w = baddr(i0, r0, rn(reg), carry);
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static jit_word_t
+_baddr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1,
+        jit_bool_t carry)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    jit_int32_t                t1;
+    t0 = jit_get_reg(jit_class_gpr);
+    t1 = jit_get_reg(jit_class_gpr);
+    addr(rn(t0), r0, r1);
+    ltr_u(rn(t1), rn(t0), r0);
+    CMPI_EQ(PR_6, PR_7, 0, rn(t1));
+    sync();
+    w = _jit->pc.w;
+    MOV(r0, rn(t0));
+    BRI_COND((i0 - w) >> 4, carry ? PR_7 : PR_6);
+    jit_unget_reg(t1);
+    jit_unget_reg(t0);
+    return (w);
+}
+
+static jit_word_t
+_baddi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1,
+        jit_bool_t carry)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i1);
+    w = baddr_u(i0, r0, rn(reg), carry);
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static jit_word_t
+_bsubr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1,
+       jit_bool_t carry)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    jit_int32_t                t1;
+    jit_int32_t                t2;
+    /* t1 = r0 - r1;   overflow = 0 < r1 ? r0 < t1 : t1 < r0 */
+    t0 = jit_get_reg(jit_class_gpr);
+    t1 = jit_get_reg(jit_class_gpr);
+    t2 = jit_get_reg(jit_class_gpr);
+    gti(rn(t0), r1, 0);                        /* t0 = r1 > 0 */
+    subr(rn(t1), r0, r1);              /* t1 = r0 - r1 */
+    ltr(rn(t2), rn(t1), r0);           /* t2 = t1 < r0 */
+    ltr(rn(t1), r0, rn(t1));           /* t1 = r0 < t1 */
+    CMPI_EQ(PR_6, PR_7, 0, rn(t0));
+    CMPI_EQ_p(PR_8, PR_9, 0, rn(t2), PR_6);/* if (t0==0) p4=t2==0,p5=t2!=0; */
+    CMPI_EQ_p(PR_8, PR_9, 0, rn(t2), PR_7);/* if (t0!=0) p4=t1==0,p5=t1!=0; */
+    sync();
+    w = _jit->pc.w;
+    MOV(r0, rn(t0));
+    BRI_COND((i0 - w) >> 4, carry ? PR_9 : PR_8);
+    jit_unget_reg(t2);
+    jit_unget_reg(t1);
+    jit_unget_reg(t0);
+    return (w);
+}
+
+static jit_word_t
+_bsubi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1,
+       jit_bool_t carry)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i1);
+    w = bsubr(i0, r0, rn(reg), carry);
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static jit_word_t
+_bsubr_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1,
+        jit_bool_t carry)
+{
+    jit_word_t         w;
+    jit_int32_t                t0;
+    jit_int32_t                t1;
+    t0 = jit_get_reg(jit_class_gpr);
+    t1 = jit_get_reg(jit_class_gpr);
+    subr(rn(t0), r0, r1);
+    ltr_u(rn(t1), r0, rn(t0));
+    CMPI_EQ(PR_6, PR_7, 0, rn(t1));
+    sync();
+    w = _jit->pc.w;
+    MOV(r0, rn(t0));
+    BRI_COND((i0 - w) >> 4, carry ? PR_7 : PR_6);
+    jit_unget_reg(t1);
+    jit_unget_reg(t0);
+    return (w);
+}
+
+static jit_word_t
+_bsubi_u(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_word_t i1,
+        jit_bool_t carry)
+{
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i1);
+    w = bsubr_u(i0, r0, rn(reg), carry);
+    jit_unget_reg(reg);
+    return (w);
+}
+
+static void
+_jmpr(jit_state_t *_jit, jit_int32_t r0)
+{
+    MOV_rn_br(r0, BR_6);
+    BR(BR_6);
+}
+
+static void
+_jmpi(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_word_t         d;
+    sync();
+    d = ((jit_word_t)i0 - _jit->pc.w) >> 4;
+    if (d < -16777216 && d > 16777215)
+       BRI(d);
+    else
+       BRL(d);
+}
+
+static jit_word_t
+_jmpi_p(jit_state_t *_jit, jit_word_t i0)
+{
+    jit_word_t         d, w;
+    sync();
+    w = _jit->pc.w;
+    d = ((jit_word_t)i0 - w) >> 4;
+    BRL(d);
+    return (w);
+}
+
+static void
+_callr(jit_state_t *_jit, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    LD8_inc(rn(reg), r0, 8);
+    MOV_br_rn(BR_6, rn(reg));
+    jit_unget_reg(reg);
+    LD8(GR_1, r0);
+    BR_CALL(BR_0, BR_6);
+}
+
+static void
+_calli(jit_state_t *_jit, jit_word_t i0)
+{
+#if 1
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    callr(rn(reg));
+    jit_unget_reg(reg);
+#else
+    jit_word_t         d;
+    sync();
+    i0 = *(jit_word_t **)i0;
+    d = ((jit_word_t)i0 - _jit->pc.w) >> 4;
+    if (d < -16777216 && d > 16777215)
+       BRI_CALL(BR_6, d);
+    else
+       BRL_CALL(BR_6, d);
+#endif
+}
+
+static jit_word_t
+_calli_p(jit_state_t *_jit, jit_word_t i0)
+{
+#if 1
+    jit_word_t         w;
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    w = movi_p(rn(reg), i0);
+    callr(rn(reg));
+    jit_unget_reg(reg);
+    return (w);
+#else
+    jit_word_t         d, w;
+    sync();
+    i0 = *(jit_word_t *)i0;
+    w = _jit->pc.w;
+    d = ((jit_word_t)i0 - w) >> 4;
+    BRL_CALL(BR_6, d);
+    return (w);
+#endif
+}
+
+static void
+_prolog(jit_state_t *_jit, jit_node_t *node)
+{
+    jit_int32_t                reg, ruse, rout;
+
+    _jitc->function->stack = ((_jitc->function->self.alen -
+                              _jitc->function->self.aoff) + 15) & -16;
+
+    /* First lowest unused register is first output register */
+    for (reg = _R115; reg >= _R40; reg--) {
+       if (jit_regset_tstbit(&_jitc->function->regset, reg))
+           break;
+    }
+    _jitc->breg = rn(reg) + 1;
+    _jitc->rout = _jitc->breg + 5;
+    ruse = _jitc->rout - GR_32;
+
+    /* How many out argument registers required? */
+    for (reg = _OUT0; reg <= _OUT7; reg++) {
+       if (!jit_regset_tstbit(&_jitc->function->regset, reg))
+           break;
+    }
+    rout = reg - _OUT0;
+
+    /* Match gcc prolog */
+    ALLOC(_jitc->breg + 1, ruse, rout);
+    MOV(_jitc->breg + 2, GR_12);
+    MOV_rn_br(_jitc->breg, BR_0);
+    MOV(_jitc->breg + 3, GR_1);
+    /* lightning specific, use r4 as frame pointer */
+    MOV(_jitc->breg + 4, GR_4);
+
+    /* setup frame pointer */
+    if (!jit_regset_tstbit(&_jitc->function->regset, _R3))
+       MOV(GR_4, GR_12);
+
+    /* adjust stack if required */
+    if (_jitc->function->stack)
+       subi(GR_12, GR_12, _jitc->function->stack);
+}
+
+static void
+_epilog(jit_state_t *_jit, jit_node_t *node)
+{
+    MOV(GR_4, _jitc->breg + 4);
+    /* Match gcc epilog */
+    MOV(GR_1, _jitc->breg + 3);
+    MOV_I_ar_rn(AR_PFS, _jitc->breg + 1);
+    MOV_br_rn(BR_0, _jitc->breg);
+    MOV(GR_12, _jitc->breg + 2);
+    BR_RET(BR_0);
+    flush();
+}
+
+static void
+_patch_at(jit_state_t *_jit, jit_node_t *node,
+         jit_word_t instr, jit_word_t label)
+{
+    inst_lo_t           l;
+    inst_hi_t           h;
+    jit_word_t          tm, s0, s1, s2;
+    union {
+       jit_word_t       w;
+       jit_word_t      *p;
+    } c;
+    jit_word_t          i1, i41, i20, ic, i5, i9, i7;
+
+    c.w = instr;       l.w = c.p[0];   h.w = c.p[1];
+    get_tm(tm);                get_s0(s0);     get_s1(s1);     get_s2(s2);
+    switch (node->code) {
+       case jit_code_movi:
+       case jit_code_calli:
+           i1  = (label >> 63) &           0x1L;
+           i41 = (label >> 22) & 0x1ffffffffffL;
+           ic  = (label >> 21) &           0x1L;
+           i5  = (label >> 16) &          0x1fL;
+           i9  = (label >>  7) &         0x1ffL;
+           i7  =  label        &          0x7fL;
+           s1 = i41;
+           assert((tm & ~1) == TM_M_L_X_ &&
+                  (s2 & 0xfL<<37) == (6L<<37) &&
+                  s0 == nop_m);
+           s2 &= (6L<<37)|(0x7fL<<6);
+           s2 |= (i1<<36)|(i9<<27)|(i5<<22)|(ic<<21)|(i7<<13);
+           break;
+       case jit_code_jmpi:
+           if (_jitc->jump) {
+               /* kludge to hide function descriptors; check that gp
+                * is zero, what is done for all jit functions */
+               if (((long *)label)[1] == 0) {
+                   for (ic = 0; ic < _jitc->prolog.offset; ic++) {
+                       if (_jitc->prolog.ptr[ic] == label) {
+                           label += 16;
+                           break;
+                       }
+                   }
+               }
+           }
+           ic = (label - instr) >> 4;
+           i1  = (ic >> 61) &           0x1L;
+           i41 = (ic >> 22) & 0x1ffffffffffL;
+           i20 =  ic        &       0xfffffL;
+           assert((tm & ~1) == TM_M_L_X_ &&
+                  (s2 & 0xfL<<37) == (0xcL<<37) &&
+                  s0 == nop_m);
+           s1 = i41;
+           s2 &= (0xcL<<37)|(0x7L<<33)|(1L<<12);
+           s2 |= (i1<<36)|(i20<<13);
+           break;
+#if 0
+       case jit_code_calli:
+           label = *(jit_word_t *)label;
+           ic = (label - instr) >> 4;
+           i1  = (ic >> 61) &           0x1L;
+           i41 = (ic >> 22) & 0x1ffffffffffL;
+           i20 =  ic        &       0xfffffL;
+           assert((tm & ~1) == TM_M_L_X_ &&
+                  (s2 & 0xfL<<37) == (0xdL<<37) &&
+                  s0 == nop_m);
+           s1 = i41;
+           s2 &= (0xdL<<37)|(0x7L<<33)|(1L<<12)|(0x3L<<6);
+           s2 |= (i1<<36)|(i20<<13);
+           break;
+#endif
+       default:
+           /* expected sequences are:
+            * A6, B1           (cmp_xxx, br_cond, ???)
+            * A8, B1           (cmpi_xxx, br_cond, ???)
+            * A4, B1           (mov, br_cond, ???)
+            * F4, B1           (fcmp_xxx, br_cond, ???)
+            * B1               (br, ???, ???)
+            */
+           ic = (label - instr) >> 4;
+           switch (s0 >> 37) {
+               case 0x8:                               /* A4, B1 */
+                   /* validate s0 is register mov */
+                   assert((s0 & 0x3ffe0007e000L) == 0x800000000L);
+                   /* validate s2 is br.cond */
+               check_s1:
+                   assert((s1 >> 37) == 4 &&
+                          (s1 & (7 << 6)) == 0 &&
+                          (s1 & 0x1f) != 0);
+                   s1 &= (4L<<37)|(7L<<33)|(1L<<12)|0x1f;
+                   s1 |= (((ic>>20)&1L)<<36)|((ic&0xfffffL)<<13);
+                   break;
+               case 0xc: case 0xd: case 0xe:           /* A6, B1 or A8, B1 */
+                   /* validate s0 is cmp.lt, cmp.ltu or cmq.eq */
+                   assert((s0 & ((1L<<33)|(1<<12))) == 0);
+                   goto check_s1;
+               case 0x4:                               /* B1 or F4, B1 */
+                   if (s0 & 0x1f)                      /* F4,B1 */
+                       goto check_s1;
+                   else {                              /* B1 */
+                       /* validate s0 is br */
+                       assert((s0 >> 37) == 4 &&
+                              (s0 & (7 << 6)) == 0 &&
+                              (s0 & 0x1f) == 0);
+                       s0 &= (4L<<37)|(7L<<33)|(1L<<12)|0x1f;
+                       s0 |= (((ic>>20)&1L)<<36)|((ic&0xfffffL)<<13);
+                   }
+                   break;
+               default:
+                   abort();
+           }
+           break;
+    }
+    to_tm(tm);         to_s0(s0);      to_s1(s1);      to_s2(s2);
+    c.p[0] = l.w;
+    c.p[1] = h.w;
+}
+#endif
diff --git a/lib/jit_ia64-fpu.c b/lib/jit_ia64-fpu.c
new file mode 100644
index 0000000..30da635
--- /dev/null
+++ b/lib/jit_ia64-fpu.c
@@ -0,0 +1,1530 @@
+/*
+ * Copyright (C) 2013  Free Software Foundation, Inc.
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This software is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Authors:
+ *     Paulo Cesar Pereira de Andrade
+ */
+
+#if PROTO
+#define SF_S0                          0
+#define SF_S1                          1
+#define SF_S2                          2
+#define SF_S3                          3
+
+#define TSTFREG1(r0)                                                   \
+    do {                                                               \
+       if (jit_regset_tstbit(&_jitc->fprs, r0))                        \
+           stop();                                                     \
+    } while (0)
+#define TSTFREG2(r0, r1)                                               \
+    do {                                                               \
+       if (jit_regset_tstbit(&_jitc->fprs, r0) ||                      \
+           jit_regset_tstbit(&_jitc->fprs, r1))                        \
+           stop();                                                     \
+    } while (0)
+#define TSTFREG3(r0, r1, r2)                                           \
+    do {                                                               \
+       if (jit_regset_tstbit(&_jitc->fprs, r0) ||                      \
+           jit_regset_tstbit(&_jitc->fprs, r1) ||                      \
+           jit_regset_tstbit(&_jitc->fprs, r2))                        \
+           stop();                                                     \
+    } while (0)
+#define SETFREG(r0)            jit_regset_setbit(&_jitc->fprs, r0)     
+
+/* libm */
+extern float sqrtf(float);
+extern double sqrt(double);
+
+#define M7(x6,ht,r3,r2,f1)             _M7(_jit,0,x6,ht,r3,r2,f1)
+static void _M7(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define M8(x6,ht,r3,im,f1)             _M8(_jit,0,x6,ht,r3,im,f1)
+static void _M8(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define M9(x6,ht,r3,f1)                        _M9(_jit,0,x6,ht,r3,f1)
+static void _M9(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define M10(x6,ht,r3,r2,im)            _M10(_jit,0,x6,ht,r3,r2,im)
+static void _M10(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define M11(x6,ht,r3,f2,f1)            _M11(_jit,0,x6,ht,r3,f2,f1)
+static void _M11(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define M12(x6,ht,r3,f2,f1)            _M12(_jit,0,x6,ht,r3,f2,f1)
+static void _M12(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define M18(x6,r2,f1)                  _M18(_jit,0,x6,r2,f1)
+static void _M18(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t);
+#define M19(x6,f2,r1)                  _M19(_jit,0,x6,f2,r1)
+static void _M19(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t);
+#define M21(f2,im)                     M20x(0x3,f2,im)
+#define M23(x3,im,f1)                  M22x(x3,im,f1)
+#define M27(f1)                                M26x(3,f1)
+#define F1(op,x,sf,f4,f3,f2,f1)                F1_(_jit,0,op,x,sf,f4,f3,f2,f1)
+#define F2(x2,f4,f3,f2,f1)             F1(0xe,1,x2,f4,f3,f2,f1)
+#define F3(f4,f3,f2,f1)                        F1(0xe,0,0,f4,f3,f2,f1)
+static void F1_(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define F4(rb,sf,ra,p2,f3,f2,ta,p1)    F4_(_jit,0,rb,sf,ra,p2,f3,f2,ta,p1)
+static void F4_(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define F5(p2,fc,f2,ta,p1)             F5_(_jit,0,p2,fc,f2,ta,p1)
+static void F5_(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define F6x(op,q,sf,p2,f3,f2,f1)       F6x_(_jit,0,op,q,sf,p2,f3,f2,f1)
+#define F6(op,sf,p2,f3,f2,f1)          F6x(op,0,sf,p2,f3,f2,f1)
+#define F7(op,sf,p2,f3,f1)             F6x(op,1,sf,p2,f3,0,f1)
+static void F6x_(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define F8(op,sf,x6,f3,f2,f1)          F8_(_jit,0,op,sf,x6,f3,f2,f1)
+#define F9(op,x6,f3,f2,f1)             F8(op,0,x6,f3,f2,f1)
+#define F10(op,sf,x6,f2,f1)            F8(op,sf,x6,0,f2,f1)
+#define F11(x6,f2,f1)                  F8(0,0,x6,0,f2,f1)
+static void F8_(jit_state_t*,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t,
+               jit_word_t,jit_word_t,jit_word_t);
+#define F12(sf,x6,omsk,amsk)           F12_(_jit,0,sf,x6,omsk,amsk)
+#define F13(sf,x6)                     F12(sf,x6,0,0)
+static void F12_(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define F14x(sf,x,x6,im)               F14x_(_jit,0,sf,x,x6,im)
+#define F14(sf,im)                     F14x(sf,0,8,im)
+#define F15(im)                                F14x(0,0,0,im)
+static void F14x_(jit_state_t*,jit_word_t,
+                 jit_word_t,jit_word_t,jit_word_t,jit_word_t);
+#define F16(y,im)                      F16_(_jit,0,y,im)
+static void F16_(jit_state_t*,jit_word_t,
+                jit_word_t,jit_word_t);
+
+/* break */
+#define BREAK_F(im)                    F15(im)
+/* chk */
+#define CHK_S(f2,im)                   M21(f2,im)
+#define CHKF_A_NC(f1,im)               M23(0x6,im,f1)
+#define CHKF_A_CLR(f1,im)              M23(0x7,im,f1)
+/* fabs */
+#define FABS(f1,f3)                    FMERGE_S(f1,0,f3)
+/* fadd */
+#define FADD(f1,f3,f2)                 FMA(f1,f3,1,f2)
+#define FADD_S(f1,f3,f2)               FMA_S(f1,f3,1,f2)
+#define FADD_D(f1,f3,f2)               FMA_D(f1,f3,1,f2)
+/* famax */
+#define FAMAX(f1,f2,f3)                        F8(0,SF_S0,0x17,f3,f2,f1)
+/* famin */
+#define FAMIN(f1,f2,f3)                        F8(0,SF_S0,0x16,f3,f2,f1)
+/* fand */
+#define FAND(f1,f2,f3)                 F9(0,0x2c,f3,f2,f1)
+/* fandcm */
+#define FANDCM(f1,f2,f3)               F9(0,0x2d,f3,f2,f1)
+/* fchkf */
+#define FCHKF(im)                      F14(SF_S0,im)
+/* fclass */
+#define FCLASS_M(p1,p2,f2,fc)          F5(p2,fc,f2,0,p1)
+#define FCLASS_M_UNC(p1,p2,f2,fc)      F5(p2,fc,f2,1,p1)
+/* fclrf */
+#define FCLRF()                                F13(SF_S0,5)
+/* fcmp */
+#define FCMP_EQ(p1,p2,f2,f3)           F4(0,SF_S0,0,p2,f3,f2,0,p1)
+#define FCMP_LT(p1,p2,f2,f3)           F4(1,SF_S0,0,p2,f3,f2,0,p1)
+#define FCMP_LE(p1,p2,f2,f3)           F4(0,SF_S0,1,p2,f3,f2,0,p1)
+#define FCMP_UNORD(p1,p2,f2,f3)                F4(1,SF_S0,1,p2,f3,f2,0,p1)
+#define FCMP_EQ_UNC(p1,p2,f2,f3)       F4(0,SF_S0,0,p2,f3,f2,1,p1)
+#define FCMP_LT_UNC(p1,p2,f2,f3)       F4(1,SF_S0,0,p2,f3,f2,1,p1)
+#define FCMP_LE_UNC(p1,p2,f2,f3)       F4(0,SF_S0,1,p2,f3,f2,1,p1)
+#define FCMP_UNORD_UNC(p1,p2,f2,f3)    F4(1,SF_S0,1,p2,f3,f2,1,p1)
+/* fcvt.fx */
+#define FCVT_FX(f1,f2)                 F10(0,SF_S0,0x18,f2,f1)
+#define FCVT_FXU(f1,f2)                        F10(0,SF_S0,0x19,f2,f1)
+#define FCVT_FX_TRUNC(f1,f2)           F10(0,SF_S0,0x1a,f2,f1)
+#define FCVT_FXU_TRUNC(f1,f2)          F10(0,SF_S0,0x1b,f2,f1)
+/* fcvt.xf */
+#define FCVT_XF(f1,f2)                 F11(0x1c,f1,f2)
+/* fcvt.fxuf */
+#define FCVT_XUF(f1,f3)                        FMA(f1,f3,1,0)
+/* fma */
+#define FMA(f1,f3,f4,f2)               F1(0x8,0,SF_S0,f4,f3,f3,f1)
+#define FMA_p(f1,f3,f4,f2,sf,_p)       F1_(_jit,_p,0x8,0,sf,f4,f3,f3,f1)
+#define FMA_S(f1,f3,f4,f2)             F1(0x8,1,SF_S0,f4,f3,f3,f1)
+#define FMA_S_p(f1,f3,f4,f2,sf,_p)     F1_(_jit,_p,0x8,1,sf,f4,f3,f3,f1)
+#define FMA_D(f1,f3,f4,f2)             F1(0x9,0,SF_S0,f4,f3,f3,f1)
+#define FMA_D_p(f1,f3,f4,f2,sf,_p)     F1_(_jit,_p,0x9,0,sf,f4,f3,f3,f1)
+/* fmax */
+#define FMAX(f1,f2,f3)                 F8(0,SF_S0,0x15,f3,f2,f1)
+/* fmerge */
+#define FMERGE_S(f1,f2,f3)             F9(0,0x10,f3,f2,f1)
+#define FMERGE_NS(f1,f2,f3)            F9(0,0x11,f3,f2,f1)
+#define FMERGE_SE(f1,f2,f3)            F9(0,0x12,f3,f2,f1)
+/* fmin */
+#define FMIN(f1,f2,f3)                 F8(0,SF_S0,0x14,f3,f2,f1)
+/* fmix */
+#define FMIX_LR(f1,f2,f3)              F9(0,0x39,f3,f2,f1)
+#define FMIX_R(f1,f2,f3)               F9(0,0x3a,f3,f2,f1)
+#define FMIX_L(f1,f2,f3)               F9(0,0x3b,f3,f2,f1)
+/* fmpy */
+#define FMPY(f1,f3,f4)                 FMA(f1,f3,f4,0)
+#define FMPY_p(f1,f3,f4,sf,_p)         FMA_p(f1,f3,f4,0,sf,_p)
+#define FMPY_S(f1,f3,f4)               FMA_S(f1,f3,f4,0)
+#define FMPY_S_p(f1,f3,f4,sf,_p)       FMA_S_p(f1,f3,f4,0,sf,_p)
+#define FMPY_D(f1,f3,f4)               FMA_D(f1,f3,f4,0)
+#define FMPY_D_p(f1,f3,f4,sf,_p)       FMA_D_p(f1,f3,f4,0,sf,_p)
+/* fms */
+#define FMS(f1,f3,f4,f2)               F1(0xa,0,SF_S0,f4,f3,f3,f1)
+#define FMS_S(f1,f3,f4,f2)             F1(0xa,1,SF_S0,f4,f3,f3,f1)
+#define FMS_D(f1,f3,f4,f2)             F1(0xb,0,SF_S0,f4,f3,f3,f1)
+/* fneg */
+#define FNEG(f1,f3)                    FMERGE_NS(f1,f3,f3)
+/* fnegabs */
+#define FNEGABS(f1,f3)                 FMERGE_NS(f1,0,f3)
+/* fnma */
+#define FNMA(f1,f3,f4,f2)              F1(0xc,0,SF_S0,f4,f3,f3,f1)
+#define FNMA_p(f1,f3,f4,f2,sf,_p)      F1_(_jit,_p,0xc,0,sf,f4,f3,f3,f1)
+#define FNMA_S(f1,f3,f4,f2)            F1(0xc,1,SF_S0,f4,f3,f3,f1)
+#define FNMA_D(f1,f3,f4,f2)            F1(0xd,0,SF_S0,f4,f3,f3,f1)
+/* fnmpy */
+#define FNMPY(f1,f3,f4)                        FNMA(f1,f3,f4,0)
+/* fnorm */
+#define FNORM(f1,f3)                   FMA(f1,f3,1,0)
+/* for */
+#define FOR(f1,f2,f3)                  F9(0,0x2e,f3,f2,f1)
+/* fpabs */
+#define FPABS(f1,f3)                   FPMERGE_S(f1,0,f3)
+/* fpack */
+#define FPACK(f1,f2,f3)                        F9(0,0x28,f3,f2,f1)
+/* fpamax */
+#define FPAMAX(f1,f2,f3)               F8(1,SF_S0,0x17,f3,f2,f1)
+/* fpamin */
+#define FPAMIN(f1,f2,f3)               F8(1,SF_S0,0x16,f3,f2,f1)
+/* fpcmp */
+#define FPCMP_EQ(f1,f2,f3)             F8(1,SF_S0,0x30,f3,f2,f1)
+#define FPCMP_LT(f1,f2,f3)             F8(1,SF_S0,0x31,f3,f2,f1)
+#define FPCMP_LE(f1,f2,f3)             F8(1,SF_S0,0x32,f3,f2,f1)
+#define FPCMP_UNORD(f1,f2,f3)          F8(1,SF_S0,0x33,f3,f2,f1)
+#define FPCMP_NEQ(f1,f2,f3)            F8(1,SF_S0,0x34,f3,f2,f1)
+#define FPCMP_NLT(f1,f2,f3)            F8(1,SF_S0,0x35,f3,f2,f1)
+#define FPCMP_NLE(f1,f2,f3)            F8(1,SF_S0,0x36,f3,f2,f1)
+#define FPCMP_ORD(f1,f2,f3)            F8(1,SF_S0,0x37,f3,f2,f1)
+/* fpcvt.fx */
+#define FPCVT_FX(f1,f2)                        F10(1,SF_S0,0x18,f2,f1)
+#define FPCVT_FXU(f1,f2)               F10(1,SF_S0,0x19,f2,f1)
+#define FPCVT_FX_TRUNC(f1,f2)          F10(1,SF_S0,0x1a,f2,f1)
+#define FPCVT_FXU_TRUNC(f1,f2)         F10(1,SF_S0,0x1b,f2,f1)
+/* fpma */
+#define FPMA(f1,f3,f4,f2)              F1(0x9,1,SF_S0,f4,f3,f3,f1)
+/* fpmax */
+#define FPMAX(f1,f2,f3)                        F8(1,SF_S0,0x15,f3,f2,f1)
+/* fpmerge */
+#define FPMERGE_S(f1,f2,f3)            F9(1,0x10,f3,f2,f1)
+#define FPMERGE_NS(f1,f2,f3)           F9(1,0x11,f3,f2,f1)
+#define FPMERGE_SE(f1,f2,f3)           F9(1,0x12,f3,f2,f1)
+/* fpmin */
+#define FPMIN(f1,f2,f3)                        F8(1,SF_S0,0x14,f3,f2,f1)
+/* fpmpy */
+#define FPMPY(f1,f3,f4)                        FPMA(f1,f3,f4,0)
+/* fpms */
+#define FPMS(f1,f3,f4,f2)              F1(0xb,1,SF_S0,f4,f3,f3,f1)
+/* fpneg */
+#define FPNEG(f1,f3)                   FPMERGE_NS(f1,f3,f3)
+/* fpnegabs */
+#define FPNEGABS(f1,f3)                        FPMERGE_NS(f1,0,f3)
+/* fpnma */
+#define FPNMA(f1,f3,f4,f2)             F1(0xd,1,SF_S0,f4,f3,f3,f1)
+/* fpnmpy */
+#define FPNMPY(f1,f3,f4)               FPNMA(f1,f3,f4,0)
+/* fprcpa */
+#define FPRCPA(f1,p2,f2,f3)            F6(1,SF_S0,p2,f3,f2,f1)
+/* fprsqrta */
+#define FPRSQRTA(f1,p2,f3)             F7(1,SF_S0,p2,f3,f1)
+/* frcpa */
+#define FRCPA(f1,p2,f2,f3)             F6(0,SF_S0,p2,f3,f2,f1)
+/* frsqrta */
+#define FRSQRTA(f1,p2,f3)              F7(0,SF_S0,p2,f3,f1)
+/* fselect */
+#define FSELECT(f1,f3,f4,f2)           F3(f4,f3,f2,f1)
+#define FSETC(amsk,omsk)               F12(SF_S0,4,omsk,amsk)
+/* fsub */
+#define FSUB(f1,f3,f2)                 FMS(f1,f3,1,f2)
+#define FSUB_S(f1,f3,f2)               FMS_S(f1,f3,1,f2)
+#define FSUB_D(f1,f3,f2)               FMS_D(f1,f3,1,f2)
+/* fswap */
+#define FSWAP(f1,f2,f3)                        F9(0,0x34,f3,f2,f1)
+#define FSWAP_NL(f1,f2,f3)             F9(0,0x35,f3,f2,f1)
+#define FSWAP_NR(f1,f2,f3)             F9(0,0x36,f3,f2,f1)
+/* fsxt */
+#define FSXT_R(f1,f2,f3)               F9(0,0x3c,f3,f2,f1)
+#define FSXT_L(f1,f2,f3)               F9(0,0x3d,f3,f2,f1)
+/* fxor */
+#define FXOR(f1,f2,f3)                 F9(0,0x2f,f3,f2,f1)
+/* getf */
+#define GETF_S(r1,f2)                  M19(0x1e,f2,r1)
+#define GETF_D(r1,f2)                  M19(0x1f,f2,r1)
+#define GETF_EXP(r1,f2)                        M19(0x1d,f2,r1)
+#define GETF_SIG(r1,f2)                        M19(0x1c,f2,r1)
+/* hint */
+#define HINT_F(im)                     F16(1,im)
+/* invala */
+#define INVALAF_E(f1)                  M27(f1)
+/* ldf */
+#define LDFS(f1,r3)                    M9(0x02,LD_NONE,r3,f1)
+#define LDFD(f1,r3)                    M9(0x03,LD_NONE,r3,f1)
+#define LDF8(f1,r3)                    M9(0x01,LD_NONE,r3,f1)
+#define LDFE(f1,r3)                    M9(0x00,LD_NONE,r3,f1)
+#define LDFS_S(f1,r3)                  M9(0x06,LD_NONE,r3,f1)
+#define LDFD_S(f1,r3)                  M9(0x07,LD_NONE,r3,f1)
+#define LDF8_S(f1,r3)                  M9(0x05,LD_NONE,r3,f1)
+#define LDFE_S(f1,r3)                  M9(0x04,LD_NONE,r3,f1)
+#define LDFS_A(f1,r3)                  M9(0x0a,LD_NONE,r3,f1)
+#define LDFD_A(f1,r3)                  M9(0x0b,LD_NONE,r3,f1)
+#define LDF8_A(f1,r3)                  M9(0x09,LD_NONE,r3,f1)
+#define LDFE_A(f1,r3)                  M9(0x08,LD_NONE,r3,f1)
+#define LDFS_SA(f1,r3)                 M9(0x0e,LD_NONE,r3,f1)
+#define LDFD_SA(f1,r3)                 M9(0x0f,LD_NONE,r3,f1)
+#define LDF8_SA(f1,r3)                 M9(0x0d,LD_NONE,r3,f1)
+#define LDFE_SA(f1,r3)                 M9(0x0c,LD_NONE,r3,f1)
+#define LDF_FILL(f1,r3)                        M9(0x1b,LD_NONE,r3,f1)
+#define LDFS_C_CLR(f1,r3)              M9(0x22,LD_NONE,r3,f1)
+#define LDFD_C_CLR(f1,r3)              M9(0x23,LD_NONE,r3,f1)
+#define LDF8_C_CLR(f1,r3)              M9(0x21,LD_NONE,r3,f1)
+#define LDFE_C_CLR(f1,r3)              M9(0x20,LD_NONE,r3,f1)
+#define LDFS_C_NC(f1,r3)               M9(0x26,LD_NONE,r3,f1)
+#define LDFD_C_NC(f1,r3)               M9(0x27,LD_NONE,r3,f1)
+#define LDF8_C_NC(f1,r3)               M9(0x25,LD_NONE,r3,f1)
+#define LDFE_C_NC(f1,r3)               M9(0x24,LD_NONE,r3,f1)
+#define LDXFS(f1,r3,r2)                        M7(0x02,LD_NONE,r3,r2,f1)
+#define LDXFD(f1,r3,r2)                        M7(0x03,LD_NONE,r3,r2,f1)
+#define LDXF8(f1,r3,r2)                        M7(0x01,LD_NONE,r3,r2,f1)
+#define LDXFE(f1,r3,r2)                        M7(0x00,LD_NONE,r3,r2,f1)
+#define LDXFS_S(f1,r3,r2)              M7(0x06,LD_NONE,r3,r2,f1)
+#define LDXFD_S(f1,r3,r2)              M7(0x07,LD_NONE,r3,r2,f1)
+#define LDXF8_S(f1,r3,r2)              M7(0x05,LD_NONE,r3,r2,f1)
+#define LDXFE_S(f1,r3,r2)              M7(0x04,LD_NONE,r3,r2,f1)
+#define LDXFS_A(f1,r3,r2)              M7(0x0a,LD_NONE,r3,r2,f1)
+#define LDXFD_A(f1,r3,r2)              M7(0x0b,LD_NONE,r3,r2,f1)
+#define LDXF8_A(f1,r3,r2)              M7(0x09,LD_NONE,r3,r2,f1)
+#define LDXFE_A(f1,r3,r2)              M7(0x08,LD_NONE,r3,r2,f1)
+#define LDXFS_SA(f1,r3,r2)             M7(0x0e,LD_NONE,r3,r2,f1)
+#define LDXFD_SA(f1,r3,r2)             M7(0x0f,LD_NONE,r3,r2,f1)
+#define LDXF8_SA(f1,r3,r2)             M7(0x0d,LD_NONE,r3,r2,f1)
+#define LDXFE_SA(f1,r3,r2)             M7(0x0c,LD_NONE,r3,r2,f1)
+#define LDXFS_FILL(f1,r3,r2)           M7(0x1b,LD_NONE,r3,r2,f1)
+#define LDXFS_C_CLR(f1,r3,r2)          M7(0x22,LD_NONE,r3,r2,f1)
+#define LDXFD_C_CLR(f1,r3,r2)          M7(0x23,LD_NONE,r3,r2,f1)
+#define LDXF8_C_CLR(f1,r3,r2)          M7(0x21,LD_NONE,r3,r2,f1)
+#define LDXFE_C_CLR(f1,r3,r2)          M7(0x20,LD_NONE,r3,r2,f1)
+#define LDXFS_C_NC(f1,r3,r2)           M7(0x26,LD_NONE,r3,r2,f1)
+#define LDXFD_C_NC(f1,r3,r2)           M7(0x27,LD_NONE,r3,r2,f1)
+#define LDXF8_C_NC(f1,r3,r2)           M7(0x25,LD_NONE,r3,r2,f1)
+#define LDXFE_C_NC(f1,r3,r2)           M7(0x24,LD_NONE,r3,r2,f1)
+#define LDFS_inc(f1,f3,im)             M8(0x02,LD_NONE,f3,im,f1)
+#define LDFD_inc(f1,f3,im)             M8(0x03,LD_NONE,f3,im,f1)
+#define LDF8_inc(f1,f3,im)             M8(0x01,LD_NONE,f3,im,f1)
+#define LDFE_inc(f1,f3,im)             M8(0x00,LD_NONE,f3,im,f1)
+#define LDFS_S_inc(f1,f3,im)           M8(0x06,LD_NONE,f3,im,f1)
+#define LDFD_S_inc(f1,f3,im)           M8(0x07,LD_NONE,f3,im,f1)
+#define LDF8_S_inc(f1,f3,im)           M8(0x05,LD_NONE,f3,im,f1)
+#define LDFE_S_inc(f1,f3,im)           M8(0x04,LD_NONE,f3,im,f1)
+#define LDFS_A_inc(f1,f3,im)           M8(0x0a,LD_NONE,f3,im,f1)
+#define LDFD_A_inc(f1,f3,im)           M8(0x0b,LD_NONE,f3,im,f1)
+#define LDF8_A_inc(f1,f3,im)           M8(0x09,LD_NONE,f3,im,f1)
+#define LDFE_A_inc(f1,f3,im)           M8(0x08,LD_NONE,f3,im,f1)
+#define LDF_FILL_inc(f1,f3,im)         M8(0x1b,LD_NONE,f3,im,f1)
+#define LDFS_C_CLR_inc(f1,f3,im)       M8(0x22,LD_NONE,f3,im,f1)
+#define LDFD_C_CLR_inc(f1,f3,im)       M8(0x23,LD_NONE,f3,im,f1)
+#define LDF8_C_CLR_inc(f1,f3,im)       M8(0x21,LD_NONE,f3,im,f1)
+#define LDFE_C_CLR_inc(f1,f3,im)       M8(0x20,LD_NONE,f3,im,f1)
+#define LDFS_C_NC_inc(f1,f3,im)                M8(0x26,LD_NONE,f3,im,f1)
+#define LDFD_C_NC_inc(f1,f3,im)                M8(0x27,LD_NONE,f3,im,f1)
+#define LDF8_C_NC_inc(f1,f3,im)                M8(0x25,LD_NONE,f3,im,f1)
+#define LDFE_C_NC_inc(f1,f3,im)                M8(0x24,LD_NONE,f3,im,f1)
+/* ldpf */
+#define LDFPS(f1,f2,r3)                        M11(0x02,LD_NONE,r3,f2,f1)
+#define LDFPD(f1,f2,r3)                        M11(0x03,LD_NONE,r3,f2,f1)
+#define LDFP8(f1,f2,r3)                        M11(0x01,LD_NONE,r3,f2,f1)
+#define LDFPS_S(f1,f2,r3)              M11(0x06,LD_NONE,r3,f2,f1)
+#define LDFPD_S(f1,f2,r3)              M11(0x07,LD_NONE,r3,f2,f1)
+#define LDFP8_S(f1,f2,r3)              M11(0x05,LD_NONE,r3,f2,f1)
+#define LDFPS_A(f1,f2,r3)              M11(0x0a,LD_NONE,r3,f2,f1)
+#define LDFPD_A(f1,f2,r3)              M11(0x0b,LD_NONE,r3,f2,f1)
+#define LDFP8_A(f1,f2,r3)              M11(0x09,LD_NONE,r3,f2,f1)
+#define LDFPS_SA(f1,f2,r3)             M11(0x0e,LD_NONE,r3,f2,f1)
+#define LDFPD_SA(f1,f2,r3)             M11(0x0f,LD_NONE,r3,f2,f1)
+#define LDFP8_SA(f1,f2,r3)             M11(0x0d,LD_NONE,r3,f2,f1)
+#define LDFPS_C_CLR(f1,f2,r3)          M11(0x22,LD_NONE,r3,f2,f1)
+#define LDFPD_C_CLR(f1,f2,r3)          M11(0x23,LD_NONE,r3,f2,f1)
+#define LDFP8_C_CLR(f1,f2,r3)          M11(0x21,LD_NONE,r3,f2,f1)
+#define LDFPS_C_NC(f1,f2,r3)           M11(0x26,LD_NONE,r3,f2,f1)
+#define LDFPD_C_NC(f1,f2,r3)           M11(0x27,LD_NONE,r3,f2,f1)
+#define LDFP8_C_NC(f1,f2,r3)           M11(0x25,LD_NONE,r3,f2,f1)
+#define LDIFPS(f1,f2,r3)               M12(0x02,LD_NONE,r3,f2,f1)
+#define LDIFPD(f1,f2,r3)               M12(0x03,LD_NONE,r3,f2,f1)
+#define LDIFP8(f1,f2,r3)               M12(0x01,LD_NONE,r3,f2,f1)
+#define LDIFPS_S(f1,f2,r3)             M12(0x06,LD_NONE,r3,f2,f1)
+#define LDIFPD_S(f1,f2,r3)             M12(0x07,LD_NONE,r3,f2,f1)
+#define LDIFP8_S(f1,f2,r3)             M12(0x05,LD_NONE,r3,f2,f1)
+#define LDIFPS_A(f1,f2,r3)             M12(0x0a,LD_NONE,r3,f2,f1)
+#define LDIFPD_A(f1,f2,r3)             M12(0x0b,LD_NONE,r3,f2,f1)
+#define LDIFP8_A(f1,f2,r3)             M12(0x09,LD_NONE,r3,f2,f1)
+#define LDIFPS_SA(f1,f2,r3)            M12(0x0e,LD_NONE,r3,f2,f1)
+#define LDIFPD_SA(f1,f2,r3)            M12(0x0f,LD_NONE,r3,f2,f1)
+#define LDIFP8_SA(f1,f2,r3)            M12(0x0d,LD_NONE,r3,f2,f1)
+#define LDIFPS_C_CLR(f1,f2,r3)         M12(0x22,LD_NONE,r3,f2,f1)
+#define LDIFPD_C_CLR(f1,f2,r3)         M12(0x23,LD_NONE,r3,f2,f1)
+#define LDIFP8_C_CLR(f1,f2,r3)         M12(0x21,LD_NONE,r3,f2,f1)
+#define LDIFPS_C_NC(f1,f2,r3)          M12(0x26,LD_NONE,r3,f2,f1)
+#define LDIFPD_C_NC(f1,f2,r3)          M12(0x27,LD_NONE,r3,f2,f1)
+#define LDIFP8_C_NC(f1,f2,r3)          M12(0x25,LD_NONE,r3,f2,f1)
+/* mov - Move Floating-point Register */
+#define MOVF(f1,f3)                    FMERGE_S(f1,f3,f3)
+/* nop */
+#define NOP_F(im)                      F16(0,im)
+/* setf */
+#define SETF_S(f1,r2)                  M18(0x1e,r2,f1)
+#define SETF_D(f1,r2)                  M18(0x1f,r2,f1)
+#define SETF_EXP(f1,r2)                        M18(0x1d,r2,f1)
+#define SETF_SIG(f1,r2)                        M18(0x1c,r2,f1)
+/* stf */
+#define STFS(r3,f2)                    M13(0x32,ST_NONE,r3,f2)
+#define STFD(r3,f2)                    M13(0x33,ST_NONE,r3,f2)
+#define STF8(r3,f2)                    M13(0x31,ST_NONE,r3,f2)
+#define STFE(r3,f2)                    M13(0x30,ST_NONE,r3,f2)
+#define STF_SPILL(r3,f2)               M13(0x3b,ST_NONE,r3,f2)
+#define STFS_inc(r3,f2,im)             M10(0x32,ST_NONE,r3,f2,im)
+#define STFD_inc(r3,f2,im)             M10(0x33,ST_NONE,r3,f2,im)
+#define STF8_inc(r3,f2,im)             M10(0x31,ST_NONE,r3,f2,im)
+#define STFE_inc(r3,f2,im)             M10(0x30,ST_NONE,r3,f2,im)
+#define STF_SPILL_inc(r3,f2,im)                M10(0x3b,ST_NONE,r3,f2,im)
+/* xma */
+#define XMA_L(f1,f3,f4,f2)             F2(0,f4,f3,f2,f1)
+#define XMA_LU(f1,f3,f4,f2)            XMA_L(f1,f3,f4,f2)
+#define XMA_H(f1,f3,f4,f2)             F2(3,f4,f3,f2,f1)
+#define XMA_HU(f1,f3,f4,f2)            F2(2,f4,f3,f2,f1)
+/* xmpy */
+#define XMPY_L(f1,f3,f4)               XMA_L(f1,f3,f4,0)
+#define XMPY_LU(f1,f3,f4)              XMA_L(f1,f3,f4,0)
+#define XMPY_H(f1,f3,f4)               XMA_H(f1,f3,f4,0)
+#define XMPY_HU(f1,f3,f4)              XMA_HU(f1,f3,f4,0)
+
+#define movr_f(r0,r1)                  movr_d(r0,r1)
+#define movr_d(r0,r1)                  MOVF(r0,r1)
+#define movi_f(r0,i0)                  ldi_f(r0,(jit_word_t)i0)
+#define movi_d(r0,i0)                  ldi_d(r0,(jit_word_t)i0)
+
+#define absr_f(r0,r1)                  absr_d(r0,r1)
+#define absr_d(r0,r1)                  FABS(r0,r1)
+#define negr_f(r0,r1)                  negr_d(r0,r1)
+#define negr_d(r0,r1)                  FNEG(r0,r1)
+#define sqrtr_f(r0,r1)                 _sqrtr_f(_jit,r0,r1)
+static void _sqrtr_f(jit_state_t*,jit_int32_t,jit_int32_t);
+#define sqrtr_d(r0,r1)                 _sqrtr_d(_jit,r0,r1)
+static void _sqrtr_d(jit_state_t*,jit_int32_t,jit_int32_t);
+#define extr_f_d(r0,r1)                        /*FNORM(r0,r1)*/
+#define extr_d_f(r0,r1)                        /*FNORM(r0,r1)*/
+#define extr_f(r0,r1)                  extr_d(r0,r1)
+#define extr_d(r0,r1)                  _extr_d(_jit,r0,r1)
+static void _extr_d(jit_state_t*,jit_int32_t,jit_int32_t);
+#define truncr_f_i(r0,r1)              truncr_d_l(r0,r1)
+#define truncr_d_i(r0,r1)              truncr_d_l(r0,r1)
+#define truncr_f_l(r0,r1)              truncr_d_l(r0,r1)
+#define truncr_d_l(r0,r1)              _truncr_d_l(_jit,r0,r1)
+static void _truncr_d_l(jit_state_t*,jit_int32_t,jit_int32_t);
+#define addr_f(r0,r1,r2)               FADD_S(r0,r1,r2)
+#define addi_f(r0,r1,i0)               _addi_f(_jit,r0,r1,i0)
+static void _addi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#define addr_d(r0,r1,r2)               FADD_D(r0,r1,r2)
+#define addi_d(r0,r1,i0)               _addi_d(_jit,r0,r1,i0)
+static void _addi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#define subr_f(r0,r1,r2)               FSUB_S(r0,r1,r2)
+#define subi_f(r0,r1,i0)               _subi_f(_jit,r0,r1,i0)
+static void _subi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#define subr_d(r0,r1,r2)               FSUB_D(r0,r1,r2)
+#define subi_d(r0,r1,i0)               _subi_d(_jit,r0,r1,i0)
+static void _subi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#define mulr_f(r0,r1,r2)               FMPY_S(r0,r1,r2)
+#define muli_f(r0,r1,i0)               _muli_f(_jit,r0,r1,i0)
+static void _muli_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#define mulr_d(r0,r1,r2)               FMPY_D(r0,r1,r2)
+#define muli_d(r0,r1,i0)               _muli_d(_jit,r0,r1,i0)
+static void _muli_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#define divr_f(r0,r1,r2)               _divr_f(_jit,r0,r1,r2)
+static void _divr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define divi_f(r0,r1,i0)               _divi_f(_jit,r0,r1,i0)
+static void _divi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#define divr_d(r0,r1,r2)               _divr_d(_jit,r0,r1,r2)
+static void _divr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define divi_d(r0,r1,i0)               _divi_d(_jit,r0,r1,i0)
+static void _divi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#define ltr_f(r0,r1,r2)                        ltr_d(r0,r1,r2)
+#define ltr_d(r0,r1,r2)                        _ltr_d(_jit,r0,r1,r2)
+static void _ltr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define lti_f(r0,r1,i0)                        _lti_f(_jit,r0,r1,i0)
+static void _lti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#define lti_d(r0,r1,i0)                        _lti_d(_jit,r0,r1,i0)
+static void _lti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#define ler_f(r0,r1,r2)                        ler_d(r0,r1,r2)
+#define ler_d(r0,r1,r2)                        _ler_d(_jit,r0,r1,r2)
+static void _ler_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define lei_f(r0,r1,i0)                        _lei_f(_jit,r0,r1,i0)
+static void _lei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#define lei_d(r0,r1,i0)                        _lei_d(_jit,r0,r1,i0)
+static void _lei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#define eqr_f(r0,r1,r2)                        eqr_d(r0,r1,r2)
+#define eqr_d(r0,r1,r2)                        _eqr_d(_jit,r0,r1,r2)
+static void _eqr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define eqi_f(r0,r1,i0)                        _eqi_f(_jit,r0,r1,i0)
+static void _eqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#define eqi_d(r0,r1,i0)                        _eqi_d(_jit,r0,r1,i0)
+static void _eqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#define ger_f(r0,r1,r2)                        ger_d(r0,r1,r2)
+#define ger_d(r0,r1,r2)                        _ger_d(_jit,r0,r1,r2)
+static void _ger_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define gei_f(r0,r1,i0)                        _gei_f(_jit,r0,r1,i0)
+static void _gei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#define gei_d(r0,r1,i0)                        _gei_d(_jit,r0,r1,i0)
+static void _gei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#define gtr_f(r0,r1,r2)                        gtr_d(r0,r1,r2)
+#define gtr_d(r0,r1,r2)                        _gtr_d(_jit,r0,r2,r1)
+static void _gtr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define gti_f(r0,r1,i0)                        _gti_f(_jit,r0,r1,i0)
+static void _gti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#define gti_d(r0,r1,i0)                        _gti_d(_jit,r0,r1,i0)
+static void _gti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#define ner_f(r0,r1,r2)                        ner_d(r0,r1,r2)
+#define ner_d(r0,r1,r2)                        _ner_d(_jit,r0,r1,r2)
+static void _ner_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define nei_f(r0,r1,i0)                        _nei_f(_jit,r0,r1,i0)
+static void _nei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#define nei_d(r0,r1,i0)                        _nei_d(_jit,r0,r1,i0)
+static void _nei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#define unltr_f(r0,r1,r2)              unltr_d(r0,r1,r2)
+#define unltr_d(r0,r1,r2)              _unltr_d(_jit,r0,r1,r2)
+static void _unltr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define unlti_f(r0,r1,i0)              _unlti_f(_jit,r0,r1,i0)
+static void _unlti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#define unlti_d(r0,r1,i0)              _unlti_d(_jit,r0,r1,i0)
+static void _unlti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#define unler_f(r0,r1,r2)              unler_d(r0,r1,r2)
+#define unler_d(r0,r1,r2)              _unler_d(_jit,r0,r1,r2)
+static void _unler_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define unlei_f(r0,r1,i0)              _unlei_f(_jit,r0,r1,i0)
+static void _unlei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#define unlei_d(r0,r1,i0)              _unlei_d(_jit,r0,r1,i0)
+static void _unlei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#define uneqr_f(r0,r1,r2)              uneqr_d(r0,r1,r2)
+#define uneqr_d(r0,r1,r2)              _uneqr_d(_jit,r0,r1,r2)
+static void _uneqr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define uneqi_f(r0,r1,i0)              _uneqi_f(_jit,r0,r1,i0)
+static void _uneqi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#define uneqi_d(r0,r1,i0)              _uneqi_d(_jit,r0,r1,i0)
+static void _uneqi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#define unger_f(r0,r1,r2)              unger_d(r0,r1,r2)
+#define unger_d(r0,r1,r2)              _unger_d(_jit,r0,r1,r2)
+static void _unger_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define ungei_f(r0,r1,i0)              _ungei_f(_jit,r0,r1,i0)
+static void _ungei_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#define ungei_d(r0,r1,i0)              _ungei_d(_jit,r0,r1,i0)
+static void _ungei_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#define ungtr_f(r0,r1,r2)              ungtr_d(r0,r1,r2)
+#define ungtr_d(r0,r1,r2)              _ungtr_d(_jit,r0,r1,r2)
+static void _ungtr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define ungti_f(r0,r1,i0)              _ungti_f(_jit,r0,r1,i0)
+static void _ungti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#define ungti_d(r0,r1,i0)              _ungti_d(_jit,r0,r1,i0)
+static void _ungti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#define ltgtr_f(r0,r1,r2)              ltgtr_d(r0,r1,r2)
+#define ltgtr_d(r0,r1,r2)              _ltgtr_d(_jit,r0,r1,r2)
+static void _ltgtr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define ltgti_f(r0,r1,i0)              _ltgti_f(_jit,r0,r1,i0)
+static void _ltgti_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#define ltgti_d(r0,r1,i0)              _ltgti_d(_jit,r0,r1,i0)
+static void _ltgti_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#define ordr_f(r0,r1,r2)               ordr_d(r0,r1,r2)
+#define ordr_d(r0,r1,r2)               _ordr_d(_jit,r0,r1,r2)
+static void _ordr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define ordi_f(r0,r1,i0)               _ordi_f(_jit,r0,r1,i0)
+static void _ordi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#define ordi_d(r0,r1,i0)               _ordi_d(_jit,r0,r1,i0)
+static void _ordi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#define unordr_f(r0,r1,r2)             unordr_d(r0,r1,r2)
+#define unordr_d(r0,r1,r2)             _unordr_d(_jit,r0,r1,r2)
+static void _unordr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define unordi_f(r0,r1,i0)             _unordi_f(_jit,r0,r1,i0)
+static void _unordi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_float32_t*);
+#define unordi_d(r0,r1,i0)             _unordi_d(_jit,r0,r1,i0)
+static void _unordi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_float64_t*);
+#define ldr_f(r0,r1)                   LDFS(r0,r1)
+#define ldi_f(r0,i0)                   _ldi_f(_jit,r0,i0)
+static void _ldi_f(jit_state_t*,jit_int32_t,jit_word_t);
+#define ldxr_f(r0,r1,r2)               LDXFS(r0,r1,r2)
+#define ldxi_f(r0,r1,i0)               _ldxi_f(_jit,r0,r1,i0)
+static void _ldxi_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define ldr_d(r0,r1)                   LDFD(r0,r1)
+#define ldi_d(r0,i0)                   _ldi_d(_jit,r0,i0)
+static void _ldi_d(jit_state_t*,jit_int32_t,jit_word_t);
+#define ldxi_d(r0,r1,i0)               _ldxi_d(_jit,r0,r1,i0)
+static void _ldxi_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#define ldxr_d(r0,r1,r2)               LDXFD(r0,r1,r2)
+#define str_f(r0,r1)                   STFS(r0,r1)
+#define sti_f(i0,r0)                   _sti_f(_jit,i0,r0)
+static void _sti_f(jit_state_t*,jit_word_t,jit_int32_t);
+#define stxr_f(r0,r1,r2)               _stxr_f(_jit,r0,r1,r2)
+static void _stxr_f(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define stxi_f(i0,r0,r1)               _stxi_f(_jit,i0,r0,r1)
+static void _stxi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define str_d(r0,r1)                   STFD(r0,r1)
+#define sti_d(i0,r0)                   _sti_d(_jit,i0,r0)
+static void _sti_d(jit_state_t*,jit_word_t,jit_int32_t);
+#define stxr_d(r0,r1,r2)               _stxr_d(_jit,r0,r1,r2)
+static void _stxr_d(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#define stxi_d(i0,r0,r1)               _stxi_d(_jit,i0,r0,r1)
+static void _stxi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bltr_f(i0,r0,r1)               bltr_d(i0,r0,r1)
+#define bltr_d(i0,r0,r1)               _bltr_d(_jit,i0,r0,r1)
+static jit_word_t _bltr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define blti_f(i0,r0,i1)               _blti_f(_jit,i0,r0,i1)
+static jit_word_t _blti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#define blti_d(i0,r0,i1)               _blti_d(_jit,i0,r0,i1)
+static jit_word_t _blti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#define bler_f(i0,r0,r1)               bler_d(i0,r0,r1)
+#define bler_d(i0,r0,r1)               _bler_d(_jit,i0,r0,r1)
+static jit_word_t _bler_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define blei_f(i0,r0,i1)               _blei_f(_jit,i0,r0,i1)
+static jit_word_t _blei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#define blei_d(i0,r0,i1)               _blei_d(_jit,i0,r0,i1)
+static jit_word_t _blei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#define beqr_f(i0,r0,r1)               beqr_d(i0,r0,r1)
+#define beqr_d(i0,r0,r1)               _beqr_d(_jit,i0,r0,r1)
+static jit_word_t _beqr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define beqi_f(i0,r0,i1)               _beqi_f(_jit,i0,r0,i1)
+static jit_word_t _beqi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#define beqi_d(i0,r0,i1)               _beqi_d(_jit,i0,r0,i1)
+static jit_word_t _beqi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#define bger_f(i0,r0,r1)               bger_d(i0,r0,r1)
+#define bger_d(i0,r0,r1)               _bger_d(_jit,i0,r0,r1)
+static jit_word_t _bger_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bgei_f(i0,r0,i1)               _bgei_f(_jit,i0,r0,i1)
+static jit_word_t _bgei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#define bgei_d(i0,r0,i1)               _bgei_d(_jit,i0,r0,i1)
+static jit_word_t _bgei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#define bgtr_f(i0,r0,r1)               bgtr_d(i0,r0,r1)
+#define bgtr_d(i0,r0,r1)               _bgtr_d(_jit,i0,r0,r1)
+static jit_word_t _bgtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bgti_f(i0,r0,i1)               _bgti_f(_jit,i0,r0,i1)
+static jit_word_t _bgti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#define bgti_d(i0,r0,i1)               _bgti_d(_jit,i0,r0,i1)
+static jit_word_t _bgti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#define bner_f(i0,r0,r1)               bner_d(i0,r0,r1)
+#define bner_d(i0,r0,r1)               _bner_d(_jit,i0,r0,r1)
+static jit_word_t _bner_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bnei_f(i0,r0,i1)               _bnei_f(_jit,i0,r0,i1)
+static jit_word_t _bnei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#define bnei_d(i0,r0,i1)               _bnei_d(_jit,i0,r0,i1)
+static jit_word_t _bnei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#define bunltr_f(i0,r0,r1)             bunltr_d(i0,r0,r1)
+#define bunltr_d(i0,r0,r1)             _bunltr_d(_jit,i0,r0,r1)
+static jit_word_t _bunltr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bunlti_f(i0,r0,i1)             _bunlti_f(_jit,i0,r0,i1)
+static jit_word_t 
_bunlti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#define bunlti_d(i0,r0,i1)             _bunlti_d(_jit,i0,r0,i1)
+static jit_word_t 
_bunlti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#define bunler_f(i0,r0,r1)             bunler_d(i0,r0,r1)
+#define bunler_d(i0,r0,r1)             _bunler_d(_jit,i0,r0,r1)
+static jit_word_t _bunler_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bunlei_f(i0,r0,i1)             _bunlei_f(_jit,i0,r0,i1)
+static jit_word_t 
_bunlei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#define bunlei_d(i0,r0,i1)             _bunlei_d(_jit,i0,r0,i1)
+static jit_word_t 
_bunlei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#define buneqr_f(i0,r0,r1)             buneqr_d(i0,r0,r1)
+#define buneqr_d(i0,r0,r1)             _buneqr_d(_jit,i0,r0,r1)
+static jit_word_t _buneqr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define buneqi_f(i0,r0,i1)             _buneqi_f(_jit,i0,r0,i1)
+static jit_word_t 
_buneqi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#define buneqi_d(i0,r0,i1)             _buneqi_d(_jit,i0,r0,i1)
+static jit_word_t 
_buneqi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#define bunger_f(i0,r0,r1)             bunger_d(i0,r0,r1)
+#define bunger_d(i0,r0,r1)             _bunger_d(_jit,i0,r0,r1)
+static jit_word_t _bunger_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bungei_f(i0,r0,i1)             _bungei_f(_jit,i0,r0,i1)
+static jit_word_t 
_bungei_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#define bungei_d(i0,r0,i1)             _bungei_d(_jit,i0,r0,i1)
+static jit_word_t 
_bungei_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#define bungtr_f(i0,r0,r1)             bungtr_d(i0,r0,r1)
+#define bungtr_d(i0,r0,r1)             _bungtr_d(_jit,i0,r0,r1)
+static jit_word_t _bungtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bungti_f(i0,r0,i1)             _bungti_f(_jit,i0,r0,i1)
+static jit_word_t 
_bungti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#define bungti_d(i0,r0,i1)             _bungti_d(_jit,i0,r0,i1)
+static jit_word_t 
_bungti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#define bltgtr_f(i0,r0,r1)             bltgtr_d(i0,r0,r1)
+#define bltgtr_d(i0,r0,r1)             _bltgtr_d(_jit,i0,r0,r1)
+static jit_word_t _bltgtr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bltgti_f(i0,r0,i1)             _bltgti_f(_jit,i0,r0,i1)
+static jit_word_t 
_bltgti_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#define bltgti_d(i0,r0,i1)             _bltgti_d(_jit,i0,r0,i1)
+static jit_word_t 
_bltgti_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#define bordr_f(i0,r0,r1)              bordr_d(i0,r0,r1)
+#define bordr_d(i0,r0,r1)              _bordr_d(_jit,i0,r0,r1)
+static jit_word_t _bordr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bordi_f(i0,r0,i1)              _bordi_f(_jit,i0,r0,i1)
+static jit_word_t _bordi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#define bordi_d(i0,r0,i1)              _bordi_d(_jit,i0,r0,i1)
+static jit_word_t _bordi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#define bunordr_f(i0,r0,r1)            bunordr_d(i0,r0,r1)
+#define bunordr_d(i0,r0,r1)            _bunordr_d(_jit,i0,r0,r1)
+static jit_word_t _bunordr_d(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#define bunordi_f(i0,r0,i1)            _bunordi_f(_jit,i0,r0,i1)
+static jit_word_t 
_bunordi_f(jit_state_t*,jit_word_t,jit_int32_t,jit_float32_t*);
+#define bunordi_d(i0,r0,i1)            _bunordi_d(_jit,i0,r0,i1)
+static jit_word_t 
_bunordi_d(jit_state_t*,jit_word_t,jit_int32_t,jit_float64_t*);
+#endif
+
+#if CODE
+static void
+_M7(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t x6, jit_word_t ht, jit_word_t r3, jit_word_t r2, jit_word_t f1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(x6 & ~0x3fL));
+    assert(!(ht &  ~0x3L));
+    assert(!(r3 & ~0x7fL));
+    assert(!(r2 & ~0x7fL));
+    assert(!(f1 & ~0x7fL));
+    TSTREG2(r2, r3);
+    inst((6L<<37)|(1L<<36)|(x6<<30)|(ht<<28)|
+        (r3<<20)|(r2<<13)|(f1<<6)|_p, INST_M);
+    SETFREG(f1);
+    SETREG(r3);
+}
+
+static void
+_M8(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t x6, jit_word_t ht, jit_word_t r3, jit_word_t im, jit_word_t f1)
+{
+    assert(!(_p &  ~0x3fL));
+    assert(!(x6 &  ~0x3fL));
+    assert(!(ht &   ~0x3L));
+    assert(!(r3 &  ~0x7fL));
+    assert(im > -256 && im < 255);
+    assert(!(f1 &  ~0x7fL));
+    TSTREG1(r3);
+    inst((7L<<37)|(((im>>8)&1L)<<36)|(x6<<30)|(ht<<28)|
+        (((im>>8)&1L)<<27)|(r3<<20)|((im&0x7fLL)<<13)|(f1<<6)|_p, INST_M);
+    SETFREG(f1);
+    SETREG(r3);
+}
+
+static void
+_M9(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t x6, jit_word_t ht, jit_word_t r3, jit_word_t f1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(x6 & ~0x3fL));
+    assert(!(ht &  ~0x3L));
+    assert(!(r3 & ~0x7fL));
+    assert(!(f1 & ~0x7fL));
+    TSTREG1(r3);
+    inst((6L<<37)|(x6<<30)|(ht<<28)|(r3<<20)|(f1<<6)|_p, INST_M);
+    SETFREG(f1);
+}
+
+static void
+_M10(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t x6, jit_word_t ht, jit_word_t r3, jit_word_t f2, jit_word_t im)
+{
+    assert(!(_p &  ~0x3fL));
+    assert(!(x6 &  ~0x3fL));
+    assert(!(ht &   ~0x3L));
+    assert(!(r3 &  ~0x7fL));
+    assert(!(f2 &  ~0x7fL));
+    assert(im > -256 && im < 255);
+    TSTREG1(r3);
+    TSTFREG1(f2);
+    inst((7L<<37)|(((im>>8)&1L)<<36)|(x6<<30)|(ht<<28)|
+        (((im>>8)&1L)<<27)|(r3<<20)|(f2<<13)|((im&0x7fL)<<6)|_p, INST_M);
+    SETREG(r3);
+}
+
+static void
+_M11(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t x6, jit_word_t ht, jit_word_t r3, jit_word_t f2, jit_word_t f1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(x6 & ~0x3fL));
+    assert(!(ht &  ~0x3L));
+    assert(!(r3 & ~0x7fL));
+    assert(!(f2 & ~0x7fL));
+    assert(!(f1 & ~0x7fL));
+    TSTREG1(r3);
+    inst((6L<<37)|(x6<<30)|(ht<<28)|(1L<<27)|
+        (r3<<20)|(f2<<13)|(f1<<6)|_p, INST_M);
+    SETFREG(f1);
+    SETFREG(f2);
+}
+
+static void
+_M12(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t x6, jit_word_t ht, jit_word_t r3, jit_word_t f2, jit_word_t f1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(x6 & ~0x3fL));
+    assert(!(ht &  ~0x3L));
+    assert(!(r3 & ~0x7fL));
+    assert(!(f2 & ~0x7fL));
+    assert(!(f1 & ~0x7fL));
+    TSTREG1(r3);
+    inst((6L<<37)|(1L<<36)|(x6<<30)|(ht<<28)|
+        (1L<<27)|(r3<<20)|(f2<<13)|(f1<<6)|_p, INST_M);
+    SETFREG(f1);
+    SETFREG(f2);
+    SETREG(r3);
+}
+
+static void
+_M18(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t x6, jit_word_t r2, jit_word_t f1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(x6 & ~0x3fL));
+    assert(!(r2 & ~0x7fL));
+    assert(!(f1 & ~0x7fL));
+    TSTREG1(r2);
+    inst((6L<<37)|(x6<<30)|(1L<<27)|(r2<<13)|(f1<<6)|_p, INST_M);
+    SETFREG(f1);
+}
+
+static void
+_M19(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t x6, jit_word_t f2, jit_word_t r1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(x6 & ~0x3fL));
+    assert(!(f2 & ~0x7fL));
+    assert(!(r1 & ~0x7fL));
+    TSTFREG1(f2);
+    inst((4L<<37)|(x6<<30)|(1L<<27)|(f2<<13)|(r1<<6)|_p, INST_M);
+    SETREG(r1);
+}
+
+static void
+F1_(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t op, jit_word_t x, jit_word_t sf,
+    jit_word_t f4, jit_word_t f3, jit_word_t f2, jit_word_t f1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(op &  ~0xfL));
+    assert(!(x  &  ~0x1L));
+    assert(!(sf &  ~0x3L));
+    assert(!(f4 & ~0x7fL));
+    assert(!(f3 & ~0x7fL));
+    assert(!(f2 & ~0x7fL));
+    assert(!(f1 & ~0x7fL));
+    TSTFREG3(f2, f3, f4);
+    inst((op<<37)|(x<<36)|(sf<<34)|(f4<<27)|
+        (f3<<20)|(f2<<13)|(f1<<6)|_p, INST_F);
+    SETFREG(f1);
+}
+
+static void
+F4_(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t rb, jit_word_t sf, jit_word_t ra, jit_word_t p2,
+    jit_word_t f3, jit_word_t f2, jit_word_t ta, jit_word_t p1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(rb &  ~0x1L));
+    assert(!(sf &  ~0x3L));
+    assert(!(ra &  ~0x1L));
+    assert(!(p2 & ~0x3fL));
+    assert(!(f3 & ~0x7fL));
+    assert(!(f2 & ~0x7fL));
+    assert(!(ta &  ~0x1L));
+    assert(!(p1 & ~0x3fL));
+    TSTFREG2(f2, f3);
+    inst((4L<<37)|(rb<<36)|(sf<<34)|(ra<<33)|(p2<<27)|
+        (f3<<20)|(f2<<13)|(ta<<12)|(p1<<6)|_p, INST_F);
+}
+
+static void
+F5_(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t p2, jit_word_t fc, jit_word_t f2, jit_word_t ta, jit_word_t p1)
+{
+    assert(!(_p &  ~0x3fL));
+    assert(!(p2 &  ~0x3fL));
+    assert(!(fc & ~0x1ffL));
+    assert(!(f2 &  ~0x7fL));
+    assert(!(ta &   ~0x1L));
+    assert(!(p1 &  ~0x3fL));
+    TSTFREG1(f2);
+    inst((5L<<37)|(((fc>>7)&3L)<<33)|(p2<<27)|
+        ((fc&0x7fL)<<20)|(f2<<13)|(ta<<12)|(p1<<6)|_p, INST_F);
+}
+
+static void
+F6x_(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t op, jit_word_t q, jit_word_t sf,
+     jit_word_t p2,  jit_word_t f3, jit_word_t f2, jit_word_t f1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(op &  ~0xfL));
+    assert(!(q  &  ~0x1L));
+    assert(!(sf &  ~0x3L));
+    assert(!(p2 & ~0x3fL));
+    assert(!(f3 & ~0x7fL));
+    assert(!(f2 & ~0x7fL));
+    assert(!(f1 & ~0x7fL));
+    TSTFREG2(f2, f3);
+    inst((op<<37)|(q<<36)|(sf<<34)|(1L<<33)|
+        (p2<<27)|(f3<<20)|(f2<<13)|(f1<<6)|_p, INST_F);
+    SETFREG(f1);
+}
+
+static void
+F8_(jit_state_t *_jit, jit_word_t _p,
+    jit_word_t op, jit_word_t sf, jit_word_t x6,
+    jit_word_t f3, jit_word_t f2, jit_word_t f1)
+{
+    assert(!(_p & ~0x3fL));
+    assert(!(op &  ~0xfL));
+    assert(!(sf &  ~0x3L));
+    assert(!(x6 & ~0x3fL));
+    assert(!(f3 & ~0x7fL));
+    assert(!(f2 & ~0x7fL));
+    assert(!(f1 & ~0x7fL));
+    TSTFREG2(f2, f3);
+    inst((op<<37)|(sf<<34)|(x6<<27)|(f3<<20)|(f2<<13)|(f1<<6)|_p, INST_F);
+    SETFREG(f1);
+}
+
+static void
+F12_(jit_state_t *_jit, jit_word_t _p,
+     jit_word_t sf, jit_word_t x6, jit_word_t omsk, jit_word_t amsk)
+{
+    assert(!(_p   & ~0x3fL));
+    assert(!(sf   &  ~0x3L));
+    assert(!(x6   & ~0x3fL));
+    assert(!(omsk & ~0x7fL));
+    assert(!(amsk & ~0x7fL));
+    /* no registers referenced */
+    inst((sf<<34)|(x6<<27)|(omsk<<20)|(amsk<<13), INST_F);
+}
+
+static void
+F14x_(jit_state_t* _jit, jit_word_t _p,
+      jit_word_t sf,  jit_word_t x, jit_word_t x6, jit_word_t im)
+{
+    assert(!(_p &    ~0x3fL));
+    assert(!(sf &     ~0x3L));
+    assert(!(x  &     ~0x1L));
+    assert(!(x6 &    ~0x3fL));
+    assert(!(im & ~0x1ffffL));
+    /* no registers referenced */
+    inst((((im>>20)&1L)<<36)|(sf<<34)|(x<<33)|
+        (x6<<27)|((im&0xffffL)<<6)|_p, INST_F);
+}
+
+static void
+F16_(jit_state_t* _jit, jit_word_t _p,
+     jit_word_t y, jit_word_t im)
+{
+    assert(!(_p &    ~0x3fL));
+    assert(!(y  &     ~0x1L));
+    assert(!(im & ~0x1ffffL));
+    /* no registers referenced */
+    inst((((im>>20)&1L)<<36)|(y<<27)|(1L<<26)|((im&0xffffL)<<6)|_p, INST_F);
+}
+
+#define fpr_opi(name, type, size)                                      \
+static void                                                            \
+_##name##i_##type(jit_state_t *_jit,                                   \
+                 jit_int32_t r0, jit_int32_t r1,                       \
+                 jit_float##size##_t *i0)                              \
+{                                                                      \
+    jit_int32_t                reg = jit_get_reg(jit_class_fpr);               
\
+    movi_##type(rn(reg), i0);                                          \
+    name##r_##type(r0, r1, rn(reg));                                   \
+    jit_unget_reg(reg);                                                        
\
+}
+#define fpr_bopi(name, type, size)                                     \
+static jit_word_t                                                      \
+_b##name##i_##type(jit_state_t *_jit,                                  \
+                 jit_word_t i0, jit_int32_t r0,                        \
+                 jit_float##size##_t *i1)                              \
+{                                                                      \
+    jit_word_t         word;                                           \
+    jit_int32_t                reg = jit_get_reg(jit_class_fpr);               
\
+    movi_##type(rn(reg), i1);                                          \
+    word = b##name##r_##type(i0, r0, rn(reg));                         \
+    jit_unget_reg(reg);                                                        
\
+    return (word);                                                     \
+}
+#define fopi(name)                     fpr_opi(name, f, 32)
+#define fbopi(name)                    fpr_bopi(name, f, 32)
+#define dopi(name)                     fpr_opi(name, d, 64)
+#define dbopi(name)                    fpr_bopi(name, d, 64)
+
+fopi(add)
+fopi(sub)
+fopi(mul)
+fopi(div)
+dopi(add)
+dopi(sub)
+dopi(mul)
+dopi(div)
+
+/* translation from gcc -O0 */
+static void
+_divr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                t0, t1, t2, t3;
+    t0 = jit_get_reg(jit_class_fpr);
+    t1 = jit_get_reg(jit_class_fpr);
+    t2 = jit_get_reg(jit_class_fpr);
+    t3 = jit_get_reg(jit_class_fpr);
+    FRCPA(rn(t0), PR_6, r1, r2);
+    FNMA_p(rn(t1), r2, rn(t0), 1, SF_S1, PR_6);
+    FMA_p(rn(t2), rn(t0), rn(t1), rn(t0), SF_S1, PR_6);
+    FMPY_p(rn(t1), rn(t1), rn(t1), SF_S1, PR_6);
+    FMA_p(rn(t2), rn(t2), rn(t1), rn(t2), SF_S1, PR_6);
+    FMPY_p(rn(t1), rn(t1), rn(t1), SF_S1, PR_6);
+    FMA_p(rn(t1), rn(t2), rn(t1), rn(t2), SF_S1, PR_6);
+    FMPY_S_p(rn(t1), rn(t1), rn(t1), SF_S1, PR_6);
+    FNMA_p(rn(t3), r2, rn(t2), r1, SF_S1, PR_6);
+    FMA_S_p(r0, rn(t3), rn(t1), 1, SF_S0, PR_6);
+    jit_unget_reg(t3);
+    jit_unget_reg(t2);
+    jit_unget_reg(t1);
+    jit_unget_reg(t0);
+}
+
+static void
+_divr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                t0, t1, t2, t3;
+    t0 = jit_get_reg(jit_class_fpr);
+    t1 = jit_get_reg(jit_class_fpr);
+    t2 = jit_get_reg(jit_class_fpr);
+    t3 = jit_get_reg(jit_class_fpr);
+    FRCPA(rn(t0), PR_6, r1, r2);
+    FNMA_p(rn(t1), r2, rn(t0), 1, SF_S1, PR_6);
+    FMA_p(rn(t2), rn(t0), rn(t1), rn(t0), SF_S1, PR_6);
+    FMPY_p(rn(t1), rn(t1), rn(t1), SF_S1, PR_6);
+    FMA_p(rn(t2), rn(t2), rn(t1), rn(t2), SF_S1, PR_6);
+    FMPY_p(rn(t1), rn(t1), rn(t1), SF_S1, PR_6);
+    FMA_p(rn(t1), rn(t2), rn(t1), rn(t2), SF_S1, PR_6);
+    FMPY_D_p(rn(t1), rn(t1), rn(t1), SF_S1, PR_6);
+    FNMA_p(rn(t3), r2, rn(t2), r1, SF_S1, PR_6);
+    FMA_D_p(r0, rn(t3), rn(t1), 1, SF_S0, PR_6);
+    jit_unget_reg(t3);
+    jit_unget_reg(t2);
+    jit_unget_reg(t1);
+    jit_unget_reg(t0);
+}
+
+static void
+_extr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_fpr);
+    SETF_SIG(rn(reg), r1);
+    FCVT_XF(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_truncr_d_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_fpr);
+    FCVT_FX_TRUNC(rn(reg), r1);
+    GETF_SIG(r0, rn(reg));
+    FNORM(r0, r0);
+    jit_unget_reg(reg);
+}
+
+static void
+_ltr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    FCMP_LT(PR_6, PR_7, r1, r2);
+    MOVI_p(r0, 1, PR_6);
+    MOV_p(r0, GR_0, PR_7);
+}
+fopi(lt)
+dopi(lt)
+
+static void
+_ler_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    FCMP_LE(PR_6, PR_7, r1, r2);
+    MOVI_p(r0, 1, PR_6);
+    MOV_p(r0, GR_0, PR_7);
+}
+fopi(le)
+dopi(le)
+
+static void
+_eqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    FCMP_EQ(PR_6, PR_7, r1, r2);
+    MOVI_p(r0, 1, PR_6);
+    MOV_p(r0, GR_0, PR_7);
+}
+fopi(eq)
+dopi(eq)
+
+static void
+_ger_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    FCMP_LE(PR_6, PR_7, r2, r1);
+    MOVI_p(r0, 1, PR_6);
+    MOV_p(r0, GR_0, PR_7);
+}
+fopi(ge)
+dopi(ge)
+
+static void
+_gtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    FCMP_LT(PR_6, PR_7, r2, r1);
+    MOVI_p(r0, 1, PR_6);
+    MOV_p(r0, GR_0, PR_7);
+}
+fopi(gt)
+dopi(gt)
+
+static void
+_ner_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    FCMP_EQ(PR_6, PR_7, r1, r2);
+    MOV_p(r0, GR_0, PR_6);
+    MOVI_p(r0, 1, PR_7);
+}
+fopi(ne)
+dopi(ne)
+
+static void
+_unltr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    FCMP_LE(PR_6, PR_7, r2, r1);
+    MOV_p(r0, GR_0, PR_6);
+    MOVI_p(r0, 1, PR_7);
+}
+fopi(unlt)
+dopi(unlt)
+
+static void
+_unler_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    FCMP_LT(PR_6, PR_7, r2, r1);
+    MOV_p(r0, GR_0, PR_6);
+    MOVI_p(r0, 1, PR_7);
+}
+fopi(unle)
+dopi(unle)
+
+static void
+_uneqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    MOVI(r0, 1);
+    FCMP_LT(PR_8, PR_9, r1, r2);
+    FCMP_LT(PR_6, PR_7, r2, r1);
+    MOV_p(r0, GR_0, PR_8);             /* !(r1 < r2) && !(r2 < r1) */
+    MOV_p(r0, GR_0, PR_6);
+}
+fopi(uneq)
+dopi(uneq)
+
+static void
+_unger_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    FCMP_LT(PR_6, PR_7, r1, r2);
+    MOV_p(r0, GR_0, PR_6);
+    MOVI_p(r0, 1, PR_7);
+}
+fopi(unge)
+dopi(unge)
+
+static void
+_ungtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    FCMP_LE(PR_6, PR_7, r1, r2);
+    MOV_p(r0, GR_0, PR_6);
+    MOVI_p(r0, 1, PR_7);
+}
+fopi(ungt)
+dopi(ungt)
+
+static void
+_ltgtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    MOV(r0, GR_0);
+    FCMP_LE(PR_8, PR_9, r1, r2);
+    FCMP_LE(PR_6, PR_7, r2, r1);
+    MOVI_p(r0, 1, PR_9);               /* !(r1 >= r2) || !(r2 >= r1) */
+    MOVI_p(r0, 1, PR_7);
+}
+fopi(ltgt)
+dopi(ltgt)
+
+static void
+_ordr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    FCMP_UNORD(PR_6, PR_7, r1, r2);
+    MOV_p(r0, GR_0, PR_6);
+    MOVI_p(r0, 1, PR_7);
+}
+fopi(ord)
+dopi(ord)
+
+static void
+_unordr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    FCMP_UNORD(PR_6, PR_7, r1, r2);
+    MOVI_p(r0, 1, PR_6);
+    MOV_p(r0, GR_0, PR_7);
+}
+fopi(unord)
+dopi(unord)
+
+static void
+_ldi_f(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    ldr_f(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addi(rn(reg), r1, i0);
+    ldr_f(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldi_d(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    ldr_d(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addi(rn(reg), r1, i0);
+    ldr_d(r0, rn(reg));
+    jit_unget_reg(reg);
+}
+
+static void
+_sti_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    str_f(rn(reg), r0);
+    jit_unget_reg(reg);
+}
+
+static void
+_stxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r0, r1);
+    str_f(rn(reg), r2);
+    jit_unget_reg(reg);
+}
+
+static void
+_stxi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addi(rn(reg), r0, i0);
+    str_f(rn(reg), r1);
+    jit_unget_reg(reg);
+}
+
+static void
+_sti_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    movi(rn(reg), i0);
+    str_d(rn(reg), r0);
+    jit_unget_reg(reg);
+}
+
+static void
+_stxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addr(rn(reg), r0, r1);
+    str_d(rn(reg), r2);
+    jit_unget_reg(reg);
+}
+
+static void
+_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_int32_t                reg;
+    reg = jit_get_reg(jit_class_gpr);
+    addi(rn(reg), r0, i0);
+    str_d(rn(reg), r1);
+    jit_unget_reg(reg);
+}
+
+static void
+_sqrtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d;
+    /* @arg0 = r1 */
+    sync();
+    d = ((jit_word_t)sqrtf - _jit->pc.w) >> 4;
+    if (d < -16777216 && d > 16777215)
+       BRI_CALL(0, d);
+    else
+       /* FIXME displacement likely wrong (in either case) */
+       BRL_CALL(0, d);
+    /* r0 = @ret */
+}
+
+static void
+_sqrtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         d;
+    /* @arg0 = r1 */
+    sync();
+    d = ((jit_word_t)sqrt - _jit->pc.w) >> 4;
+    if (d < -16777216 && d > 16777215)
+       BRI_CALL(0, d);
+    else
+       /* FIXME displacement likely wrong (in either case) */
+       BRL_CALL(0, d);
+    /* r0 = @ret */
+}
+
+static jit_word_t
+_bltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    sync();
+    w = _jit->pc.w;
+    FCMP_LT(PR_6, PR_7, r0, r1);
+    BRI_COND((i0 - w) >> 4, PR_6);
+    return (w);
+}
+fbopi(lt)
+dbopi(lt)
+
+static jit_word_t
+_bler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    sync();
+    w = _jit->pc.w;
+    FCMP_LE(PR_6, PR_7, r0, r1);
+    BRI_COND((i0 - w) >> 4, PR_6);
+    return (w);
+}
+fbopi(le)
+dbopi(le)
+
+static jit_word_t
+_beqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    sync();
+    w = _jit->pc.w;
+    FCMP_EQ(PR_6, PR_7, r0, r1);
+    BRI_COND((i0 - w) >> 4, PR_6);
+    return (w);
+}
+fbopi(eq)
+dbopi(eq)
+
+static jit_word_t
+_bger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    sync();
+    w = _jit->pc.w;
+    FCMP_LE(PR_6, PR_7, r1, r0);
+    BRI_COND((i0 - w) >> 4, PR_6);
+    return (w);
+}
+fbopi(ge)
+dbopi(ge)
+
+static jit_word_t
+_bgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    sync();
+    w = _jit->pc.w;
+    FCMP_LT(PR_6, PR_7, r1, r0);
+    BRI_COND((i0 - w) >> 4, PR_6);
+    return (w);
+}
+fbopi(gt)
+dbopi(gt)
+
+static jit_word_t
+_bner_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    sync();
+    w = _jit->pc.w;
+    FCMP_EQ(PR_6, PR_7, r0, r1);
+    BRI_COND((i0 - w) >> 4, PR_7);
+    return (w);
+}
+fbopi(ne)
+dbopi(ne)
+
+static jit_word_t
+_bunltr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    sync();
+    w = _jit->pc.w;
+    FCMP_LE(PR_6, PR_7, r1, r0);
+    BRI_COND((i0 - w) >> 4, PR_7);
+    return (w);
+}
+fbopi(unlt)
+dbopi(unlt)
+
+static jit_word_t
+_bunler_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    sync();
+    w = _jit->pc.w;
+    FCMP_LT(PR_6, PR_7, r1, r0);
+    BRI_COND((i0 - w) >> 4, PR_7);
+    return (w);
+}
+fbopi(unle)
+dbopi(unle)
+
+static jit_word_t
+_buneqr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    sync();
+    FCMP_UNORD(PR_8, PR_9, r0, r1);
+    /* junord L1 */
+    BRI_COND(2, PR_8);
+    sync();
+    FCMP_EQ(PR_6, PR_7, r0, r1);
+    /* jne L2 */
+    BRI_COND(2, PR_7);
+    sync();
+    w = _jit->pc.w;
+    /* L1: */
+    BRI((i0 - w) >> 4);                /* unconditional jump to patch */
+    sync();
+    /* L2: */
+    return (w);
+}
+fbopi(uneq)
+dbopi(uneq)
+
+static jit_word_t
+_bunger_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    sync();
+    w = _jit->pc.w;
+    FCMP_LT(PR_6, PR_7, r0, r1);
+    BRI_COND((i0 - w) >> 4, PR_7);
+    return (w);
+}
+fbopi(unge)
+dbopi(unge)
+
+static jit_word_t
+_bungtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    sync();
+    w = _jit->pc.w;
+    FCMP_LE(PR_6, PR_7, r0, r1);
+    BRI_COND((i0 - w) >> 4, PR_7);
+    return (w);
+}
+fbopi(ungt)
+dbopi(ungt)
+
+static jit_word_t
+_bltgtr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    sync();
+    FCMP_EQ(PR_8, PR_9, r0, r1);
+    /* jeq L1 */
+    BRI_COND(3, PR_8);
+    sync();
+    FCMP_UNORD(PR_6, PR_7, r0, r1);
+    /* jord L1 */
+    BRI_COND(2, PR_7);
+    sync();
+    w = _jit->pc.w;
+    BRI((i0 - w) >> 4);                /* unconditional jump to patch */
+    /* L1 */
+    sync();
+    return (w);
+}
+fbopi(ltgt)
+dbopi(ltgt)
+
+static jit_word_t
+_bordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    sync();
+    w = _jit->pc.w;
+    FCMP_UNORD(PR_6, PR_7, r0, r1);
+    BRI_COND((i0 - w) >> 4, PR_7);
+    return (w);
+}
+fbopi(ord)
+dbopi(ord)
+
+static jit_word_t
+_bunordr_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+{
+    jit_word_t         w;
+    sync();
+    w = _jit->pc.w;
+    FCMP_UNORD(PR_6, PR_7, r0, r1);
+    BRI_COND((i0 - w) >> 4, PR_6);
+    return (w);
+}
+fbopi(unord)
+dbopi(unord)
+#endif
diff --git a/lib/jit_mips.c b/lib/jit_ia64.c
similarity index 61%
copy from lib/jit_mips.c
copy to lib/jit_ia64.c
index 952c1ba..e54b13d 100644
--- a/lib/jit_mips.c
+++ b/lib/jit_ia64.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2012  Free Software Foundation, Inc.
+ * Copyright (C) 2013  Free Software Foundation, Inc.
  *
  * This is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -15,88 +15,201 @@
  *     Paulo Cesar Pereira de Andrade
  */
 
-#if defined(__linux__)
-#  include <sys/cachectl.h>
-#endif
+#include <lightning.h>
+#include <lightning/jit_private.h>
 
 #define rc(value)                      jit_class_##value
 #define rn(reg)                                
(jit_regno(_rvs[jit_regno(reg)].spec))
 
-/* initial, mipsel 32 bits code only */
+#define stack_framesize                        16
 
 /*
  * Prototypes
  */
-#  define patch(instr, node)           _patch(_jit, instr, node)
+#define patch(instr, node)             _patch(_jit, instr, node)
 static void _patch(jit_state_t*,jit_word_t,jit_node_t*);
 
 #define PROTO                          1
-#  include "jit_mips-cpu.c"
-#  include "jit_mips-fpu.c"
+#  include "jit_ia64-cpu.c"
+#  include "jit_ia64-fpu.c"
 #undef PROTO
 
 /*
  * Initialization
  */
 jit_register_t         _rvs[] = {
-    { rc(gpr) | 0x01,                  "at" },
-    { rc(gpr) | 0x02,                  "v0" },
-    { rc(gpr) | 0x03,                  "v1" },
-    { rc(gpr) | 0x08,                  "t0" },
-    { rc(gpr) | 0x09,                  "t1" },
-    { rc(gpr) | 0x0a,                  "t2" },
-    { rc(gpr) | 0x0b,                  "t3" },
-    { rc(gpr) | 0x0c,                  "t4" },
-    { rc(gpr) | 0x0d,                  "t5" },
-    { rc(gpr) | 0x0e,                  "t6" },
-    { rc(gpr) | 0x0f,                  "t7" },
-    { rc(gpr) | 0x18,                  "t8" },
-    { rc(gpr) | 0x19,                  "t9" },
-    { rc(sav) | rc(gpr) | 0x10,                "s0" },
-    { rc(sav) | rc(gpr) | 0x11,                "s1" },
-    { rc(sav) | rc(gpr) | 0x12,                "s2" },
-    { rc(sav) | rc(gpr) | 0x13,                "s3" },
-    { rc(sav) | rc(gpr) | 0x14,                "s4" },
-    { rc(sav) | rc(gpr) | 0x15,                "s5" },
-    { rc(sav) | rc(gpr) | 0x16,                "s6" },
-    { rc(sav) | rc(gpr) | 0x17,                "s7" },
-    { 0x00,                            "zero" },
-    { 0x1a,                            "k0" },
-    { 0x1b,                            "k1" },
-    { rc(sav) | 0x1f,                  "ra" },
-    { rc(sav) | 0x1c,                  "gp" },
-    { rc(sav) | 0x1d,                  "sp" },
-    { rc(sav) | 0x1e,                  "fp" },
-    { rc(arg) | rc(gpr) | 0x07,                "a3" },
-    { rc(arg) | rc(gpr) | 0x06,                "a2" },
-    { rc(arg) | rc(gpr) | 0x05,                "a1" },
-    { rc(arg) | rc(gpr) | 0x04,                "a0" },
-    { rc(fpr) | 0x00,                  "$f0" },
-    { rc(fpr) | 0x02,                  "$f2" },
-    { rc(fpr) | 0x04,                  "$f4" },
-    { rc(fpr) | 0x06,                  "$f6" },
-    { rc(fpr) | 0x08,                  "$f8" },
-    { rc(fpr) | 0x0a,                  "$f10" },
-    { rc(sav) | rc(fpr) | 0x10,                "$f16" },
-    { rc(sav) | rc(fpr) | 0x12,                "$f18" },
-    { rc(sav) | rc(fpr) | 0x14,                "$f20" },
-    { rc(sav) | rc(fpr) | 0x16,                "$f22" },
-    { rc(sav) | rc(fpr) | 0x18,                "$f24" },
-    { rc(sav) | rc(fpr) | 0x1a,                "$f26" },
-    { rc(sav) | rc(fpr) | 0x1c,                "$f28" },
-    { rc(sav) | rc(fpr) | 0x1e,                "$f30" },
-    { rc(arg) | rc(fpr) | 0x0e,                "$f14" },
-    { rc(arg) | rc(fpr) | 0x0c,                "$f12" },
-    { _NOREG,                          "<none>" },
+    /* Always 0 */
+    { 0,                "r0"  },
+    /* Global Pointer */
+    { rc(sav)|1,        "r1"  },
+    /* Used when a register cannot be allocated */
+    { 2,                "r2"  },
+    /* First scratch register */
+    { rc(gpr)|3,        "r3"  },
+    /* Use r4 as lightning fp register */
+    { rc(sav)|4,        "r4"  },
+    /* Do not touch callee save registers not automatically spill/reloaded */
+    { rc(sav)|5,        "r5"  },       { rc(sav)|6,            "r6"  },
+    { rc(sav)|7,        "r7"  },
+    /* Do not touch return register for the sake of simplicity, besides
+     * having JIT_R0 being the same as JIT_RET usually an optimization */
+    { 8,                "r8"  },
+    /* Return registers, use as temporaries */
+    { rc(gpr)|9,        "r9"  },
+    { rc(gpr)|10,       "r10" },       { rc(gpr)|11,           "r11" },
+    /* Stack pointer */
+    { rc(sav)|12,       "r12" },
+    /* Thread pointer */
+    { rc(sav)|13,       "r13" },
+    /* (Usually) assembly temporaries */
+    { rc(gpr)|31,       "r31" },       { rc(gpr)|30,           "r30" },
+    { rc(gpr)|29,       "r29" },       { rc(gpr)|28,           "r28" },
+    { rc(gpr)|27,       "r27" },       { rc(gpr)|26,           "r26" },
+    { rc(gpr)|25,       "r25" },       { rc(gpr)|24,           "r24" },
+    { rc(gpr)|23,       "r23" },       { rc(gpr)|22,           "r22" },
+    { rc(gpr)|21,       "r21" },       { rc(gpr)|20,           "r20" },
+    { rc(gpr)|19,       "r19" },       { rc(gpr)|18,           "r18" },
+    /* JIT_R4-JIT_R0 */
+    { rc(gpr)|17,       "r17" },       { rc(gpr)|16,           "r16" },
+    { rc(gpr)|15,       "r15" },       { rc(gpr)|14,           "r14" },
+    /* Do not allow allocating r32-r41 as temoraries for the sake of
+     * avoiding the need of extra complexity  in the non backend code */
+    { rc(arg)|32,       "r32" },       { rc(arg)|33,           "r33" },
+    { rc(arg)|34,       "r34" },       { rc(arg)|35,           "r35" },
+    { rc(arg)|36,       "r36" },       { rc(arg)|37,           "r37" },
+    { rc(arg)|38,       "r38" },       { rc(arg)|39,           "r39" },
+    /* JIT_V0-JIT_V3 */
+    { rc(gpr)|40,       "r40" },       { rc(gpr)|41,           "r41" },
+    { rc(gpr)|42,       "r42" },       { rc(gpr)|43,           "r43" },
+    /* Temporaries/locals */
+    { rc(gpr)|44,       "r44" },       { rc(gpr)|45,           "r45" },
+    { rc(gpr)|46,       "r46" },       { rc(gpr)|47,           "r47" },
+    { rc(gpr)|48,       "r48" },       { rc(gpr)|49,           "r49" },
+    { rc(gpr)|50,       "r50" },       { rc(gpr)|51,           "r51" },
+    { rc(gpr)|52,       "r52" },       { rc(gpr)|53,           "r53" },
+    { rc(gpr)|54,       "r54" },       { rc(gpr)|55,           "r55" },
+    { rc(gpr)|56,       "r56" },       { rc(gpr)|57,           "r57" },
+    { rc(gpr)|58,       "r58" },       { rc(gpr)|59,           "r59" },
+    { rc(gpr)|60,       "r60" },       { rc(gpr)|61,           "r61" },
+    { rc(gpr)|62,       "r62" },       { rc(gpr)|63,           "r63" },
+    { rc(gpr)|64,       "r64" },       { rc(gpr)|65,           "r65" },
+    { rc(gpr)|66,       "r66" },       { rc(gpr)|67,           "r67" },
+    { rc(gpr)|68,       "r68" },       { rc(gpr)|69,           "r69" },
+    { rc(gpr)|70,       "r70" },       { rc(gpr)|71,           "r71" },
+    { rc(gpr)|72,       "r72" },       { rc(gpr)|73,           "r73" },
+    { rc(gpr)|74,       "r74" },       { rc(gpr)|75,           "r75" },
+    { rc(gpr)|76,       "r76" },       { rc(gpr)|77,           "r77" },
+    { rc(gpr)|78,       "r78" },       { rc(gpr)|79,           "r79" },
+    { rc(gpr)|80,       "r80" },       { rc(gpr)|81,           "r81" },
+    { rc(gpr)|82,       "r82" },       { rc(gpr)|83,           "r83" },
+    { rc(gpr)|84,       "r84" },       { rc(gpr)|85,           "r85" },
+    { rc(gpr)|86,       "r86" },       { rc(gpr)|87,           "r87" },
+    { rc(gpr)|88,       "r88" },       { rc(gpr)|89,           "r89" },
+    { rc(gpr)|90,       "r90" },       { rc(gpr)|91,           "r91" },
+    { rc(gpr)|92,       "r92" },       { rc(gpr)|93,           "r93" },
+    { rc(gpr)|94,       "r94" },       { rc(gpr)|95,           "r95" },
+    { rc(gpr)|96,       "r96" },       { rc(gpr)|97,           "r97" },
+    { rc(gpr)|98,       "r98" },       { rc(gpr)|99,           "r99" },
+    { rc(gpr)|100,      "r100"},       { rc(gpr)|101,          "r101"},
+    { rc(gpr)|102,      "r102"},       { rc(gpr)|103,          "r103"},
+    { rc(gpr)|104,      "r104"},       { rc(gpr)|105,          "r105"},
+    { rc(gpr)|106,      "r106"},       { rc(gpr)|107,          "r107"},
+    { rc(gpr)|108,      "r108"},       { rc(gpr)|109,          "r109"},
+    { rc(gpr)|110,      "r110"},       { rc(gpr)|111,          "r111"},
+    { rc(gpr)|112,      "r112"},       { rc(gpr)|113,          "r113"},
+    { rc(gpr)|114,      "r114"},       { rc(gpr)|115,          "r115"},
+    /* Do not enable these because no matter what, want 12 free registers,
+     * 4 for prolog and epilog and 8 for outgoing arguments */
+    { 116,              "r116"},       { 117,                  "r117"},
+    { 118,              "r118"},       { 119,                  "r119"},
+    { 120,              "r120"},       { 121,                  "r121"},
+    { 122,              "r122"},       { 123,                  "r123"},
+    { 124,              "r124"},       { 125,                  "r125"},
+    { 126,              "r126"},       { 127,                  "r127"},
+    /* Always 0.0 */
+    { 0,                "f0"  },
+    /* Always 1.0 */
+    { 1,                "f1"  },
+    /* Do not touch callee save registers not automatically spill/reloaded */
+    { rc(sav)|2,        "f2"  },       { rc(sav)|3,            "f3"  },
+    { rc(sav)|4,        "f4"  },       { rc(sav)|5,            "f5"  },
+    /* Scratch */
+    { rc(fpr)|6,        "f6"  },       { rc(fpr)|7,            "f7"  },
+    /* Do not allocate for the sake of simplification */
+    { 8,                "f8"  },
+    /* Scratch - Argument/return registers */
+    { rc(fpr)|9,        "f9"  },
+    { rc(fpr)|10,       "f10" },       { rc(fpr)|11,           "f11" },
+    { rc(fpr)|12,       "f12" },       { rc(fpr)|13,           "f13" },
+    { rc(fpr)|14,       "f14" },       { rc(fpr)|15,           "f15" },
+    /* Do not touch callee save registers not automatically spill/reloaded */
+    { rc(sav)|16,       "f16" },       { rc(sav)|17,           "f17" },
+    { rc(sav)|18,       "f18" },       { rc(sav)|19,           "f19" },
+    { rc(sav)|20,       "f20" },       { rc(sav)|21,           "f21" },
+    { rc(sav)|22,       "f22" },       { rc(sav)|23,           "f23" },
+    { rc(sav)|24,       "f24" },       { rc(sav)|25,           "f25" },
+    { rc(sav)|26,       "f26" },       { rc(sav)|27,           "f27" },
+    { rc(sav)|28,       "f28" },       { rc(sav)|29,           "f29" },
+    { rc(sav)|30,       "f30" },       { rc(sav)|31,           "f31" },
+    /* Scratch */
+    { rc(fpr)|32,       "f32" },       { rc(fpr)|33,           "f33" },
+    { rc(fpr)|34,       "f34" },       { rc(fpr)|35,           "f35" },
+    { rc(fpr)|36,       "f36" },       { rc(fpr)|37,           "f37" },
+    { rc(fpr)|38,       "f38" },       { rc(fpr)|39,           "f39" },
+    { rc(fpr)|40,       "f40" },       { rc(fpr)|41,           "f41" },
+    { rc(fpr)|42,       "f42" },       { rc(fpr)|43,           "f43" },
+    { rc(fpr)|44,       "f44" },       { rc(fpr)|45,           "f45" },
+    { rc(fpr)|46,       "f46" },       { rc(fpr)|47,           "f47" },
+    { rc(fpr)|48,       "f48" },       { rc(fpr)|49,           "f49" },
+    { rc(fpr)|50,       "f50" },       { rc(fpr)|51,           "f51" },
+    { rc(fpr)|52,       "f52" },       { rc(fpr)|53,           "f53" },
+    { rc(fpr)|54,       "f54" },       { rc(fpr)|55,           "f55" },
+    { rc(fpr)|56,       "f56" },       { rc(fpr)|57,           "f57" },
+    { rc(fpr)|58,       "f58" },       { rc(fpr)|59,           "f59" },
+    { rc(fpr)|60,       "f60" },       { rc(fpr)|61,           "f61" },
+    { rc(fpr)|62,       "f62" },       { rc(fpr)|63,           "f63" },
+    { rc(fpr)|64,       "f64" },       { rc(fpr)|65,           "f65" },
+    { rc(fpr)|66,       "f66" },       { rc(fpr)|67,           "f67" },
+    { rc(fpr)|68,       "f68" },       { rc(fpr)|69,           "f69" },
+    { rc(fpr)|70,       "f70" },       { rc(fpr)|71,           "f71" },
+    { rc(fpr)|72,       "f72" },       { rc(fpr)|73,           "f73" },
+    { rc(fpr)|74,       "f74" },       { rc(fpr)|75,           "f75" },
+    { rc(fpr)|76,       "f76" },       { rc(fpr)|77,           "f77" },
+    { rc(fpr)|78,       "f78" },       { rc(fpr)|79,           "f79" },
+    { rc(fpr)|80,       "f80" },       { rc(fpr)|81,           "f81" },
+    { rc(fpr)|82,       "f82" },       { rc(fpr)|83,           "f83" },
+    { rc(fpr)|84,       "f84" },       { rc(fpr)|85,           "f85" },
+    { rc(fpr)|86,       "f86" },       { rc(fpr)|87,           "f87" },
+    { rc(fpr)|88,       "f88" },       { rc(fpr)|89,           "f89" },
+    { rc(fpr)|90,       "f90" },       { rc(fpr)|91,           "f91" },
+    { rc(fpr)|92,       "f92" },       { rc(fpr)|93,           "f93" },
+    { rc(fpr)|94,       "f94" },       { rc(fpr)|95,           "f95" },
+    { rc(fpr)|96,       "f96" },       { rc(fpr)|97,           "f97" },
+    { rc(fpr)|98,       "f98" },       { rc(fpr)|99,           "f99" },
+    { rc(fpr)|100,      "f100"},       { rc(fpr)|101,          "f101"},
+    { rc(fpr)|102,      "f102"},       { rc(fpr)|103,          "f103"},
+    { rc(fpr)|104,      "f104"},       { rc(fpr)|105,          "f105"},
+    { rc(fpr)|106,      "f106"},       { rc(fpr)|107,          "f107"},
+    { rc(fpr)|108,      "f108"},       { rc(fpr)|109,          "f109"},
+    { rc(fpr)|110,      "f110"},       { rc(fpr)|111,          "f111"},
+    { rc(fpr)|112,      "f112"},       { rc(fpr)|113,          "f113"},
+    { rc(fpr)|114,      "f114"},       { rc(fpr)|115,          "f115"},
+    { rc(fpr)|116,      "f116"},       { rc(fpr)|117,          "f117"},
+    { rc(fpr)|118,      "f118"},       { rc(fpr)|119,          "f119"},
+#if 0
+    { rc(fpr)|120,      "f120"},       { rc(fpr)|121,          "f121"},
+    { rc(fpr)|122,      "f122"},       { rc(fpr)|123,          "f123"},
+    { rc(fpr)|124,      "f124"},       { rc(fpr)|125,          "f125"},
+    { rc(fpr)|126,      "f126"},       { rc(fpr)|127,          "f127"},
+#endif
+    /* Fake registers to patch in movr and movi arguments */
+    { rc(arg)|120,      "o0"  },       { rc(arg)|121,          "o1"  },
+    { rc(arg)|122,      "o2"  },       { rc(arg)|123,          "o3"  },
+    { rc(arg)|124,      "o4"  },       { rc(arg)|125,          "o5"  },
+    { rc(arg)|126,      "o6"  },       { rc(arg)|127,          "o7"  },
+    { _NOREG,           "<none>" },
 };
-
-/* Could also:
- *     o reserve a register for carry  (overkill)
- *     o use MTLO/MFLO                 (performance hit)
- * So, keep a register allocated after setting carry, and implicitly
- * deallocate it if it can no longer be tracked
- */
-static jit_int32_t     jit_carry;
+int missing_count;
 
 /*
  * Implementation
@@ -110,13 +223,15 @@ void
 _jit_init(jit_state_t *_jit)
 {
     _jitc->reglen = jit_size(_rvs) - 1;
+    jit_regset_new(&_jitc->gprs);
+    jit_regset_new(&_jitc->fprs);
     jit_carry = _NOREG;
 }
 
 void
 _jit_prolog(jit_state_t *_jit)
 {
-    jit_int32_t                 offset;
+    jit_int32_t                offset;
 
     if (_jitc->function)
        jit_epilog();
@@ -132,7 +247,8 @@ _jit_prolog(jit_state_t *_jit)
     _jitc->function = _jitc->functions.ptr + _jitc->functions.offset++;
     _jitc->function->self.size = stack_framesize;
     _jitc->function->self.argi = _jitc->function->self.argf =
-       _jitc->function->self.aoff = _jitc->function->self.alen = 0;
+       _jitc->function->self.aoff = _jitc->function->self.alen =
+       _jitc->function->self.aoff = 0;
     _jitc->function->self.call = jit_call_default;
     jit_alloc((jit_pointer_t *)&_jitc->function->regoff,
              _jitc->reglen * sizeof(jit_int32_t));
@@ -179,10 +295,7 @@ _jit_ret(jit_state_t *_jit)
 void
 _jit_retr(jit_state_t *_jit, jit_int32_t u)
 {
-    if (JIT_RET != u)
-       jit_movr(JIT_RET, u);
-    else
-       jit_live(JIT_RET);
+    jit_movr(JIT_RET, u);
     jit_ret();
 }
 
@@ -196,10 +309,7 @@ _jit_reti(jit_state_t *_jit, jit_word_t u)
 void
 _jit_retr_f(jit_state_t *_jit, jit_int32_t u)
 {
-    if (JIT_FRET != u)
-       jit_movr_f(JIT_FRET, u);
-    else
-       jit_live(JIT_FRET);
+    jit_movr_f(JIT_FRET, u);
     jit_ret();
 }
 
@@ -213,10 +323,7 @@ _jit_reti_f(jit_state_t *_jit, jit_float32_t u)
 void
 _jit_retr_d(jit_state_t *_jit, jit_int32_t u)
 {
-    if (JIT_FRET != u)
-       jit_movr_d(JIT_FRET, u);
-    else
-       jit_live(JIT_FRET);
+    jit_movr_d(JIT_FRET, u);
     jit_ret();
 }
 
@@ -227,7 +334,6 @@ _jit_reti_d(jit_state_t *_jit, jit_float64_t u)
     jit_ret();
 }
 
-/* must be called internally only */
 void
 _jit_epilog(jit_state_t *_jit)
 {
@@ -243,18 +349,19 @@ _jit_arg(jit_state_t *_jit)
     jit_int32_t                offset;
 
     assert(_jitc->function);
-    offset = (_jitc->function->self.size - stack_framesize) >> 2;
-    _jitc->function->self.argi = 1;
-    if (offset >= 4)
+    if (_jitc->function->self.argi < 8)
+       offset = _jitc->function->self.argi++;
+    else {
        offset = _jitc->function->self.size;
-    _jitc->function->self.size += sizeof(jit_word_t);
+       _jitc->function->self.size += sizeof(jit_word_t);
+    }
     return (jit_new_node_w(jit_code_arg, offset));
 }
 
 jit_bool_t
 _jit_arg_reg_p(jit_state_t *_jit, jit_int32_t offset)
 {
-    return (offset >= 0 && offset < 4);
+    return (offset >= 0 && offset < 8);
 }
 
 jit_node_t *
@@ -263,310 +370,185 @@ _jit_arg_f(jit_state_t *_jit)
     jit_int32_t                offset;
 
     assert(_jitc->function);
-    offset = (_jitc->function->self.size - stack_framesize) >> 2;
-    if (offset < 4) {
-       if (!_jitc->function->self.argi) {
-           if (offset == 0)
-               offset = 4;
-           else {
-               offset = 6;
-               _jitc->function->self.argi = 1;
-           }
-       }
-    }
-    else
+    if (_jitc->function->self.argi < 8)
+       offset = _jitc->function->self.argi++;
+    else {
        offset = _jitc->function->self.size;
-    _jitc->function->self.size += sizeof(jit_float32_t);
-
+       _jitc->function->self.size += sizeof(jit_word_t);
+    }
     return (jit_new_node_w(jit_code_arg_f, offset));
 }
 
-jit_bool_t
-_jit_arg_f_reg_p(jit_state_t *_jit, jit_int32_t offset)
-{
-    /* 0-3 integer register, 4-7 float register */
-    return (offset >= 0 && offset < 8);
-}
-
 jit_node_t *
 _jit_arg_d(jit_state_t *_jit)
 {
     jit_int32_t                offset;
 
     assert(_jitc->function);
-    if (_jitc->function->self.size & 7) {
-       _jitc->function->self.size += 4;
-       _jitc->function->self.argi = 1;
-    }
-    offset = (_jitc->function->self.size - stack_framesize) >> 2;
-    if (offset < 4) {
-       if (!_jitc->function->self.argi)
-           offset += 4;
-    }
-    else
+    if (_jitc->function->self.argi < 8)
+       offset = _jitc->function->self.argi++;
+    else {
        offset = _jitc->function->self.size;
-    _jitc->function->self.size += sizeof(jit_float64_t);
+       _jitc->function->self.size += sizeof(jit_word_t);
+    }
     return (jit_new_node_w(jit_code_arg_d, offset));
 }
 
-jit_bool_t
-_jit_arg_d_reg_p(jit_state_t *_jit, jit_int32_t offset)
-{
-    return (jit_arg_f_reg_p(offset));
-}
-
 void
 _jit_getarg_c(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    if (v->u.w < 4)
-       jit_extr_c(u, _A0 - v->u.w);
+    if (v->u.w < 8)
+       jit_extr_c(u, _R32 + v->u.w);
     else
-       jit_ldxi_c(u, _FP, v->u.w);
+       jit_ldxi_c(u, JIT_FP, v->u.w);
 }
 
 void
 _jit_getarg_uc(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    if (v->u.w < 4)
-       jit_extr_uc(u, _A0 - v->u.w);
+    if (v->u.w < 8)
+       jit_extr_uc(u, _R32 + v->u.w);
     else
-       jit_ldxi_uc(u, _FP, v->u.w);
+       jit_ldxi_uc(u, JIT_FP, v->u.w);
 }
 
 void
 _jit_getarg_s(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    if (v->u.w < 4)
-       jit_extr_s(u, _A0 - v->u.w);
+    if (v->u.w < 8)
+       jit_extr_s(u, _R32 + v->u.w);
     else
-       jit_ldxi_s(u, _FP, v->u.w);
+       jit_ldxi_s(u, JIT_FP, v->u.w);
 }
 
 void
 _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    if (v->u.w < 4)
-       jit_extr_us(u, _A0 - v->u.w);
+    if (v->u.w < 8)
+       jit_extr_us(u, _R32 + v->u.w);
     else
-       jit_ldxi_us(u, _FP, v->u.w);
+       jit_ldxi_us(u, JIT_FP, v->u.w);
 }
 
 void
 _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    if (v->u.w < 4) {
-#if __WORDSIZE == 64
-       jit_extr_i(u, _A0 - v->u.w);
-#else
-       jit_movr(u, _A0 - v->u.w);
-#endif
-    }
+    if (v->u.w < 8)
+       jit_extr_i(u, _R32 + v->u.w);
     else
-       jit_ldxi_i(u, _FP, v->u.w);
+       jit_ldxi_i(u, JIT_FP, v->u.w);
 }
 
-#if __WORDSIZE == 64
 void
 _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    if (v->u.w < 4)
-       jit_extr_ui(u, _A0 - v->u.w);
+    if (v->u.w < 8)
+       jit_extr_ui(u, _R32 + v->u.w);
     else
-       jit_ldxi_ui(u, _FP, v->u.w);
+       jit_ldxi_ui(u, JIT_FP, v->u.w);
 }
 
 void
 _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    if (v->u.w < 4)
-       jit_movr(u, _A0 - v->u.w);
+    if (v->u.w < 8)
+       jit_movr(u, _R32 + v->u.w);
     else
-       jit_ldxi_l(u, _FP, v->u.w);
+       jit_ldxi(u, JIT_FP, v->u.w);
 }
-#endif
 
 void
 _jit_getarg_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    if (v->u.w < 4)
-       jit_movr_w_f(u, _A0 - v->u.w);
-    else if (v->u.w < 8)
-       jit_movr_f(u, _F12 - ((v->u.w - 4) >> 1));
+/*
+    if (v->u.w < 8)
+       jit_movr_f(u, _R32 + v->u.w);
     else
-       jit_ldxi_f(u, _FP, v->u.w);
+       jit_ldxi_f(u, JIT_FP, v->u.w);
+*/
 }
 
 void
 _jit_getarg_d(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    if (v->u.w < 4)
-       jit_movr_ww_d(u, _A0 - v->u.w, _A0 - (v->u.w + 1));
-    else if (v->u.w < 8)
-       jit_movr_d(u, _F12 - ((v->u.w - 4) >> 1));
+/*
+    if (v->u.w < 8)
+       jit_movr_d(u, _R32 + v->u.w);
     else
-       jit_ldxi_d(u, _FP, v->u.w);
+       jit_ldxi_d(u, JIT_FP, v->u.w);
+*/
 }
 
 void
 _jit_pushargr(jit_state_t *_jit, jit_int32_t u)
 {
-    jit_word_t         offset;
-
     assert(_jitc->function);
-    offset = _jitc->function->call.size >> 2;
-    _jitc->function->call.argi = 1;
-    if (offset < 4)
-       jit_movr(_A0 - offset, u);
-    else
+    if (_jitc->function->call.argi < 8) {
+       jit_movr(_OUT0 + _jitc->function->call.argi, u);
+       ++_jitc->function->call.argi;
+    }
+    else {
        jit_stxi(_jitc->function->call.size, JIT_SP, u);
-    _jitc->function->call.size += sizeof(jit_word_t);
+       _jitc->function->call.size += sizeof(jit_word_t);
+    }
 }
 
 void
 _jit_pushargi(jit_state_t *_jit, jit_word_t u)
 {
-    jit_int32_t                regno;
-    jit_word_t         offset;
-
+    jit_int32_t                 regno;
     assert(_jitc->function);
-    offset = _jitc->function->call.size >> 2;
-    ++_jitc->function->call.argi;
-    if (offset < 4)
-       jit_movi(_A0 - offset, u);
+    if (_jitc->function->call.argi < 8) {
+       jit_movi(_OUT0 + _jitc->function->call.argi, u);
+       ++_jitc->function->call.argi;
+    }
     else {
        regno = jit_get_reg(jit_class_gpr);
        jit_movi(regno, u);
        jit_stxi(_jitc->function->call.size, JIT_SP, regno);
+       _jitc->function->call.size += sizeof(jit_word_t);
        jit_unget_reg(regno);
     }
-    _jitc->function->call.size += sizeof(jit_word_t);
 }
 
 void
 _jit_pushargr_f(jit_state_t *_jit, jit_int32_t u)
 {
-    jit_word_t         offset;
-
     assert(_jitc->function);
-    offset = _jitc->function->call.size >> 2;
-    if (offset < 2 && !_jitc->function->call.argi) {
-       ++_jitc->function->call.argf;
-       jit_movr_f(_F12 - offset, u);
-    }
-    else if (offset < 4) {
-       ++_jitc->function->call.argi;
-       jit_movr_f_w(_A0 - offset, u);
-    }
-    else
-       jit_stxi_f(_jitc->function->call.size, JIT_SP, u);
-    _jitc->function->call.size += sizeof(jit_float32_t);
 }
 
 void
 _jit_pushargi_f(jit_state_t *_jit, jit_float32_t u)
 {
-    jit_int32_t                regno;
-    jit_word_t         offset;
-
+    jit_int32_t                 regno;
     assert(_jitc->function);
-    offset = _jitc->function->call.size >> 2;
-    if (offset < 2 && !_jitc->function->call.argi) {
-       ++_jitc->function->call.argf;
-       jit_movi_f(_F12 - offset, u);
-    }
-    else if (offset < 4) {
-       ++_jitc->function->call.argi;
-       jit_movi_f_w(_A0 - offset, u);
-    }
-    else {
-       regno = jit_get_reg(jit_class_fpr);
-       jit_movi_f(regno, u);
-       jit_stxi_f(_jitc->function->call.size, JIT_SP, regno);
-       jit_unget_reg(regno);
-    }
-    _jitc->function->call.size += sizeof(jit_float32_t);
+    /* FIXME move to OUTn or stack */
+    regno = jit_get_reg(jit_class_fpr);
+    jit_movi_f(regno, u);
+    jit_unget_reg(regno);
 }
 
 void
 _jit_pushargr_d(jit_state_t *_jit, jit_int32_t u)
 {
-    jit_bool_t         adjust;
-    jit_word_t         offset;
-
     assert(_jitc->function);
-    adjust = !!_jitc->function->call.argi;
-    if (_jitc->function->call.size & 7) {
-       _jitc->function->call.size += 4;
-       adjust = 1;
-    }
-    offset = _jitc->function->call.size >> 2;
-    if (offset < 3) {
-       if (adjust) {
-           jit_movr_d_ww(_A0 - offset, _A0 - (offset + 1), u);
-           _jitc->function->call.argi += 2;
-       }
-       else {
-           jit_movr_d(_F12 - (offset >> 1), u);
-           ++_jitc->function->call.argf;
-       }
-    }
-    else
-       jit_stxi_d(_jitc->function->call.size, JIT_SP, u);
-    _jitc->function->call.size += sizeof(jit_float64_t);
 }
 
 void
 _jit_pushargi_d(jit_state_t *_jit, jit_float64_t u)
 {
-    jit_int32_t                regno;
-    jit_bool_t         adjust;
-    jit_word_t         offset;
-
+    jit_int32_t                 regno;
     assert(_jitc->function);
-    adjust = !!_jitc->function->call.argi;
-    if (_jitc->function->call.size & 7) {
-       _jitc->function->call.size += 4;
-       adjust = 1;
-    }
-    offset = _jitc->function->call.size >> 2;
-    if (offset < 3) {
-       if (adjust) {
-           jit_movi_d_ww(_A0 - offset, _A0 - (offset + 1), u);
-           _jitc->function->call.argi += 2;
-       }
-       else {
-           jit_movi_d(_F12 - (offset >> 1), u);
-           ++_jitc->function->call.argf;
-       }
-    }
-    else {
-       regno = jit_get_reg(jit_class_fpr);
-       jit_movi_d(regno, u);
-       jit_stxi_d(_jitc->function->call.size, JIT_SP, regno);
-       jit_unget_reg(regno);
-    }
-    _jitc->function->call.size += sizeof(jit_float64_t);
+    /* FIXME move to OUTn or stack */
+    regno = jit_get_reg(jit_class_fpr);
+    jit_movi_d(regno, u);
+    jit_unget_reg(regno);
 }
 
 jit_bool_t
 _jit_regarg_p(jit_state_t *_jit, jit_node_t *node, jit_int32_t regno)
 {
-    jit_int32_t                spec;
-
-    spec = jit_class(_rvs[regno].spec);
-    if (spec & jit_class_arg) {
-       if (spec & jit_class_gpr) {
-           regno = _A0 - regno;
-           if (regno >= 0 && regno < node->v.w)
-               return (1);
-       }
-       else if (spec & jit_class_fpr) {
-           regno = _F12 - regno;
-           if (regno >= 0 && regno < node->w.w)
-               return (1);
-       }
-    }
-
+    /* Argument registers are allocated from the pool of unused registers */
     return (0);
 }
 
@@ -574,14 +556,12 @@ void
 _jit_finishr(jit_state_t *_jit, jit_int32_t r0)
 {
     jit_node_t         *call;
-
     assert(_jitc->function);
     if (_jitc->function->self.alen < _jitc->function->call.size)
        _jitc->function->self.alen = _jitc->function->call.size;
-    jit_movr(_T9, r0);
-    call = jit_callr(_T9);
-    call->v.w = _jitc->function->self.argi;
-    call->w.w = _jitc->function->self.argf;
+    call = jit_callr(r0);
+    call->v.w = _jitc->function->call.argi;
+    call->w.w = _jitc->function->call.argf;
     _jitc->function->call.argi = _jitc->function->call.argf =
        _jitc->function->call.size = 0;
     _jitc->prepare = 0;
@@ -590,16 +570,10 @@ _jit_finishr(jit_state_t *_jit, jit_int32_t r0)
 jit_node_t *
 _jit_finishi(jit_state_t *_jit, jit_pointer_t i0)
 {
-    jit_node_t         *call;
     jit_node_t         *node;
-
-    assert(_jitc->function);
-    if (_jitc->function->self.alen < _jitc->function->call.size)
-       _jitc->function->self.alen = _jitc->function->call.size;
-    node = jit_movi(_T9, (jit_word_t)i0);
-    call = jit_callr(_T9);
-    call->v.w = _jitc->function->call.argi;
-    call->w.w = _jitc->function->call.argf;
+    node = jit_calli(i0);
+    node->v.w = _jitc->function->call.argi;
+    node->w.w = _jitc->function->call.argf;
     _jitc->function->call.argi = _jitc->function->call.argf =
        _jitc->function->call.size = 0;
     _jitc->prepare = 0;
@@ -633,15 +607,9 @@ _jit_retval_us(jit_state_t *_jit, jit_int32_t r0)
 void
 _jit_retval_i(jit_state_t *_jit, jit_int32_t r0)
 {
-#if __WORDSIZE == 32
-    if (r0 != JIT_RET)
-       jit_movr(r0, JIT_RET);
-#else
     jit_extr_i(r0, JIT_RET);
-#endif
 }
 
-#if __WORDSIZE == 64
 void
 _jit_retval_ui(jit_state_t *_jit, jit_int32_t r0)
 {
@@ -651,23 +619,19 @@ _jit_retval_ui(jit_state_t *_jit, jit_int32_t r0)
 void
 _jit_retval_l(jit_state_t *_jit, jit_int32_t r0)
 {
-    if (r0 != JIT_RET)
-       jit_movr(r0, JIT_RET);
+    jit_movr(r0, JIT_RET);
 }
-#endif
 
 void
 _jit_retval_f(jit_state_t *_jit, jit_int32_t r0)
 {
-    if (r0 != JIT_FRET)
-       jit_movr_f(r0, JIT_FRET);
+    jit_movr_f(r0, JIT_FRET);
 }
 
 void
 _jit_retval_d(jit_state_t *_jit, jit_int32_t r0)
 {
-    if (r0 != JIT_FRET)
-       jit_movr_d(r0, JIT_FRET);
+    jit_movr_d(r0, JIT_FRET);
 }
 
 jit_pointer_t
@@ -682,6 +646,7 @@ _emit_code(jit_state_t *_jit)
        jit_node_t      *node;
        jit_word_t       word;
        jit_int32_t      patch_offset;
+       jit_word_t       prolog_offset;
     } undo;
 
     _jitc->function = NULL;
@@ -691,6 +656,11 @@ _emit_code(jit_state_t *_jit)
     undo.word = 0;
     undo.node = NULL;
     undo.patch_offset = 0;
+
+    undo.prolog_offset = 0;
+    /* code may start with a jump so add an initial function descriptor */
+    il(_jit->pc.w + 16);       /* addr */
+    il(0);                     /* gp */
 #define case_rr(name, type)                                            \
            case jit_code_##name##r##type:                              \
                name##r##type(rn(node->u.w), rn(node->v.w));            \
@@ -708,15 +678,15 @@ _emit_code(jit_state_t *_jit)
                name##r##type(rn(node->u.w),                            \
                              rn(node->v.w), rn(node->w.w));            \
                break
-#define case_rrw(name, type)                                           \
-           case jit_code_##name##i##type:                              \
-               name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \
-               break
 #define case_rrrr(name, type)                                          \
            case jit_code_##name##r##type:                              \
                name##r##type(rn(node->u.q.l), rn(node->u.q.h),         \
                              rn(node->v.w), rn(node->w.w));            \
                break
+#define case_rrw(name, type)                                           \
+           case jit_code_##name##i##type:                              \
+               name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \
+               break
 #define case_rrrw(name, type)                                          \
            case jit_code_##name##i##type:                              \
                name##i##type(rn(node->u.q.l), rn(node->u.q.h),         \
@@ -782,9 +752,11 @@ _emit_code(jit_state_t *_jit)
        jit_regarg_set(node, value);
        switch (node->code) {
            case jit_code_note:         case jit_code_name:
+               sync();
                node->u.w = _jit->pc.w;
                break;
            case jit_code_label:
+               sync();
                /* remember label is defined */
                node->flag |= jit_flag_patch;
                node->u.w = _jit->pc.w;
@@ -797,10 +769,10 @@ _emit_code(jit_state_t *_jit)
                case_rrw(addx,);
                case_rrr(sub,);
                case_rrw(sub,);
-               case_rrr(subc,);
-               case_rrw(subc,);
                case_rrr(subx,);
                case_rrw(subx,);
+               case_rrr(subc,);
+               case_rrw(subc,);
                case_rrr(mul,);
                case_rrw(mul,);
                case_rrrr(qmul,);
@@ -811,32 +783,77 @@ _emit_code(jit_state_t *_jit)
                case_rrw(div,);
                case_rrr(div, _u);
                case_rrw(div, _u);
-               case_rrrr(qdiv,);
-               case_rrrw(qdiv,);
-               case_rrrr(qdiv, _u);
-               case_rrrw(qdiv, _u);
                case_rrr(rem,);
                case_rrw(rem,);
                case_rrr(rem, _u);
                case_rrw(rem, _u);
-               case_rrr(lsh,);
-               case_rrw(lsh,);
-               case_rrr(rsh,);
-               case_rrw(rsh,);
-               case_rrr(rsh, _u);
-               case_rrw(rsh, _u);
+               case_rrrr(qdiv,);
+               case_rrrw(qdiv,);
+               case_rrrr(qdiv, _u);
+               case_rrrw(qdiv, _u);
                case_rrr(and,);
                case_rrw(and,);
                case_rrr(or,);
                case_rrw(or,);
                case_rrr(xor,);
                case_rrw(xor,);
+               case_rrr(lsh,);
+               case_rrw(lsh,);
+               case_rrr(rsh,);
+               case_rrw(rsh,);
+               case_rrr(rsh, _u);
+               case_rrw(rsh, _u);
+               case_rr(neg,);
+               case_rr(com,);
+               case_rr(mov,);
+           case jit_code_movi:
+               if (node->flag & jit_flag_node) {
+                   temp = node->v.n;
+                   if (temp->code == jit_code_data ||
+                       (temp->code == jit_code_label &&
+                        (temp->flag & jit_flag_patch)))
+                       movi(rn(node->u.w), temp->u.w);
+                   else {
+                       assert(temp->code == jit_code_label ||
+                              temp->code == jit_code_epilog);
+                       word = movi_p(rn(node->u.w), node->v.w);
+                       patch(word, node);
+                   }
+               }
+               else
+                   movi(rn(node->u.w), node->v.w);
+               break;
+               case_rr(hton,);
+               case_rr(ext, _c);
+               case_rr(ext, _uc);
+               case_rr(ext, _s);
+               case_rr(ext, _us);
+               case_rr(ext, _i);
+               case_rr(ext, _ui);
                case_rr(trunc, _f_i);
                case_rr(trunc, _d_i);
-#if __WORDSIZE == 64
                case_rr(trunc, _f_l);
                case_rr(trunc, _d_l);
-#endif
+               case_rrr(lt,);
+               case_rrw(lt,);
+               case_rrr(lt, _u);
+               case_rrw(lt, _u);
+               case_rrr(le,);
+               case_rrw(le,);
+               case_rrr(le, _u);
+               case_rrw(le, _u);
+               case_rrr(eq,);
+               case_rrw(eq,);
+               case_rrr(ge,);
+               case_rrw(ge,);
+               case_rrr(ge, _u);
+               case_rrw(ge, _u);
+               case_rrr(gt,);
+               case_rrw(gt,);
+               case_rrr(gt, _u);
+               case_rrw(gt, _u);
+               case_rrr(ne,);
+               case_rrw(ne,);
                case_rr(ld, _c);
                case_rw(ld, _c);
                case_rr(ld, _uc);
@@ -847,12 +864,10 @@ _emit_code(jit_state_t *_jit)
                case_rw(ld, _us);
                case_rr(ld, _i);
                case_rw(ld, _i);
-#if __WORDSIZE == 64
                case_rr(ld, _ui);
                case_rw(ld, _ui);
                case_rr(ld, _l);
                case_rw(ld, _l);
-#endif
                case_rrr(ldx, _c);
                case_rrw(ldx, _c);
                case_rrr(ldx, _uc);
@@ -863,77 +878,26 @@ _emit_code(jit_state_t *_jit)
                case_rrw(ldx, _us);
                case_rrr(ldx, _i);
                case_rrw(ldx, _i);
-#if __WORDSIZE == 64
                case_rrr(ldx, _ui);
                case_rrw(ldx, _ui);
                case_rrr(ldx, _l);
                case_rrw(ldx, _l);
-#endif
                case_rr(st, _c);
                case_wr(st, _c);
                case_rr(st, _s);
                case_wr(st, _s);
                case_rr(st, _i);
                case_wr(st, _i);
-#if __WORDSIZE == 64
                case_rr(st, _l);
-               case_rw(st, _l);
-#endif
+               case_wr(st, _l);
                case_rrr(stx, _c);
                case_wrr(stx, _c);
                case_rrr(stx, _s);
                case_wrr(stx, _s);
                case_rrr(stx, _i);
                case_wrr(stx, _i);
-#if __WORDSIZE == 64
                case_rrr(stx, _l);
                case_wrr(stx, _l);
-#endif
-               case_rr(hton,);
-               case_rr(ext, _c);
-               case_rr(ext, _uc);
-               case_rr(ext, _s);
-               case_rr(ext, _us);
-               case_rr(mov,);
-           case jit_code_movi:
-               if (node->flag & jit_flag_node) {
-                   temp = node->v.n;
-                   if (temp->code == jit_code_data ||
-                       (temp->code == jit_code_label &&
-                        (temp->flag & jit_flag_patch)))
-                       movi(rn(node->u.w), temp->u.w);
-                   else {
-                       assert(temp->code == jit_code_label ||
-                              temp->code == jit_code_epilog);
-                       word = movi_p(rn(node->u.w), node->v.w);
-                       patch(word, node);
-                   }
-               }
-               else
-                   movi(rn(node->u.w), node->v.w);
-               break;
-               case_rr(neg,);
-               case_rr(com,);
-               case_rrr(lt,);
-               case_rrw(lt,);
-               case_rrr(lt, _u);
-               case_rrw(lt, _u);
-               case_rrr(le,);
-               case_rrw(le,);
-               case_rrr(le, _u);
-               case_rrw(le, _u);
-               case_rrr(eq,);
-               case_rrw(eq,);
-               case_rrr(ge,);
-               case_rrw(ge,);
-               case_rrr(ge, _u);
-               case_rrw(ge, _u);
-               case_rrr(gt,);
-               case_rrw(gt,);
-               case_rrr(gt, _u);
-               case_rrw(gt, _u);
-               case_rrr(ne,);
-               case_rrw(ne,);
                case_brr(blt,);
                case_brw(blt,);
                case_brr(blt, _u);
@@ -954,6 +918,10 @@ _emit_code(jit_state_t *_jit)
                case_brw(bgt, _u);
                case_brr(bne,);
                case_brw(bne,);
+               case_brr(bms,);
+               case_brw(bms,);
+               case_brr(bmc,);
+               case_brw(bmc,);
                case_brr(boadd,);
                case_brw(boadd,);
                case_brr(boadd, _u);
@@ -970,10 +938,6 @@ _emit_code(jit_state_t *_jit)
                case_brw(bxsub,);
                case_brr(bxsub, _u);
                case_brw(bxsub, _u);
-               case_brr(bms,);
-               case_brw(bms,);
-               case_brr(bmc,);
-               case_brw(bmc,);
                case_rrr(add, _f);
                case_rrf(add, _f, 32);
                case_rrr(sub, _f);
@@ -982,10 +946,10 @@ _emit_code(jit_state_t *_jit)
                case_rrf(mul, _f, 32);
                case_rrr(div, _f);
                case_rrf(div, _f, 32);
+               case_rr(ext, _f);
                case_rr(abs, _f);
                case_rr(neg, _f);
                case_rr(sqrt, _f);
-               case_rr(ext, _f);
                case_rr(ld, _f);
                case_rw(ld, _f);
                case_rrr(ldx, _f);
@@ -1064,10 +1028,10 @@ _emit_code(jit_state_t *_jit)
                case_rrf(mul, _d, 64);
                case_rrr(div, _d);
                case_rrf(div, _d, 64);
+               case_rr(ext, _d);
                case_rr(abs, _d);
                case_rr(neg, _d);
                case_rr(sqrt, _d);
-               case_rr(ext, _d);
                case_rr(ld, _d);
                case_rw(ld, _d);
                case_rrr(ldx, _d);
@@ -1142,13 +1106,15 @@ _emit_code(jit_state_t *_jit)
                jmpr(rn(node->u.w));
                break;
            case jit_code_jmpi:
+               if (_jit->pc.uc == _jit->code.ptr + 16)
+                   _jitc->jump = 1;
                temp = node->u.n;
                assert(temp->code == jit_code_label ||
                       temp->code == jit_code_epilog);
                if (temp->flag & jit_flag_patch)
                    jmpi(temp->u.w);
                else {
-                   word = jmpi(_jit->pc.w);
+                   word = jmpi_p(_jit->pc.w);
                    patch(word, node);
                }
                break;
@@ -1168,12 +1134,32 @@ _emit_code(jit_state_t *_jit)
                    calli(node->u.w);
                break;
            case jit_code_prolog:
+               jit_regset_set_ui(&_jitc->gprs, 0);
+               jit_regset_set_ui(&_jitc->fprs, 0);
+               sync();
                _jitc->function = _jitc->functions.ptr + node->w.w;
                undo.node = node;
                undo.word = _jit->pc.w;
                undo.patch_offset = _jitc->patches.offset;
+               undo.prolog_offset = _jitc->prolog.offset;
            restart_function:
                _jitc->again = 0;
+               if (_jitc->jump) {
+                   /* remember prolog to hide offset adjustment for a jump
+                    * to the start of a function, what is expected to be
+                    * a common practice as first jit instruction */
+                   if (_jitc->prolog.offset >= _jitc->prolog.length) {
+                       _jitc->prolog.length += 16;
+                       jit_realloc((jit_pointer_t *)&_jitc->prolog.ptr,
+                                   (_jitc->prolog.length - 16) *
+                                   sizeof(jit_word_t),
+                                   _jitc->prolog.length * sizeof(jit_word_t));
+                   }
+                   _jitc->prolog.ptr[_jitc->prolog.offset++] = _jit->pc.w;
+                   /* function descriptor */
+                   il(_jit->pc.w + 16);        /* addr */
+                   il(0);                      /* gp */
+               }
                prolog(node);
                break;
            case jit_code_epilog:
@@ -1189,35 +1175,21 @@ _emit_code(jit_state_t *_jit)
                    node = undo.node;
                    _jit->pc.w = undo.word;
                    _jitc->patches.offset = undo.patch_offset;
+                   _jitc->prolog.offset = undo.prolog_offset;
+                   _jitc->ioff = 0;
+                   jit_regset_set_ui(&_jitc->gprs, 0);
+                   jit_regset_set_ui(&_jitc->fprs, 0);
                    goto restart_function;
                }
                /* remember label is defined */
+               sync();
                node->flag |= jit_flag_patch;
                node->u.w = _jit->pc.w;
                epilog(node);
+               jit_regset_set_ui(&_jitc->gprs, 0);
+               jit_regset_set_ui(&_jitc->fprs, 0);
                _jitc->function = NULL;
                break;
-           case jit_code_movr_w_f:
-               movr_w_f(rn(node->u.w), rn(node->v.w));
-               break;
-           case jit_code_movr_f_w:
-               movr_f_w(rn(node->u.w), rn(node->v.w));
-               break;
-           case jit_code_movi_f_w:
-               assert(node->flag & jit_flag_data);
-               movi_f_w(rn(node->u.w), (jit_float32_t *)node->v.n->u.w);
-               break;
-           case jit_code_movr_ww_d:
-               movr_ww_d(rn(node->u.w), rn(node->v.w), rn(node->w.w));
-               break;
-           case jit_code_movr_d_ww:
-               movr_d_ww(rn(node->u.w), rn(node->v.w), rn(node->w.w));
-               break;
-           case jit_code_movi_d_ww:
-               assert(node->flag & jit_flag_data);
-               movi_d_ww(rn(node->u.w), rn(node->v.w),
-                         (jit_float64_t *)node->w.n->u.w);
-               break;
            case jit_code_live:
            case jit_code_arg:
            case jit_code_arg_f:                case jit_code_arg_d:
@@ -1243,12 +1215,15 @@ _emit_code(jit_state_t *_jit)
        /* update register live state */
        jit_reglive(node);
     }
+    sync();
 #undef case_brf
 #undef case_brw
 #undef case_brr
 #undef case_wrr
+#undef case_rrrw
 #undef case_rrf
 #undef case_rrw
+#undef case_rrrr
 #undef case_rrr
 #undef case_wr
 #undef case_rw
@@ -1257,57 +1232,60 @@ _emit_code(jit_state_t *_jit)
     for (offset = 0; offset < _jitc->patches.offset; offset++) {
        node = _jitc->patches.ptr[offset].node;
        word = node->code == jit_code_movi ? node->v.n->u.w : node->u.n->u.w;
-       patch_at(_jitc->patches.ptr[offset].inst, word);
+       patch_at(node, _jitc->patches.ptr[offset].inst, word);
     }
 
-#if defined(__linux__)
-    _flush_cache((char *)_jit->code.ptr, _jit->pc.uc - _jit->code.ptr, ICACHE);
-#endif
+    word = sysconf(_SC_PAGE_SIZE);
+    __clear_cache(_jit->code.ptr, (void *)((_jit->pc.w + word) & -word));
 
     return (_jit->code.ptr);
 }
 
 #define CODE                           1
-#  include "jit_mips-cpu.c"
-#  include "jit_mips-fpu.c"
+#  include "jit_ia64-cpu.c"
+#  include "jit_ia64-fpu.c"
 #undef CODE
 
+/* Use r2 that is reserved to not require a jit_get_reg call, also note
+ * that addil needs a register that first in 2 bits, so, if using a
+ * register other than r2 must be less than r8 (or check for a smaller
+ * immediate, i.e. i0 >= -8192 && i0 <= 8191) */
 void
-_emit_ldxi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+_emit_ldxi(jit_state_t *_jit, jit_gpr_t r0, jit_gpr_t r1, jit_word_t i0)
 {
-#if __WORDSIZE == 32
-    ldxi_i(rn(r0), rn(r1), i0);
-#else
-    ldxi_l(rn(r0), rn(r1), i0);
-#endif
+    assert(i0 >= -2097152 && i0 < 2097151);
+    addi(GR_2, rn(r1), i0);
+    ldr(rn(r0), GR_2);
 }
 
 void
-_emit_stxi(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+_emit_stxi(jit_state_t *_jit, jit_word_t i0, jit_gpr_t r0, jit_gpr_t r1)
 {
-#if __WORDSIZE == 32
-    stxi_i(i0, rn(r0), rn(r1));
-#else
-    stxi_l(i0, rn(r0), rn(r1));
-#endif
+    assert(i0 >= -2097152 && i0 < 2097151);
+    addi(GR_2, rn(r0), i0);
+    str(GR_2, rn(r1));
 }
 
 void
-_emit_ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
+_emit_ldxi_d(jit_state_t *_jit, jit_fpr_t r0, jit_gpr_t r1, jit_word_t i0)
 {
-    ldxi_d(rn(r0), rn(r1), i0);
+    assert(i0 >= -2097152 && i0 < 2097151);
+    addi(GR_2, rn(r1), i0);
+    ldr_d(rn(r0), GR_2);
 }
 
 void
-_emit_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
+_emit_stxi_d(jit_state_t *_jit, jit_word_t i0, jit_gpr_t r0, jit_fpr_t r1)
 {
-    stxi_d(i0, rn(r0), rn(r1));
+    assert(i0 >= -2097152 && i0 < 2097151);
+    addi(GR_2, rn(r0), i0);
+    str_d(GR_2, rn(r1));
 }
 
 static void
 _patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node)
 {
-    jit_int32_t                 flag;
+    jit_int32_t                flag;
 
     assert(node->flag & jit_flag_node);
     if (node->code == jit_code_movi)
diff --git a/lib/jit_mips-cpu.c b/lib/jit_mips-cpu.c
index 25c1a7f..b804f50 100644
--- a/lib/jit_mips-cpu.c
+++ b/lib/jit_mips-cpu.c
@@ -2578,7 +2578,7 @@ _bxsubr(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, 
jit_int32_t r1)
     SUBU(rn(t1), r0, r1);              /* t1 = r0 - r1 */
     SLT(rn(t2), rn(t1), r0);           /* t2 = t1 < r0 */
     SLT(rn(t1), r0, rn(t1));           /* t1 = r0 < t1 */
-    MOVZ(rn(t1), rn(t2), rn(t0));      /* if (r0 == 0) t1 = t2 */
+    MOVZ(rn(t1), rn(t2), rn(t0));      /* if (t0 == 0) t1 = t2 */
     w = _jit->pc.w;
     BEQ(rn(t1), _ZERO_REGNO, ((i0 - w) >> 2) - 1);
     /* delay slot */
diff --git a/lib/jit_mips.c b/lib/jit_mips.c
index 952c1ba..881bef4 100644
--- a/lib/jit_mips.c
+++ b/lib/jit_mips.c
@@ -90,14 +90,6 @@ jit_register_t               _rvs[] = {
     { _NOREG,                          "<none>" },
 };
 
-/* Could also:
- *     o reserve a register for carry  (overkill)
- *     o use MTLO/MFLO                 (performance hit)
- * So, keep a register allocated after setting carry, and implicitly
- * deallocate it if it can no longer be tracked
- */
-static jit_int32_t     jit_carry;
-
 /*
  * Implementation
  */
@@ -110,6 +102,12 @@ void
 _jit_init(jit_state_t *_jit)
 {
     _jitc->reglen = jit_size(_rvs) - 1;
+/* Could also:
+ *     o reserve a register for carry  (overkill)
+ *     o use MTLO/MFLO                 (performance hit)
+ * So, keep a register allocated after setting carry, and implicitly
+ * deallocate it if it can no longer be tracked
+ */
     jit_carry = _NOREG;
 }
 
diff --git a/lib/lightning.c b/lib/lightning.c
index 79327de..2c9b789 100644
--- a/lib/lightning.c
+++ b/lib/lightning.c
@@ -224,7 +224,15 @@ _jit_get_reg(jit_state_t *_jit, jit_int32_t regspec)
                            jit_allocai(sizeof(jit_word_t));
                        _jitc->again = 1;
                    }
+#if DEBUG
+                   /* emit_stxi must not need temporary registers */
+                   assert(!_jitc->getreg);
+                   _jitc->getreg = 1;
+#endif
                    emit_stxi(_jitc->function->regoff[regno], JIT_FP, regno);
+#if DEBUG
+                   _jitc->getreg = 0;
+#endif
                }
                else {
                    if (!_jitc->function->regoff[regno]) {
@@ -232,7 +240,15 @@ _jit_get_reg(jit_state_t *_jit, jit_int32_t regspec)
                            jit_allocai(sizeof(jit_float64_t));
                        _jitc->again = 1;
                    }
+#if DEBUG
+                   /* emit_stxi must not need temporary registers */
+                   assert(!_jitc->getreg);
+                   _jitc->getreg = 1;
+#endif
                    emit_stxi_d(_jitc->function->regoff[regno], JIT_FP, regno);
+#if DEBUG
+                   _jitc->getreg = 0;
+#endif
                }
                jit_regset_setbit(&_jitc->regsav, regno);
            regarg:
@@ -277,10 +293,19 @@ _jit_unget_reg(jit_state_t *_jit, jit_int32_t regno)
     regno = jit_regno(regno);
     if (jit_regset_tstbit(&_jitc->regsav, regno)) {
        if (_jitc->emit) {
+#if DEBUG
+           /* emit_ldxi must not need a temporary register */
+           assert(!_jitc->getreg);
+           _jitc->getreg = 1;
+#endif
            if (jit_class(_rvs[regno].spec) & jit_class_gpr)
                emit_ldxi(regno, JIT_FP, _jitc->function->regoff[regno]);
            else
                emit_ldxi_d(regno, JIT_FP, _jitc->function->regoff[regno]);
+#if DEBUG
+           /* emit_ldxi must not need a temporary register */
+           _jitc->getreg = 0;
+#endif
        }
        else
            jit_load(regno);
@@ -290,6 +315,153 @@ _jit_unget_reg(jit_state_t *_jit, jit_int32_t regno)
     jit_regset_clrbit(&_jitc->regarg, regno);
 }
 
+#if __ia64__
+void
+jit_regset_com(jit_regset_t *u, jit_regset_t *v)
+{
+    u->rl = ~v->rl;            u->rh = ~v->rh;
+    u->fl = ~v->fl;            u->fh = ~v->fh;
+}
+
+void
+jit_regset_and(jit_regset_t *u, jit_regset_t *v, jit_regset_t *w)
+{
+    u->rl = v->rl & w->rl;     u->rh = v->rh & w->rh;
+    u->fl = v->fl & w->fl;     u->fh = v->fh & w->fh;
+}
+
+void
+jit_regset_ior(jit_regset_t *u, jit_regset_t *v, jit_regset_t *w)
+{
+    u->rl = v->rl | w->rl;     u->rh = v->rh | w->rh;
+    u->fl = v->fl | w->fl;     u->fh = v->fh | w->fh;
+}
+
+void
+jit_regset_xor(jit_regset_t *u, jit_regset_t *v, jit_regset_t *w)
+{
+    u->rl = v->rl ^ w->rl;     u->rh = v->rh ^ w->rh;
+    u->fl = v->fl ^ w->fl;     u->fh = v->fh ^ w->fh;
+}
+
+void
+jit_regset_set(jit_regset_t *u, jit_regset_t *v)
+{
+    u->rl = v->rl;             u->rh = v->rh;
+    u->fl = v->fl;             u->fh = v->fh;
+}
+
+void
+jit_regset_set_mask(jit_regset_t *u, jit_int32_t v)
+{
+    jit_bool_t         w = !!(v & (v - 1));
+
+    assert(v >= 0 && v <= 256);
+    if (v == 0)
+       u->rl = u->rh = u->fl = u->fh = -1LL;
+    else if (v <= 64) {
+       u->rl = w ? (1LL << v) - 1 : -1LL;
+       u->rh = u->fl = u->fh = 0;
+    }
+    else if (v <= 128) {
+       u->rl = -1LL;
+       u->rh = w ? (1LL << (v - 64)) - 1 : -1LL;
+       u->fl = u->fh = 0;
+    }
+    else if (v <= 192) {
+       u->rl = u->rh = -1LL;
+       u->fl = w ? (1LL << (v - 128)) - 1 : -1LL;
+       u->fh = 0;
+    }
+    else {
+       u->rl = u->rh = u->fl = -1LL;
+       u->fh = w ? (1LL << (v - 128)) - 1 : -1LL;
+    }
+}
+
+jit_bool_t
+jit_regset_cmp_ui(jit_regset_t *u, jit_word_t v)
+{
+    return !((u->rl == v && u->rh == 0 && u->fl == 0 && u->fh == 0));
+}
+
+void
+jit_regset_set_ui(jit_regset_t *u, jit_word_t v)
+{
+    u->rl = v;
+    u->rh = u->fl = u->fh = 0;
+}
+
+jit_bool_t
+jit_regset_set_p(jit_regset_t *u)
+{
+    return (u->rl || u->rh || u->fl || u->fh);
+}
+
+void
+jit_regset_clrbit(jit_regset_t *set, jit_int32_t bit)
+{
+    assert(bit >= 0 && bit <= 255);
+    if (bit < 64)
+       set->rl &= ~(1LL << bit);
+    else if (bit < 128)
+       set->rh &= ~(1LL << (bit - 64));
+    else if (bit < 192)
+       set->fl &= ~(1LL << (bit - 128));
+    else
+       set->fh &= ~(1LL << (bit - 192));
+}
+
+void
+jit_regset_setbit(jit_regset_t *set, jit_int32_t bit)
+{
+    assert(bit >= 0 && bit <= 255);
+    if (bit < 64)
+       set->rl |= 1LL << bit;
+    else if (bit < 128)
+       set->rh |= 1LL << (bit - 64);
+    else if (bit < 192)
+       set->fl |= 1LL << (bit - 128);
+    else
+       set->fh |= 1LL << (bit - 192);
+}
+
+jit_bool_t
+jit_regset_tstbit(jit_regset_t *set, jit_int32_t bit)
+{
+    assert(bit >= 0 && bit <= 255);
+    if (bit < 64)
+       return (!!(set->rl & (1LL << bit)));
+    else if (bit < 128)
+       return (!!(set->rh & (1LL << (bit - 64))));
+    else if (bit < 192)
+       return (!!(set->fl & (1LL << (bit - 128))));
+    return (!!(set->fh & (1LL << (bit - 192))));
+}
+
+unsigned long
+jit_regset_scan1(jit_regset_t *set, jit_int32_t offset)
+{
+    assert(offset >= 0 && offset <= 255);
+    for (; offset < 64; offset++) {
+       if (set->rl & (1LL << offset))
+           return (offset);
+    }
+    for (; offset < 128; offset++) {
+       if (set->rh & (1LL << (offset - 64)))
+           return (offset);
+    }
+    for (; offset < 192; offset++) {
+       if (set->fl & (1LL << (offset - 128)))
+           return (offset);
+    }
+    for (; offset < 256; offset++) {
+       if (set->fh & (1LL << (offset - 192)))
+           return (offset);
+    }
+    return (ULONG_MAX);
+}
+#else
 unsigned long
 jit_regset_scan1(jit_regset_t *set, jit_int32_t offset)
 {
@@ -300,6 +472,7 @@ jit_regset_scan1(jit_regset_t *set, jit_int32_t offset)
     }
     return (ULONG_MAX);
 }
+#endif
 
 void
 _jit_save(jit_state_t *_jit, jit_int32_t reg)
@@ -661,10 +834,15 @@ _jit_clear_state(jit_state_t *_jit)
     jit_free((jit_pointer_t *)&_jitc->data_info.ptr);
 #endif
 
-#if __powerpc64__
+#if __powerpc64__ || __ia64__
     jit_free((jit_pointer_t *)&_jitc->prolog.ptr);
 #endif
 
+#if __ia64__
+    jit_regset_del(&_jitc->gprs);
+    jit_regset_del(&_jitc->fprs);
+#endif
+
     jit_free((jit_pointer_t *)&_jitc);
 }
 
@@ -2689,4 +2867,6 @@ _patch_register(jit_state_t *_jit, jit_node_t *node, 
jit_node_t *link,
 #  include "jit_ppc.c"
 #elif defined(__sparc__)
 #  include "jit_sparc.c"
+#elif defined(__ia64__)
+#  include "jit_ia64.c"
 #endif



reply via email to

[Prev in Thread] Current Thread [Next in Thread]