guile-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Guile-commits] 03/34: First pass at aarch64 assembler port


From: Andy Wingo
Subject: [Guile-commits] 03/34: First pass at aarch64 assembler port
Date: Mon, 20 May 2019 09:55:50 -0400 (EDT)

wingo pushed a commit to branch master
in repository guile.

commit 19e7712358be67e1b1a1d764c36e0b9eb72a5762
Author: Andy Wingo <address@hidden>
Date:   Tue May 14 15:46:19 2019 +0200

    First pass at aarch64 assembler port
---
 lightening/aarch64-cpu.c | 4525 +++++++++++++++++++++++++---------------------
 lightening/aarch64-fpu.c | 1384 +++++++-------
 lightening/aarch64.c     | 1622 ++---------------
 lightening/aarch64.h     |  221 ++-
 4 files changed, 3407 insertions(+), 4345 deletions(-)

diff --git a/lightening/aarch64-cpu.c b/lightening/aarch64-cpu.c
index 98f2dab..0a1f01e 100644
--- a/lightening/aarch64-cpu.c
+++ b/lightening/aarch64-cpu.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2013-2017  Free Software Foundation, Inc.
+ * Copyright (C) 2013-2017, 2019  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
@@ -14,2433 +14,2888 @@
  * License for more details.
  *
  * Authors:
- *     Paulo Cesar Pereira de Andrade
+ *      Paulo Cesar Pereira de Andrade
  */
 
-#if PROTO
+#if __BYTE_ORDER != __LITTLE_ENDIAN
+#error AArch64 requires little-endian host
+#endif
+
 typedef union {
-/* aarch64-opc.c */
-#  define ui                   uint32_t
-#  if __BYTE_ORDER == __LITTLE_ENDIAN
-    /* cond2: condition in truly conditional-executed inst.  */
-    struct {           ui b:  4; } cond2;
-    /* nzcv: flag bit specifier, encoded in the "nzcv" field.  */
-    struct {           ui b:  4; } nzcv;
-    /* defgh: d:e:f:g:h bits in AdvSIMD modified immediate.  */
-    struct { ui _:  5; ui b:  5; } defgh;
-    /* abc: a:b:c bits in AdvSIMD modified immediate.  */
-    struct { ui _: 16; ui b:  3; } abc;
-    /* imm19: e.g. in CBZ.  */
-    struct { ui _:  5; ui b: 19; } imm19;
-    /* immhi: e.g. in ADRP.  */
-    struct { ui _:  5; ui b: 19; } immhi;
-    /* immlo: e.g. in ADRP.  */
-    struct { ui _: 29; ui b:  2; } immlo;
-    /* size: in most AdvSIMD and floating-point instructions.  */
-    struct { ui _: 22; ui b:  2; } size;
-    /* vldst_size: size field in the AdvSIMD load/store inst.  */
-    struct { ui _: 10; ui b:  2; } vldst_size;
-    /* op: in AdvSIMD modified immediate instructions.  */
-    struct { ui _: 29; ui b:  1; } op;
-    /* Q: in most AdvSIMD instructions.  */
-    struct { ui _: 30; ui b:  1; } Q;
-    /* Rt: in load/store instructions.  */
-    struct {           ui b:  5; } Rt;
-    /* Rd: in many integer instructions.  */
-    struct {           ui b:  5; } Rd;
-    /* Rn: in many integer instructions.  */
-    struct { ui _:  5; ui b:  5; } Rn;
-    /* Rt2: in load/store pair instructions.  */
-    struct { ui _: 10; ui b:  5; } Rt2;
-    /* Ra: in fp instructions.  */
-    struct { ui _: 10; ui b:  5; } Ra;
-    /* op2: in the system instructions.  */
-    struct { ui _:  5; ui b:  3; } op2;
-    /* CRm: in the system instructions.  */
-    struct { ui _:  8; ui b:  4; } CRm;
-    /* CRn: in the system instructions.  */
-    struct { ui _: 12; ui b:  4; } CRn;
-    /* op1: in the system instructions.  */
-    struct { ui _: 16; ui b:  3; } op1;
-    /* op0: in the system instructions.  */
-    struct { ui _: 19; ui b:  2; } op0;
-    /* imm3: in add/sub extended reg instructions.  */
-    struct { ui _: 10; ui b:  3; } imm3;
-    /* cond: condition flags as a source operand.  */
-    struct { ui _: 12; ui b:  4; } cond;
-    /* opcode: in advsimd load/store instructions.  */
-    struct { ui _: 12; ui b:  4; } opcode;
-    /* cmode: in advsimd modified immediate instructions.  */
-    struct { ui _: 12; ui b:  4; } cmode;
-    /* asisdlso_opcode: opcode in advsimd ld/st single element.  */
-    struct { ui _: 13; ui b:  3; } asisdlso_opcode;
-    /* len: in advsimd tbl/tbx instructions.  */
-    struct { ui _: 13; ui b:  2; } len;
-    /* Rm: in ld/st reg offset and some integer inst.  */
-    struct { ui _: 16; ui b:  5; } Rm;
-    /* Rs: in load/store exclusive instructions.  */
-    struct { ui _: 16; ui b:  5; } Rs;
-    /* option: in ld/st reg offset + add/sub extended reg inst.  */
-    struct { ui _: 13; ui b:  3; } option;
-    /* S: in load/store reg offset instructions.  */
-    struct { ui _: 12; ui b:  1; } S;
-    /* hw: in move wide constant instructions.  */
-    struct { ui _: 21; ui b:  2; } hw;
-    /* opc: in load/store reg offset instructions.  */
-    struct { ui _: 22; ui b:  2; } opc;
-    /* opc1: in load/store reg offset instructions.  */
-    struct { ui _: 23; ui b:  1; } opc1;
-    /* shift: in add/sub reg/imm shifted instructions.  */
-    struct { ui _: 22; ui b:  2; } shift;
-    /* type: floating point type field in fp data inst.  */
-    struct { ui _: 22; ui b:  2; } type;
-    /* ldst_size: size field in ld/st reg offset inst.  */
-    struct { ui _: 30; ui b:  2; } ldst_size;
-    /* imm6: in add/sub reg shifted instructions.  */
-    struct { ui _: 10; ui b:  6; } imm6;
-    /* imm4: in advsimd ext and advsimd ins instructions.  */
-    struct { ui _: 11; ui b:  4; } imm4;
-    /* imm5: in conditional compare (immediate) instructions.  */
-    struct { ui _: 16; ui b:  5; } imm5;
-    /* imm7: in load/store pair pre/post index instructions.  */
-    struct { ui _: 15; ui b:  7; } imm7;
-    /* imm8: in floating-point scalar move immediate inst.  */
-    struct { ui _: 13; ui b:  8; } imm8;
-    /* imm9: in load/store pre/post index instructions.  */
-    struct { ui _: 12; ui b:  9; } imm9;
-    /* imm12: in ld/st unsigned imm or add/sub shifted inst.  */
-    struct { ui _: 10; ui b: 12; } imm12;
-    /* imm14: in test bit and branch instructions.  */
-    struct { ui _:  5; ui b: 14; } imm14;
-    /* imm16: in exception instructions.  */
-    struct { ui _:  5; ui b: 16; } imm16;
-    /* imm26: in unconditional branch instructions.  */
-    struct {           ui b: 26; } imm26;
-    /* imms: in bitfield and logical immediate instructions.  */
-    struct { ui _: 10; ui b:  6; } imms;
-    /* immr: in bitfield and logical immediate instructions.  */
-    struct { ui _: 16; ui b:  6; } immr;
-    /* immb: in advsimd shift by immediate instructions.  */
-    struct { ui _: 16; ui b:  3; } immb;
-    /* immh: in advsimd shift by immediate instructions.  */
-    struct { ui _: 19; ui b:  4; } immh;
-    /* N: in logical (immediate) instructions.  */
-    struct { ui _: 22; ui b:  1; } N;
-    /* index: in ld/st inst deciding the pre/post-index.  */
-    struct { ui _: 11; ui b:  1; } index;
-    /* index2: in ld/st pair inst deciding the pre/post-index.  */
-    struct { ui _: 24; ui b:  1; } index2;
-    /* sf: in integer data processing instructions.  */
-    struct { ui _: 31; ui b:  1; } sf;
-    /* H: in advsimd scalar x indexed element instructions.  */
-    struct { ui _: 11; ui b:  1; } H;
-    /* L: in advsimd scalar x indexed element instructions.  */
-    struct { ui _: 21; ui b:  1; } L;
-    /* M: in advsimd scalar x indexed element instructions.  */
-    struct { ui _: 20; ui b:  1; } M;
-    /* b5: in the test bit and branch instructions.  */
-    struct { ui _: 31; ui b:  1; } b5;
-    /* b40: in the test bit and branch instructions.  */
-    struct { ui _: 19; ui b:  5; } b40;
-    /* scale: in the fixed-point scalar to fp converting inst.  */
-    struct { ui _: 10; ui b:  6; } scale;
-#  else
-    struct { ui _: 28; ui b:  4; } cond2;
-    struct { ui _: 28; ui b:  4; } nzcv;
-    struct { ui _: 22; ui b:  5; } defgh;
-    struct { ui _: 13; ui b:  3; } abc;
-    struct { ui _:  8; ui b: 19; } imm19;
-    struct { ui _:  8; ui b: 19; } immhi;
-    struct { ui _:  1; ui b: 29; } immlo;
-    struct { ui _:  8; ui b:  2; } size;
-    struct { ui _: 20; ui b:  2; } vldst_size;
-    struct { ui _:  2; ui b:  1; } op;
-    struct { ui _:  1; ui b:  1; } Q;
-    struct { ui _: 27; ui b:  1; } Rt;
-    struct { ui _: 27; ui b:  1; } Rd;
-    struct { ui _: 22; ui b:  5; } Rn;
-    struct { ui _: 17; ui b:  5; } Rt2;
-    struct { ui _: 17; ui b:  5; } Ra;
-    struct { ui _: 24; ui b:  3; } op2;
-    struct { ui _: 20; ui b:  4; } CRm;
-    struct { ui _: 16; ui b:  4; } CRn;
-    struct { ui _: 13; ui b:  3; } op1;
-    struct { ui _: 11; ui b:  2; } op0;
-    struct { ui _: 19; ui b:  3; } imm3;
-    struct { ui _: 16; ui b:  4; } cond;
-    struct { ui _: 16; ui b:  4; } opcode;
-    struct { ui _: 16; ui b:  4; } cmode;
-    struct { ui _: 16; ui b:  3; } asisdlso_opcode;
-    struct { ui _: 17; ui b:  2; } len;
-    struct { ui _: 11; ui b:  5; } Rm;
-    struct { ui _: 11; ui b:  5; } Rs;
-    struct { ui _: 16; ui b:  3; } option;
-    struct { ui _: 19; ui b:  1; } S;
-    struct { ui _:  9; ui b:  2; } hw;
-    struct { ui _:  8; ui b:  2; } opc;
-    struct { ui _:  8; ui b:  1; } opc1;
-    struct { ui _:  8; ui b:  2; } shift;
-    struct { ui _:  8; ui b:  2; } type;
-    struct {           ui b:  2; } ldst_size;
-    struct { ui _: 16; ui b:  6; } imm6;
-    struct { ui _: 17; ui b:  4; } imm4;
-    struct { ui _: 11; ui b:  5; } imm5;
-    struct { ui _: 10; ui b:  7; } imm7;
-    struct { ui _: 11; ui b:  8; } imm8;
-    struct { ui _: 11; ui b:  9; } imm9;
-    struct { ui _: 10; ui b: 12; } imm12;
-    struct { ui _: 13; ui b: 14; } imm14;
-    struct { ui _: 11; ui b: 16; } imm16;
-    struct { ui _:  6; ui b: 26; } imm26;
-    struct { ui _: 16; ui b:  6; } imms;
-    struct { ui _: 10; ui b:  6; } immr;
-    struct { ui _: 13; ui b:  3; } immb;
-    struct { ui _:  9; ui b:  4; } immh;
-    struct { ui _:  9; ui b:  1; } N;
-    struct { ui _: 20; ui b:  1; } index;
-    struct { ui _:  7; ui b:  1; } index2;
-    struct {           ui b:  1; } sf;
-    struct { ui _: 20; ui b:  1; } H;
-    struct { ui _: 10; ui b:  1; } L;
-    struct { ui _: 11; ui b:  1; } M;
-    struct {           ui b:  1; } b5;
-    struct { ui _:  8; ui b:  5; } b40;
-    struct { ui _: 16; ui b:  6; } scale;
-#  endif
-    int32_t            w;
-#  undef ui
+  /* cond2: condition in truly conditional-executed inst.  */
+  struct {                    uint32_t b:  4; } cond2;
+  /* nzcv: flag bit specifier, encoded in the "nzcv" field.  */
+  struct {                    uint32_t b:  4; } nzcv;
+  /* defgh: d:e:f:g:h bits in AdvSIMD modified immediate.  */
+  struct { uint32_t _:  5;    uint32_t b:  5; } defgh;
+  /* abc: a:b:c bits in AdvSIMD modified immediate.  */
+  struct { uint32_t _: 16;    uint32_t b:  3; } abc;
+  /* imm19: e.g. in CBZ.  */
+  struct { uint32_t _:  5;    uint32_t b: 19; } imm19;
+  /* immhi: e.g. in ADRP.  */
+  struct { uint32_t _:  5;    uint32_t b: 19; } immhi;
+  /* immlo: e.g. in ADRP.  */
+  struct { uint32_t _: 29;    uint32_t b:  2; } immlo;
+  /* size: in most AdvSIMD and floating-point instructions.  */
+  struct { uint32_t _: 22;    uint32_t b:  2; } size;
+  /* vldst_size: size field in the AdvSIMD load/store inst.  */
+  struct { uint32_t _: 10;    uint32_t b:  2; } vldst_size;
+  /* op: in AdvSIMD modified immediate instructions.  */
+  struct { uint32_t _: 29;    uint32_t b:  1; } op;
+  /* Q: in most AdvSIMD instructions.  */
+  struct { uint32_t _: 30;    uint32_t b:  1; } Q;
+  /* Rt: in load/store instructions.  */
+  struct {                    uint32_t b:  5; } Rt;
+  /* Rd: in many integer instructions.  */
+  struct {                    uint32_t b:  5; } Rd;
+  /* Rn: in many integer instructions.  */
+  struct { uint32_t _:  5;    uint32_t b:  5; } Rn;
+  /* Rt2: in load/store pair instructions.  */
+  struct { uint32_t _: 10;    uint32_t b:  5; } Rt2;
+  /* Ra: in fp instructions.  */
+  struct { uint32_t _: 10;    uint32_t b:  5; } Ra;
+  /* op2: in the system instructions.  */
+  struct { uint32_t _:  5;    uint32_t b:  3; } op2;
+  /* CRm: in the system instructions.  */
+  struct { uint32_t _:  8;    uint32_t b:  4; } CRm;
+  /* CRn: in the system instructions.  */
+  struct { uint32_t _: 12;    uint32_t b:  4; } CRn;
+  /* op1: in the system instructions.  */
+  struct { uint32_t _: 16;    uint32_t b:  3; } op1;
+  /* op0: in the system instructions.  */
+  struct { uint32_t _: 19;    uint32_t b:  2; } op0;
+  /* imm3: in add/sub extended reg instructions.  */
+  struct { uint32_t _: 10;    uint32_t b:  3; } imm3;
+  /* cond: condition flags as a source operand.  */
+  struct { uint32_t _: 12;    uint32_t b:  4; } cond;
+  /* opcode: in advsimd load/store instructions.  */
+  struct { uint32_t _: 12;    uint32_t b:  4; } opcode;
+  /* cmode: in advsimd modified immediate instructions.  */
+  struct { uint32_t _: 12;    uint32_t b:  4; } cmode;
+  /* asisdlso_opcode: opcode in advsimd ld/st single element.  */
+  struct { uint32_t _: 13;    uint32_t b:  3; } asisdlso_opcode;
+  /* len: in advsimd tbl/tbx instructions.  */
+  struct { uint32_t _: 13;    uint32_t b:  2; } len;
+  /* Rm: in ld/st reg offset and some integer inst.  */
+  struct { uint32_t _: 16;    uint32_t b:  5; } Rm;
+  /* Rs: in load/store exclusive instructions.  */
+  struct { uint32_t _: 16;    uint32_t b:  5; } Rs;
+  /* option: in ld/st reg offset + add/sub extended reg inst.  */
+  struct { uint32_t _: 13;    uint32_t b:  3; } option;
+  /* S: in load/store reg offset instructions.  */
+  struct { uint32_t _: 12;    uint32_t b:  1; } S;
+  /* hw: in move wide constant instructions.  */
+  struct { uint32_t _: 21;    uint32_t b:  2; } hw;
+  /* opc: in load/store reg offset instructions.  */
+  struct { uint32_t _: 22;    uint32_t b:  2; } opc;
+  /* opc1: in load/store reg offset instructions.  */
+  struct { uint32_t _: 23;    uint32_t b:  1; } opc1;
+  /* shift: in add/sub reg/imm shifted instructions.  */
+  struct { uint32_t _: 22;    uint32_t b:  2; } shift;
+  /* type: floating point type field in fp data inst.  */
+  struct { uint32_t _: 22;    uint32_t b:  2; } type;
+  /* ldst_size: size field in ld/st reg offset inst.  */
+  struct { uint32_t _: 30;    uint32_t b:  2; } ldst_size;
+  /* imm6: in add/sub reg shifted instructions.  */
+  struct { uint32_t _: 10;    uint32_t b:  6; } imm6;
+  /* imm4: in advsimd ext and advsimd ins instructions.  */
+  struct { uint32_t _: 11;    uint32_t b:  4; } imm4;
+  /* imm5: in conditional compare (immediate) instructions.  */
+  struct { uint32_t _: 16;    uint32_t b:  5; } imm5;
+  /* imm7: in load/store pair pre/post index instructions.  */
+  struct { uint32_t _: 15;    uint32_t b:  7; } imm7;
+  /* imm8: in floating-point scalar move immediate inst.  */
+  struct { uint32_t _: 13;    uint32_t b:  8; } imm8;
+  /* imm9: in load/store pre/post index instructions.  */
+  struct { uint32_t _: 12;    uint32_t b:  9; } imm9;
+  /* imm12: in ld/st unsigned imm or add/sub shifted inst.  */
+  struct { uint32_t _: 10;    uint32_t b: 12; } imm12;
+  /* imm14: in test bit and branch instructions.  */
+  struct { uint32_t _:  5;    uint32_t b: 14; } imm14;
+  /* imm16: in exception instructions.  */
+  struct { uint32_t _:  5;    uint32_t b: 16; } imm16;
+  /* imm26: in unconditional branch instructions.  */
+  struct {                    uint32_t b: 26; } imm26;
+  /* imms: in bitfield and logical immediate instructions.  */
+  struct { uint32_t _: 10;    uint32_t b:  6; } imms;
+  /* immr: in bitfield and logical immediate instructions.  */
+  struct { uint32_t _: 16;    uint32_t b:  6; } immr;
+  /* immb: in advsimd shift by immediate instructions.  */
+  struct { uint32_t _: 16;    uint32_t b:  3; } immb;
+  /* immh: in advsimd shift by immediate instructions.  */
+  struct { uint32_t _: 19;    uint32_t b:  4; } immh;
+  /* N: in logical (immediate) instructions.  */
+  struct { uint32_t _: 22;    uint32_t b:  1; } N;
+  /* index: in ld/st inst deciding the pre/post-index.  */
+  struct { uint32_t _: 11;    uint32_t b:  1; } index;
+  /* index2: in ld/st pair inst deciding the pre/post-index.  */
+  struct { uint32_t _: 24;    uint32_t b:  1; } index2;
+  /* sf: in integer data processing instructions.  */
+  struct { uint32_t _: 31;    uint32_t b:  1; } sf;
+  /* H: in advsimd scalar x indexed element instructions.  */
+  struct { uint32_t _: 11;    uint32_t b:  1; } H;
+  /* L: in advsimd scalar x indexed element instructions.  */
+  struct { uint32_t _: 21;    uint32_t b:  1; } L;
+  /* M: in advsimd scalar x indexed element instructions.  */
+  struct { uint32_t _: 20;    uint32_t b:  1; } M;
+  /* b5: in the test bit and branch instructions.  */
+  struct { uint32_t _: 31;    uint32_t b:  1; } b5;
+  /* b40: in the test bit and branch instructions.  */
+  struct { uint32_t _: 19;    uint32_t b:  5; } b40;
+  /* scale: in the fixed-point scalar to fp converting inst.  */
+  struct { uint32_t _: 10;    uint32_t b:  6; } scale;
+  int32_t               w;
 } instr_t;
-#  define stack_framesize              160
-#  define ii(i)                                *_jit->pc.ui++ = i
-#  define ldr(r0,r1)                   ldr_l(r0,r1)
-#  define ldxr(r0,r1,r2)               ldxr_l(r0,r1,r2)
-#  define ldxi(r0,r1,i0)               ldxi_l(r0,r1,i0)
-#  define stxi(i0,r0,r1)               stxi_l(i0,r0,r1)
-#  define FP_REGNO                     0x1d
-#  define LR_REGNO                     0x1e
-#  define SP_REGNO                     0x1f
-#  define XZR_REGNO                    0x1f
-#  define WZR_REGNO                    XZR_REGNO
-#  define LSL_12                       0x00400000
-#  define MOVI_LSL_16                  0x00200000
-#  define MOVI_LSL_32                  0x00400000
-#  define MOVI_LSL_48                  0x00600000
-#  define XS                           0x80000000      /* Wn -> Xn */
-#  define DS                           0x00400000      /* Sn -> Dn */
-#  define CC_NE                                0x0
-#  define CC_EQ                                0x1
-#  define CC_CC                                0x2
-#  define CC_LO                                CC_CC
-#  define CC_CS                                0x3
-#  define CC_HS                                CC_CS
-#  define CC_PL                                0x4
-#  define CC_MI                                0x5
-#  define CC_VC                                0x6
-#  define CC_VS                                0x7
-#  define CC_LS                                0x8
-#  define CC_HI                                0x9
-#  define CC_LT                                0xa
-#  define CC_GE                                0xb
-#  define CC_LE                                0xc
-#  define CC_GT                                0xd
-#  define CC_NV                                0xe
-#  define CC_AL                                0xf
+
+static int32_t
+logical_immediate(jit_word_t imm)
+{
+  /* There are 5334 possible immediate values, but to avoid the
+   * need of either too complex code or large lookup tables,
+   * only check for (simply) encodable common/small values */
+  switch (imm) {
+  case -16:       return 0xf3b;
+  case -15:       return 0xf3c;
+  case -13:       return 0xf3d;
+  case -9:        return 0xf3e;
+  case -8:        return 0xf7c;
+  case -7:        return 0xf7d;
+  case -5:        return 0xf7e;
+  case -4:        return 0xfbd;
+  case -3:        return 0xfbe;
+  case -2:        return 0xffe;
+  case 1:         return 0x000;
+  case 2:         return 0xfc0;
+  case 3:         return 0x001;
+  case 4:         return 0xf80;
+  case 6:         return 0xfc1;
+  case 7:         return 0x002;
+  case 8:         return 0xf40;
+  case 12:        return 0xf81;
+  case 14:        return 0xfc2;
+  case 15:        return 0x003;
+  case 16:        return 0xf00;
+  default:        return -1;
+  }
+}
+
+static void
+oxxx(jit_state_t *_jit, int32_t Op, int32_t Rd, int32_t Rn, int32_t Rm)
+{
+  instr_t     i;
+  ASSERT(!(Rd &       ~0x1f));
+  ASSERT(!(Rn &       ~0x1f));
+  ASSERT(!(Rm &       ~0x1f));
+  ASSERT(!(Op & ~0xffe0fc00));
+  i.w = Op;
+  i.Rd.b = Rd;
+  i.Rn.b = Rn;
+  i.Rm.b = Rm;
+  emit_u32(_jit, i.w);
+}
+
+static void
+oxxi(jit_state_t *_jit, int32_t Op, int32_t Rd, int32_t Rn, int32_t Imm12)
+{
+  instr_t     i;
+  ASSERT(!(Rd    &       ~0x1f));
+  ASSERT(!(Rn    &       ~0x1f));
+  ASSERT(!(Imm12 &      ~0xfff));
+  ASSERT(!(Op    & ~0xffe00000));
+  i.w = Op;
+  i.Rd.b = Rd;
+  i.Rn.b = Rn;
+  i.imm12.b = Imm12;
+  emit_u32(_jit, i.w);
+}
+
+static void
+oxx9(jit_state_t *_jit, int32_t Op, int32_t Rd, int32_t Rn, int32_t Imm9)
+{
+  instr_t     i;
+  ASSERT(!(Rd   &       ~0x1f));
+  ASSERT(!(Rn   &       ~0x1f));
+  ASSERT(!(Imm9 &      ~0x1ff));
+  ASSERT(!(Op   & ~0xffe00000));
+  i.w = Op;
+  i.Rd.b = Rd;
+  i.Rn.b = Rn;
+  i.imm9.b = Imm9;
+  emit_u32(_jit, i.w);
+}
+
+static uint32_t
+encode_ox19(jit_state_t *_jit, int32_t Op, int32_t Rd)
+{
+  instr_t     i;
+  ASSERT(!(Rd &         ~0x1f));
+  ASSERT(!(Op   & ~0xff000000));
+  i.w = Op;
+  i.Rd.b = Rd;
+  return i.w;
+}
+
+static uint32_t
+encode_oc19(jit_state_t *_jit, int32_t Op, int32_t Cc)
+{
+  instr_t     i;
+  ASSERT(!(Cc &          ~0xf));
+  ASSERT(!(Op   & ~0xff000000));
+  i.w = Op;
+  i.cond2.b = Cc;
+  return i.w;
+}
+
+static uint32_t
+encode_o26(jit_state_t *_jit, int32_t Op)
+{
+  instr_t     i;
+  ASSERT(!(Op   & ~0xfc000000));
+  i.w = Op;
+  return i.w;
+}
+
+static void
+ox_x(jit_state_t *_jit, int32_t Op, int32_t Rd, int32_t Rm)
+{
+  instr_t     i;
+  ASSERT(!(Rd &       ~0x1f));
+  ASSERT(!(Rm &       ~0x1f));
+  ASSERT(!(Op & ~0xffe0ffe0));
+  i.w = Op;
+  i.Rd.b = Rd;
+  i.Rm.b = Rm;
+  emit_u32(_jit, i.w);
+}
+
+static void
+o_xx(jit_state_t *_jit, int32_t Op, int32_t Rd, int32_t Rn)
+{
+  instr_t     i;
+  ASSERT(!(Rd &       ~0x1f));
+  ASSERT(!(Rn &       ~0x1f));
+  ASSERT(!(Op & ~0xfffffc00));
+  i.w = Op;
+  i.Rd.b = Rd;
+  i.Rn.b = Rn;
+  emit_u32(_jit, i.w);
+}
+
+static void
+oxx_(jit_state_t *_jit, int32_t Op, int32_t Rn, int32_t Rm)
+{
+  instr_t     i;
+  ASSERT(!(Rn &       ~0x1f));
+  ASSERT(!(Rm &       ~0x1f));
+  ASSERT(!(Op & ~0xffc0fc1f));
+  i.w = Op;
+  i.Rn.b = Rn;
+  i.Rm.b = Rm;
+  emit_u32(_jit, i.w);
+}
+
+static void
+o_x_(jit_state_t *_jit, int32_t Op, int32_t Rn)
+{
+  instr_t     i;
+  ASSERT(!(Rn & ~0x1f));
+  ASSERT(!(Op & 0x3e0));
+  i.w = Op;
+  i.Rn.b = Rn;
+  emit_u32(_jit, i.w);
+}
+
+static void
+ox_h(jit_state_t *_jit, int32_t Op, int32_t Rd, int32_t Imm16)
+{
+  instr_t     i;
+  ASSERT(!(Rd    &       ~0x1f));
+  ASSERT(!(Imm16 &     ~0xffff));
+  ASSERT(!(Op    & ~0xffe00000));
+  i.w = Op;
+  i.Rd.b = Rd;
+  i.imm16.b = Imm16;
+  emit_u32(_jit, i.w);
+}
+
+static void
+oxxrs(jit_state_t *_jit, int32_t Op,
+      int32_t Rd, int32_t Rn, int32_t R, int32_t S)
+{
+  instr_t     i;
+  ASSERT(!(Rd &       ~0x1f));
+  ASSERT(!(Rn &       ~0x1f));
+  ASSERT(!(R  &       ~0x3f));
+  ASSERT(!(S  &       ~0x3f));
+  ASSERT(!(Op & ~0xffc00000));
+  i.w = Op;
+  i.Rd.b = Rd;
+  i.Rn.b = Rn;
+  i.immr.b = R;
+  i.imms.b = S;
+  emit_u32(_jit, i.w);
+}
+
+static void
+oxxxc(jit_state_t *_jit, int32_t Op,
+      int32_t Rd, int32_t Rn, int32_t Rm, int32_t Cc)
+{
+  instr_t     i;
+  ASSERT(!(Rd &       ~0x1f));
+  ASSERT(!(Rn &       ~0x1f));
+  ASSERT(!(Rm &       ~0x1f));
+  ASSERT(!(Cc  &       ~0xf));
+  ASSERT(!(Op & ~0xffc00c00));
+  i.w = Op;
+  i.Rd.b = Rd;
+  i.Rn.b = Rn;
+  i.Rm.b = Rm;
+  i.cond.b = Cc;
+  emit_u32(_jit, i.w);
+}
+
+static void
+oxxx7(jit_state_t *_jit, int32_t Op,
+      int32_t Rt, int32_t Rt2, int32_t Rn, int32_t Simm7)
+{
+  instr_t     i;
+  ASSERT(!(Rt  &       ~0x1f));
+  ASSERT(!(Rt2 &       ~0x1f));
+  ASSERT(!(Rn  &       ~0x1f));
+  ASSERT(Simm7 >= -128 && Simm7 <= 127);
+  ASSERT(!(Op & ~0xffc003e0));
+  i.w = Op;
+  i.Rt.b = Rt;
+  i.Rt2.b = Rt2;
+  i.Rn.b = Rn;
+  i.imm7.b = Simm7;
+  emit_u32(_jit, i.w);
+}
+
+#define stack_framesize               160
+#define FP_REGNO                      0x1d
+#define LR_REGNO                      0x1e
+#define SP_REGNO                      0x1f
+#define XZR_REGNO                     0x1f
+#define WZR_REGNO                     XZR_REGNO
+#define LSL_12                        0x00400000
+#define MOVI_LSL_16                   0x00200000
+#define MOVI_LSL_32                   0x00400000
+#define MOVI_LSL_48                   0x00600000
+#define XS                            0x80000000      /* Wn -> Xn */
+#define DS                            0x00400000      /* Sn -> Dn */
+#define CC_NE                         0x0
+#define CC_EQ                         0x1
+#define CC_CC                         0x2
+#define CC_LO                         CC_CC
+#define CC_CS                         0x3
+#define CC_HS                         CC_CS
+#define CC_PL                         0x4
+#define CC_MI                         0x5
+#define CC_VC                         0x6
+#define CC_VS                         0x7
+#define CC_LS                         0x8
+#define CC_HI                         0x9
+#define CC_LT                         0xa
+#define CC_GE                         0xb
+#define CC_LE                         0xc
+#define CC_GT                         0xd
+#define CC_NV                         0xe
+#define CC_AL                         0xf
 /* Branches need inverted condition */
-#  define BCC_EQ                       0x0
-#  define BCC_NE                       0x1
-#  define BCC_CS                       0x2
-#  define BCC_HS                       BCC_CS
-#  define BCC_CC                       0x3
-#  define BCC_LO                       BCC_CC
-#  define BCC_MI                       0x4
-#  define BCC_PL                       0x5
-#  define BCC_VS                       0x6
-#  define BCC_VC                       0x7
-#  define BCC_HI                       0x8
-#  define BCC_LS                       0x9
-#  define BCC_GE                       0xa
-#  define BCC_LT                       0xb
-#  define BCC_GT                       0xc
-#  define BCC_LE                       0xd
-#  define BCC_AL                       0xe
-#  define BCC_NV                       0xf
+#define BCC_EQ                        0x0
+#define BCC_NE                        0x1
+#define BCC_CS                        0x2
+#define BCC_HS                        BCC_CS
+#define BCC_CC                        0x3
+#define BCC_LO                        BCC_CC
+#define BCC_MI                        0x4
+#define BCC_PL                        0x5
+#define BCC_VS                        0x6
+#define BCC_VC                        0x7
+#define BCC_HI                        0x8
+#define BCC_LS                        0x9
+#define BCC_GE                        0xa
+#define BCC_LT                        0xb
+#define BCC_GT                        0xc
+#define BCC_LE                        0xd
+#define BCC_AL                        0xe
+#define BCC_NV                        0xf
 /* adapted and cut down to only tested and required by lightning,
  * from data in binutils/aarch64-tbl.h */
-#  define A64_ADCS                     0x3a000000
-#  define A64_SBCS                     0x7a000000
-#  define A64_ADDI                     0x11000000
-#  define A64_ADDSI                    0xb1000000
-#  define A64_SUBI                     0x51000000
-#  define A64_SUBSI                    0x71000000
-#  define A64_ADD                      0x0b000000
-#  define A64_ADDS                     0x2b000000
-#  define A64_SUB                      0x4b000000
-#  define A64_NEG                      0x4b0003e0
-#  define A64_SUBS                     0x6b000000
-#  define A64_CMP                      0x6b00001f
-#  define A64_SBFM                     0x93400000
-#  define A64_UBFM                     0x53400000
-#  define A64_UBFX                     0x53000000
-#  define A64_B                                0x14000000
-#  define A64_BL                       0x94000000
-#  define A64_BR                       0xd61f0000
-#  define A64_BLR                      0xd63f0000
-#  define A64_RET                      0xd65f0000
-#  define A64_CBZ                      0x34000000
-#  define A64_CBNZ                     0x35000000
-#  define A64_B_C                      0x54000000
-#  define A64_CSINC                    0x1a800400
-#  define A64_REV                      0xdac00c00
-#  define A64_UDIV                     0x1ac00800
-#  define A64_SDIV                     0x1ac00c00
-#  define A64_LSL                      0x1ac02000
-#  define A64_LSR                      0x1ac02400
-#  define A64_ASR                      0x1ac02800
-#  define A64_MUL                      0x1b007c00
-#  define A64_SMULL                    0x9b207c00
-#  define A64_SMULH                    0x9b407c00
-#  define A64_UMULL                    0x9ba07c00
-#  define A64_UMULH                    0x9bc07c00
-#  define A64_STRBI                    0x39000000
-#  define A64_LDRBI                    0x39400000
-#  define A64_LDRSBI                   0x39800000
-#  define A64_STRI                     0xf9000000
-#  define A64_LDRI                     0xf9400000
-#  define A64_STRHI                    0x79000000
-#  define A64_LDRHI                    0x79400000
-#  define A64_LDRSHI                   0x79800000
-#  define A64_STRWI                    0xb9000000
-#  define A64_LDRWI                    0xb9400000
-#  define A64_LDRSWI                   0xb9800000
-#  define A64_STRB                     0x38206800
-#  define A64_LDRB                     0x38606800
-#  define A64_LDRSB                    0x38e06800
-#  define A64_STR                      0xf8206800
-#  define A64_LDR                      0xf8606800
-#  define A64_STRH                     0x78206800
-#  define A64_LDRH                     0x78606800
-#  define A64_LDRSH                    0x78a06800
-#  define A64_STRW                     0xb8206800
-#  define A64_LDRW                     0xb8606800
-#  define A64_LDRSW                    0xb8a06800
-#  define A64_STURB                    0x38000000
-#  define A64_LDURB                    0x38400000
-#  define A64_LDURSB                   0x38800000
-#  define A64_STUR                     0xf8000000
-#  define A64_LDUR                     0xf8400000
-#  define A64_STURH                    0x78000000
-#  define A64_LDURH                    0x78400000
-#  define A64_LDURSH                   0x78800000
-#  define A64_STURW                    0xb8000000
-#  define A64_LDURW                    0xb8400000
-#  define A64_LDURSW                   0xb8800000
-#  define A64_STP                      0x29000000
-#  define A64_LDP                      0x29400000
-#  define A64_STP_POS                  0x29800000
-#  define A64_LDP_PRE                  0x28c00000
-#  define A64_ANDI                     0x12400000
-#  define A64_ORRI                     0x32400000
-#  define A64_EORI                     0x52400000
-#  define A64_ANDSI                    0x72000000
-#  define A64_AND                      0x0a000000
-#  define A64_ORR                      0x2a000000
-#  define A64_MOV                      0x2a0003e0      /* AKA orr Rd,xzr,Rm */
-#  define A64_MVN                      0x2a2003e0
-#  define A64_UXTW                     0x2a0003e0      /* AKA MOV */
-#  define A64_EOR                      0x4a000000
-#  define A64_ANDS                     0x6a000000
-#  define A64_MOVN                     0x12800000
-#  define A64_MOVZ                     0x52800000
-#  define A64_MOVK                     0x72800000
-#  define SBFM(Rd,Rn,ImmR,ImmS)                
oxxrs(A64_SBFM|XS,Rd,Rn,ImmR,ImmS)
-#  define UBFM(Rd,Rn,ImmR,ImmS)                
oxxrs(A64_UBFM|XS,Rd,Rn,ImmR,ImmS)
-#  define UBFX(Rd,Rn,ImmR,ImmS)                oxxrs(A64_UBFX,Rd,Rn,ImmR,ImmS)
-#  define CMP(Rn,Rm)                   oxx_(A64_CMP|XS,Rn,Rm)
-#  define CMPI(Rn,Imm12)               oxxi(A64_SUBSI|XS,XZR_REGNO,Rn,Imm12)
-#  define CMPI_12(Rn,Imm12)            
oxxi(A64_SUBSI|XS|LSL_12,XZR_REGNO,Rn,Imm12)
-#  define CMNI(Rn,Imm12)               oxxi(A64_ADDSI|XS,XZR_REGNO,Rn,Imm12)
-#  define CMNI_12(Rn,Imm12)            
oxxi(A64_ADDSI|XS|LSL_12,XZR_REGNO,Rn,Imm12)
-#  define CSINC(Rd,Rn,Rm,Cc)           oxxxc(A64_CSINC|XS,Rd,Rn,Rm,Cc)
-#  define TST(Rn,Rm)                   oxxx(A64_ANDS|XS,XZR_REGNO,Rn,Rm)
-/* actually should use oxxrs but logical_immediate returns proper encoding */
-#  define TSTI(Rn,Imm12)               oxxi(A64_ANDSI,XZR_REGNO,Rn,Imm12)
-#  define MOV(Rd,Rm)                   ox_x(A64_MOV|XS,Rd,Rm)
-#  define MVN(Rd,Rm)                   ox_x(A64_MVN|XS,Rd,Rm)
-#  define NEG(Rd,Rm)                   ox_x(A64_NEG|XS,Rd,Rm)
-#  define MOVN(Rd,Imm16)               ox_h(A64_MOVN|XS,Rd,Imm16)
-#  define MOVN_16(Rd,Imm16)            ox_h(A64_MOVN|XS|MOVI_LSL_16,Rd,Imm16)
-#  define MOVN_32(Rd,Imm16)            ox_h(A64_MOVN|XS|MOVI_LSL_32,Rd,Imm16)
-#  define MOVN_48(Rd,Imm16)            ox_h(A64_MOVN|XS|MOVI_LSL_48,Rd,Imm16)
-#  define MOVZ(Rd,Imm16)               ox_h(A64_MOVZ|XS,Rd,Imm16)
-#  define MOVZ_16(Rd,Imm16)            ox_h(A64_MOVZ|XS|MOVI_LSL_16,Rd,Imm16)
-#  define MOVZ_32(Rd,Imm16)            ox_h(A64_MOVZ|XS|MOVI_LSL_32,Rd,Imm16)
-#  define MOVZ_48(Rd,Imm16)            ox_h(A64_MOVZ|XS|MOVI_LSL_48,Rd,Imm16)
-#  define MOVK(Rd,Imm16)               ox_h(A64_MOVK|XS,Rd,Imm16)
-#  define MOVK_16(Rd,Imm16)            ox_h(A64_MOVK|XS|MOVI_LSL_16,Rd,Imm16)
-#  define MOVK_32(Rd,Imm16)            ox_h(A64_MOVK|XS|MOVI_LSL_32,Rd,Imm16)
-#  define MOVK_48(Rd,Imm16)            ox_h(A64_MOVK|XS|MOVI_LSL_48,Rd,Imm16)
-#  define ADD(Rd,Rn,Rm)                        oxxx(A64_ADD|XS,Rd,Rn,Rm)
-#  define ADDI(Rd,Rn,Imm12)            oxxi(A64_ADDI|XS,Rd,Rn,Imm12)
-#  define ADDI_12(Rd,Rn,Imm12)         oxxi(A64_ADDI|XS|LSL_12,Rd,Rn,Imm12)
-#  define MOV_XSP(Rd,Rn)               ADDI(Rd,Rn,0)
-#  define ADDS(Rd,Rn,Rm)               oxxx(A64_ADDS|XS,Rd,Rn,Rm)
-#  define ADDSI(Rd,Rn,Imm12)           oxxi(A64_ADDSI|XS,Rd,Rn,Imm12)
-#  define ADDSI_12(Rd,Rn,Imm12)                
oxxi(A64_ADDSI|XS|LSL_12,Rd,Rn,Imm12)
-#  define ADCS(Rd,Rn,Rm)               oxxx(A64_ADCS|XS,Rd,Rn,Rm)
-#  define SUB(Rd,Rn,Rm)                        oxxx(A64_SUB|XS,Rd,Rn,Rm)
-#  define SUBI(Rd,Rn,Imm12)            oxxi(A64_SUBI|XS,Rd,Rn,Imm12)
-#  define SUBI_12(Rd,Rn,Imm12)         oxxi(A64_SUBI|XS|LSL_12,Rd,Rn,Imm12)
-#  define SUBS(Rd,Rn,Rm)               oxxx(A64_SUBS|XS,Rd,Rn,Rm)
-#  define SUBSI(Rd,Rn,Imm12)           oxxi(A64_SUBSI|XS,Rd,Rn,Imm12)
-#  define SUBSI_12(Rd,Rn,Imm12)                
oxxi(A64_SUBSI|XS|LSL_12,Rd,Rn,Imm12)
-#  define SBCS(Rd,Rn,Rm)               oxxx(A64_SBCS|XS,Rd,Rn,Rm)
-#  define MUL(Rd,Rn,Rm)                        oxxx(A64_MUL|XS,Rd,Rn,Rm)
-#  define SMULL(Rd,Rn,Rm)              oxxx(A64_SMULL,Rd,Rn,Rm)
-#  define SMULH(Rd,Rn,Rm)              oxxx(A64_SMULH,Rd,Rn,Rm)
-#  define UMULL(Rd,Rn,Rm)              oxxx(A64_UMULL,Rd,Rn,Rm)
-#  define UMULH(Rd,Rn,Rm)              oxxx(A64_UMULH,Rd,Rn,Rm)
-#  define SDIV(Rd,Rn,Rm)               oxxx(A64_SDIV|XS,Rd,Rn,Rm)
-#  define UDIV(Rd,Rn,Rm)               oxxx(A64_UDIV|XS,Rd,Rn,Rm)
-#  define LSL(Rd,Rn,Rm)                        oxxx(A64_LSL|XS,Rd,Rn,Rm)
-#  define LSLI(r0,r1,i0)               UBFM(r0,r1,(64-i0)&63,63-i0)
-#  define ASR(Rd,Rn,Rm)                        oxxx(A64_ASR|XS,Rd,Rn,Rm)
-#  define ASRI(r0,r1,i0)               SBFM(r0,r1,i0,63)
-#  define LSR(Rd,Rn,Rm)                        oxxx(A64_LSR|XS,Rd,Rn,Rm)
-#  define LSRI(r0,r1,i0)               UBFM(r0,r1,i0,63)
-#  define AND(Rd,Rn,Rm)                        oxxx(A64_AND|XS,Rd,Rn,Rm)
-/* actually should use oxxrs but logical_immediate returns proper encoding */
-#  define ANDI(Rd,Rn,Imm12)            oxxi(A64_ANDI|XS,Rd,Rn,Imm12)
-#  define ORR(Rd,Rn,Rm)                        oxxx(A64_ORR|XS,Rd,Rn,Rm)
-/* actually should use oxxrs but logical_immediate returns proper encoding */
-#  define ORRI(Rd,Rn,Imm12)            oxxi(A64_ORRI|XS,Rd,Rn,Imm12)
-#  define EOR(Rd,Rn,Rm)                        oxxx(A64_EOR|XS,Rd,Rn,Rm)
+#define A64_ADCS                      0x3a000000
+#define A64_SBCS                      0x7a000000
+#define A64_ADDI                      0x11000000
+#define A64_ADDSI                     0xb1000000
+#define A64_SUBI                      0x51000000
+#define A64_SUBSI                     0x71000000
+#define A64_ADD                       0x0b000000
+#define A64_ADDS                      0x2b000000
+#define A64_SUB                       0x4b000000
+#define A64_NEG                       0x4b0003e0
+#define A64_SUBS                      0x6b000000
+#define A64_CMP                       0x6b00001f
+#define A64_SBFM                      0x93400000
+#define A64_UBFM                      0x53400000
+#define A64_UBFX                      0x53000000
+#define A64_B                         0x14000000
+#define A64_BL                        0x94000000
+#define A64_BR                        0xd61f0000
+#define A64_BLR                       0xd63f0000
+#define A64_RET                       0xd65f0000
+#define A64_CBZ                       0x34000000
+#define A64_CBNZ                      0x35000000
+#define A64_B_C                       0x54000000
+#define A64_CSINC                     0x1a800400
+#define A64_REV                       0xdac00c00
+#define A64_UDIV                      0x1ac00800
+#define A64_SDIV                      0x1ac00c00
+#define A64_LSL                       0x1ac02000
+#define A64_LSR                       0x1ac02400
+#define A64_ASR                       0x1ac02800
+#define A64_MUL                       0x1b007c00
+#define A64_SMULL                     0x9b207c00
+#define A64_SMULH                     0x9b407c00
+#define A64_UMULL                     0x9ba07c00
+#define A64_UMULH                     0x9bc07c00
+#define A64_STRBI                     0x39000000
+#define A64_LDRBI                     0x39400000
+#define A64_LDRSBI                    0x39800000
+#define A64_STRI                      0xf9000000
+#define A64_LDRI                      0xf9400000
+#define A64_LDRI_LITERAL              0x58000000
+#define A64_STRHI                     0x79000000
+#define A64_LDRHI                     0x79400000
+#define A64_LDRSHI                    0x79800000
+#define A64_STRWI                     0xb9000000
+#define A64_LDRWI                     0xb9400000
+#define A64_LDRSWI                    0xb9800000
+#define A64_STRB                      0x38206800
+#define A64_LDRB                      0x38606800
+#define A64_LDRSB                     0x38e06800
+#define A64_STR                       0xf8206800
+#define A64_LDR                       0xf8606800
+#define A64_STRH                      0x78206800
+#define A64_LDRH                      0x78606800
+#define A64_LDRSH                     0x78a06800
+#define A64_STRW                      0xb8206800
+#define A64_LDRW                      0xb8606800
+#define A64_LDRSW                     0xb8a06800
+#define A64_STURB                     0x38000000
+#define A64_LDURB                     0x38400000
+#define A64_LDURSB                    0x38800000
+#define A64_STUR                      0xf8000000
+#define A64_LDUR                      0xf8400000
+#define A64_STURH                     0x78000000
+#define A64_LDURH                     0x78400000
+#define A64_LDURSH                    0x78800000
+#define A64_STURW                     0xb8000000
+#define A64_LDURW                     0xb8400000
+#define A64_LDURSW                    0xb8800000
+#define A64_STP                       0x29000000
+#define A64_LDP                       0x29400000
+#define A64_STP_POS                   0x29800000
+#define A64_LDP_PRE                   0x28c00000
+#define A64_ANDI                      0x12400000
+#define A64_ORRI                      0x32400000
+#define A64_EORI                      0x52400000
+#define A64_ANDSI                     0x72000000
+#define A64_AND                       0x0a000000
+#define A64_ORR                       0x2a000000
+#define A64_MOV                       0x2a0003e0      /* AKA orr Rd,xzr,Rm */
+#define A64_MVN                       0x2a2003e0
+#define A64_UXTW                      0x2a0003e0      /* AKA MOV */
+#define A64_EOR                       0x4a000000
+#define A64_ANDS                      0x6a000000
+#define A64_MOVN                      0x12800000
+#define A64_MOVZ                      0x52800000
+#define A64_MOVK                      0x72800000
+
+static void
+SBFM(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t ImmR, int32_t ImmS) 
+{
+  return oxxrs(_jit, A64_SBFM|XS,Rd,Rn,ImmR,ImmS);
+}
+
+static void
+UBFM(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t ImmR, int32_t ImmS) 
+{
+  return oxxrs(_jit, A64_UBFM|XS,Rd,Rn,ImmR,ImmS);
+}
+
+static void
+UBFX(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t ImmR, int32_t ImmS) 
+{
+  return oxxrs(_jit, A64_UBFX,Rd,Rn,ImmR,ImmS);
+}
+
+static void
+CMP(jit_state_t *_jit, int32_t Rn, int32_t Rm) 
+{
+  return oxx_(_jit, A64_CMP|XS,Rn,Rm);
+}
+
+static void
+CMPI(jit_state_t *_jit, int32_t Rn, int32_t Imm12) 
+{
+  return oxxi(_jit, A64_SUBSI|XS,XZR_REGNO,Rn,Imm12);
+}
+
+static void
+CMPI_12(jit_state_t *_jit, int32_t Rn, int32_t Imm12) 
+{
+  return oxxi(_jit, A64_SUBSI|XS|LSL_12,XZR_REGNO,Rn,Imm12);
+}
+
+static void
+CMNI(jit_state_t *_jit, int32_t Rn, int32_t Imm12) 
+{
+  return oxxi(_jit, A64_ADDSI|XS,XZR_REGNO,Rn,Imm12);
+}
+
+static void
+CMNI_12(jit_state_t *_jit, int32_t Rn, int32_t Imm12) 
+{
+  return oxxi(_jit, A64_ADDSI|XS|LSL_12,XZR_REGNO,Rn,Imm12);
+}
+
+static void
+CSINC(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm, int32_t Cc) 
+{
+  return oxxxc(_jit, A64_CSINC|XS,Rd,Rn,Rm,Cc);
+}
+
+static void
+TST(jit_state_t *_jit, int32_t Rn, int32_t Rm) 
+{
+  return oxxx(_jit, A64_ANDS|XS,XZR_REGNO,Rn,Rm);
+}
+
 /* actually should use oxxrs but logical_immediate returns proper encoding */
-#  define EORI(Rd,Rn,Imm12)            oxxi(A64_EORI|XS,Rd,Rn,Imm12)
-#  define SXTB(Rd,Rn)                  SBFM(Rd,Rn,0,7)
-#  define SXTH(Rd,Rn)                  SBFM(Rd,Rn,0,15)
-#  define SXTW(Rd,Rn)                  SBFM(Rd,Rn,0,31)
-#  define UXTB(Rd,Rn)                  UBFX(Rd,Rn,0,7)
-#  define UXTH(Rd,Rn)                  UBFX(Rd,Rn,0,15)
-#  define UXTW(Rd,Rm)                  ox_x(A64_UXTW,Rd,Rm)
-#  define REV(Rd,Rn)                   o_xx(A64_REV,Rd,Rn)
-#  define LDRSB(Rt,Rn,Rm)              oxxx(A64_LDRSB,Rt,Rn,Rm)
-#  define LDRSBI(Rt,Rn,Imm12)          oxxi(A64_LDRSBI,Rt,Rn,Imm12)
-#  define LDURSB(Rt,Rn,Imm9)           oxx9(A64_LDURSB,Rt,Rn,Imm9)
-#  define LDRB(Rt,Rn,Rm)               oxxx(A64_LDRB,Rt,Rn,Rm)
-#  define LDRBI(Rt,Rn,Imm12)           oxxi(A64_LDRBI,Rt,Rn,Imm12)
-#  define LDURB(Rt,Rn,Imm9)            oxx9(A64_LDURB,Rt,Rn,Imm9)
-#  define LDRSH(Rt,Rn,Rm)              oxxx(A64_LDRSH,Rt,Rn,Rm)
-#  define LDRSHI(Rt,Rn,Imm12)          oxxi(A64_LDRSHI,Rt,Rn,Imm12)
-#  define LDURSH(Rt,Rn,Imm9)           oxx9(A64_LDURSH,Rt,Rn,Imm9)
-#  define LDRH(Rt,Rn,Rm)               oxxx(A64_LDRH,Rt,Rn,Rm)
-#  define LDRHI(Rt,Rn,Imm12)           oxxi(A64_LDRHI,Rt,Rn,Imm12)
-#  define LDURH(Rt,Rn,Imm9)            oxx9(A64_LDURH,Rt,Rn,Imm9)
-#  define LDRSW(Rt,Rn,Rm)              oxxx(A64_LDRSW,Rt,Rn,Rm)
-#  define LDRSWI(Rt,Rn,Imm12)          oxxi(A64_LDRSWI,Rt,Rn,Imm12)
-#  define LDURSW(Rt,Rn,Imm9)           oxx9(A64_LDURSW,Rt,Rn,Imm9)
-#  define LDRW(Rt,Rn,Rm)               oxxx(A64_LDRW,Rt,Rn,Rm)
-#  define LDRWI(Rt,Rn,Imm12)           oxxi(A64_LDRWI,Rt,Rn,Imm12)
-#  define LDURW(Rt,Rn,Imm9)            oxx9(A64_LDURW,Rt,Rn,Imm9)
-#  define LDR(Rt,Rn,Rm)                        oxxx(A64_LDR,Rt,Rn,Rm)
-#  define LDRI(Rt,Rn,Imm12)            oxxi(A64_LDRI,Rt,Rn,Imm12)
-#  define LDUR(Rt,Rn,Imm9)             oxx9(A64_LDUR,Rt,Rn,Imm9)
-#  define STRB(Rt,Rn,Rm)               oxxx(A64_STRB,Rt,Rn,Rm)
-#  define STRBI(Rt,Rn,Imm12)           oxxi(A64_STRBI,Rt,Rn,Imm12)
-#  define STURB(Rt,Rn,Imm9)            oxx9(A64_STURB,Rt,Rn,Imm9)
-#  define STRH(Rt,Rn,Rm)               oxxx(A64_STRH,Rt,Rn,Rm)
-#  define STRHI(Rt,Rn,Imm12)           oxxi(A64_STRHI,Rt,Rn,Imm12)
-#  define STURH(Rt,Rn,Imm9)            oxx9(A64_STURH,Rt,Rn,Imm9)
-#  define STRW(Rt,Rn,Rm)               oxxx(A64_STRW,Rt,Rn,Rm)
-#  define STRWI(Rt,Rn,Imm12)           oxxi(A64_STRWI,Rt,Rn,Imm12)
-#  define STURW(Rt,Rn,Imm9)            oxx9(A64_STURW,Rt,Rn,Imm9)
-#  define STR(Rt,Rn,Rm)                        oxxx(A64_STR,Rt,Rn,Rm)
-#  define STRI(Rt,Rn,Imm12)            oxxi(A64_STRI,Rt,Rn,Imm12)
-#  define STUR(Rt,Rn,Imm9)             oxx9(A64_STUR,Rt,Rn,Imm9)
-#  define LDPI(Rt,Rt2,Rn,Simm7)                
oxxx7(A64_LDP|XS,Rt,Rt2,Rn,Simm7)
-#  define STPI(Rt,Rt2,Rn,Simm7)                
oxxx7(A64_STP|XS,Rt,Rt2,Rn,Simm7)
-#  define LDPI_PRE(Rt,Rt2,Rn,Simm7)    oxxx7(A64_LDP_PRE|XS,Rt,Rt2,Rn,Simm7)
-#  define STPI_POS(Rt,Rt2,Rn,Simm7)    oxxx7(A64_STP_POS|XS,Rt,Rt2,Rn,Simm7)
-#  define CSET(Rd,Cc)                  CSINC(Rd,XZR_REGNO,XZR_REGNO,Cc)
-#  define B(Simm26)                    o26(A64_B,Simm26)
-#  define BL(Simm26)                   o26(A64_BL,Simm26)
-#  define BR(Rn)                       o_x_(A64_BR,Rn)
-#  define BLR(Rn)                      o_x_(A64_BLR,Rn)
-#  define RET()                                o_x_(A64_RET,LR_REGNO)
-#  define B_C(Cc,Simm19)               oc19(A64_B_C,Cc,Simm19)
-#  define CBZ(Rd,Simm19)               ox19(A64_CBZ|XS,Rd,Simm19)
-#  define CBNZ(Rd,Simm19)              ox19(A64_CBNZ|XS,Rd,Simm19)
-#  define NOP()                                ii(0xd503201f)
-static int32_t logical_immediate(jit_word_t);
-#  define oxxx(Op,Rd,Rn,Rm)            _oxxx(_jit,Op,Rd,Rn,Rm)
-static void _oxxx(jit_state_t*,int32_t,int32_t,int32_t,int32_t);
-#  define oxxi(Op,Rd,Rn,Imm12)         _oxxi(_jit,Op,Rd,Rn,Imm12)
-static void _oxxi(jit_state_t*,int32_t,int32_t,int32_t,int32_t);
-#  define oxx9(Op,Rd,Rn,Imm9)          _oxx9(_jit,Op,Rd,Rn,Imm9)
-static void _oxx9(jit_state_t*,int32_t,int32_t,int32_t,int32_t);
-#  define ox19(Op,Rd,Simm19)           _ox19(_jit,Op,Rd,Simm19)
-static void _ox19(jit_state_t*,int32_t,int32_t,int32_t);
-#  define oc19(Op,Cc,Simm19)           _oc19(_jit,Op,Cc,Simm19)
-static void _oc19(jit_state_t*,int32_t,int32_t,int32_t);
-#  define o26(Op,Simm26)               _o26(_jit,Op,Simm26)
-static void _oc26(jit_state_t*,int32_t,int32_t);
-#  define ox_x(Op,Rd,Rn)               _ox_x(_jit,Op,Rd,Rn)
-static void _ox_x(jit_state_t*,int32_t,int32_t,int32_t);
-#  define o_xx(Op,Rd,Rn)               _o_xx(_jit,Op,Rd,Rn)
-static void _o_xx(jit_state_t*,int32_t,int32_t,int32_t);
-#  define oxx_(Op,Rn,Rm)               _oxx_(_jit,Op,Rn,Rm)
-static void _oxx_(jit_state_t*,int32_t,int32_t,int32_t);
-#  define o_x_(Op,Rn)                  _o_x_(_jit,Op,Rn)
-static void _o_x_(jit_state_t*,int32_t,int32_t);
-#  define ox_h(Op,Rd,Imm16)            _ox_h(_jit,Op,Rd,Imm16)
-static void _ox_h(jit_state_t*,int32_t,int32_t,int32_t);
-#  define oxxrs(Op,Rd,Rn,R,S)          _oxxrs(_jit,Op,Rd,Rn,R,S)
-static void _oxxrs(jit_state_t*,int32_t,int32_t,
-                  int32_t,int32_t,int32_t);
-#  define oxxxc(Op,Rd,Rn,Rm,Cc)                _oxxxc(_jit,Op,Rd,Rn,Rm,Cc)
-static void _oxxxc(jit_state_t*,int32_t,int32_t,
-                  int32_t,int32_t,int32_t);
-#  define oxxx7(Op,Rt,Rt2,Rn,Simm7)    _oxxx7(_jit,Op,Rt,Rt2,Rn,Simm7)
-static void _oxxx7(jit_state_t*,int32_t,
-                  int32_t,int32_t,int32_t,int32_t);
-#  define nop(i0)                      _nop(_jit,i0)
-static void _nop(jit_state_t*,int32_t);
-#  define addr(r0,r1,r2)               ADD(r0,r1,r2)
-#  define addi(r0,r1,i0)               _addi(_jit,r0,r1,i0)
-static void _addi(jit_state_t*,int32_t,int32_t,jit_word_t);
-#  define addcr(r0,r1,r2)              ADDS(r0,r1,r2)
-#  define addci(r0,r1,i0)              _addci(_jit,r0,r1,i0)
-static void _addci(jit_state_t*,int32_t,int32_t,jit_word_t);
-#  define addxr(r0,r1,r2)              ADCS(r0,r1,r2)
-#  define addxi(r0,r1,i0)              _addxi(_jit,r0,r1,i0)
-static void _addxi(jit_state_t*,int32_t,int32_t,jit_word_t);
-#  define subr(r0,r1,r2)               SUB(r0,r1,r2)
-#  define subi(r0,r1,i0)               _subi(_jit,r0,r1,i0)
-static void _subi(jit_state_t*,int32_t,int32_t,jit_word_t);
-#  define subcr(r0,r1,r2)              SUBS(r0,r1,r2)
-#  define subci(r0,r1,i0)              _subci(_jit,r0,r1,i0)
-static void _subci(jit_state_t*,int32_t,int32_t,jit_word_t);
-#  define subxr(r0,r1,r2)              SBCS(r0,r1,r2)
-#  define subxi(r0,r1,i0)              _subxi(_jit,r0,r1,i0)
-static void _subxi(jit_state_t*,int32_t,int32_t,jit_word_t);
-#  define rsbi(r0, r1, i0)             _rsbi(_jit, r0, r1, i0)
-static void _rsbi(jit_state_t*,int32_t,int32_t,jit_word_t);
-#  define mulr(r0,r1,r2)               MUL(r0,r1,r2)
-#  define muli(r0,r1,i0)               _muli(_jit,r0,r1,i0)
-static void _muli(jit_state_t*,int32_t,int32_t,jit_word_t);
-#  define qmulr(r0,r1,r2,r3)           _qmulr(_jit,r0,r1,r2,r3)
-static void _qmulr(jit_state_t*,int32_t,
-                  int32_t,int32_t,int32_t);
-#  define qmuli(r0,r1,r2,i0)           _qmuli(_jit,r0,r1,r2,i0)
-static void _qmuli(jit_state_t*,int32_t,
-                  int32_t,int32_t,jit_word_t);
-#  define qmulr_u(r0,r1,r2,r3)         _qmulr_u(_jit,r0,r1,r2,r3)
-static void _qmulr_u(jit_state_t*,int32_t,
-                    int32_t,int32_t,int32_t);
-#  define qmuli_u(r0,r1,r2,i0)         _qmuli_u(_jit,r0,r1,r2,i0)
-static void _qmuli_u(jit_state_t*,int32_t,
-                    int32_t,int32_t,jit_word_t);
-#  define divr(r0,r1,r2)               SDIV(r0,r1,r2)
-#  define divi(r0,r1,i0)               _divi(_jit,r0,r1,i0)
-static void _divi(jit_state_t*,int32_t,int32_t,jit_word_t);
-#  define divr_u(r0,r1,r2)             UDIV(r0,r1,r2)
-#  define divi_u(r0,r1,i0)             _divi_u(_jit,r0,r1,i0)
-static void _divi_u(jit_state_t*,int32_t,int32_t,jit_word_t);
-#  define qdivr(r0,r1,r2,r3)           _iqdivr(_jit,1,r0,r1,r2,r3)
-#  define qdivr_u(r0,r1,r2,r3)         _iqdivr(_jit,0,r0,r1,r2,r3)
-static void _iqdivr(jit_state_t*,jit_bool_t,
-                   int32_t,int32_t,int32_t,int32_t);
-#  define qdivi(r0,r1,r2,i0)           _qdivi(_jit,r0,r1,r2,i0)
-static void _qdivi(jit_state_t*,int32_t,
-                  int32_t,int32_t,jit_word_t);
-#  define qdivi_u(r0,r1,r2,i0)         _qdivi_u(_jit,r0,r1,r2,i0)
-static void _qdivi_u(jit_state_t*,int32_t,
-                    int32_t,int32_t,jit_word_t);
-#  define remr(r0,r1,r2)               _remr(_jit,r0,r1,r2)
-static void _remr(jit_state_t*,int32_t,int32_t,int32_t);
-#  define remi(r0,r1,i0)               _remi(_jit,r0,r1,i0)
-static void _remi(jit_state_t*,int32_t,int32_t,jit_word_t);
-#  define remr_u(r0,r1,r2)             _remr_u(_jit,r0,r1,r2)
-static void _remr_u(jit_state_t*,int32_t,int32_t,int32_t);
-#  define remi_u(r0,r1,i0)             _remi_u(_jit,r0,r1,i0)
-static void _remi_u(jit_state_t*,int32_t,int32_t,jit_word_t);
-#  define lshr(r0,r1,r2)               LSL(r0,r1,r2)
-#  define lshi(r0,r1,i0)               _lshi(_jit,r0,r1,i0)
-static void _lshi(jit_state_t*,int32_t,int32_t,jit_word_t);
-#  define rshr(r0,r1,r2)               ASR(r0,r1,r2)
-#  define rshi(r0,r1,i0)               _rshi(_jit,r0,r1,i0)
-static void _rshi(jit_state_t*,int32_t,int32_t,jit_word_t);
-#  define rshr_u(r0,r1,r2)             LSR(r0,r1,r2)
-#  define rshi_u(r0,r1,i0)             _rshi_u(_jit,r0,r1,i0)
-static void _rshi_u(jit_state_t*,int32_t,int32_t,jit_word_t);
-#  define negr(r0,r1)                  NEG(r0,r1)
-#  define comr(r0,r1)                  MVN(r0,r1)
-#  define andr(r0,r1,r2)               AND(r0,r1,r2)
-#  define andi(r0,r1,i0)               _andi(_jit,r0,r1,i0)
-static void _andi(jit_state_t*,int32_t,int32_t,jit_word_t);
-#  define orr(r0,r1,r2)                        ORR(r0,r1,r2)
-#  define ori(r0,r1,i0)                        _ori(_jit,r0,r1,i0)
-static void _ori(jit_state_t*,int32_t,int32_t,jit_word_t);
-#  define xorr(r0,r1,r2)               EOR(r0,r1,r2)
-#  define xori(r0,r1,i0)               _xori(_jit,r0,r1,i0)
-static void _xori(jit_state_t*,int32_t,int32_t,jit_word_t);
-#  define ldr_c(r0,r1)                 LDRSBI(r0,r1,0)
-#  define ldi_c(r0,i0)                 _ldi_c(_jit,r0,i0)
-static void _ldi_c(jit_state_t*,int32_t,jit_word_t);
-#  define ldr_uc(r0,r1)                        _ldr_uc(_jit,r0,r1)
-static void _ldr_uc(jit_state_t*,int32_t,int32_t);
-#  define ldi_uc(r0,i0)                        _ldi_uc(_jit,r0,i0)
-static void _ldi_uc(jit_state_t*,int32_t,jit_word_t);
-#  define ldr_s(r0,r1)                 LDRSHI(r0,r1,0)
-#  define ldi_s(r0,i0)                 _ldi_s(_jit,r0,i0)
-static void _ldi_s(jit_state_t*,int32_t,jit_word_t);
-#  define ldr_us(r0,r1)                        _ldr_us(_jit,r0,r1)
-static void _ldr_us(jit_state_t*,int32_t,int32_t);
-#  define ldi_us(r0,i0)                        _ldi_us(_jit,r0,i0)
-static void _ldi_us(jit_state_t*,int32_t,jit_word_t);
-#  define ldr_i(r0,r1)                 LDRSWI(r0,r1,0)
-#  define ldi_i(r0,i0)                 _ldi_i(_jit,r0,i0)
-static void _ldi_i(jit_state_t*,int32_t,jit_word_t);
-#  define ldr_ui(r0,r1)                        _ldr_ui(_jit,r0,r1)
-static void _ldr_ui(jit_state_t*,int32_t,int32_t);
-#  define ldi_ui(r0,i0)                        _ldi_ui(_jit,r0,i0)
-static void _ldi_ui(jit_state_t*,int32_t,jit_word_t);
-#  define ldr_l(r0,r1)                 LDRI(r0,r1,0)
-static void _ldr_l(jit_state_t*,int32_t,int32_t);
-#  define ldi_l(r0,i0)                 _ldi_l(_jit,r0,i0)
-static void _ldi_l(jit_state_t*,int32_t,jit_word_t);
-#  define ldxr_c(r0,r1,r2)             _ldxr_c(_jit,r0,r1,r2)
-static void _ldxr_c(jit_state_t*,int32_t,int32_t,int32_t);
-#  define ldxi_c(r0,r1,i0)             _ldxi_c(_jit,r0,r1,i0)
-static void _ldxi_c(jit_state_t*,int32_t,int32_t,jit_word_t);
-#  define ldxr_uc(r0,r1,r2)            _ldxr_uc(_jit,r0,r1,r2)
-static void _ldxr_uc(jit_state_t*,int32_t,int32_t,int32_t);
-#  define ldxi_uc(r0,r1,i0)            _ldxi_uc(_jit,r0,r1,i0)
-static void _ldxi_uc(jit_state_t*,int32_t,int32_t,jit_word_t);
-#  define ldxr_s(r0,r1,r2)             LDRSH(r0,r1,r2)
-#  define ldxi_s(r0,r1,i0)             _ldxi_s(_jit,r0,r1,i0)
-static void _ldxi_s(jit_state_t*,int32_t,int32_t,jit_word_t);
-#  define ldxr_us(r0,r1,r2)            _ldxr_us(_jit,r0,r1,r2)
-static void _ldxr_us(jit_state_t*,int32_t,int32_t,int32_t);
-#  define ldxi_us(r0,r1,i0)            _ldxi_us(_jit,r0,r1,i0)
-static void _ldxi_us(jit_state_t*,int32_t,int32_t,jit_word_t);
-#  define ldxr_i(r0,r1,r2)             LDRSW(r0,r1,r2)
-#  define ldxi_i(r0,r1,i0)             _ldxi_i(_jit,r0,r1,i0)
-static void _ldxi_i(jit_state_t*,int32_t,int32_t,jit_word_t);
-#  define ldxr_ui(r0,r1,r2)            _ldxr_ui(_jit,r0,r1,r2)
-static void _ldxr_ui(jit_state_t*,int32_t,int32_t,int32_t);
-#  define ldxi_ui(r0,r1,i0)            _ldxi_ui(_jit,r0,r1,i0)
-static void _ldxi_ui(jit_state_t*,int32_t,int32_t,jit_word_t);
-#  define ldxr_l(r0,r1,r2)             LDR(r0,r1,r2)
-#  define ldxi_l(r0,r1,i0)             _ldxi_l(_jit,r0,r1,i0)
-static void _ldxi_l(jit_state_t*,int32_t,int32_t,jit_word_t);
-#  define str_c(r0,r1)                 STRBI(r1,r0,0)
-#  define sti_c(i0,r0)                 _sti_c(_jit,i0,r0)
-static void _sti_c(jit_state_t*,jit_word_t,int32_t);
-#  define str_s(r0,r1)                 STRHI(r1,r0,0)
-#  define sti_s(i0,r0)                 _sti_s(_jit,i0,r0)
-static void _sti_s(jit_state_t*,jit_word_t,int32_t);
-#  define str_i(r0,r1)                 STRWI(r1,r0,0)
-#  define sti_i(i0,r0)                 _sti_i(_jit,i0,r0)
-static void _sti_i(jit_state_t*,jit_word_t,int32_t);
-#  define str_l(r0,r1)                 STRI(r1,r0,0)
-#  define sti_l(i0,r0)                 _sti_l(_jit,i0,r0)
-static void _sti_l(jit_state_t*,jit_word_t,int32_t);
-#  define stxr_c(r0,r1,r2)             STRB(r2,r1,r0)
-#  define stxi_c(i0,r0,r1)             _stxi_c(_jit,i0,r0,r1)
-static void _stxi_c(jit_state_t*,jit_word_t,int32_t,int32_t);
-#  define stxr_s(r0,r1,r2)             STRH(r2,r1,r0)
-#  define stxi_s(i0,r0,r1)             _stxi_s(_jit,i0,r0,r1)
-static void _stxi_s(jit_state_t*,jit_word_t,int32_t,int32_t);
-#  define stxr_i(r0,r1,r2)             STRW(r2,r1,r0)
-#  define stxi_i(i0,r0,r1)             _stxi_i(_jit,i0,r0,r1)
-static void _stxi_i(jit_state_t*,jit_word_t,int32_t,int32_t);
-#  define stxr_l(r0,r1,r2)             STR(r2,r1,r0)
-#  define stxi_l(i0,r0,r1)             _stxi_l(_jit,i0,r0,r1)
-static void _stxi_l(jit_state_t*,jit_word_t,int32_t,int32_t);
-#  if __BYTE_ORDER == __LITTLE_ENDIAN
-#  define bswapr_us(r0,r1)             _bswapr_us(_jit,r0,r1)
-static void _bswapr_us(jit_state_t*,int32_t,int32_t);
-#  define bswapr_ui(r0,r1)             _bswapr_ui(_jit,r0,r1)
-static void _bswapr_ui(jit_state_t*,int32_t,int32_t);
-#    define bswapr_ul(r0,r1)           REV(r0,r1)
-#  else
-#    define bswapr_us(r0,r1)           extr_us(r0,r1)
-#    define bswapr_ui(r0,r1)           extr_ui(r0,r1)
-#    define bswapr_ul(r0,r1)           movr(r0,r1)
-#  endif
-#  define extr_c(r0,r1)                        SXTB(r0,r1)
-#  define extr_uc(r0,r1)               UXTB(r0,r1)
-#  define extr_s(r0,r1)                        SXTH(r0,r1)
-#  define extr_us(r0,r1)               UXTH(r0,r1)
-#  define extr_i(r0,r1)                        SXTW(r0,r1)
-#  define extr_ui(r0,r1)               UXTW(r0,r1)
-#  define movr(r0,r1)                  _movr(_jit,r0,r1)
-static void _movr(jit_state_t*,int32_t,int32_t);
-#  define movi(r0,i0)                  _movi(_jit,r0,i0)
-static void _movi(jit_state_t*,int32_t,jit_word_t);
-#  define movi_p(r0,i0)                        _movi_p(_jit,r0,i0)
-static jit_word_t _movi_p(jit_state_t*,int32_t,jit_word_t);
-#  define ccr(cc,r0,r1,r2)             _ccr(_jit,cc,r0,r1,r2)
-static void _ccr(jit_state_t*,int32_t,int32_t,int32_t,int32_t);
-#  define cci(cc,r0,r1,i0)             _cci(_jit,cc,r0,r1,i0)
-static void _cci(jit_state_t*,int32_t,int32_t,int32_t,jit_word_t);
-#  define ltr(r0,r1,r2)                        ccr(CC_LT,r0,r1,r2)
-#  define lti(r0,r1,i0)                        cci(CC_LT,r0,r1,i0)
-#  define ltr_u(r0,r1,r2)              ccr(CC_CC,r0,r1,r2)
-#  define lti_u(r0,r1,i0)              cci(CC_CC,r0,r1,i0)
-#  define ler(r0,r1,r2)                        ccr(CC_LE,r0,r1,r2)
-#  define lei(r0,r1,i0)                        cci(CC_LE,r0,r1,i0)
-#  define ler_u(r0,r1,r2)              ccr(CC_LS,r0,r1,r2)
-#  define lei_u(r0,r1,i0)              cci(CC_LS,r0,r1,i0)
-#  define eqr(r0,r1,r2)                        ccr(CC_EQ,r0,r1,r2)
-#  define eqi(r0,r1,i0)                        cci(CC_EQ,r0,r1,i0)
-#  define ger(r0,r1,r2)                        ccr(CC_GE,r0,r1,r2)
-#  define gei(r0,r1,i0)                        cci(CC_GE,r0,r1,i0)
-#  define ger_u(r0,r1,r2)              ccr(CC_CS,r0,r1,r2)
-#  define gei_u(r0,r1,i0)              cci(CC_CS,r0,r1,i0)
-#  define gtr(r0,r1,r2)                        ccr(CC_GT,r0,r1,r2)
-#  define gti(r0,r1,i0)                        cci(CC_GT,r0,r1,i0)
-#  define gtr_u(r0,r1,r2)              ccr(CC_HI,r0,r1,r2)
-#  define gti_u(r0,r1,i0)              cci(CC_HI,r0,r1,i0)
-#  define ner(r0,r1,r2)                        ccr(CC_NE,r0,r1,r2)
-#  define nei(r0,r1,i0)                        cci(CC_NE,r0,r1,i0)
-#  define bccr(cc,i0,r0,r1)            _bccr(_jit,cc,i0,r0,r1)
-static jit_word_t
-_bccr(jit_state_t*,int32_t,jit_word_t,int32_t,int32_t);
-#  define bcci(cc,i0,r0,i1)            _bcci(_jit,cc,i0,r0,i1)
-static jit_word_t
-_bcci(jit_state_t*,int32_t,jit_word_t,int32_t,jit_word_t);
-#  define bltr(i0,r0,r1)               bccr(BCC_LT,i0,r0,r1)
-#  define blti(i0,r0,i1)               bcci(BCC_LT,i0,r0,i1)
-#  define bltr_u(i0,r0,r1)             bccr(BCC_CC,i0,r0,r1)
-#  define blti_u(i0,r0,i1)             bcci(BCC_CC,i0,r0,i1)
-#  define bler(i0,r0,r1)               bccr(BCC_LE,i0,r0,r1)
-#  define blei(i0,r0,i1)               bcci(BCC_LE,i0,r0,i1)
-#  define bler_u(i0,r0,r1)             bccr(BCC_LS,i0,r0,r1)
-#  define blei_u(i0,r0,i1)             bcci(BCC_LS,i0,r0,i1)
-#  define beqr(i0,r0,r1)               bccr(BCC_EQ,i0,r0,r1)
-#  define beqi(i0,r0,i1)               _beqi(_jit,i0,r0,i1)
-static jit_word_t _beqi(jit_state_t*,jit_word_t,int32_t,jit_word_t);
-#  define bger(i0,r0,r1)               bccr(BCC_GE,i0,r0,r1)
-#  define bgei(i0,r0,i1)               bcci(BCC_GE,i0,r0,i1)
-#  define bger_u(i0,r0,r1)             bccr(BCC_CS,i0,r0,r1)
-#  define bgei_u(i0,r0,i1)             bcci(BCC_CS,i0,r0,i1)
-#  define bgtr(i0,r0,r1)               bccr(BCC_GT,i0,r0,r1)
-#  define bgti(i0,r0,i1)               bcci(BCC_GT,i0,r0,i1)
-#  define bgtr_u(i0,r0,r1)             bccr(BCC_HI,i0,r0,r1)
-#  define bgti_u(i0,r0,i1)             bcci(BCC_HI,i0,r0,i1)
-#  define bner(i0,r0,r1)               bccr(BCC_NE,i0,r0,r1)
-#  define bnei(i0,r0,i1)               _bnei(_jit,i0,r0,i1)
-static jit_word_t _bnei(jit_state_t*,jit_word_t,int32_t,jit_word_t);
-#  define baddr(cc,i0,r0,r1)           _baddr(_jit,cc,i0,r0,r1)
-static jit_word_t
-_baddr(jit_state_t*,int32_t,jit_word_t,int32_t,int32_t);
-#  define baddi(cc,i0,r0,i1)           _baddi(_jit,cc,i0,r0,i1)
-static jit_word_t
-_baddi(jit_state_t*,int32_t,jit_word_t,int32_t,jit_word_t);
-#  define boaddr(i0,r0,r1)             baddr(BCC_VS,i0,r0,r1)
-#  define boaddi(i0,r0,i1)             baddi(BCC_VS,i0,r0,i1)
-#  define boaddr_u(i0,r0,r1)           baddr(BCC_HS,i0,r0,r1)
-#  define boaddi_u(i0,r0,i1)           baddi(BCC_HS,i0,r0,i1)
-#  define bxaddr(i0,r0,r1)             baddr(BCC_VC,i0,r0,r1)
-#  define bxaddi(i0,r0,i1)             baddi(BCC_VC,i0,r0,i1)
-#  define bxaddr_u(i0,r0,r1)           baddr(BCC_LO,i0,r0,r1)
-#  define bxaddi_u(i0,r0,i1)           baddi(BCC_LO,i0,r0,i1)
-#  define bsubr(cc,i0,r0,r1)           _bsubr(_jit,cc,i0,r0,r1)
-static jit_word_t
-_bsubr(jit_state_t*,int32_t,jit_word_t,int32_t,int32_t);
-#  define bsubi(cc,i0,r0,i1)           _bsubi(_jit,cc,i0,r0,i1)
-static jit_word_t
-_bsubi(jit_state_t*,int32_t,jit_word_t,int32_t,jit_word_t);
-#  define bosubr(i0,r0,r1)             bsubr(BCC_VS,i0,r0,r1)
-#  define bosubi(i0,r0,i1)             bsubi(BCC_VS,i0,r0,i1)
-#  define bosubr_u(i0,r0,r1)           bsubr(BCC_LO,i0,r0,r1)
-#  define bosubi_u(i0,r0,i1)           bsubi(BCC_LO,i0,r0,i1)
-#  define bxsubr(i0,r0,r1)             bsubr(BCC_VC,i0,r0,r1)
-#  define bxsubi(i0,r0,i1)             bsubi(BCC_VC,i0,r0,i1)
-#  define bxsubr_u(i0,r0,r1)           bsubr(BCC_HS,i0,r0,r1)
-#  define bxsubi_u(i0,r0,i1)           bsubi(BCC_HS,i0,r0,i1)
-#  define bmxr(cc,i0,r0,r1)            _bmxr(_jit,cc,i0,r0,r1)
-static jit_word_t
-_bmxr(jit_state_t*,int32_t,jit_word_t,int32_t,int32_t);
-#  define bmxi(cc,i0,r0,r1)            _bmxi(_jit,cc,i0,r0,r1)
-static jit_word_t
-_bmxi(jit_state_t*,int32_t,jit_word_t,int32_t,jit_word_t);
-#  define bmsr(i0,r0,r1)               bmxr(BCC_NE,i0,r0,r1)
-#  define bmsi(i0,r0,i1)               bmxi(BCC_NE,i0,r0,i1)
-#  define bmcr(i0,r0,r1)               bmxr(BCC_EQ,i0,r0,r1)
-#  define bmci(i0,r0,i1)               bmxi(BCC_EQ,i0,r0,i1)
-#  define jmpr(r0)                     BR(r0)
-#  define jmpi(i0)                     _jmpi(_jit,i0)
-static void _jmpi(jit_state_t*,jit_word_t);
-#  define jmpi_p(i0)                   _jmpi_p(_jit,i0)
-static jit_word_t _jmpi_p(jit_state_t*,jit_word_t);
-#  define callr(r0)                    BLR(r0)
-#  define calli(i0)                    _calli(_jit,i0)
-static void _calli(jit_state_t*,jit_word_t);
-#  define calli_p(i0)                  _calli_p(_jit,i0)
-static jit_word_t _calli_p(jit_state_t*,jit_word_t);
-#  define prolog(i0)                   _prolog(_jit,i0)
-static void _prolog(jit_state_t*,jit_node_t*);
-#  define epilog(i0)                   _epilog(_jit,i0)
-static void _epilog(jit_state_t*,jit_node_t*);
-#  define vastart(r0)                  _vastart(_jit, r0)
-static void _vastart(jit_state_t*, int32_t);
-#  define vaarg(r0, r1)                        _vaarg(_jit, r0, r1)
-static void _vaarg(jit_state_t*, int32_t, int32_t);
-#  define patch_at(jump,label)         _patch_at(_jit,jump,label)
-static void _patch_at(jit_state_t*,jit_word_t,jit_word_t);
-#endif
+static void
+TSTI(jit_state_t *_jit, int32_t Rn, int32_t Imm12) 
+{
+  return oxxi(_jit, A64_ANDSI,XZR_REGNO,Rn,Imm12);
+}
 
-#if CODE
-static int32_t
-logical_immediate(jit_word_t imm)
+static void
+MOV(jit_state_t *_jit, int32_t Rd, int32_t Rm) 
 {
-    /* There are 5334 possible immediate values, but to avoid the
-     * need of either too complex code or large lookup tables,
-     * only check for (simply) encodable common/small values */
-    switch (imm) {
-       case -16:       return (0xf3b);
-       case -15:       return (0xf3c);
-       case -13:       return (0xf3d);
-       case -9:        return (0xf3e);
-       case -8:        return (0xf7c);
-       case -7:        return (0xf7d);
-       case -5:        return (0xf7e);
-       case -4:        return (0xfbd);
-       case -3:        return (0xfbe);
-       case -2:        return (0xffe);
-       case 1:         return (0x000);
-       case 2:         return (0xfc0);
-       case 3:         return (0x001);
-       case 4:         return (0xf80);
-       case 6:         return (0xfc1);
-       case 7:         return (0x002);
-       case 8:         return (0xf40);
-       case 12:        return (0xf81);
-       case 14:        return (0xfc2);
-       case 15:        return (0x003);
-       case 16:        return (0xf00);
-       default:        return (-1);
-    }
+  return ox_x(_jit, A64_MOV|XS,Rd,Rm);
 }
 
 static void
-_oxxx(jit_state_t *_jit, int32_t Op,
-      int32_t Rd, int32_t Rn, int32_t Rm)
+MVN(jit_state_t *_jit, int32_t Rd, int32_t Rm) 
 {
-    instr_t    i;
-    assert(!(Rd &       ~0x1f));
-    assert(!(Rn &       ~0x1f));
-    assert(!(Rm &       ~0x1f));
-    assert(!(Op & ~0xffe0fc00));
-    i.w = Op;
-    i.Rd.b = Rd;
-    i.Rn.b = Rn;
-    i.Rm.b = Rm;
-    ii(i.w);
+  return ox_x(_jit, A64_MVN|XS,Rd,Rm);
 }
 
 static void
-_oxxi(jit_state_t *_jit, int32_t Op,
-      int32_t Rd, int32_t Rn, int32_t Imm12)
+NEG(jit_state_t *_jit, int32_t Rd, int32_t Rm) 
 {
-    instr_t    i;
-    assert(!(Rd    &       ~0x1f));
-    assert(!(Rn    &       ~0x1f));
-    assert(!(Imm12 &      ~0xfff));
-    assert(!(Op    & ~0xffe00000));
-    i.w = Op;
-    i.Rd.b = Rd;
-    i.Rn.b = Rn;
-    i.imm12.b = Imm12;
-    ii(i.w);
+  return ox_x(_jit, A64_NEG|XS,Rd,Rm);
 }
 
 static void
-_oxx9(jit_state_t *_jit, int32_t Op,
-      int32_t Rd, int32_t Rn, int32_t Imm9)
+MOVN(jit_state_t *_jit, int32_t Rd, int32_t Imm16) 
 {
-    instr_t    i;
-    assert(!(Rd   &       ~0x1f));
-    assert(!(Rn   &       ~0x1f));
-    assert(!(Imm9 &      ~0x1ff));
-    assert(!(Op   & ~0xffe00000));
-    i.w = Op;
-    i.Rd.b = Rd;
-    i.Rn.b = Rn;
-    i.imm9.b = Imm9;
-    ii(i.w);
+  return ox_h(_jit, A64_MOVN|XS,Rd,Imm16);
 }
 
 static void
-_ox19(jit_state_t *_jit, int32_t Op, int32_t Rd, int32_t Simm19)
+MOVN_16(jit_state_t *_jit, int32_t Rd, int32_t Imm16) 
 {
-    instr_t    i;
-    assert(!(Rd &         ~0x1f));
-    assert(Simm19 >= -262148 && Simm19 <= 262143);
-    assert(!(Op   & ~0xff000000));
-    i.w = Op;
-    i.Rd.b = Rd;
-    i.imm19.b = Simm19;
-    ii(i.w);
+  return ox_h(_jit, A64_MOVN|XS|MOVI_LSL_16,Rd,Imm16);
 }
 
 static void
-_oc19(jit_state_t *_jit, int32_t Op, int32_t Cc, int32_t Simm19)
+MOVN_32(jit_state_t *_jit, int32_t Rd, int32_t Imm16) 
 {
-    instr_t    i;
-    assert(!(Cc &          ~0xf));
-    assert(Simm19 >= -262148 && Simm19 <= 262143);
-    assert(!(Op   & ~0xff000000));
-    i.w = Op;
-    i.cond2.b = Cc;
-    i.imm19.b = Simm19;
-    ii(i.w);
+  return ox_h(_jit, A64_MOVN|XS|MOVI_LSL_32,Rd,Imm16);
 }
 
 static void
-_o26(jit_state_t *_jit, int32_t Op, int32_t Simm26)
+MOVN_48(jit_state_t *_jit, int32_t Rd, int32_t Imm16) 
 {
-    instr_t    i;
-    assert(Simm26 >= -33554432 && Simm26 <= 33554431);
-    assert(!(Op   & ~0xfc000000));
-    i.w = Op;
-    i.imm26.b = Simm26;
-    ii(i.w);
+  return ox_h(_jit, A64_MOVN|XS|MOVI_LSL_48,Rd,Imm16);
 }
 
 static void
-_ox_x(jit_state_t *_jit, int32_t Op, int32_t Rd, int32_t Rm)
+MOVZ(jit_state_t *_jit, int32_t Rd, int32_t Imm16) 
 {
-    instr_t    i;
-    assert(!(Rd &       ~0x1f));
-    assert(!(Rm &       ~0x1f));
-    assert(!(Op & ~0xffe0ffe0));
-    i.w = Op;
-    i.Rd.b = Rd;
-    i.Rm.b = Rm;
-    ii(i.w);
+  return ox_h(_jit, A64_MOVZ|XS,Rd,Imm16);
 }
 
 static void
-_o_xx(jit_state_t *_jit, int32_t Op, int32_t Rd, int32_t Rn)
+MOVZ_16(jit_state_t *_jit, int32_t Rd, int32_t Imm16) 
 {
-    instr_t    i;
-    assert(!(Rd &       ~0x1f));
-    assert(!(Rn &       ~0x1f));
-    assert(!(Op & ~0xfffffc00));
-    i.w = Op;
-    i.Rd.b = Rd;
-    i.Rn.b = Rn;
-    ii(i.w);
+  return ox_h(_jit, A64_MOVZ|XS|MOVI_LSL_16,Rd,Imm16);
 }
 
 static void
-_oxx_(jit_state_t *_jit, int32_t Op, int32_t Rn, int32_t Rm)
+MOVZ_32(jit_state_t *_jit, int32_t Rd, int32_t Imm16) 
 {
-    instr_t    i;
-    assert(!(Rn &       ~0x1f));
-    assert(!(Rm &       ~0x1f));
-    assert(!(Op & ~0xffc0fc1f));
-    i.w = Op;
-    i.Rn.b = Rn;
-    i.Rm.b = Rm;
-    ii(i.w);
+  return ox_h(_jit, A64_MOVZ|XS|MOVI_LSL_32,Rd,Imm16);
 }
 
 static void
-_o_x_(jit_state_t *_jit, int32_t Op, int32_t Rn)
+MOVZ_48(jit_state_t *_jit, int32_t Rd, int32_t Imm16) 
 {
-    instr_t    i;
-    assert(!(Rn & ~0x1f));
-    assert(!(Op & 0x3e0));
-    i.w = Op;
-    i.Rn.b = Rn;
-    ii(i.w);
+  return ox_h(_jit, A64_MOVZ|XS|MOVI_LSL_48,Rd,Imm16);
 }
 
 static void
-_ox_h(jit_state_t *_jit, int32_t Op, int32_t Rd, int32_t Imm16)
+MOVK(jit_state_t *_jit, int32_t Rd, int32_t Imm16) 
 {
-    instr_t    i;
-    assert(!(Rd    &       ~0x1f));
-    assert(!(Imm16 &     ~0xffff));
-    assert(!(Op    & ~0xffe00000));
-    i.w = Op;
-    i.Rd.b = Rd;
-    i.imm16.b = Imm16;
-    ii(i.w);
+  return ox_h(_jit, A64_MOVK|XS,Rd,Imm16);
 }
 
 static void
-_oxxrs(jit_state_t *_jit, int32_t Op,
-       int32_t Rd, int32_t Rn, int32_t R, int32_t S)
+MOVK_16(jit_state_t *_jit, int32_t Rd, int32_t Imm16) 
 {
-    instr_t    i;
-    assert(!(Rd &       ~0x1f));
-    assert(!(Rn &       ~0x1f));
-    assert(!(R  &       ~0x3f));
-    assert(!(S  &       ~0x3f));
-    assert(!(Op & ~0xffc00000));
-    i.w = Op;
-    i.Rd.b = Rd;
-    i.Rn.b = Rn;
-    i.immr.b = R;
-    i.imms.b = S;
-    ii(i.w);
+  return ox_h(_jit, A64_MOVK|XS|MOVI_LSL_16,Rd,Imm16);
 }
 
 static void
-_oxxxc(jit_state_t *_jit, int32_t Op,
-       int32_t Rd, int32_t Rn, int32_t Rm, int32_t Cc)
+MOVK_32(jit_state_t *_jit, int32_t Rd, int32_t Imm16) 
 {
-    instr_t    i;
-    assert(!(Rd &       ~0x1f));
-    assert(!(Rn &       ~0x1f));
-    assert(!(Rm &       ~0x1f));
-    assert(!(Cc  &       ~0xf));
-    assert(!(Op & ~0xffc00c00));
-    i.w = Op;
-    i.Rd.b = Rd;
-    i.Rn.b = Rn;
-    i.Rm.b = Rm;
-    i.cond.b = Cc;
-    ii(i.w);
+  return ox_h(_jit, A64_MOVK|XS|MOVI_LSL_32,Rd,Imm16);
 }
 
 static void
-_oxxx7(jit_state_t *_jit, int32_t Op,
-       int32_t Rt, int32_t Rt2, int32_t Rn, int32_t Simm7)
+MOVK_48(jit_state_t *_jit, int32_t Rd, int32_t Imm16) 
 {
-    instr_t    i;
-    assert(!(Rt  &       ~0x1f));
-    assert(!(Rt2 &       ~0x1f));
-    assert(!(Rn  &       ~0x1f));
-    assert(Simm7 >= -128 && Simm7 <= 127);
-    assert(!(Op & ~0xffc003e0));
-    i.w = Op;
-    i.Rt.b = Rt;
-    i.Rt2.b = Rt2;
-    i.Rn.b = Rn;
-    i.imm7.b = Simm7;
-    ii(i.w);
+  return ox_h(_jit, A64_MOVK|XS|MOVI_LSL_48,Rd,Imm16);
 }
 
 static void
-_nop(jit_state_t *_jit, int32_t i0)
+ADD(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm) 
 {
-    for (; i0 > 0; i0 -= 4)
-       NOP();
-    assert(i0 == 0);
+  return oxxx(_jit, A64_ADD|XS,Rd,Rn,Rm);
 }
 
 static void
-_addi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+ADDI(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Imm12) 
 {
-    int32_t            reg;
-    jit_word_t         is =  i0 >> 12;
-    jit_word_t         in = -i0;
-    jit_word_t         iS =  in >> 12;
-    if (      i0 >= 0 && i0 <= 0xfff)
-       ADDI   (r0, r1, i0);
-    else if ((is << 12) == i0 && is >= 0 && is <= 0xfff)
-       ADDI_12(r0, r1, is);
-    else if ( in >= 0 && in <= 0xfff)
-       SUBI   (r0, r1, in);
-    else if ((iS << 12) == is && iS >= 0 && iS <= 0xfff)
-       SUBI_12(r0, r1, iS);
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-       movi(rn(reg), i0);
-       addr(r0, r1, rn(reg));
-       jit_unget_reg(reg);
-    }
+  return oxxi(_jit, A64_ADDI|XS,Rd,Rn,Imm12);
 }
 
 static void
-_addci(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
-{
-    int32_t            reg;
-    jit_word_t         is =  i0 >> 12;
-    jit_word_t         in = -i0;
-    jit_word_t         iS =  in >> 12;
-    if (      i0 >= 0 && i0 <= 0xfff)
-       ADDSI   (r0, r1, i0);
-    else if ((is << 12) == i0 && is >= 0 && is <= 0xfff)
-       ADDSI_12(r0, r1, is);
-    else if ( in >= 0 && in <= 0xfff)
-       SUBSI   (r0, r1, in);
-    else if ((iS << 12) == is && iS >= 0 && iS <= 0xfff)
-       SUBSI_12(r0, r1, iS);
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-       movi(rn(reg), i0);
-       addcr(r0, r1, rn(reg));
-       jit_unget_reg(reg);
-    }
+ADDI_12(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Imm12) 
+{
+  return oxxi(_jit, A64_ADDI|XS|LSL_12,Rd,Rn,Imm12);
 }
 
 static void
-_addxi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+MOV_XSP(jit_state_t *_jit, int32_t Rd, int32_t Rn) 
 {
-    int32_t            reg;
-    reg = jit_get_reg(jit_class_gpr);
-    movi(rn(reg), i0);
-    addxr(r0, r1, rn(reg));
-    jit_unget_reg(reg);
+  return ADDI(_jit, Rd,Rn,0);
 }
 
 static void
-_subi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+ADDS(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm) 
 {
-    int32_t            reg;
-    jit_word_t         is = i0 >> 12;
-    if (      i0 >= 0 && i0 <= 0xfff)
-       SUBI   (r0, r1, i0);
-    else if ((is << 12) == i0 && is >= 0 && is <= 0xfff)
-       SUBI_12(r0, r1, is);
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-       movi(rn(reg), i0);
-       subr(r0, r1, rn(reg));
-       jit_unget_reg(reg);
-    }
+  return oxxx(_jit, A64_ADDS|XS,Rd,Rn,Rm);
 }
 
 static void
-_subci(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+ADDSI(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Imm12) 
 {
-    int32_t            reg;
-    jit_word_t         is = i0 >> 12;
-    if (      i0 >= 0 && i0 <= 0xfff)
-       SUBSI   (r0, r1, i0);
-    else if ((is << 12) == i0 && is >= 0 && is <= 0xfff)
-       SUBSI_12(r0, r1, is);
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-       movi(rn(reg), i0);
-       subcr(r0, r1, rn(reg));
-       jit_unget_reg(reg);
-    }
+  return oxxi(_jit, A64_ADDSI|XS,Rd,Rn,Imm12);
 }
 
 static void
-_subxi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+ADDSI_12(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Imm12) 
 {
-    int32_t            reg;
-    reg = jit_get_reg(jit_class_gpr);
-    movi(rn(reg), i0);
-    subxr(r0, r1, rn(reg));
-    jit_unget_reg(reg);
+  return oxxi(_jit, A64_ADDSI|XS|LSL_12,Rd,Rn,Imm12);
 }
 
 static void
-_rsbi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+ADCS(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm) 
 {
-    subi(r0, r1, i0);
-    negr(r0, r0);
+  return oxxx(_jit, A64_ADCS|XS,Rd,Rn,Rm);
 }
 
 static void
-_muli(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+SUB(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm) 
 {
-    int32_t            reg;
-    reg = jit_get_reg(jit_class_gpr);
-    movi(rn(reg), i0);
-    mulr(r0, r1, rn(reg));
-    jit_unget_reg(reg);
+  return oxxx(_jit, A64_SUB|XS,Rd,Rn,Rm);
 }
 
 static void
-_qmulr(jit_state_t *_jit, int32_t r0,
-       int32_t r1, int32_t r2, int32_t r3)
+SUBI(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Imm12) 
 {
-    int32_t            reg;
-    if (r0 == r2 || r0 == r3) {
-       reg = jit_get_reg(jit_class_gpr);
-       mulr(rn(reg), r2, r3);
-    }
-    else
-       mulr(r0, r2, r3);
-    SMULH(r1, r2, r3);
-    if (r0 == r2 || r0 == r3) {
-       movr(r0, rn(reg));
-       jit_unget_reg(reg);
-    }
+  return oxxi(_jit, A64_SUBI|XS,Rd,Rn,Imm12);
 }
 
 static void
-_qmuli(jit_state_t *_jit, int32_t r0,
-       int32_t r1, int32_t r2, jit_word_t i0)
+SUBI_12(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Imm12) 
 {
-    int32_t            reg;
-    reg = jit_get_reg(jit_class_gpr);
-    movi(rn(reg), i0);
-    qmulr(r0, r1, r2, rn(reg));
-    jit_unget_reg(reg);
+  return oxxi(_jit, A64_SUBI|XS|LSL_12,Rd,Rn,Imm12);
 }
 
 static void
-_qmulr_u(jit_state_t *_jit, int32_t r0,
-        int32_t r1, int32_t r2, int32_t r3)
+SUBS(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm) 
 {
-    int32_t            reg;
-    if (r0 == r2 || r0 == r3) {
-       reg = jit_get_reg(jit_class_gpr);
-       mulr(rn(reg), r2, r3);
-    }
-    else
-       mulr(r0, r2, r3);
-    UMULH(r1, r2, r3);
-    if (r0 == r2 || r0 == r3) {
-       movr(r0, rn(reg));
-       jit_unget_reg(reg);
-    }
+  return oxxx(_jit, A64_SUBS|XS,Rd,Rn,Rm);
 }
 
 static void
-_qmuli_u(jit_state_t *_jit, int32_t r0,
-        int32_t r1, int32_t r2, jit_word_t i0)
+SUBSI(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Imm12) 
 {
-    int32_t            reg;
-    reg = jit_get_reg(jit_class_gpr);
-    movi(rn(reg), i0);
-    qmulr_u(r0, r1, r2, rn(reg));
-    jit_unget_reg(reg);
+  return oxxi(_jit, A64_SUBSI|XS,Rd,Rn,Imm12);
 }
 
 static void
-_divi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+SUBSI_12(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Imm12) 
 {
-    int32_t            reg;
-    reg = jit_get_reg(jit_class_gpr);
-    movi(rn(reg), i0);
-    divr(r0, r1, rn(reg));
-    jit_unget_reg(reg);
+  return oxxi(_jit, A64_SUBSI|XS|LSL_12,Rd,Rn,Imm12);
 }
 
 static void
-_divi_u(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+SBCS(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm) 
 {
-    int32_t            reg;
-    reg = jit_get_reg(jit_class_gpr);
-    movi(rn(reg), i0);
-    divr_u(r0, r1, rn(reg));
-    jit_unget_reg(reg);
+  return oxxx(_jit, A64_SBCS|XS,Rd,Rn,Rm);
 }
 
 static void
-_iqdivr(jit_state_t *_jit, jit_bool_t sign,
-       int32_t r0, int32_t r1, int32_t r2, int32_t r3)
+MUL(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm) 
 {
-    int32_t            sv0, rg0;
-    int32_t            sv1, rg1;
-    if (r0 == r2 || r0 == r3) {
-       sv0 = jit_get_reg(jit_class_gpr);
-       rg0 = rn(sv0);
-    }
-    else
-       rg0 = r0;
-    if (r1 == r2 || r1 == r3) {
-       sv1 = jit_get_reg(jit_class_gpr);
-       rg1 = rn(sv1);
-    }
-    else
-       rg1 = r1;
-    if (sign)
-       divr(rg0, r2, r3);
-    else
-       divr_u(rg0, r2, r3);
-    mulr(rg1, r3, rg0);
-    subr(rg1, r2, rg1);
-    if (rg0 != r0) {
-       movr(r0, rg0);
-       jit_unget_reg(sv0);
-    }
-    if (rg1 != r1) {
-       movr(r1, rg1);
-       jit_unget_reg(sv1);
-    }
+  return oxxx(_jit, A64_MUL|XS,Rd,Rn,Rm);
 }
 
 static void
-_qdivi(jit_state_t *_jit, int32_t r0,
-       int32_t r1, int32_t r2, jit_word_t i0)
+SMULL(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm) 
 {
-    int32_t            reg;
-    reg = jit_get_reg(jit_class_gpr);
-    movi(rn(reg), i0);
-    qdivr(r0, r1, r2, rn(reg));
-    jit_unget_reg(reg);
+  return oxxx(_jit, A64_SMULL,Rd,Rn,Rm);
 }
 
 static void
-_qdivi_u(jit_state_t *_jit, int32_t r0,
-        int32_t r1, int32_t r2, jit_word_t i0)
+SMULH(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm) 
 {
-    int32_t            reg;
-    reg = jit_get_reg(jit_class_gpr);
-    movi(rn(reg), i0);
-    qdivr_u(r0, r1, r2, rn(reg));
-    jit_unget_reg(reg);
+  return oxxx(_jit, A64_SMULH,Rd,Rn,Rm);
 }
 
 static void
-_remr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+UMULL(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm) 
 {
-    int32_t            reg;
-    if (r0 == r1 || r0 == r2) {
-       reg = jit_get_reg(jit_class_gpr);
-       divr(rn(reg), r1, r2);
-       mulr(rn(reg), r2, rn(reg));
-       subr(r0, r1, rn(reg));
-       jit_unget_reg(reg);
-    }
-    else {
-       divr(r0, r1, r2);
-       mulr(r0, r2, r0);
-       subr(r0, r1, r0);
-    }
+  return oxxx(_jit, A64_UMULL,Rd,Rn,Rm);
 }
 
 static void
-_remi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+UMULH(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm) 
 {
-    int32_t            reg;
-    reg = jit_get_reg(jit_class_gpr);
-    movi(rn(reg), i0);
-    remr(r0, r1, rn(reg));
-    jit_unget_reg(reg);
+  return oxxx(_jit, A64_UMULH,Rd,Rn,Rm);
 }
 
 static void
-_remr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+SDIV(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm) 
 {
-    int32_t            reg;
-    if (r0 == r1 || r0 == r2) {
-       reg = jit_get_reg(jit_class_gpr);
-       divr_u(rn(reg), r1, r2);
-       mulr(rn(reg), r2, rn(reg));
-       subr(r0, r1, rn(reg));
-       jit_unget_reg(reg);
-    }
-    else {
-       divr_u(r0, r1, r2);
-       mulr(r0, r2, r0);
-       subr(r0, r1, r0);
-    }
+  return oxxx(_jit, A64_SDIV|XS,Rd,Rn,Rm);
 }
 
 static void
-_remi_u(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+UDIV(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm) 
 {
-    int32_t            reg;
-    reg = jit_get_reg(jit_class_gpr);
-    movi(rn(reg), i0);
-    remr_u(r0, r1, rn(reg));
-    jit_unget_reg(reg);
+  return oxxx(_jit, A64_UDIV|XS,Rd,Rn,Rm);
 }
 
 static void
-_lshi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+LSL(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm) 
 {
-    if (i0 == 0)
-       movr(r0, r1);
-    else {
-       assert(i0 > 0 && i0 < 64);
-       LSLI(r0, r1, i0);
-    }
+  return oxxx(_jit, A64_LSL|XS,Rd,Rn,Rm);
 }
 
 static void
-_rshi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+LSLI(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0) 
 {
-    if (i0 == 0)
-       movr(r0, r1);
-    else {
-       assert(i0 > 0 && i0 < 64);
-       ASRI(r0, r1, i0);
-    }
+  return UBFM(_jit, r0,r1,(64-i0)&63,63-i0);
 }
 
 static void
-_rshi_u(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+ASR(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm) 
 {
-    if (i0 == 0)
-       movr(r0, r1);
-    else {
-       assert(i0 > 0 && i0 < 64);
-       LSRI(r0, r1, i0);
-    }
+  return oxxx(_jit, A64_ASR|XS,Rd,Rn,Rm);
 }
 
 static void
-_andi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
-{
-    int32_t            reg;
-    int32_t            imm;
-    if (i0 == 0)
-       movi(r0, 0);
-    else if (i0 == -1)
-       movr(r0, r1);
-    else {
-       imm = logical_immediate(i0);
-       if (imm != -1)
-           ANDI(r0, r1, imm);
-       else {
-           reg = jit_get_reg(jit_class_gpr);
-           movi(rn(reg), i0);
-           andr(r0, r1, rn(reg));
-           jit_unget_reg(reg);
-       }
-    }
+ASRI(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0) 
+{
+  return SBFM(_jit, r0,r1,i0,63);
 }
 
 static void
-_ori(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
-{
-    int32_t            reg;
-    int32_t            imm;
-    if (i0 == 0)
-       movr(r0, r1);
-    else if (i0 == -1)
-       movi(r0, -1);
-    else {
-       imm = logical_immediate(i0);
-       if (imm != -1)
-           ORRI(r0, r1, imm);
-       else {
-           reg = jit_get_reg(jit_class_gpr);
-           movi(rn(reg), i0);
-           orr(r0, r1, rn(reg));
-           jit_unget_reg(reg);
-       }
-    }
+LSR(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm) 
+{
+  return oxxx(_jit, A64_LSR|XS,Rd,Rn,Rm);
 }
 
 static void
-_xori(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
-{
-    int32_t            reg;
-    int32_t            imm;
-    if (i0 == 0)
-       movr(r0, r1);
-    else if (i0 == -1)
-       comr(r0, r1);
-    else {
-       imm = logical_immediate(i0);
-       if (imm != -1)
-           EORI(r0, r1, imm);
-       else {
-           reg = jit_get_reg(jit_class_gpr);
-           movi(rn(reg), i0);
-           xorr(r0, r1, rn(reg));
-           jit_unget_reg(reg);
-       }
-    }
+LSRI(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0) 
+{
+  return UBFM(_jit, r0,r1,i0,63);
 }
 
-#if __BYTE_ORDER == __LITTLE_ENDIAN
 static void
-_bswapr_us(jit_state_t *_jit, int32_t r0, int32_t r1)
+AND(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm) 
 {
-    bswapr_ul(r0, r1);
-    rshi_u(r0, r0, 48);
+  return oxxx(_jit, A64_AND|XS,Rd,Rn,Rm);
 }
 
+/* actually should use oxxrs but logical_immediate returns proper encoding */;
 static void
-_bswapr_ui(jit_state_t *_jit, int32_t r0, int32_t r1)
+ANDI(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Imm12) 
 {
-    bswapr, 2019_ul(r0, r1);
-    rshi_u(r0, r0, 32);
+  return oxxi(_jit, A64_ANDI|XS,Rd,Rn,Imm12);
 }
-#endif
 
 static void
-_ldi_c(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+ORR(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm) 
 {
-    int32_t            reg;
-    reg = jit_get_reg(jit_class_gpr);
-    movi(rn(reg), i0);
-    ldr_c(r0, rn(reg));
-    jit_unget_reg(reg);
+  return oxxx(_jit, A64_ORR|XS,Rd,Rn,Rm);
 }
 
+/* actually should use oxxrs but logical_immediate returns proper encoding */
 static void
-_ldr_uc(jit_state_t *_jit, int32_t r0, int32_t r1)
+ORRI(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Imm12) 
 {
-    LDRBI(r0, r1, 0);
-#if 0
-    extr_uc(r0, r0);
-#endif
+  return oxxi(_jit, A64_ORRI|XS,Rd,Rn,Imm12);
 }
 
 static void
-_ldi_uc(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+EOR(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm) 
 {
-    int32_t            reg;
-    reg = jit_get_reg(jit_class_gpr);
-    movi(rn(reg), i0);
-    ldr_uc(r0, rn(reg));
-    jit_unget_reg(reg);
+  return oxxx(_jit, A64_EOR|XS,Rd,Rn,Rm);
 }
 
+/* actually should use oxxrs but logical_immediate returns proper encoding */
 static void
-_ldi_s(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+EORI(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Imm12) 
 {
-    int32_t            reg;
-    reg = jit_get_reg(jit_class_gpr);
-    movi(rn(reg), i0);
-    ldr_s(r0, rn(reg));
-    jit_unget_reg(reg);
+  return oxxi(_jit, A64_EORI|XS,Rd,Rn,Imm12);
 }
 
 static void
-_ldr_us(jit_state_t *_jit, int32_t r0, int32_t r1)
+SXTB(jit_state_t *_jit, int32_t Rd, int32_t Rn) 
 {
-    LDRHI(r0, r1, 0);
-#if 0
-    extr_us(r0, r0);
-#endif
+  return SBFM(_jit, Rd,Rn,0,7);
 }
 
 static void
-_ldi_us(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+SXTH(jit_state_t *_jit, int32_t Rd, int32_t Rn) 
 {
-    int32_t            reg;
-    reg = jit_get_reg(jit_class_gpr);
-    movi(rn(reg), i0);
-    ldr_us(r0, rn(reg));
-    jit_unget_reg(reg);
+  return SBFM(_jit, Rd,Rn,0,15);
 }
 
 static void
-_ldi_i(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+SXTW(jit_state_t *_jit, int32_t Rd, int32_t Rn) 
 {
-    int32_t            reg;
-    reg = jit_get_reg(jit_class_gpr);
-    movi(rn(reg), i0);
-    ldr_i(r0, rn(reg));
-    jit_unget_reg(reg);
+  return SBFM(_jit, Rd,Rn,0,31);
 }
 
 static void
-_ldr_ui(jit_state_t *_jit, int32_t r0, int32_t r1)
+UXTB(jit_state_t *_jit, int32_t Rd, int32_t Rn) 
 {
-    LDRWI(r0, r1, 0);
-#if 0
-    extr_ui(r0, r0);
-#endif
+  return UBFX(_jit, Rd,Rn,0,7);
 }
 
 static void
-_ldi_ui(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+UXTH(jit_state_t *_jit, int32_t Rd, int32_t Rn) 
 {
-    int32_t            reg;
-    reg = jit_get_reg(jit_class_gpr);
-    movi(rn(reg), i0);
-    ldr_ui(r0, rn(reg));
-    jit_unget_reg(reg);
+  return UBFX(_jit, Rd,Rn,0,15);
 }
 
 static void
-_ldi_l(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+UXTW(jit_state_t *_jit, int32_t Rd, int32_t Rm) 
 {
-    int32_t            reg;
-    reg = jit_get_reg(jit_class_gpr);
-    movi(rn(reg), i0);
-    ldr_l(r0, rn(reg));
-    jit_unget_reg(reg);
+  return ox_x(_jit, A64_UXTW,Rd,Rm);
 }
 
 static void
-_ldxr_c(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+REV(jit_state_t *_jit, int32_t Rd, int32_t Rn) 
 {
-    LDRSB(r0, r1, r2);
-    extr_c(r0, r0);
+  return o_xx(_jit, A64_REV,Rd,Rn);
 }
 
 static void
-_ldxi_c(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+LDRSB(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Rm) 
 {
-    int32_t            reg;
-    if (i0 >= 0 && i0 <= 4095)
-       LDRSBI(r0, r1, i0);
-    else if (i0 > -256 && i0 < 0)
-       LDURSB(r0, r1, i0 & 0x1ff);
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-       movi(rn(reg), i0);
-       LDRSB(r0, r1, rn(reg));
-       jit_unget_reg(reg);
-    }
-    extr_c(r0, r0);
+  return oxxx(_jit, A64_LDRSB,Rt,Rn,Rm);
 }
 
 static void
-_ldxr_uc(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+LDRSBI(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm12) 
 {
-    LDRB(r0, r1, r2);
-#if 0
-    extr_uc(r0, r0);
-#endif
+  return oxxi(_jit, A64_LDRSBI,Rt,Rn,Imm12);
 }
 
 static void
-_ldxi_uc(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+LDURSB(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm9) 
 {
-    int32_t            reg;
-    if (i0 >= 0 && i0 <= 4095)
-       LDRBI(r0, r1, i0);
-    else if (i0 > -256 && i0 < 0)
-       LDURB(r0, r1, i0 & 0x1ff);
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-       addi(rn(reg), r1, i0);
-       ldr_uc(r0, rn(reg));
-       jit_unget_reg(reg);
-    }
-#if 0
-    extr_uc(r0, r0);
-#endif
+  return oxx9(_jit, A64_LDURSB,Rt,Rn,Imm9);
 }
 
 static void
-_ldxi_s(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+LDRB(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Rm) 
 {
-    int32_t            reg;
-    assert(!(i0 & 1));
-    if (i0 >= 0 && i0 <= 8191)
-       LDRSHI(r0, r1, i0 >> 1);
-    else if (i0 > -256 && i0 < 0)
-       LDURSH(r0, r1, i0 & 0x1ff);
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-       movi(rn(reg), i0);
-       LDRSH(r0, r1, rn(reg));
-       jit_unget_reg(reg);
-    }
+  return oxxx(_jit, A64_LDRB,Rt,Rn,Rm);
 }
 
 static void
-_ldxr_us(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+LDRBI(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm12) 
 {
-    LDRH(r0, r1, r2);
-#if 0
-    extr_us(r0, r0);
-#endif
+  return oxxi(_jit, A64_LDRBI,Rt,Rn,Imm12);
 }
 
 static void
-_ldxi_us(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+LDURB(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm9) 
 {
-    int32_t            reg;
-    assert(!(i0 & 1));
-    if (i0 >= 0 && i0 <= 8191)
-       LDRHI(r0, r1, i0 >> 1);
-    else if (i0 > -256 && i0 < 0)
-       LDURH(r0, r1, i0 & 0x1ff);
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-       movi(rn(reg), i0);
-       LDRH(r0, r1, rn(reg));
-       jit_unget_reg(reg);
-    }
-#if 0
-    extr_us(r0, r0);
-#endif
+  return oxx9(_jit, A64_LDURB,Rt,Rn,Imm9);
 }
 
 static void
-_ldxi_i(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+LDRSH(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Rm) 
 {
-    int32_t            reg;
-    assert(!(i0 & 3));
-    if (i0 >= 0 && i0 <= 16383)
-       LDRSWI(r0, r1, i0 >> 2);
-    else if (i0 > -256 && i0 < 0)
-       LDURSW(r0, r1, i0 & 0x1ff);
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-       addi(rn(reg), r1, i0);
-       ldr_i(r0, rn(reg));
-       jit_unget_reg(reg);
-    }
+  return oxxx(_jit, A64_LDRSH,Rt,Rn,Rm);
 }
 
 static void
-_ldxr_ui(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+LDRSHI(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm12) 
 {
-    LDRW(r0, r1, r2);
-#if 0
-    extr_ui(r0, r0);
-#endif
+  return oxxi(_jit, A64_LDRSHI,Rt,Rn,Imm12);
 }
 
 static void
-_ldxi_ui(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+LDURSH(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm9) 
 {
-    int32_t            reg;
-    assert(!(i0 & 3));
-    if (i0 >= 0 && i0 <= 16383)
-       LDRWI(r0, r1, i0 >> 2);
-    else if (i0 > -256 && i0 < 0)
-       LDURW(r0, r1, i0 & 0x1ff);
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-       movi(rn(reg), i0);
-       LDRW(r0, r1, rn(reg));
-       jit_unget_reg(reg);
-    }
-#if 0
-    extr_ui(r0, r0);
-#endif
+  return oxx9(_jit, A64_LDURSH,Rt,Rn,Imm9);
 }
 
 static void
-_ldxi_l(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+LDRH(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Rm) 
 {
-    int32_t            reg;
-    assert(!(i0 & 7));
-    if (i0 >= 0 && i0 <= 32767)
-       LDRI(r0, r1, i0 >> 3);
-    else if (i0 > -256 && i0 < 0)
-       LDUR(r0, r1, i0 & 0x1ff);
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-       addi(rn(reg), r1, i0);
-       ldr_l(r0, rn(reg));
-       jit_unget_reg(reg);
-    }
+  return oxxx(_jit, A64_LDRH,Rt,Rn,Rm);
 }
 
 static void
-_sti_c(jit_state_t *_jit, jit_word_t i0, int32_t r0)
+LDRHI(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm12) 
 {
-    int32_t            reg;
-    reg = jit_get_reg(jit_class_gpr);
-    movi(rn(reg), i0);
-    str_c(rn(reg), r0);
-    jit_unget_reg(reg);
+  return oxxi(_jit, A64_LDRHI,Rt,Rn,Imm12);
 }
 
 static void
-_sti_s(jit_state_t *_jit, jit_word_t i0, int32_t r0)
+LDURH(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm9) 
 {
-    int32_t            reg;
-    reg = jit_get_reg(jit_class_gpr);
-    movi(rn(reg), i0);
-    str_s(rn(reg), r0);
-    jit_unget_reg(reg);
+  return oxx9(_jit, A64_LDURH,Rt,Rn,Imm9);
 }
 
 static void
-_sti_i(jit_state_t *_jit, jit_word_t i0, int32_t r0)
+LDRSW(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Rm) 
 {
-    int32_t            reg;
-    reg = jit_get_reg(jit_class_gpr);
-    movi(rn(reg), i0);
-    str_i(rn(reg), r0);
-    jit_unget_reg(reg);
+  return oxxx(_jit, A64_LDRSW,Rt,Rn,Rm);
 }
 
 static void
-_sti_l(jit_state_t *_jit, jit_word_t i0, int32_t r0)
+LDRSWI(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm12) 
 {
-    int32_t            reg;
-    reg = jit_get_reg(jit_class_gpr);
-    movi(rn(reg), i0);
-    str_l(rn(reg), r0);
-    jit_unget_reg(reg);
+  return oxxi(_jit, A64_LDRSWI,Rt,Rn,Imm12);
 }
 
 static void
-_stxi_c(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+LDURSW(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm9) 
 {
-    int32_t            reg;
-    if (i0 >= 0 && i0 <= 4095)
-       STRBI(r1, r0, i0);
-    else if (i0 > -256 && i0 < 0)
-       STURB(r1, r0, i0 & 0x1ff);
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-       addi(rn(reg), r0, i0);
-       str_c(rn(reg), r1);
-       jit_unget_reg(reg);
-    }
+  return oxx9(_jit, A64_LDURSW,Rt,Rn,Imm9);
 }
 
 static void
-_stxi_s(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+LDRW(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Rm) 
 {
-    int32_t            reg;
-    assert(!(i0 & 1));
-    if (i0 >= 0 && i0 <= 8191)
-       STRHI(r1, r0, i0 >> 1);
-    else if (i0 > -256 && i0 < 0)
-       STURH(r1, r0, i0 & 0x1ff);
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-       addi(rn(reg), r0, i0);
-       str_s(rn(reg), r1);
-       jit_unget_reg(reg);
-    }
+  return oxxx(_jit, A64_LDRW,Rt,Rn,Rm);
 }
 
 static void
-_stxi_i(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+LDRWI(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm12) 
 {
-    int32_t            reg;
-    assert(!(i0 & 3));
-    if (i0 >= 0 && i0 <= 16383)
-       STRWI(r1, r0, i0 >> 2);
-    else if (i0 > -256 && i0 < 0)
-       STURW(r1, r0, i0 & 0x1ff);
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-       addi(rn(reg), r0, i0);
-       str_i(rn(reg), r1);
-       jit_unget_reg(reg);
-    }
+  return oxxi(_jit, A64_LDRWI,Rt,Rn,Imm12);
 }
 
 static void
-_stxi_l(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+LDURW(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm9) 
 {
-    int32_t            reg;
-    assert(!(i0 & 7));
-    if (i0 >= 0 && i0 <= 32767)
-       STRI(r1, r0, i0 >> 3);
-    else if (i0 > -256 && i0 < 0)
-       STUR(r1, r0, i0 & 0x1ff);
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-       addi(rn(reg), r0, i0);
-       str_l(rn(reg), r1);
-       jit_unget_reg(reg);
-    }
+  return oxx9(_jit, A64_LDURW,Rt,Rn,Imm9);
 }
 
 static void
-_movr(jit_state_t *_jit, int32_t r0, int32_t r1)
-{
-    if (r0 != r1)
-       MOV(r0, r1);
-}
-
-static void
-_movi(jit_state_t *_jit, int32_t r0, jit_word_t i0)
-{
-    jit_word_t         n0, ibit, nbit;
-    n0 = ~i0;
-    ibit = nbit = 0;
-    if (i0 & 0x000000000000ffffL)      ibit |= 1;
-    if (i0 & 0x00000000ffff0000L)      ibit |= 2;
-    if (i0 & 0x0000ffff00000000L)      ibit |= 4;
-    if (i0 & 0xffff000000000000L)      ibit |= 8;
-    if (n0 & 0x000000000000ffffL)      nbit |= 1;
-    if (n0 & 0x00000000ffff0000L)      nbit |= 2;
-    if (n0 & 0x0000ffff00000000L)      nbit |= 4;
-    if (n0 & 0xffff000000000000L)      nbit |= 8;
-    switch (ibit) {
-       case 0:
-           MOVZ   (r0,  0);
-           break;
-       case 1:
-           MOVZ   (r0,  i0        & 0xffff);
-           break;
-       case 2:
-           MOVZ_16(r0, (i0 >> 16) & 0xffff);
-           break;
-       case 3:
-           MOVZ   (r0,  i0        & 0xffff);
-           MOVK_16(r0, (i0 >> 16) & 0xffff);
-           break;
-       case 4:
-           MOVZ_32(r0, (i0 >> 32) & 0xffff);
-           break;
-       case 5:
-           MOVZ   (r0,  i0        & 0xffff);
-           MOVK_32(r0, (i0 >> 32) & 0xffff);
-           break;
-       case 6:
-           MOVZ_16(r0, (i0 >> 16) & 0xffff);
-           MOVK_32(r0, (i0 >> 32) & 0xffff);
-           break;
-       case 7:
-           if (nbit == 8)
-               MOVN_48(r0, (n0 >> 48) & 0xffff);
-           else {
-               MOVZ   (r0,  i0        & 0xffff);
-               MOVK_16(r0, (i0 >> 16) & 0xffff);
-               MOVK_32(r0, (i0 >> 32) & 0xffff);
-           }
-           break;
-       case 8:
-           MOVZ_48(r0, (i0 >> 48) & 0xffff);
-           break;
-       case 9:
-           MOVZ   (r0,  i0        & 0xffff);
-           MOVK_48(r0, (i0 >> 48) & 0xffff);
-           break;
-       case 10:
-           MOVZ_16(r0, (i0 >> 16) & 0xffff);
-           MOVK_48(r0, (i0 >> 48) & 0xffff);
-           break;
-       case 11:
-           if (nbit == 4)
-               MOVN_32(r0, (n0 >> 32) & 0xffff);
-           else {
-               MOVZ   (r0,  i0        & 0xffff);
-               MOVK_16(r0, (i0 >> 16) & 0xffff);
-               MOVK_48(r0, (i0 >> 48) & 0xffff);
-           }
-           break;
-       case 12:
-           MOVZ_32(r0, (i0 >> 32) & 0xffff);
-           MOVK_48(r0, (i0 >> 48) & 0xffff);
-           break;
-       case 13:
-           if (nbit == 2)
-               MOVN_16(r0, (n0 >> 16) & 0xffff);
-           else {
-               MOVZ   (r0,  i0        & 0xffff);
-               MOVK_32(r0, (i0 >> 32) & 0xffff);
-               MOVK_48(r0, (i0 >> 48) & 0xffff);
-           }
-           break;
-       case 14:
-           if (nbit == 1)
-               MOVN   (r0, (n0)       & 0xffff);
-           else {
-               MOVZ_16(r0, (i0 >> 16) & 0xffff);
-               MOVK_32(r0, (i0 >> 32) & 0xffff);
-               MOVK_48(r0, (i0 >> 48) & 0xffff);
-           }
-           break;
-       case 15:
-           if (nbit == 0)
-               MOVN   (r0,  0);
-           else if (nbit == 1)
-               MOVN   (r0,  n0        & 0xffff);
-           else if (nbit == 8)
-               MOVN_48(r0, (n0 >> 48) & 0xffff);
-           else {
-               MOVZ   (r0,  i0        & 0xffff);
-               MOVK_16(r0, (i0 >> 16) & 0xffff);
-               MOVK_32(r0, (i0 >> 32) & 0xffff);
-               MOVK_48(r0, (i0 >> 48) & 0xffff);
-           }
-           break;
-       default:
-           abort();
-    }
+LDR(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Rm) 
+{
+  return oxxx(_jit, A64_LDR,Rt,Rn,Rm);
 }
 
-static jit_word_t
-_movi_p(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+static void
+LDRI(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm12) 
 {
-    jit_word_t         w;
-    w = _jit->pc.w;
-    MOVZ   (r0,  i0        & 0xffff);
-    MOVK_16(r0, (i0 >> 16) & 0xffff);
-    MOVK_32(r0, (i0 >> 32) & 0xffff);
-    MOVK_48(r0, (i0 >> 48) & 0xffff);
-    return (w);
+  return oxxi(_jit, A64_LDRI,Rt,Rn,Imm12);
 }
 
 static void
-_ccr(jit_state_t *_jit, int32_t cc,
-     int32_t r0, int32_t r1, int32_t r2)
+LDUR(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm9) 
 {
-    CMP(r1, r2);
-    CSET(r0, cc);
+  return oxx9(_jit, A64_LDUR,Rt,Rn,Imm9);
 }
 
 static void
-_cci(jit_state_t *_jit, int32_t cc,
-     int32_t r0, int32_t r1, jit_word_t i0)
+STRB(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Rm) 
 {
-    int32_t            reg;
-    jit_word_t         is =  i0 >> 12;
-    jit_word_t         in = -i0;
-    jit_word_t         iS =  in >> 12;
-    if (      i0 >= 0 && i0 <= 0xfff)
-       CMPI   (r1, i0);
-    else if ((is << 12) == i0 && is >= 0 && is <= 0xfff)
-       CMPI_12(r1, is);
-    else if ( in >= 0 && in <= 0xfff)
-       CMNI   (r1, in);
-    else if ((iS << 12) == is && iS >= 0 && iS <= 0xfff)
-       CMNI_12(r1, iS);
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-       movi(rn(reg), i0);
-       CMP(r1, rn(reg));
-       jit_unget_reg(reg);
-    }
-    CSET(r0, cc);
-}
-
-static jit_word_t
-_bccr(jit_state_t *_jit, int32_t cc,
-      jit_word_t i0, int32_t r0, int32_t r1)
-{
-    jit_word_t         w, d;
-    CMP(r0, r1);
-    w = _jit->pc.w;
-    d = (i0 - w) >> 2;
-    B_C(cc, d);
-    return (w);
-}
-
-static jit_word_t
-_bcci(jit_state_t *_jit, int32_t cc,
-      jit_word_t i0, int32_t r0, jit_word_t i1)
-{
-    int32_t            reg;
-    jit_word_t         w, d;
-    jit_word_t         is =  i1 >> 12;
-    jit_word_t         in = -i1;
-    jit_word_t         iS =  in >> 12;
-    if (      i1 >= 0 && i1 <= 0xfff)
-       CMPI   (r0, i1);
-    else if ((is << 12) == i0 && is >= 0 && is <= 0xfff)
-       CMPI_12(r0, is);
-    else if ( in >= 0 && in <= 0xfff)
-       CMNI   (r0, in);
-    else if ((iS << 12) == is && iS >= 0 && iS <= 0xfff)
-       CMNI_12(r0, iS);
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-       movi(rn(reg), i1);
-       CMP(r0, rn(reg));
-       jit_unget_reg(reg);
-    }
-    w = _jit->pc.w;
-    d = (i0 - w) >> 2;
-    B_C(cc, d);
-    return (w);
+  return oxxx(_jit, A64_STRB,Rt,Rn,Rm);
 }
 
-static jit_word_t
-_beqi(jit_state_t *_jit, jit_word_t i0, int32_t r0, jit_word_t i1)
+static void
+STRBI(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm12) 
 {
-    jit_word_t         w;
-    if (i1 == 0) {
-       w = _jit->pc.w;
-       CBZ(r0, (i0 - w) >> 2);
-    }
-    else
-       w = bcci(BCC_EQ, i0, r0, i1);
-    return (w);
+  return oxxi(_jit, A64_STRBI,Rt,Rn,Imm12);
 }
 
-static jit_word_t
-_bnei(jit_state_t *_jit, jit_word_t i0, int32_t r0, jit_word_t i1)
+static void
+STURB(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm9) 
 {
-    jit_word_t         w;
-    if (i1 == 0) {
-       w = _jit->pc.w;
-       CBNZ(r0, (i0 - w) >> 2);
-    }
-    else
-       w = bcci(BCC_NE, i0, r0, i1);
-    return (w);
+  return oxx9(_jit, A64_STURB,Rt,Rn,Imm9);
 }
 
-static jit_word_t
-_baddr(jit_state_t *_jit, int32_t cc,
-       jit_word_t i0, int32_t r0, int32_t r1)
+static void
+STRH(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Rm) 
 {
-    jit_word_t         w;
-    addcr(r0, r0, r1);
-    w = _jit->pc.w;
-    B_C(cc, (i0 - w) >> 2);
-    return (w);
+  return oxxx(_jit, A64_STRH,Rt,Rn,Rm);
 }
 
-static jit_word_t
-_baddi(jit_state_t *_jit, int32_t cc,
-       jit_word_t i0, int32_t r0, jit_word_t i1)
+static void
+STRHI(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm12) 
 {
-    jit_word_t         w;
-    addci(r0, r0, i1);
-    w = _jit->pc.w;
-    B_C(cc, (i0 - w) >> 2);
-    return (w);
+  return oxxi(_jit, A64_STRHI,Rt,Rn,Imm12);
 }
 
-static jit_word_t
-_bsubr(jit_state_t *_jit, int32_t cc,
-       jit_word_t i0, int32_t r0, int32_t r1)
+static void
+STURH(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm9) 
 {
-    jit_word_t         w;
-    subcr(r0, r0, r1);
-    w = _jit->pc.w;
-    B_C(cc, (i0 - w) >> 2);
-    return (w);
+  return oxx9(_jit, A64_STURH,Rt,Rn,Imm9);
 }
 
-static jit_word_t
-_bsubi(jit_state_t *_jit, int32_t cc,
-       jit_word_t i0, int32_t r0, jit_word_t i1)
+static void
+STRW(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Rm) 
 {
-    jit_word_t         w;
-    subci(r0, r0, i1);
-    w = _jit->pc.w;
-    B_C(cc, (i0 - w) >> 2);
-    return (w);
+  return oxxx(_jit, A64_STRW,Rt,Rn,Rm);
 }
 
-static jit_word_t
-_bmxr(jit_state_t *_jit, int32_t cc,
-      jit_word_t i0, int32_t r0, int32_t r1)
+static void
+STRWI(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm12) 
 {
-    jit_word_t         w;
-    TST(r0, r1);
-    w = _jit->pc.w;
-    B_C(cc, (i0 - w) >> 2);
-    return (w);
+  return oxxi(_jit, A64_STRWI,Rt,Rn,Imm12);
 }
 
-static jit_word_t
-_bmxi(jit_state_t *_jit, int32_t cc,
-      jit_word_t i0, int32_t r0, jit_word_t i1)
+static void
+STURW(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm9) 
 {
-    jit_word_t         w;
-    int32_t            reg;
-    int32_t            imm;
-    imm = logical_immediate(i1);
-    if (imm != -1)
-       TSTI(r0, imm);
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-       movi(rn(reg), i0);
-       TST(r0, rn(reg));
-       jit_unget_reg(reg);
-    }
-    w = _jit->pc.w;
-    B_C(cc, (i0 - w) >> 2);
-    return (w);
+  return oxx9(_jit, A64_STURW,Rt,Rn,Imm9);
 }
 
 static void
-_jmpi(jit_state_t *_jit, jit_word_t i0)
+STR(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Rm) 
 {
-    jit_word_t         w;
-    int32_t            reg;
-    w = (i0 - _jit->pc.w) >> 2;
-    if (w >= -33554432 && w <= 33554431)
-       B(w);
-    else {
-       reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
-       movi(rn(reg), i0);
-       jmpr(rn(reg));
-       jit_unget_reg(reg);
-    }
+  return oxxx(_jit, A64_STR,Rt,Rn,Rm);
 }
 
-static jit_word_t
-_jmpi_p(jit_state_t *_jit, jit_word_t i0)
+static void
+STRI(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm12) 
 {
-    jit_word_t         w;
-    int32_t            reg;
-    reg = jit_get_reg(jit_class_gpr|jit_class_nospill);
-    w = movi_p(rn(reg), i0);
-    jmpr(rn(reg));
-    jit_unget_reg(reg);
-    return (w);
+  return oxxi(_jit, A64_STRI,Rt,Rn,Imm12);
 }
 
 static void
-_calli(jit_state_t *_jit, jit_word_t i0)
+STUR(jit_state_t *_jit, int32_t Rt, int32_t Rn, int32_t Imm9) 
 {
-    jit_word_t         w;
-    int32_t            reg;
-    w = (i0 - _jit->pc.w) >> 2;
-    if (w >= -33554432 && w <= 33554431)
-       BL(w);
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-       movi(rn(reg), i0);
-       callr(rn(reg));
-       jit_unget_reg(reg);
-    }
+  return oxx9(_jit, A64_STUR,Rt,Rn,Imm9);
 }
 
-static jit_word_t
-_calli_p(jit_state_t *_jit, jit_word_t i0)
+static void
+LDPI(jit_state_t *_jit, int32_t Rt, int32_t Rt2, int32_t Rn, int32_t Simm7) 
 {
-    jit_word_t         w;
-    int32_t            reg;
-    reg = jit_get_reg(jit_class_gpr);
-    w = movi_p(rn(reg), i0);
-    callr(rn(reg));
-    jit_unget_reg(reg);
-    return (w);
+  return oxxx7(_jit, A64_LDP|XS,Rt,Rt2,Rn,Simm7);
 }
 
-/*
- * prolog and epilog not as "optimized" as one would like, but the
- * problem of overallocating stack space to save callee save registers
- * exists on all ports, and is still a todo to use a variable
- *     stack_framesize
- * value, what would cause needing to patch some calls, most likely
- * the offset of jit_arg* of stack arguments.
- */
 static void
-_prolog(jit_state_t *_jit, jit_node_t *node)
+STPI(jit_state_t *_jit, int32_t Rt, int32_t Rt2, int32_t Rn, int32_t Simm7) 
 {
-    int32_t            reg;
-    if (_jitc->function->define_frame || _jitc->function->assume_frame) {
-       int32_t frame = -_jitc->function->frame;
-       assert(_jitc->function->self.aoff >= frame);
-       if (_jitc->function->assume_frame)
-           return;
-       _jitc->function->self.aoff = frame;
-    }
-    if (_jitc->function->allocar)
-       _jitc->function->self.aoff &= -16;
-    _jitc->function->stack = ((_jitc->function->self.alen -
-                             /* align stack at 16 bytes */
-                             _jitc->function->self.aoff) + 15) & -16;
-    STPI_POS(FP_REGNO, LR_REGNO, SP_REGNO, -(stack_framesize >> 3));
-    MOV_XSP(FP_REGNO, SP_REGNO);
-#define SPILL(L, R, O)                                                 \
-    do {                                                               \
-       if (jit_regset_tstbit(&_jitc->function->regset, _R##L)) {       \
-           if (jit_regset_tstbit(&_jitc->function->regset, _R##R))     \
-               STPI(L, R, SP_REGNO, O);                                \
-           else                                                        \
-               STRI(L, SP_REGNO, O);                                   \
-       }                                                               \
-       else if (jit_regset_tstbit(&_jitc->function->regset, _R##R))    \
-           STRI(R, SP_REGNO, O + 1);                                   \
-    } while (0)
-    SPILL(19, 20,  2);
-    SPILL(21, 22,  4);
-    SPILL(23, 24,  6);
-    SPILL(25, 26,  8);
-    SPILL(27, 28, 10);
-#undef SPILL
-#define SPILL(R, O)                                                    \
-    do {                                                               \
-       if (jit_regset_tstbit(&_jitc->function->regset, _V##R))         \
-               stxi_d(O, SP_REGNO, R);                                 \
-    } while (0)
-    SPILL( 8,  96);
-    SPILL( 9, 104);
-    SPILL(10, 112);
-    SPILL(11, 120);
-    SPILL(12, 128);
-    SPILL(13, 136);
-    SPILL(14, 144);
-    SPILL(15, 152);
-#undef SPILL
-    if (_jitc->function->stack)
-       subi(SP_REGNO, SP_REGNO, _jitc->function->stack);
-    if (_jitc->function->allocar) {
-       reg = jit_get_reg(jit_class_gpr);
-       movi(rn(reg), _jitc->function->self.aoff);
-       stxi_i(_jitc->function->aoffoff, FP_REGNO, rn(reg));
-       jit_unget_reg(reg);
-    }
+  return oxxx7(_jit, A64_STP|XS,Rt,Rt2,Rn,Simm7);
+}
 
-    if (_jitc->function->self.call & jit_call_varargs) {
-       /* Save gp registers in the save area, if any is a vararg */
-       for (reg = 8 - _jitc->function->vagp / -8;
-            jit_arg_reg_p(reg); ++reg)
-           stxi(_jitc->function->vaoff + offsetof(jit_va_list_t, x0) +
-                reg * 8, FP_REGNO, rn(JIT_RA0 - reg));
-
-       for (reg = 8 - _jitc->function->vafp / -16;
-            jit_arg_f_reg_p(reg); ++reg)
-           /* Save fp registers in the save area, if any is a vararg */
-           /* Note that the full 16 byte register is not saved, because
-            * lightning only handles float and double, and, while
-            * attempting to provide a va_list compatible pointer as
-            * jit_va_start return, does not guarantee it (on all ports). */
-           stxi_d(_jitc->function->vaoff + offsetof(jit_va_list_t, q0) +
-                  reg * 16 + offsetof(jit_qreg_t, l), FP_REGNO, rn(_V0 - reg));
-    }
+static void
+LDPI_PRE(jit_state_t *_jit, int32_t Rt, int32_t Rt2, int32_t Rn, int32_t 
Simm7) 
+{
+  return oxxx7(_jit, A64_LDP_PRE|XS,Rt,Rt2,Rn,Simm7);
 }
 
 static void
-_epilog(jit_state_t *_jit, jit_node_t *node)
+STPI_POS(jit_state_t *_jit, int32_t Rt, int32_t Rt2, int32_t Rn, int32_t 
Simm7) 
 {
-    if (_jitc->function->assume_frame)
-       return;
-    if (_jitc->function->stack)
-       MOV_XSP(SP_REGNO, FP_REGNO);
-#define LOAD(L, R, O)                                                  \
-    do {                                                               \
-       if (jit_regset_tstbit(&_jitc->function->regset, _R##L)) {       \
-           if (jit_regset_tstbit(&_jitc->function->regset, _R##R))     \
-               LDPI(L, R, SP_REGNO, O);                                \
-           else                                                        \
-               LDRI(L, SP_REGNO, O);                                   \
-       }                                                               \
-       else if (jit_regset_tstbit(&_jitc->function->regset, _R##R))    \
-           LDRI(R, SP_REGNO, O + 1);                                   \
-    } while (0)
-    LOAD(19, 20,  2);
-    LOAD(21, 22,  4);
-    LOAD(23, 24,  6);
-    LOAD(25, 26,  8);
-    LOAD(27, 28, 10);
-#undef LOAD
-#define LOAD(R, O)                                                     \
-    do {                                                               \
-       if (jit_regset_tstbit(&_jitc->function->regset, _V##R))         \
-               ldxi_d(R, SP_REGNO, O);                                 \
-    } while (0)
-    LOAD( 8,  96);
-    LOAD( 9, 104);
-    LOAD(10, 112);
-    LOAD(11, 120);
-    LOAD(12, 128);
-    LOAD(13, 136);
-    LOAD(14, 144);
-    LOAD(15, 152);
-#undef LOAD
-    LDPI_PRE(FP_REGNO, LR_REGNO, SP_REGNO, stack_framesize >> 3);
-    RET();
+  return oxxx7(_jit, A64_STP_POS|XS,Rt,Rt2,Rn,Simm7);
 }
 
 static void
-_vastart(jit_state_t *_jit, int32_t r0)
+CSET(jit_state_t *_jit, int32_t Rd, int32_t Cc) 
+{
+  return CSINC(_jit, Rd,XZR_REGNO,XZR_REGNO,Cc);
+}
+
+static jit_reloc_t
+B(jit_state_t *_jit)
+{
+  return emit_jmp(_jit, encode_o26(_jit, A64_B));
+}
+
+static jit_reloc_t
+BL(jit_state_t *_jit)
 {
-    int32_t            reg;
+  return emit_jmp(_jit, encode_o26(_jit, A64_BL));
+}
 
-    assert(_jitc->function->self.call & jit_call_varargs);
+static void
+BR(jit_state_t *_jit, int32_t Rn) 
+{
+  return o_x_(_jit, A64_BR,Rn);
+}
 
-    /* Return jit_va_list_t in the register argument */
-    addi(r0, FP_REGNO, _jitc->function->vaoff);
+static void
+BLR(jit_state_t *_jit, int32_t Rn)
+{
+  return o_x_(_jit, A64_BLR,Rn);
+}
 
-    reg = jit_get_reg(jit_class_gpr);
+static void
+RET(jit_state_t *_jit)
+{
+  return o_x_(_jit, A64_RET,LR_REGNO);
+}
 
-    /* Initialize stack pointer to the first stack argument. */
-    addi(rn(reg), FP_REGNO, _jitc->function->self.size);
-    stxi(offsetof(jit_va_list_t, stack), r0, rn(reg));
+static jit_reloc_t
+B_C(jit_state_t *_jit, int32_t Cc) 
+{
+  return emit_jcc(_jit, encode_oc19(_jit, A64_B_C, Cc));
+}
 
-    /* Initialize gp top pointer to the first stack argument. */
-    addi(rn(reg), r0, va_gp_top_offset);
-    stxi(offsetof(jit_va_list_t, gptop), r0, rn(reg));
+static jit_reloc_t
+CBZ(jit_state_t *_jit, int32_t Rd) 
+{
+  return emit_jcc(_jit, encode_ox19(_jit, A64_CBZ|XS,Rd));
+}
 
-    /* Initialize fp top pointer to the first stack argument. */
-    addi(rn(reg), r0, va_fp_top_offset);
-    stxi(offsetof(jit_va_list_t, fptop), r0, rn(reg));
+static jit_reloc_t
+CBNZ(jit_state_t *_jit, int32_t Rd) 
+{
+  return emit_jcc(_jit, encode_ox19(_jit, A64_CBNZ|XS,Rd));
+}
 
-    /* Initialize gp offset in the save area. */
-    movi(rn(reg), _jitc->function->vagp);
-    stxi_i(offsetof(jit_va_list_t, gpoff), r0, rn(reg));
+static void
+NOP(jit_state_t *_jit)
+{
+  return emit_u32(_jit, 0xd503201f);
+}
 
-    /* Initialize fp offset in the save area. */
-    movi(rn(reg), _jitc->function->vafp);
-    stxi_i(offsetof(jit_va_list_t, fpoff), r0, rn(reg));
+static jit_reloc_t
+movi_from_pool(jit_state_t *_jit, int32_t Rt)
+{
+  return emit_load_from_pool(_jit, encode_ox19(_jit, A64_LDRI_LITERAL, Rt));
+}
 
-    jit_unget_reg(reg);
+static void
+movr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  if (r0 != r1)
+    MOV(_jit, r0, r1);
 }
 
 static void
-_vaarg(jit_state_t *_jit, int32_t r0, int32_t r1)
+addr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
-    jit_word_t         ge_code;
-    jit_word_t         lt_code;
-    int32_t            rg0, rg1;
+  return ADD(_jit,r0,r1,r2);
+}
 
-    assert(_jitc->function->self.call & jit_call_varargs);
+static void
+addcr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+  return ADDS(_jit,r0,r1,r2);
+}
 
-    rg0 = jit_get_reg(jit_class_gpr);
-    rg1 = jit_get_reg(jit_class_gpr);
+static void
+addxr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+  return ADCS(_jit,r0,r1,r2);
+}
 
-    /* Load the gp offset in save area in the first temporary. */
-    ldxi_i(rn(rg0), r1, offsetof(jit_va_list_t, gpoff));
+static void
+subr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+  return SUB(_jit,r0,r1,r2);
+}
 
-    /* Jump over if there are no remaining arguments in the save area. */
-    ge_code = bgei(_jit->pc.w, rn(rg0), 0);
+static void
+subcr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+  return SUBS(_jit,r0,r1,r2);
+}
 
-    /* Load the gp save pointer in the second temporary. */
-    ldxi(rn(rg1), r1, offsetof(jit_va_list_t, gptop));
+static void
+subxr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+  return SBCS(_jit,r0,r1,r2);
+}
 
-    /* Load the vararg argument in the first argument. */
-    ldxr(r0, rn(rg1), rn(rg0));
+static void
+mulr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+  return MUL(_jit,r0,r1,r2);
+}
 
-    /* Update the gp offset. */
-    addi(rn(rg0), rn(rg0), 8);
-    stxi_i(offsetof(jit_va_list_t, gpoff), r1, rn(rg0));
+static void
+divr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+  return SDIV(_jit,r0,r1,r2);
+}
 
-    /* Will only need one temporary register below. */
-    jit_unget_reg(rg1);
+static void
+divr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+  return UDIV(_jit,r0,r1,r2);
+}
 
-    /* Jump over overflow code. */
-    lt_code = jmpi_p(_jit->pc.w);
+static void
+iqdivr(jit_state_t *_jit, jit_bool_t sign,
+       int32_t r0, int32_t r1, int32_t r2, int32_t r3)
+{
+  int32_t rg0, rg1;
+  if (r0 == r2 || r0 == r3) {
+    rg0 = jit_gpr_regno(get_temp_gpr(_jit));
+  } else {
+    rg0 = r0;
+  }
+  if (r1 == r2 || r1 == r3) {
+    rg1 = jit_gpr_regno(get_temp_gpr(_jit));
+  } else {
+    rg1 = r1;
+  }
+  if (sign)
+    divr(_jit, rg0, r2, r3);
+  else
+    divr_u(_jit, rg0, r2, r3);
+  mulr(_jit, rg1, r3, rg0);
+  subr(_jit, rg1, r2, rg1);
+  if (rg0 != r0) {
+    movr(_jit, r0, rg0);
+    unget_temp_gpr(_jit);
+  }
+  if (rg1 != r1) {
+    movr(_jit, r1, rg1);
+    unget_temp_gpr(_jit);
+  }
+}
 
-    /* Where to land if argument is in overflow area. */
-    patch_at(ge_code, _jit->pc.w);
+static void
+qdivr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, int32_t r3)
+{
+  return iqdivr(_jit,1,r0,r1,r2,r3);
+}
 
-    /* Load stack pointer. */
-    ldxi(rn(rg0), r1, offsetof(jit_va_list_t, stack));
+static void
+qdivr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, int32_t r3)
+{
+  return iqdivr(_jit,0,r0,r1,r2,r3);
+}
 
-    /* Load argument. */
-    ldr(r0, rn(rg0));
+static void
+lshr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+  return LSL(_jit,r0,r1,r2);
+}
 
-    /* Update stack pointer. */
-    addi(rn(rg0), rn(rg0), 8);
-    stxi(offsetof(jit_va_list_t, stack), r1, rn(rg0));
+static void
+rshr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+  return ASR(_jit,r0,r1,r2);
+}
 
-    /* Where to land if argument is in gp save area. */
-    patch_at(lt_code, _jit->pc.w);
+static void
+rshr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+  return LSR(_jit,r0,r1,r2);
+}
 
-    jit_unget_reg(rg0);
+static void
+negr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return NEG(_jit,r0,r1);
 }
 
 static void
-_patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t label)
+comr(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    instr_t             i;
-    jit_word_t          d;
-    int32_t             fc, ff, ffc;
-    union {
-       int32_t *i;
-       jit_word_t       w;
-    } u;
-    u.w = instr;
-    i.w = u.i[0];
-    fc  = i.w & 0xfc000000;
-    ff  = i.w & 0xff000000;
-    ffc = i.w & 0xffc00000;
-    if (fc == A64_B || fc == A64_BL) {
-       d = (label - instr) >> 2;
-       assert(d >= -33554432 && d <= 33554431);
-       i.imm26.b = d;
-       u.i[0] = i.w;
-    }
-    else if (ff == A64_B_C || ff == (A64_CBZ|XS) || ff == (A64_CBNZ|XS)) {
-       d = (label - instr) >> 2;
-       assert(d >= -262148 && d <= 262143);
-       i.imm19.b = d;
-       u.i[0] = i.w;
-    }
-    else if (ffc == (A64_MOVZ|XS)) {
-       i.imm16.b = label;
-       u.i[0] = i.w;
-       i.w = u.i[1];
-       assert((i.w & 0xffe00000) == (A64_MOVK|XS|MOVI_LSL_16));
-       i.imm16.b = label >> 16;
-       u.i[1] = i.w;
-       i.w = u.i[2];
-       assert((i.w & 0xffe00000) == (A64_MOVK|XS|MOVI_LSL_32));
-       i.imm16.b = label >> 32;
-       u.i[2] = i.w;
-       i.w = u.i[3];
-       assert((i.w & 0xffe00000) == (A64_MOVK|XS|MOVI_LSL_48));
-       i.imm16.b = label >> 48;
-       u.i[3] = i.w;
-    }
-    else
-       abort();
+  return MVN(_jit,r0,r1);
+}
+
+static void
+andr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+  return AND(_jit,r0,r1,r2);
+}
+
+static void
+orr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+  return ORR(_jit,r0,r1,r2);
+}
+
+static void
+xorr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+  return EOR(_jit,r0,r1,r2);
+}
+
+static void
+ldr_c(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return LDRSBI(_jit,r0,r1,0);
+}
+
+static void
+ldr_s(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return LDRSHI(_jit,r0,r1,0);
+}
+
+static void
+ldr_i(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return LDRSWI(_jit,r0,r1,0);
+}
+
+static void
+ldxr_s(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+  return LDRSH(_jit,r0,r1,r2);
+}
+
+static void
+ldxr_i(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+  return LDRSW(_jit,r0,r1,r2);
+}
+
+static void
+ldxr_l(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+  return LDR(_jit,r0,r1,r2);
+}
+
+static void
+str_c(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return STRBI(_jit,r1,r0,0);
+}
+
+static void
+str_s(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return STRHI(_jit,r1,r0,0);
+}
+
+static void
+str_i(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return STRWI(_jit,r1,r0,0);
+}
+
+static void
+str_l(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return STRI(_jit,r1,r0,0);
+}
+
+static void
+stxr_c(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+  return STRB(_jit,r2,r1,r0);
+}
+
+static void
+stxr_s(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+  return STRH(_jit,r2,r1,r0);
+}
+
+static void
+stxr_i(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+  return STRW(_jit,r2,r1,r0);
+}
+
+static void
+stxr_l(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+  return STR(_jit,r2,r1,r0);
+}
+
+static void
+bswapr_ul(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return REV(_jit,r0,r1);
+}
+
+static void
+extr_c(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return SXTB(_jit,r0,r1);
+}
+
+static void
+extr_uc(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return UXTB(_jit,r0,r1);
+}
+
+static void
+extr_s(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return SXTH(_jit,r0,r1);
+}
+
+static void
+extr_us(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return UXTH(_jit,r0,r1);
+}
+
+static void
+extr_i(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return SXTW(_jit,r0,r1);
+}
+
+static void
+extr_ui(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return UXTW(_jit,r0,r1);
+}
+
+static void
+movi(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+  jit_word_t n0 = ~i0, ibit = 0, nbit = 0;
+  if (i0 & 0x000000000000ffffL)       ibit |= 1;
+  if (i0 & 0x00000000ffff0000L)       ibit |= 2;
+  if (i0 & 0x0000ffff00000000L)       ibit |= 4;
+  if (i0 & 0xffff000000000000L)       ibit |= 8;
+  if (n0 & 0x000000000000ffffL)       nbit |= 1;
+  if (n0 & 0x00000000ffff0000L)       nbit |= 2;
+  if (n0 & 0x0000ffff00000000L)       nbit |= 4;
+  if (n0 & 0xffff000000000000L)       nbit |= 8;
+  switch (ibit) {
+  case 0:
+    MOVZ   (_jit, r0,  0);
+    break;
+  case 1:
+    MOVZ   (_jit, r0,  i0        & 0xffff);
+    break;
+  case 2:
+    MOVZ_16(_jit, r0, (i0 >> 16) & 0xffff);
+    break;
+  case 3:
+    MOVZ   (_jit, r0,  i0        & 0xffff);
+    MOVK_16(_jit, r0, (i0 >> 16) & 0xffff);
+    break;
+  case 4:
+    MOVZ_32(_jit, r0, (i0 >> 32) & 0xffff);
+    break;
+  case 5:
+    MOVZ   (_jit, r0,  i0        & 0xffff);
+    MOVK_32(_jit, r0, (i0 >> 32) & 0xffff);
+    break;
+  case 6:
+    MOVZ_16(_jit, r0, (i0 >> 16) & 0xffff);
+    MOVK_32(_jit, r0, (i0 >> 32) & 0xffff);
+    break;
+  case 7:
+    if (nbit == 8) {
+      MOVN_48(_jit, r0, (n0 >> 48) & 0xffff);
+    } else {
+      MOVZ   (_jit, r0,  i0        & 0xffff);
+      MOVK_16(_jit, r0, (i0 >> 16) & 0xffff);
+      MOVK_32(_jit, r0, (i0 >> 32) & 0xffff);
+    }
+    break;
+  case 8:
+    MOVZ_48(_jit, r0, (i0 >> 48) & 0xffff);
+    break;
+  case 9:
+    MOVZ   (_jit, r0,  i0        & 0xffff);
+    MOVK_48(_jit, r0, (i0 >> 48) & 0xffff);
+    break;
+  case 10:
+    MOVZ_16(_jit, r0, (i0 >> 16) & 0xffff);
+    MOVK_48(_jit, r0, (i0 >> 48) & 0xffff);
+    break;
+  case 11:
+    if (nbit == 4) {
+      MOVN_32(_jit, r0, (n0 >> 32) & 0xffff);
+    } else {
+      MOVZ   (_jit, r0,  i0        & 0xffff);
+      MOVK_16(_jit, r0, (i0 >> 16) & 0xffff);
+      MOVK_48(_jit, r0, (i0 >> 48) & 0xffff);
+    }
+    break;
+  case 12:
+    MOVZ_32(_jit, r0, (i0 >> 32) & 0xffff);
+    MOVK_48(_jit, r0, (i0 >> 48) & 0xffff);
+    break;
+  case 13:
+    if (nbit == 2) {
+      MOVN_16(_jit, r0, (n0 >> 16) & 0xffff);
+    } else {
+      MOVZ   (_jit, r0,  i0        & 0xffff);
+      MOVK_32(_jit, r0, (i0 >> 32) & 0xffff);
+      MOVK_48(_jit, r0, (i0 >> 48) & 0xffff);
+    }
+    break;
+  case 14:
+    if (nbit == 1) {
+      MOVN   (_jit, r0, (n0)       & 0xffff);
+    } else {
+      MOVZ_16(_jit, r0, (i0 >> 16) & 0xffff);
+      MOVK_32(_jit, r0, (i0 >> 32) & 0xffff);
+      MOVK_48(_jit, r0, (i0 >> 48) & 0xffff);
+    }
+    break;
+  case 15:
+    if (nbit == 0) {
+      MOVN   (_jit, r0,  0);
+    } else if (nbit == 1) {
+      MOVN   (_jit, r0,  n0        & 0xffff);
+    } else if (nbit == 8) {
+      MOVN_48(_jit, r0, (n0 >> 48) & 0xffff);
+    } else {
+      MOVZ   (_jit, r0,  i0        & 0xffff);
+      MOVK_16(_jit, r0, (i0 >> 16) & 0xffff);
+      MOVK_32(_jit, r0, (i0 >> 32) & 0xffff);
+      MOVK_48(_jit, r0, (i0 >> 48) & 0xffff);
+    }
+    break;
+  default:
+    abort();
+  }
+}
+
+static void
+ccr(jit_state_t *_jit, int32_t cc, int32_t r0, int32_t r1, int32_t r2)
+{
+  CMP(_jit, r1, r2);
+  CSET(_jit, r0, cc);
+}
+
+static void
+cci(jit_state_t *_jit, int32_t cc, int32_t r0, int32_t r1, jit_word_t i0)
+{
+  jit_word_t          is =  i0 >> 12;
+  jit_word_t          in = -i0;
+  jit_word_t          iS =  in >> 12;
+  if (      i0 >= 0 && i0 <= 0xfff) {
+    CMPI   (_jit, r1, i0);
+  } else if ((is << 12) == i0 && is >= 0 && is <= 0xfff) {
+    CMPI_12(_jit, r1, is);
+  } else if ( in >= 0 && in <= 0xfff) {
+    CMNI   (_jit, r1, in);
+  } else if ((iS << 12) == is && iS >= 0 && iS <= 0xfff) {
+    CMNI_12(_jit, r1, iS);
+  } else {
+    jit_gpr_t reg = get_temp_gpr(_jit);
+    movi(_jit, jit_gpr_regno(reg), i0);
+    CMP(_jit, r1, jit_gpr_regno(reg));
+    unget_temp_gpr(_jit);
+  }
+  CSET(_jit, r0, cc);
+}
+
+static void
+ltr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+  return ccr(_jit,CC_LT,r0,r1,r2);
+}
+
+static void
+lti(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0)
+{
+  return cci(_jit,CC_LT,r0,r1,i0);
+}
+
+static void
+ltr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+  return ccr(_jit,CC_CC,r0,r1,r2);
+}
+
+static void
+lti_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0)
+{
+  return cci(_jit,CC_CC,r0,r1,i0);
+}
+
+static void
+ler(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+  return ccr(_jit,CC_LE,r0,r1,r2);
+}
+
+static void
+lei(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0)
+{
+  return cci(_jit,CC_LE,r0,r1,i0);
+}
+
+static void
+ler_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+  return ccr(_jit,CC_LS,r0,r1,r2);
+}
+
+static void
+lei_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0)
+{
+  return cci(_jit,CC_LS,r0,r1,i0);
+}
+
+static void
+eqr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+  return ccr(_jit,CC_EQ,r0,r1,r2);
+}
+
+static void
+eqi(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0)
+{
+  return cci(_jit,CC_EQ,r0,r1,i0);
+}
+
+static void
+ger(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+  return ccr(_jit,CC_GE,r0,r1,r2);
+}
+
+static void
+gei(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0)
+{
+  return cci(_jit,CC_GE,r0,r1,i0);
+}
+
+static void
+ger_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+  return ccr(_jit,CC_CS,r0,r1,r2);
+}
+
+static void
+gei_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0)
+{
+  return cci(_jit,CC_CS,r0,r1,i0);
+}
+
+static void
+gtr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+  return ccr(_jit,CC_GT,r0,r1,r2);
+}
+
+static void
+gti(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0)
+{
+  return cci(_jit,CC_GT,r0,r1,i0);
+}
+
+static void
+gtr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+  return ccr(_jit,CC_HI,r0,r1,r2);
+}
+
+static void
+gti_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0)
+{
+  return cci(_jit,CC_HI,r0,r1,i0);
+}
+
+static void
+ner(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+  return ccr(_jit,CC_NE,r0,r1,r2);
+}
+
+static void
+nei(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t i0)
+{
+  return cci(_jit,CC_NE,r0,r1,i0);
+}
+
+static jit_reloc_t
+bccr(jit_state_t *_jit, int32_t cc, int32_t r0, int32_t r1)
+{
+  CMP(_jit, r0, r1);
+  return B_C(_jit, cc);
+}
+
+static jit_reloc_t
+bcci(jit_state_t *_jit, int32_t cc, int32_t r0, jit_word_t i1)
+{
+  jit_word_t          is =  i1 >> 12;
+  jit_word_t          in = -i1;
+  jit_word_t          iS =  in >> 12;
+  if (      i1 >= 0 && i1 <= 0xfff) {
+    CMPI   (_jit, r0, i1);
+  } else if ((is << 12) == i1 && is >= 0 && is <= 0xfff) {
+    CMPI_12(_jit, r0, is);
+  } else if ( in >= 0 && in <= 0xfff) {
+    CMNI   (_jit, r0, in);
+  } else if ((iS << 12) == is && iS >= 0 && iS <= 0xfff) {
+    CMNI_12(_jit, r0, iS);
+  } else {
+    jit_gpr_t reg = get_temp_gpr(_jit);
+    movi(_jit, jit_gpr_regno(reg), i1);
+    CMP(_jit, r0, jit_gpr_regno(reg));
+    unget_temp_gpr(_jit);
+  }
+  return B_C(_jit, cc);
+}
+
+static jit_reloc_t
+bltr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return bccr(_jit,BCC_LT,r0,r1);
+}
+
+static jit_reloc_t
+blti(jit_state_t *_jit, int32_t r0, int32_t i1)
+{
+  return bcci(_jit,BCC_LT,r0,i1);
+}
+
+static jit_reloc_t
+bltr_u(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return bccr(_jit,BCC_CC,r0,r1);
+}
+
+static jit_reloc_t
+blti_u(jit_state_t *_jit, int32_t r0, int32_t i1)
+{
+  return bcci(_jit,BCC_CC,r0,i1);
+}
+
+static jit_reloc_t
+bler(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return bccr(_jit,BCC_LE,r0,r1);
+}
+
+static jit_reloc_t
+blei(jit_state_t *_jit, int32_t r0, int32_t i1)
+{
+  return bcci(_jit,BCC_LE,r0,i1);
+}
+
+static jit_reloc_t
+bler_u(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return bccr(_jit,BCC_LS,r0,r1);
+}
+
+static jit_reloc_t
+blei_u(jit_state_t *_jit, int32_t r0, int32_t i1)
+{
+  return bcci(_jit,BCC_LS,r0,i1);
+}
+
+static jit_reloc_t
+beqr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return bccr(_jit,BCC_EQ,r0,r1);
+}
+
+static jit_reloc_t
+bger(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return bccr(_jit,BCC_GE,r0,r1);
+}
+
+static jit_reloc_t
+bgei(jit_state_t *_jit, int32_t r0, int32_t i1)
+{
+  return bcci(_jit,BCC_GE,r0,i1);
+}
+
+static jit_reloc_t
+bger_u(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return bccr(_jit,BCC_CS,r0,r1);
+}
+
+static jit_reloc_t
+bgei_u(jit_state_t *_jit, int32_t r0, int32_t i1)
+{
+  return bcci(_jit,BCC_CS,r0,i1);
+}
+
+static jit_reloc_t
+bgtr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return bccr(_jit,BCC_GT,r0,r1);
+}
+
+static jit_reloc_t
+bgti(jit_state_t *_jit, int32_t r0, int32_t i1)
+{
+  return bcci(_jit,BCC_GT,r0,i1);
+}
+
+static jit_reloc_t
+bgtr_u(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return bccr(_jit,BCC_HI,r0,r1);
+}
+
+static jit_reloc_t
+bgti_u(jit_state_t *_jit, int32_t r0, int32_t i1)
+{
+  return bcci(_jit,BCC_HI,r0,i1);
+}
+
+static jit_reloc_t
+bner(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return bccr(_jit,BCC_NE,r0,r1);
+}
+
+static void
+addi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+  jit_word_t          is =  i0 >> 12;
+  jit_word_t          in = -i0;
+  jit_word_t          iS =  in >> 12;
+  if (      i0 >= 0 && i0 <= 0xfff) {
+    ADDI   (_jit, r0, r1, i0);
+  } else if ((is << 12) == i0 && is >= 0 && is <= 0xfff) {
+    ADDI_12(_jit, r0, r1, is);
+  } else if ( in >= 0 && in <= 0xfff) {
+    SUBI   (_jit, r0, r1, in);
+  } else if ((iS << 12) == is && iS >= 0 && iS <= 0xfff) {
+    SUBI_12(_jit, r0, r1, iS);
+  } else {
+    jit_gpr_t reg = get_temp_gpr(_jit);
+    movi(_jit, jit_gpr_regno(reg), i0);
+    addr(_jit, r0, r1, jit_gpr_regno(reg));
+    unget_temp_gpr(_jit);
+  }
+}
+
+static void
+addci(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+  jit_word_t          is =  i0 >> 12;
+  jit_word_t          in = -i0;
+  jit_word_t          iS =  in >> 12;
+  if (      i0 >= 0 && i0 <= 0xfff) {
+    ADDSI   (_jit, r0, r1, i0);
+  } else if ((is << 12) == i0 && is >= 0 && is <= 0xfff) {
+    ADDSI_12(_jit, r0, r1, is);
+  } else if ( in >= 0 && in <= 0xfff) {
+    SUBSI   (_jit, r0, r1, in);
+  } else if ((iS << 12) == is && iS >= 0 && iS <= 0xfff) {
+    SUBSI_12(_jit, r0, r1, iS);
+  } else {
+    jit_gpr_t reg = get_temp_gpr(_jit);
+    movi(_jit, jit_gpr_regno(reg), i0);
+    addcr(_jit, r0, r1, jit_gpr_regno(reg));
+    unget_temp_gpr(_jit);
+  }
+}
+
+static void
+addxi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+  jit_gpr_t reg = get_temp_gpr(_jit);
+  movi(_jit, jit_gpr_regno(reg), i0);
+  addxr(_jit, r0, r1, jit_gpr_regno(reg));
+  unget_temp_gpr(_jit);
+}
+
+static void
+subi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+  jit_word_t          is = i0 >> 12;
+  if (      i0 >= 0 && i0 <= 0xfff) {
+    SUBI   (_jit, r0, r1, i0);
+  } else if ((is << 12) == i0 && is >= 0 && is <= 0xfff) {
+    SUBI_12(_jit, r0, r1, is);
+  } else {
+    jit_gpr_t reg = get_temp_gpr(_jit);
+    movi(_jit, jit_gpr_regno(reg), i0);
+    subr(_jit, r0, r1, jit_gpr_regno(reg));
+    unget_temp_gpr(_jit);
+  }
+}
+
+static void
+subci(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+  jit_word_t          is = i0 >> 12;
+  if (      i0 >= 0 && i0 <= 0xfff) {
+    SUBSI   (_jit, r0, r1, i0);
+  } else if ((is << 12) == i0 && is >= 0 && is <= 0xfff) {
+    SUBSI_12(_jit, r0, r1, is);
+  } else {
+    jit_gpr_t reg = get_temp_gpr(_jit);
+    movi(_jit, jit_gpr_regno(reg), i0);
+    subcr(_jit, r0, r1, jit_gpr_regno(reg));
+    unget_temp_gpr(_jit);
+  }
+}
+
+static void
+subxi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+  jit_gpr_t reg = get_temp_gpr(_jit);
+  movi(_jit, jit_gpr_regno(reg), i0);
+  subxr(_jit, r0, r1, jit_gpr_regno(reg));
+  unget_temp_gpr(_jit);
+}
+
+static jit_reloc_t
+baddr(jit_state_t *_jit, int32_t cc, int32_t r0, int32_t r1)
+{
+  addcr(_jit, r0, r0, r1);
+  return B_C(_jit, cc);
+}
+
+static jit_reloc_t
+baddi(jit_state_t *_jit, int32_t cc, int32_t r0, jit_word_t i1)
+{
+  addci(_jit, r0, r0, i1);
+  return B_C(_jit, cc);
+}
+
+static jit_reloc_t
+boaddr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return baddr(_jit,BCC_VS,r0,r1);
+}
+
+static jit_reloc_t
+boaddi(jit_state_t *_jit, int32_t r0, int32_t i1)
+{
+  return baddi(_jit,BCC_VS,r0,i1);
+}
+
+static jit_reloc_t
+boaddr_u(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return baddr(_jit,BCC_HS,r0,r1);
+}
+
+static jit_reloc_t
+boaddi_u(jit_state_t *_jit, int32_t r0, int32_t i1)
+{
+  return baddi(_jit,BCC_HS,r0,i1);
+}
+
+static jit_reloc_t
+bxaddr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return baddr(_jit,BCC_VC,r0,r1);
+}
+
+static jit_reloc_t
+bxaddi(jit_state_t *_jit, int32_t r0, int32_t i1)
+{
+  return baddi(_jit,BCC_VC,r0,i1);
+}
+
+static jit_reloc_t
+bxaddr_u(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return baddr(_jit,BCC_LO,r0,r1);
+}
+
+static jit_reloc_t
+bxaddi_u(jit_state_t *_jit, int32_t r0, int32_t i1)
+{
+  return baddi(_jit,BCC_LO,r0,i1);
+}
+
+static jit_reloc_t
+bsubr(jit_state_t *_jit, int32_t cc, int32_t r0, int32_t r1)
+{
+  subcr(_jit, r0, r0, r1);
+  return B_C(_jit, cc);
+}
+
+static jit_reloc_t
+bsubi(jit_state_t *_jit, int32_t cc, int32_t r0, jit_word_t i1)
+{
+  subci(_jit, r0, r0, i1);
+  return B_C(_jit, cc);
+}
+
+static jit_reloc_t
+bosubr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return bsubr(_jit,BCC_VS,r0,r1);
+}
+
+static jit_reloc_t
+bosubi(jit_state_t *_jit, int32_t r0, int32_t i1)
+{
+  return bsubi(_jit,BCC_VS,r0,i1);
+}
+
+static jit_reloc_t
+bosubr_u(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return bsubr(_jit,BCC_LO,r0,r1);
+}
+
+static jit_reloc_t
+bosubi_u(jit_state_t *_jit, int32_t r0, int32_t i1)
+{
+  return bsubi(_jit,BCC_LO,r0,i1);
+}
+
+static jit_reloc_t
+bxsubr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return bsubr(_jit,BCC_VC,r0,r1);
+}
+
+static jit_reloc_t
+bxsubi(jit_state_t *_jit, int32_t r0, int32_t i1)
+{
+  return bsubi(_jit,BCC_VC,r0,i1);
+}
+
+static jit_reloc_t
+bxsubr_u(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return bsubr(_jit,BCC_HS,r0,r1);
+}
+
+static jit_reloc_t
+bxsubi_u(jit_state_t *_jit, int32_t r0, int32_t i1)
+{
+  return bsubi(_jit,BCC_HS,r0,i1);
+}
+
+static jit_reloc_t
+bmxr(jit_state_t *_jit, int32_t cc, int32_t r0, int32_t r1)
+{
+  TST(_jit, r0, r1);
+  return B_C(_jit, cc);
+}
+
+static jit_reloc_t
+bmxi(jit_state_t *_jit, int32_t cc, int32_t r0, jit_word_t i1)
+{
+  int32_t             imm;
+  imm = logical_immediate(i1);
+  if (imm != -1) {
+    TSTI(_jit, r0, imm);
+  } else {
+    jit_gpr_t reg = get_temp_gpr(_jit);
+    movi(_jit, jit_gpr_regno(reg), i1);
+    TST(_jit, r0, jit_gpr_regno(reg));
+    unget_temp_gpr(_jit);
+  }
+  return B_C(_jit, cc);
+}
+
+static jit_reloc_t
+bmsr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return bmxr(_jit,BCC_NE,r0,r1);
+}
+
+static jit_reloc_t
+bmsi(jit_state_t *_jit, int32_t r0, int32_t i1)
+{
+  return bmxi(_jit,BCC_NE,r0,i1);
+}
+
+static jit_reloc_t
+bmcr(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return bmxr(_jit,BCC_EQ,r0,r1);
+}
+
+static jit_reloc_t
+bmci(jit_state_t *_jit, int32_t r0, int32_t i1)
+{
+  return bmxi(_jit,BCC_EQ,r0,i1);
+}
+
+static void
+jmpr(jit_state_t *_jit, int32_t r0)
+{
+  return BR(_jit, r0);
+}
+
+static void
+callr(jit_state_t *_jit, int32_t r0)
+{
+  return BLR(_jit,r0);
+}
+
+static void
+nop(jit_state_t *_jit, int32_t i0)
+{
+  for (; i0 > 0; i0 -= 4)
+    NOP(_jit);
+  ASSERT(i0 == 0);
+}
+
+static void
+rsbi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+  subi(_jit, r0, r1, i0);
+  negr(_jit, r0, r0);
+}
+
+static void
+muli(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+  jit_gpr_t reg = get_temp_gpr(_jit);
+  movi(_jit, jit_gpr_regno(reg), i0);
+  mulr(_jit, r0, r1, jit_gpr_regno(reg));
+  unget_temp_gpr(_jit);
+}
+
+static void
+qmulr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, int32_t r3)
+{
+  jit_gpr_t reg;
+  if (r0 == r2 || r0 == r3) {
+    reg = get_temp_gpr(_jit);
+    mulr(_jit, jit_gpr_regno(reg), r2, r3);
+  } else {
+    mulr(_jit, r0, r2, r3);
+  }
+  SMULH(_jit, r1, r2, r3);
+  if (r0 == r2 || r0 == r3) {
+    movr(_jit, r0, jit_gpr_regno(reg));
+    unget_temp_gpr(_jit);
+  }
+}
+
+static void
+qmuli(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, jit_word_t i0)
+{
+  jit_gpr_t reg = get_temp_gpr(_jit);
+  movi(_jit, jit_gpr_regno(reg), i0);
+  qmulr(_jit, r0, r1, r2, jit_gpr_regno(reg));
+  unget_temp_gpr(_jit);
+}
+
+static void
+qmulr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, int32_t r3)
+{
+  jit_gpr_t reg;
+  if (r0 == r2 || r0 == r3) {
+    reg = get_temp_gpr(_jit);
+    mulr(_jit, jit_gpr_regno(reg), r2, r3);
+  } else {
+    mulr(_jit, r0, r2, r3);
+  }
+  UMULH(_jit, r1, r2, r3);
+  if (r0 == r2 || r0 == r3) {
+    movr(_jit, r0, jit_gpr_regno(reg));
+    unget_temp_gpr(_jit);
+  }
+}
+
+static void
+qmuli_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, jit_word_t i0)
+{
+  jit_gpr_t reg = get_temp_gpr(_jit);
+  movi(_jit, jit_gpr_regno(reg), i0);
+  qmulr_u(_jit, r0, r1, r2, jit_gpr_regno(reg));
+  unget_temp_gpr(_jit);
+}
+
+static void
+divi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+  jit_gpr_t reg = get_temp_gpr(_jit);
+  movi(_jit, jit_gpr_regno(reg), i0);
+  divr(_jit, r0, r1, jit_gpr_regno(reg));
+  unget_temp_gpr(_jit);
+}
+
+static void
+divi_u(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+  jit_gpr_t reg = get_temp_gpr(_jit);
+  movi(_jit, jit_gpr_regno(reg), i0);
+  divr_u(_jit, r0, r1, jit_gpr_regno(reg));
+  unget_temp_gpr(_jit);
+}
+
+static void
+qdivi(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, jit_word_t i0)
+{
+  jit_gpr_t reg = get_temp_gpr(_jit);
+  movi(_jit, jit_gpr_regno(reg), i0);
+  qdivr(_jit, r0, r1, r2, jit_gpr_regno(reg));
+  unget_temp_gpr(_jit);
+}
+
+static void
+qdivi_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2, jit_word_t i0)
+{
+  jit_gpr_t reg = get_temp_gpr(_jit);
+  movi(_jit, jit_gpr_regno(reg), i0);
+  qdivr_u(_jit, r0, r1, r2, jit_gpr_regno(reg));
+  unget_temp_gpr(_jit);
+}
+
+static void
+remr(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+  if (r0 == r1 || r0 == r2) {
+    jit_gpr_t reg = get_temp_gpr(_jit);
+    divr(_jit, jit_gpr_regno(reg), r1, r2);
+    mulr(_jit, jit_gpr_regno(reg), r2, jit_gpr_regno(reg));
+    subr(_jit, r0, r1, jit_gpr_regno(reg));
+    unget_temp_gpr(_jit);
+  } else {
+    divr(_jit, r0, r1, r2);
+    mulr(_jit, r0, r2, r0);
+    subr(_jit, r0, r1, r0);
+  }
+}
+
+static void
+remi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+  jit_gpr_t reg = get_temp_gpr(_jit);
+  movi(_jit, jit_gpr_regno(reg), i0);
+  remr(_jit, r0, r1, jit_gpr_regno(reg));
+  unget_temp_gpr(_jit);
+}
+
+static void
+remr_u(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+  if (r0 == r1 || r0 == r2) {
+    jit_gpr_t reg = get_temp_gpr(_jit);
+    divr_u(_jit, jit_gpr_regno(reg), r1, r2);
+    mulr(_jit, jit_gpr_regno(reg), r2, jit_gpr_regno(reg));
+    subr(_jit, r0, r1, jit_gpr_regno(reg));
+    unget_temp_gpr(_jit);
+  } else {
+    divr_u(_jit, r0, r1, r2);
+    mulr(_jit, r0, r2, r0);
+    subr(_jit, r0, r1, r0);
+  }
+}
+
+static void
+remi_u(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+  jit_gpr_t reg = get_temp_gpr(_jit);
+  movi(_jit, jit_gpr_regno(reg), i0);
+  remr_u(_jit, r0, r1, jit_gpr_regno(reg));
+  unget_temp_gpr(_jit);
+}
+
+static void
+lshi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+  if (i0 == 0) {
+    movr(_jit, r0, r1);
+  } else {
+    ASSERT(i0 > 0 && i0 < 64);
+    LSLI(_jit, r0, r1, i0);
+  }
+}
+
+static void
+rshi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+  if (i0 == 0) {
+    movr(_jit, r0, r1);
+  } else {
+    ASSERT(i0 > 0 && i0 < 64);
+    ASRI(_jit, r0, r1, i0);
+  }
+}
+
+static void
+rshi_u(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+  if (i0 == 0) {
+    movr(_jit, r0, r1);
+  } else {
+    ASSERT(i0 > 0 && i0 < 64);
+    LSRI(_jit, r0, r1, i0);
+  }
+}
+
+static void
+andi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+  int32_t             imm;
+  if (i0 == 0) {
+    movi(_jit, r0, 0);
+  } else if (i0 == -1){
+    movr(_jit, r0, r1);
+  } else {
+    imm = logical_immediate(i0);
+    if (imm != -1) {
+      ANDI(_jit, r0, r1, imm);
+    } else {
+      jit_gpr_t reg = get_temp_gpr(_jit);
+      movi(_jit, jit_gpr_regno(reg), i0);
+      andr(_jit, r0, r1, jit_gpr_regno(reg));
+      unget_temp_gpr(_jit);
+    }
+  }
+}
+
+static void
+ori(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+  int32_t             imm;
+  if (i0 == 0) {
+    movr(_jit, r0, r1);
+  } else if (i0 == -1) {
+    movi(_jit, r0, -1);
+  } else {
+    imm = logical_immediate(i0);
+    if (imm != -1) {
+      ORRI(_jit, r0, r1, imm);
+    } else {
+      jit_gpr_t reg = get_temp_gpr(_jit);
+      movi(_jit, jit_gpr_regno(reg), i0);
+      orr(_jit, r0, r1, jit_gpr_regno(reg));
+      unget_temp_gpr(_jit);
+    }
+  }
+}
+
+static void
+xori(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+  int32_t             imm;
+  if (i0 == 0) {
+    movr(_jit, r0, r1);
+  } else if (i0 == -1) {
+    comr(_jit, r0, r1);
+  } else {
+    imm = logical_immediate(i0);
+    if (imm != -1) {
+      EORI(_jit, r0, r1, imm);
+    } else {
+      jit_gpr_t reg = get_temp_gpr(_jit);
+      movi(_jit, jit_gpr_regno(reg), i0);
+      xorr(_jit, r0, r1, jit_gpr_regno(reg));
+      unget_temp_gpr(_jit);
+    }
+  }
+}
+
+static void
+bswapr_us(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  bswapr_ul(_jit, r0, r1);
+  rshi_u(_jit, r0, r0, 48);
+}
+
+static void
+bswapr_ui(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  bswapr_ul(_jit, r0, r1);
+  rshi_u(_jit, r0, r0, 32);
+}
+
+static void
+ldi_c(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+  jit_gpr_t reg = get_temp_gpr(_jit);
+  movi(_jit, jit_gpr_regno(reg), i0);
+  ldr_c(_jit, r0, jit_gpr_regno(reg));
+  unget_temp_gpr(_jit);
+}
+
+static void
+ldr_uc(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  LDRBI(_jit, r0, r1, 0);
+#if 0
+  extr_uc(_jit, r0, r0);
+#endif
+}
+
+static void
+ldi_uc(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+  jit_gpr_t reg = get_temp_gpr(_jit);
+  movi(_jit, jit_gpr_regno(reg), i0);
+  ldr_uc(_jit, r0, jit_gpr_regno(reg));
+  unget_temp_gpr(_jit);
+}
+
+static void
+ldi_s(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+  jit_gpr_t reg = get_temp_gpr(_jit);
+  movi(_jit, jit_gpr_regno(reg), i0);
+  ldr_s(_jit, r0, jit_gpr_regno(reg));
+  unget_temp_gpr(_jit);
+}
+
+static void
+ldr_us(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  LDRHI(_jit, r0, r1, 0);
+#if 0
+  extr_us(_jit, r0, r0);
+#endif
+}
+
+static void
+ldi_us(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+  jit_gpr_t reg = get_temp_gpr(_jit);
+  movi(_jit, jit_gpr_regno(reg), i0);
+  ldr_us(_jit, r0, jit_gpr_regno(reg));
+  unget_temp_gpr(_jit);
+}
+
+static void
+ldi_i(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+  jit_gpr_t reg = get_temp_gpr(_jit);
+  movi(_jit, jit_gpr_regno(reg), i0);
+  ldr_i(_jit, r0, jit_gpr_regno(reg));
+  unget_temp_gpr(_jit);
+}
+
+static void
+ldr_ui(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  LDRWI(_jit, r0, r1, 0);
+#if 0
+  extr_ui(_jit, r0, r0);
+#endif
+}
+
+static void
+ldi_ui(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+  jit_gpr_t reg = get_temp_gpr(_jit);
+  movi(_jit, jit_gpr_regno(reg), i0);
+  ldr_ui(_jit, r0, jit_gpr_regno(reg));
+  unget_temp_gpr(_jit);
+}
+
+static void
+ldr_l(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  LDRI(_jit, r0, r1, 0);
+}
+
+static void
+ldi_l(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+{
+  jit_gpr_t reg = get_temp_gpr(_jit);
+  movi(_jit, jit_gpr_regno(reg), i0);
+  ldr_l(_jit, r0, jit_gpr_regno(reg));
+  unget_temp_gpr(_jit);
+}
+
+static void
+ldxr_c(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+  LDRSB(_jit, r0, r1, r2);
+  extr_c(_jit, r0, r0);
+}
+
+static void
+ldxi_c(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+  if (i0 >= 0 && i0 <= 4095) {
+    LDRSBI(_jit, r0, r1, i0);
+  } else if (i0 > -256 && i0 < 0) {
+    LDURSB(_jit, r0, r1, i0 & 0x1ff);
+  } else {
+    jit_gpr_t reg = get_temp_gpr(_jit);
+    movi(_jit, jit_gpr_regno(reg), i0);
+    LDRSB(_jit, r0, r1, jit_gpr_regno(reg));
+    unget_temp_gpr(_jit);
+  }
+  extr_c(_jit, r0, r0);
+}
+
+static void
+ldxr_uc(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+  LDRB(_jit, r0, r1, r2);
+#if 0
+  extr_uc(_jit, r0, r0);
+#endif
+}
+
+static void
+ldxi_uc(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+  if (i0 >= 0 && i0 <= 4095) {
+    LDRBI(_jit, r0, r1, i0);
+  } else if (i0 > -256 && i0 < 0) {
+    LDURB(_jit, r0, r1, i0 & 0x1ff);
+  } else {
+    jit_gpr_t reg = get_temp_gpr(_jit);
+    addi(_jit, jit_gpr_regno(reg), r1, i0);
+    ldr_uc(_jit, r0, jit_gpr_regno(reg));
+    unget_temp_gpr(_jit);
+  }
+#if 0
+  extr_uc(_jit, r0, r0);
+#endif
+}
+
+static void
+ldxi_s(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+  ASSERT(!(i0 & 1));
+  if (i0 >= 0 && i0 <= 8191) {
+    LDRSHI(_jit, r0, r1, i0 >> 1);
+  } else if (i0 > -256 && i0 < 0) {
+    LDURSH(_jit, r0, r1, i0 & 0x1ff);
+  } else {
+    jit_gpr_t reg = get_temp_gpr(_jit);
+    movi(_jit, jit_gpr_regno(reg), i0);
+    LDRSH(_jit, r0, r1, jit_gpr_regno(reg));
+    unget_temp_gpr(_jit);
+  }
+}
+
+static void
+ldxr_us(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+  LDRH(_jit, r0, r1, r2);
+#if 0
+  extr_us(_jit, r0, r0);
+#endif
+}
+
+static void
+ldxi_us(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+  ASSERT(!(i0 & 1));
+  if (i0 >= 0 && i0 <= 8191) {
+    LDRHI(_jit, r0, r1, i0 >> 1);
+  } else if (i0 > -256 && i0 < 0) {
+    LDURH(_jit, r0, r1, i0 & 0x1ff);
+  } else {
+    jit_gpr_t reg = get_temp_gpr(_jit);
+    movi(_jit, jit_gpr_regno(reg), i0);
+    LDRH(_jit, r0, r1, jit_gpr_regno(reg));
+    unget_temp_gpr(_jit);
+  }
+#if 0
+  extr_us(_jit, r0, r0);
+#endif
+}
+
+static void
+ldxi_i(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+  ASSERT(!(i0 & 3));
+  if (i0 >= 0 && i0 <= 16383) {
+    LDRSWI(_jit, r0, r1, i0 >> 2);
+  } else if (i0 > -256 && i0 < 0) {
+    LDURSW(_jit, r0, r1, i0 & 0x1ff);
+  } else {
+    jit_gpr_t reg = get_temp_gpr(_jit);
+    addi(_jit, jit_gpr_regno(reg), r1, i0);
+    ldr_i(_jit, r0, jit_gpr_regno(reg));
+    unget_temp_gpr(_jit);
+  }
+}
+
+static void
+ldxr_ui(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+  LDRW(_jit, r0, r1, r2);
+#if 0
+  extr_ui(_jit, r0, r0);
+#endif
+}
+
+static void
+ldxi_ui(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+  ASSERT(!(i0 & 3));
+  if (i0 >= 0 && i0 <= 16383) {
+    LDRWI(_jit, r0, r1, i0 >> 2);
+  } else if (i0 > -256 && i0 < 0) {
+    LDURW(_jit, r0, r1, i0 & 0x1ff);
+  } else {
+    jit_gpr_t reg = get_temp_gpr(_jit);
+    movi(_jit, jit_gpr_regno(reg), i0);
+    LDRW(_jit, r0, r1, jit_gpr_regno(reg));
+    unget_temp_gpr(_jit);
+  }
+#if 0
+  extr_ui(_jit, r0, r0);
+#endif
+}
+
+static void
+ldxi_l(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+  ASSERT(!(i0 & 7));
+  if (i0 >= 0 && i0 <= 32767) {
+    LDRI(_jit, r0, r1, i0 >> 3);
+  } else if (i0 > -256 && i0 < 0) {
+    LDUR(_jit, r0, r1, i0 & 0x1ff);
+  } else {
+    jit_gpr_t reg = get_temp_gpr(_jit);
+    addi(_jit, jit_gpr_regno(reg), r1, i0);
+    ldr_l(_jit, r0, jit_gpr_regno(reg));
+    unget_temp_gpr(_jit);
+  }
+}
+
+static void
+sti_c(jit_state_t *_jit, jit_word_t i0, int32_t r0)
+{
+  jit_gpr_t reg = get_temp_gpr(_jit);
+  movi(_jit, jit_gpr_regno(reg), i0);
+  str_c(_jit, jit_gpr_regno(reg), r0);
+  unget_temp_gpr(_jit);
+}
+
+static void
+sti_s(jit_state_t *_jit, jit_word_t i0, int32_t r0)
+{
+  jit_gpr_t reg = get_temp_gpr(_jit);
+  movi(_jit, jit_gpr_regno(reg), i0);
+  str_s(_jit, jit_gpr_regno(reg), r0);
+  unget_temp_gpr(_jit);
+}
+
+static void
+sti_i(jit_state_t *_jit, jit_word_t i0, int32_t r0)
+{
+  jit_gpr_t reg = get_temp_gpr(_jit);
+  movi(_jit, jit_gpr_regno(reg), i0);
+  str_i(_jit, jit_gpr_regno(reg), r0);
+  unget_temp_gpr(_jit);
+}
+
+static void
+sti_l(jit_state_t *_jit, jit_word_t i0, int32_t r0)
+{
+  jit_gpr_t reg = get_temp_gpr(_jit);
+  movi(_jit, jit_gpr_regno(reg), i0);
+  str_l(_jit, jit_gpr_regno(reg), r0);
+  unget_temp_gpr(_jit);
+}
+
+static void
+stxi_c(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+{
+  if (i0 >= 0 && i0 <= 4095) {
+    STRBI(_jit, r1, r0, i0);
+  } else if (i0 > -256 && i0 < 0) {
+    STURB(_jit, r1, r0, i0 & 0x1ff);
+  } else {
+    jit_gpr_t reg = get_temp_gpr(_jit);
+    addi(_jit, jit_gpr_regno(reg), r0, i0);
+    str_c(_jit, jit_gpr_regno(reg), r1);
+    unget_temp_gpr(_jit);
+  }
+}
+
+static void
+stxi_s(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+{
+  ASSERT(!(i0 & 1));
+  if (i0 >= 0 && i0 <= 8191) {
+    STRHI(_jit, r1, r0, i0 >> 1);
+  } else if (i0 > -256 && i0 < 0) {
+    STURH(_jit, r1, r0, i0 & 0x1ff);
+  } else {
+    jit_gpr_t reg = get_temp_gpr(_jit);
+    addi(_jit, jit_gpr_regno(reg), r0, i0);
+    str_s(_jit, jit_gpr_regno(reg), r1);
+    unget_temp_gpr(_jit);
+  }
+}
+
+static void
+stxi_i(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+{
+  ASSERT(!(i0 & 3));
+  if (i0 >= 0 && i0 <= 16383) {
+    STRWI(_jit, r1, r0, i0 >> 2);
+  } else if (i0 > -256 && i0 < 0) {
+    STURW(_jit, r1, r0, i0 & 0x1ff);
+  } else {
+    jit_gpr_t reg = get_temp_gpr(_jit);
+    addi(_jit, jit_gpr_regno(reg), r0, i0);
+    str_i(_jit, jit_gpr_regno(reg), r1);
+    unget_temp_gpr(_jit);
+  }
+}
+
+static void
+stxi_l(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+{
+  ASSERT(!(i0 & 7));
+  if (i0 >= 0 && i0 <= 32767) {
+    STRI(_jit, r1, r0, i0 >> 3);
+  } else if (i0 > -256 && i0 < 0) {
+    STUR(_jit, r1, r0, i0 & 0x1ff);
+  } else {
+    jit_gpr_t reg = get_temp_gpr(_jit);
+    addi(_jit, jit_gpr_regno(reg), r0, i0);
+    str_l(_jit, jit_gpr_regno(reg), r1);
+    unget_temp_gpr(_jit);
+  }
+}
+
+static jit_reloc_t
+mov_addr(jit_state_t *_jit, int32_t r0)
+{
+  return movi_from_pool(_jit, r0);
+}
+
+static jit_reloc_t
+beqi(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+  if (i1 == 0) {
+    return CBZ(_jit, r0);
+  } else {
+    return bcci(_jit, BCC_EQ, r0, i1);
+  }
+}
+
+static jit_reloc_t
+bnei(jit_state_t *_jit, int32_t r0, jit_word_t i1)
+{
+  if (i1 == 0) {
+    return CBNZ(_jit, r0);
+  } else {
+    return bcci(_jit, BCC_NE, r0, i1);
+  }
+}
+
+static jit_reloc_t
+jmp(jit_state_t *_jit)
+{
+  return B(_jit);
+}
+
+static void
+jmpi(jit_state_t *_jit, jit_word_t i0)
+{
+  return jit_patch_there(_jit, jmp(_jit), (void*)i0);
+}
+
+static jit_reloc_t
+call(jit_state_t *_jit)
+{
+  return BL(_jit);
+}
+
+static void
+calli(jit_state_t *_jit, jit_word_t i0)
+{
+  return jit_patch_there(_jit, call(_jit), (void*)i0);
+}
+
+static void
+ret(jit_state_t *_jit)
+{
+  RET(_jit);
+}
+
+static void
+retr(jit_state_t *_jit, int32_t r)
+{
+  movr(_jit, jit_gpr_regno(_X0), r);
+  ret(_jit);
+}
+
+static void
+reti(jit_state_t *_jit, int32_t i)
+{
+  movi(_jit, jit_gpr_regno(_X0), i);
+  ret(_jit);
+}
+
+static void
+retval_c(jit_state_t *_jit, int32_t r0)
+{
+  extr_c(_jit, r0, jit_gpr_regno(_X0));
+}
+
+static void
+retval_uc(jit_state_t *_jit, int32_t r0)
+{
+  extr_uc(_jit, r0, jit_gpr_regno(_X0));
+}
+
+static void
+retval_s(jit_state_t *_jit, int32_t r0)
+{
+  extr_s(_jit, r0, jit_gpr_regno(_X0));
+}
+
+static void
+retval_us(jit_state_t *_jit, int32_t r0)
+{
+  extr_us(_jit, r0, jit_gpr_regno(_X0));
+}
+
+static void
+retval_i(jit_state_t *_jit, int32_t r0)
+{
+  extr_i(_jit, r0, jit_gpr_regno(_X0));
+}
+
+static void
+retval_ui(jit_state_t *_jit, int32_t r0)
+{
+  extr_ui(_jit, r0, jit_gpr_regno(_X0));
+}
+
+static void
+retval_l(jit_state_t *_jit, int32_t r0)
+{
+  movr(_jit, r0, jit_gpr_regno(_X0));
+}
+
+static void
+pushr(jit_state_t *_jit, int32_t r0)
+{
+  abort();
+}
+
+static void
+popr(jit_state_t *_jit, int32_t r0)
+{
+  abort();
 }
-#endif
diff --git a/lightening/aarch64-fpu.c b/lightening/aarch64-fpu.c
index e1ccde6..c55f963 100644
--- a/lightening/aarch64-fpu.c
+++ b/lightening/aarch64-fpu.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2013-2017  Free Software Foundation, Inc.
+ * Copyright (C) 2013-2019  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
@@ -14,901 +14,825 @@
  * License for more details.
  *
  * Authors:
- *     Paulo Cesar Pereira de Andrade
+ *      Paulo Cesar Pereira de Andrade
  */
 
-#if PROTO
-#  define A64_SCVTF                    0x1e220000
-#  define A64_FMOVWV                   0x1e260000
-#  define A64_FMOVVW                   0x1e270000
-#  define A64_FMOVXV                   0x9e260000
-#  define A64_FMOVVX                   0x9e270000
-#  define A64_FCVTZS                   0x1e380000
-#  define A64_FCMPE                    0x1e202010
-#  define A64_FMOV                     0x1e204000
-#  define A64_FABS                     0x1e20c000
-#  define A64_FNEG                     0x1e214000
-#  define A64_FSQRT                    0x1e21c000
-#  define A64_FCVTS                    0x1e224000
-#  define A64_FCVTD                    0x1e22c000
-#  define A64_FMUL                     0x1e200800
-#  define A64_FDIV                     0x1e201800
-#  define A64_FADD                     0x1e202800
-#  define A64_FSUB                     0x1e203800
-#  define FCMPES(Rn,Rm)                        os_vv(A64_FCMPE,0,Rn,Rm)
-#  define FCMPED(Rn,Rm)                        os_vv(A64_FCMPE,1,Rn,Rm)
-#  define FMOVS(Rd,Rn)                 osvv_(A64_FMOV,0,Rd,Rn)
-#  define FMOVD(Rd,Rn)                 osvv_(A64_FMOV,1,Rd,Rn)
-#  define FMOVWS(Rd,Rn)                        osvv_(A64_FMOVWV,0,Rd,Rn)
-#  define FMOVSW(Rd,Rn)                        osvv_(A64_FMOVVW,0,Rd,Rn)
-#  define FMOVXD(Rd,Rn)                        osvv_(A64_FMOVXV,1,Rd,Rn)
-#  define FMOVDX(Rd,Rn)                        osvv_(A64_FMOVVX,1,Rd,Rn)
-#  define FCVT_SD(Rd,Rn)               osvv_(A64_FCVTS,1,Rd,Rn)
-#  define FCVT_DS(Rd,Rn)               osvv_(A64_FCVTD,0,Rd,Rn)
-#  define SCVTFS(Rd,Rn)                        osvv_(A64_SCVTF|XS,0,Rd,Rn)
-#  define SCVTFD(Rd,Rn)                        osvv_(A64_SCVTF|XS,1,Rd,Rn)
-#  define FCVTSZ_WS(Rd,Rn)             osvv_(A64_FCVTZS,0,Rd,Rn)
-#  define FCVTSZ_WD(Rd,Rn)             osvv_(A64_FCVTZS,1,Rd,Rn)
-#  define FCVTSZ_XS(Rd,Rn)             osvv_(A64_FCVTZS|XS,0,Rd,Rn)
-#  define FCVTSZ_XD(Rd,Rn)             osvv_(A64_FCVTZS|XS,1,Rd,Rn)
-#  define FABSS(Rd,Rn)                 osvv_(A64_FABS,0,Rd,Rn)
-#  define FABSD(Rd,Rn)                 osvv_(A64_FABS,1,Rd,Rn)
-#  define FNEGS(Rd,Rn)                 osvv_(A64_FNEG,0,Rd,Rn)
-#  define FNEGD(Rd,Rn)                 osvv_(A64_FNEG,1,Rd,Rn)
-#  define FSQRTS(Rd,Rn)                        osvv_(A64_FSQRT,0,Rd,Rn)
-#  define FSQRTD(Rd,Rn)                        osvv_(A64_FSQRT,1,Rd,Rn)
-#  define FADDS(Rd,Rn,Rm)              osvvv(A64_FADD,0,Rd,Rn,Rm)
-#  define FADDD(Rd,Rn,Rm)              osvvv(A64_FADD,1,Rd,Rn,Rm)
-#  define FSUBS(Rd,Rn,Rm)              osvvv(A64_FSUB,0,Rd,Rn,Rm)
-#  define FSUBD(Rd,Rn,Rm)              osvvv(A64_FSUB,1,Rd,Rn,Rm)
-#  define FMULS(Rd,Rn,Rm)              osvvv(A64_FMUL,0,Rd,Rn,Rm)
-#  define FMULD(Rd,Rn,Rm)              osvvv(A64_FMUL,1,Rd,Rn,Rm)
-#  define FDIVS(Rd,Rn,Rm)              osvvv(A64_FDIV,0,Rd,Rn,Rm)
-#  define FDIVD(Rd,Rn,Rm)              osvvv(A64_FDIV,1,Rd,Rn,Rm)
-#  define osvvv(Op,Sz,Rd,Rn,Rm)                _osvvv(_jit,Op,Sz,Rd,Rn,Rm)
-static void _osvvv(jit_state_t*,int32_t,int32_t,
-                  int32_t,int32_t,int32_t);
-#  define osvv_(Op,Sz,Rd,Rn)           _osvv_(_jit,Op,Sz,Rd,Rn)
-static void _osvv_(jit_state_t*,int32_t,
-                  int32_t,int32_t,int32_t);
-#  define os_vv(Op,Sz,Rn,Rm)           _os_vv(_jit,Op,Sz,Rn,Rm)
-static void _os_vv(jit_state_t*,int32_t,
-                  int32_t,int32_t,int32_t);
-#  define truncr_f_i(r0,r1)            _truncr_f_i(_jit,r0,r1)
-static void _truncr_f_i(jit_state_t*,int32_t,int32_t);
-#  define truncr_f_l(r0,r1)            FCVTSZ_XS(r0,r1)
-#  define truncr_d_i(r0,r1)            _truncr_d_i(_jit,r0,r1)
-static void _truncr_d_i(jit_state_t*,int32_t,int32_t);
-#  define truncr_d_l(r0,r1)            FCVTSZ_XD(r0,r1)
-#  define addr_f(r0,r1,r2)             FADDS(r0,r1,r2)
-#  define addi_f(r0,r1,i0)             _addi_f(_jit,r0,r1,i0)
-static void _addi_f(jit_state_t*,int32_t,int32_t,jit_float32_t);
-#  define subr_f(r0,r1,r2)             FSUBS(r0,r1,r2)
-#  define subi_f(r0,r1,i0)             _subi_f(_jit,r0,r1,i0)
-static void _subi_f(jit_state_t*,int32_t,int32_t,jit_float32_t);
-#  define rsbr_f(r0, r1, r2)           subr_f(r0, r2, r1)
-#  define rsbi_f(r0, r1, i0)           _rsbi_f(_jit, r0, r1, i0)
-static void _rsbi_f(jit_state_t*,int32_t,int32_t,jit_float32_t);
-#  define mulr_f(r0,r1,r2)             FMULS(r0,r1,r2)
-#  define muli_f(r0,r1,i0)             _muli_f(_jit,r0,r1,i0)
-static void _muli_f(jit_state_t*,int32_t,int32_t,jit_float32_t);
-#  define divr_f(r0,r1,r2)             FDIVS(r0,r1,r2)
-#  define divi_f(r0,r1,i0)             _divi_f(_jit,r0,r1,i0)
-static void _divi_f(jit_state_t*,int32_t,int32_t,jit_float32_t);
-#  define absr_f(r0,r1)                        FABSS(r0,r1)
-#  define negr_f(r0,r1)                        FNEGS(r0,r1)
-#  define sqrtr_f(r0,r1)               FSQRTS(r0,r1)
-#  define extr_f(r0,r1)                        SCVTFS(r0,r1)
-#  define ldr_f(r0,r1)                 _ldr_f(_jit,r0,r1)
-static void _ldr_f(jit_state_t*,int32_t,int32_t);
-#  define ldi_f(r0,i0)                 _ldi_f(_jit,r0,i0)
-static void _ldi_f(jit_state_t*,int32_t,jit_word_t);
-#  define ldxr_f(r0,r1,r2)             _ldxr_f(_jit,r0,r1,r2)
-static void _ldxr_f(jit_state_t*,int32_t,int32_t,int32_t);
-#  define ldxi_f(r0,r1,i0)             _ldxi_f(_jit,r0,r1,i0)
-static void _ldxi_f(jit_state_t*,int32_t,int32_t,jit_word_t);
-#  define str_f(r0,r1)                 _str_f(_jit,r0,r1)
-static void _str_f(jit_state_t*,int32_t,int32_t);
-#  define sti_f(i0,r0)                 _sti_f(_jit,i0,r0)
-static void _sti_f(jit_state_t*,jit_word_t,int32_t);
-#  define stxr_f(r0,r1,r2)             _stxr_f(_jit,r0,r1,r2)
-static void _stxr_f(jit_state_t*,int32_t,int32_t,int32_t);
-#  define stxi_f(i0,r0,r1)             _stxi_f(_jit,i0,r0,r1)
-static void _stxi_f(jit_state_t*,jit_word_t,int32_t,int32_t);
-#  define movr_f(r0,r1)                        _movr_f(_jit,r0,r1)
-static void _movr_f(jit_state_t*,int32_t,int32_t);
-#  define movi_f(r0,i0)                        _movi_f(_jit,r0,i0)
-static void _movi_f(jit_state_t*,int32_t,jit_float32_t);
-#  define extr_d_f(r0,r1)              FCVT_SD(r0,r1)
-#  define fccr(cc,r0,r1,r2)            _fccr(_jit,cc,r0,r1,r2)
-static void _fccr(jit_state_t*,int32_t,int32_t,int32_t,int32_t);
-#  define fcci(cc,r0,r1,i0)            _fcci(_jit,cc,r0,r1,i0)
-static void _fcci(jit_state_t*,
-                 int32_t,int32_t,int32_t,jit_float32_t);
-#  define ltr_f(r0,r1,r2)              fccr(CC_MI,r0,r1,r2)
-#  define lti_f(r0,r1,i0)              fcci(CC_MI,r0,r1,i0)
-#  define ler_f(r0,r1,r2)              fccr(CC_LS,r0,r1,r2)
-#  define lei_f(r0,r1,i0)              fcci(CC_LS,r0,r1,i0)
-#  define eqr_f(r0,r1,r2)              fccr(CC_EQ,r0,r1,r2)
-#  define eqi_f(r0,r1,i0)              fcci(CC_EQ,r0,r1,i0)
-#  define ger_f(r0,r1,r2)              fccr(CC_GE,r0,r1,r2)
-#  define gei_f(r0,r1,i0)              fcci(CC_GE,r0,r1,i0)
-#  define gtr_f(r0,r1,r2)              fccr(CC_GT,r0,r1,r2)
-#  define gti_f(r0,r1,i0)              fcci(CC_GT,r0,r1,i0)
-#  define ner_f(r0,r1,r2)              fccr(CC_NE,r0,r1,r2)
-#  define nei_f(r0,r1,i0)              fcci(CC_NE,r0,r1,i0)
-#  define unltr_f(r0,r1,r2)            fccr(CC_LT,r0,r1,r2)
-#  define unlti_f(r0,r1,i0)            fcci(CC_LT,r0,r1,i0)
-#  define unler_f(r0,r1,r2)            fccr(CC_LE,r0,r1,r2)
-#  define unlei_f(r0,r1,i0)            fcci(CC_LE,r0,r1,i0)
-#  define uneqr_f(r0,r1,r2)            _uneqr_f(_jit,r0,r1,r2)
-static void _uneqr_f(jit_state_t*,int32_t,int32_t,int32_t);
-#  define uneqi_f(r0,r1,i0)            _uneqi_f(_jit,r0,r1,i0)
-static void _uneqi_f(jit_state_t*,int32_t,int32_t,jit_float32_t);
-#  define unger_f(r0,r1,r2)            fccr(CC_PL,r0,r1,r2)
-#  define ungei_f(r0,r1,i0)            fcci(CC_PL,r0,r1,i0)
-#  define ungtr_f(r0,r1,r2)            fccr(CC_HI,r0,r1,r2)
-#  define ungti_f(r0,r1,i0)            fcci(CC_HI,r0,r1,i0)
-#  define ltgtr_f(r0,r1,r2)            _ltgtr_f(_jit,r0,r1,r2)
-static void _ltgtr_f(jit_state_t*,int32_t,int32_t,int32_t);
-#  define ltgti_f(r0,r1,i0)            _ltgti_f(_jit,r0,r1,i0)
-static void _ltgti_f(jit_state_t*,int32_t,int32_t,jit_float32_t);
-#  define ordr_f(r0,r1,r2)             fccr(CC_VC,r0,r1,r2)
-#  define ordi_f(r0,r1,i0)             fcci(CC_VC,r0,r1,i0)
-#  define unordr_f(r0,r1,r2)           fccr(CC_VS,r0,r1,r2)
-#  define unordi_f(r0,r1,i0)           fcci(CC_VS,r0,r1,i0)
-#define fbccr(cc,i0,r0,r1)             _fbccr(_jit,cc,i0,r0,r1)
-static jit_word_t
-_fbccr(jit_state_t*,int32_t,jit_word_t,int32_t,int32_t);
-#define fbcci(cc,i0,r0,i1)             _fbcci(_jit,cc,i0,r0,i1)
-static jit_word_t
-_fbcci(jit_state_t*,int32_t,jit_word_t,int32_t,jit_float32_t);
-#  define bltr_f(i0,r0,r1)             fbccr(BCC_MI,i0,r0,r1)
-#  define blti_f(i0,r0,i1)             fbcci(BCC_MI,i0,r0,i1)
-#  define bler_f(i0,r0,r1)             fbccr(BCC_LS,i0,r0,r1)
-#  define blei_f(i0,r0,i1)             fbcci(BCC_LS,i0,r0,i1)
-#  define beqr_f(i0,r0,r1)             fbccr(BCC_EQ,i0,r0,r1)
-#  define beqi_f(i0,r0,i1)             fbcci(BCC_EQ,i0,r0,i1)
-#  define bger_f(i0,r0,r1)             fbccr(BCC_GE,i0,r0,r1)
-#  define bgei_f(i0,r0,i1)             fbcci(BCC_GE,i0,r0,i1)
-#  define bgtr_f(i0,r0,r1)             fbccr(BCC_GT,i0,r0,r1)
-#  define bgti_f(i0,r0,i1)             fbcci(BCC_GT,i0,r0,i1)
-#  define bner_f(i0,r0,r1)             fbccr(BCC_NE,i0,r0,r1)
-#  define bnei_f(i0,r0,i1)             fbcci(BCC_NE,i0,r0,i1)
-#  define bunltr_f(i0,r0,r1)           fbccr(BCC_LT,i0,r0,r1)
-#  define bunlti_f(i0,r0,i1)           fbcci(BCC_LT,i0,r0,i1)
-#  define bunler_f(i0,r0,r1)           fbccr(BCC_LE,i0,r0,r1)
-#  define bunlei_f(i0,r0,i1)           fbcci(BCC_LE,i0,r0,i1)
-#  define buneqr_f(i0,r0,r1)           _buneqr_f(_jit,i0,r0,r1)
-static jit_word_t _buneqr_f(jit_state_t*,jit_word_t,int32_t,int32_t);
-#  define buneqi_f(i0,r0,i1)           _buneqi_f(_jit,i0,r0,i1)
-static jit_word_t _buneqi_f(jit_state_t*,jit_word_t,int32_t,jit_float32_t);
-#  define bunger_f(i0,r0,r1)           fbccr(BCC_PL,i0,r0,r1)
-#  define bungei_f(i0,r0,i1)           fbcci(BCC_PL,i0,r0,i1)
-#  define bungtr_f(i0,r0,r1)           fbccr(BCC_HI,i0,r0,r1)
-#  define bungti_f(i0,r0,i1)           fbcci(BCC_HI,i0,r0,i1)
-#  define bltgtr_f(i0,r0,r1)           _bltgtr_f(_jit,i0,r0,r1)
-static jit_word_t _bltgtr_f(jit_state_t*,jit_word_t,int32_t,int32_t);
-#  define bltgti_f(i0,r0,i1)           _bltgti_f(_jit,i0,r0,i1)
-static jit_word_t _bltgti_f(jit_state_t*,jit_word_t,int32_t,jit_float32_t);
-#  define bordr_f(i0,r0,r1)            fbccr(BCC_VC,i0,r0,r1)
-#  define bordi_f(i0,r0,i1)            fbcci(BCC_VC,i0,r0,i1)
-#  define bunordr_f(i0,r0,r1)          fbccr(BCC_VS,i0,r0,r1)
-#  define bunordi_f(i0,r0,i1)          fbcci(BCC_VS,i0,r0,i1)
-#  define addr_d(r0,r1,r2)             FADDD(r0,r1,r2)
-#  define addi_d(r0,r1,i0)             _addi_d(_jit,r0,r1,i0)
-static void _addi_d(jit_state_t*,int32_t,int32_t,jit_float64_t);
-#  define subr_d(r0,r1,r2)             FSUBD(r0,r1,r2)
-#  define subi_d(r0,r1,i0)             _subi_d(_jit,r0,r1,i0)
-static void _subi_d(jit_state_t*,int32_t,int32_t,jit_float64_t);
-#  define rsbr_d(r0, r1, r2)           subr_d(r0, r2, r1)
-#  define rsbi_d(r0, r1, i0)           _rsbi_d(_jit, r0, r1, i0)
-static void _rsbi_d(jit_state_t*,int32_t,int32_t,jit_float64_t);
-#  define mulr_d(r0,r1,r2)             FMULD(r0,r1,r2)
-#  define muli_d(r0,r1,i0)             _muli_d(_jit,r0,r1,i0)
-static void _muli_d(jit_state_t*,int32_t,int32_t,jit_float64_t);
-#  define divr_d(r0,r1,r2)             FDIVD(r0,r1,r2)
-#  define divi_d(r0,r1,i0)             _divi_d(_jit,r0,r1,i0)
-static void _divi_d(jit_state_t*,int32_t,int32_t,jit_float64_t);
-#  define absr_d(r0,r1)                        FABSD(r0,r1)
-#  define negr_d(r0,r1)                        FNEGD(r0,r1)
-#  define sqrtr_d(r0,r1)               FSQRTD(r0,r1)
-#  define extr_d(r0,r1)                        SCVTFD(r0,r1)
-#  define ldr_d(r0,r1)                 _ldr_d(_jit,r0,r1)
-static void _ldr_d(jit_state_t*,int32_t,int32_t);
-#  define ldi_d(r0,i0)                 _ldi_d(_jit,r0,i0)
-static void _ldi_d(jit_state_t*,int32_t,jit_word_t);
-#  define ldxr_d(r0,r1,r2)             _ldxr_d(_jit,r0,r1,r2)
-static void _ldxr_d(jit_state_t*,int32_t,int32_t,int32_t);
-#  define ldxi_d(r0,r1,i0)             _ldxi_d(_jit,r0,r1,i0)
-static void _ldxi_d(jit_state_t*,int32_t,int32_t,jit_word_t);
-#  define str_d(r0,r1)                 _str_d(_jit,r0,r1)
-static void _str_d(jit_state_t*,int32_t,int32_t);
-#  define sti_d(i0,r0)                 _sti_d(_jit,i0,r0)
-static void _sti_d(jit_state_t*,jit_word_t,int32_t);
-#  define stxr_d(r0,r1,r2)             _stxr_d(_jit,r0,r1,r2)
-static void _stxr_d(jit_state_t*,int32_t,int32_t,int32_t);
-#  define stxi_d(i0,r0,r1)             _stxi_d(_jit,i0,r0,r1)
-static void _stxi_d(jit_state_t*,jit_word_t,int32_t,int32_t);
-#  define movr_d(r0,r1)                        _movr_d(_jit,r0,r1)
-static void _movr_d(jit_state_t*,int32_t,int32_t);
-#  define movi_d(r0,i0)                        _movi_d(_jit,r0,i0)
-static void _movi_d(jit_state_t*,int32_t,jit_float64_t);
-#  define extr_f_d(r0,r1)              FCVT_DS(r0,r1)
-#  define dccr(cc,r0,r1,r2)            _dccr(_jit,cc,r0,r1,r2)
-static void _dccr(jit_state_t*,int32_t,int32_t,int32_t,int32_t);
-#  define dcci(cc,r0,r1,i0)            _dcci(_jit,cc,r0,r1,i0)
-static void _dcci(jit_state_t*,
-                 int32_t,int32_t,int32_t,jit_float64_t);
-#  define ltr_d(r0,r1,r2)              dccr(CC_MI,r0,r1,r2)
-#  define lti_d(r0,r1,i0)              dcci(CC_MI,r0,r1,i0)
-#  define ler_d(r0,r1,r2)              dccr(CC_LS,r0,r1,r2)
-#  define lei_d(r0,r1,i0)              dcci(CC_LS,r0,r1,i0)
-#  define eqr_d(r0,r1,r2)              dccr(CC_EQ,r0,r1,r2)
-#  define eqi_d(r0,r1,i0)              dcci(CC_EQ,r0,r1,i0)
-#  define ger_d(r0,r1,r2)              dccr(CC_GE,r0,r1,r2)
-#  define gei_d(r0,r1,i0)              dcci(CC_GE,r0,r1,i0)
-#  define gtr_d(r0,r1,r2)              dccr(CC_GT,r0,r1,r2)
-#  define gti_d(r0,r1,i0)              dcci(CC_GT,r0,r1,i0)
-#  define ner_d(r0,r1,r2)              dccr(CC_NE,r0,r1,r2)
-#  define nei_d(r0,r1,i0)              dcci(CC_NE,r0,r1,i0)
-#  define unltr_d(r0,r1,r2)            dccr(CC_LT,r0,r1,r2)
-#  define unlti_d(r0,r1,i0)            dcci(CC_LT,r0,r1,i0)
-#  define unler_d(r0,r1,r2)            dccr(CC_LE,r0,r1,r2)
-#  define unlei_d(r0,r1,i0)            dcci(CC_LE,r0,r1,i0)
-#  define uneqr_d(r0,r1,r2)            _uneqr_d(_jit,r0,r1,r2)
-static void _uneqr_d(jit_state_t*,int32_t,int32_t,int32_t);
-#  define uneqi_d(r0,r1,i0)            _uneqi_d(_jit,r0,r1,i0)
-static void _uneqi_d(jit_state_t*,int32_t,int32_t,jit_float64_t);
-#  define unger_d(r0,r1,r2)            dccr(CC_PL,r0,r1,r2)
-#  define ungei_d(r0,r1,i0)            dcci(CC_PL,r0,r1,i0)
-#  define ungtr_d(r0,r1,r2)            dccr(CC_HI,r0,r1,r2)
-#  define ungti_d(r0,r1,i0)            dcci(CC_HI,r0,r1,i0)
-#  define ltgtr_d(r0,r1,r2)            _ltgtr_d(_jit,r0,r1,r2)
-static void _ltgtr_d(jit_state_t*,int32_t,int32_t,int32_t);
-#  define ltgti_d(r0,r1,i0)            _ltgti_d(_jit,r0,r1,i0)
-static void _ltgti_d(jit_state_t*,int32_t,int32_t,jit_float64_t);
-#  define ordr_d(r0,r1,r2)             dccr(CC_VC,r0,r1,r2)
-#  define ordi_d(r0,r1,i0)             dcci(CC_VC,r0,r1,i0)
-#  define unordr_d(r0,r1,r2)           dccr(CC_VS,r0,r1,r2)
-#  define unordi_d(r0,r1,i0)           dcci(CC_VS,r0,r1,i0)
-#define dbccr(cc,i0,r0,r1)             _dbccr(_jit,cc,i0,r0,r1)
-static jit_word_t
-_dbccr(jit_state_t*,int32_t,jit_word_t,int32_t,int32_t);
-#define dbcci(cc,i0,r0,i1)             _dbcci(_jit,cc,i0,r0,i1)
-static jit_word_t
-_dbcci(jit_state_t*,int32_t,jit_word_t,int32_t,jit_float64_t);
-#  define bltr_d(i0,r0,r1)             dbccr(BCC_MI,i0,r0,r1)
-#  define blti_d(i0,r0,i1)             dbcci(BCC_MI,i0,r0,i1)
-#  define bler_d(i0,r0,r1)             dbccr(BCC_LS,i0,r0,r1)
-#  define blei_d(i0,r0,i1)             dbcci(BCC_LS,i0,r0,i1)
-#  define beqr_d(i0,r0,r1)             dbccr(BCC_EQ,i0,r0,r1)
-#  define beqi_d(i0,r0,i1)             dbcci(BCC_EQ,i0,r0,i1)
-#  define bger_d(i0,r0,r1)             dbccr(BCC_GE,i0,r0,r1)
-#  define bgei_d(i0,r0,i1)             dbcci(BCC_GE,i0,r0,i1)
-#  define bgtr_d(i0,r0,r1)             dbccr(BCC_GT,i0,r0,r1)
-#  define bgti_d(i0,r0,i1)             dbcci(BCC_GT,i0,r0,i1)
-#  define bner_d(i0,r0,r1)             dbccr(BCC_NE,i0,r0,r1)
-#  define bnei_d(i0,r0,i1)             dbcci(BCC_NE,i0,r0,i1)
-#  define bunltr_d(i0,r0,r1)           dbccr(BCC_LT,i0,r0,r1)
-#  define bunlti_d(i0,r0,i1)           dbcci(BCC_LT,i0,r0,i1)
-#  define bunler_d(i0,r0,r1)           dbccr(BCC_LE,i0,r0,r1)
-#  define bunlei_d(i0,r0,i1)           dbcci(BCC_LE,i0,r0,i1)
-#  define buneqr_d(i0,r0,r1)           _buneqr_d(_jit,i0,r0,r1)
-static jit_word_t _buneqr_d(jit_state_t*,jit_word_t,int32_t,int32_t);
-#  define buneqi_d(i0,r0,i1)           _buneqi_d(_jit,i0,r0,i1)
-static jit_word_t _buneqi_d(jit_state_t*,jit_word_t,int32_t,jit_float64_t);
-#  define bunger_d(i0,r0,r1)           dbccr(BCC_PL,i0,r0,r1)
-#  define bungei_d(i0,r0,i1)           dbcci(BCC_PL,i0,r0,i1)
-#  define bungtr_d(i0,r0,r1)           dbccr(BCC_HI,i0,r0,r1)
-#  define bungti_d(i0,r0,i1)           dbcci(BCC_HI,i0,r0,i1)
-#  define bltgtr_d(i0,r0,r1)           _bltgtr_d(_jit,i0,r0,r1)
-static jit_word_t _bltgtr_d(jit_state_t*,jit_word_t,int32_t,int32_t);
-#  define bltgti_d(i0,r0,i1)           _bltgti_d(_jit,i0,r0,i1)
-static jit_word_t _bltgti_d(jit_state_t*,jit_word_t,int32_t,jit_float64_t);
-#  define bordr_d(i0,r0,r1)            dbccr(BCC_VC,i0,r0,r1)
-#  define bordi_d(i0,r0,i1)            dbcci(BCC_VC,i0,r0,i1)
-#  define bunordr_d(i0,r0,r1)          dbccr(BCC_VS,i0,r0,r1)
-#  define bunordi_d(i0,r0,i1)          dbcci(BCC_VS,i0,r0,i1)
-#  define vaarg_d(r0, r1)              _vaarg_d(_jit, r0, r1)
-static void _vaarg_d(jit_state_t*, int32_t, int32_t);
-#endif
-
-#if CODE
-static void
-_osvvv(jit_state_t *_jit, int32_t Op, int32_t Sz,
-       int32_t Rd, int32_t Rn, int32_t Rm)
-{
-    instr_t    i;
-    assert(!(Rd &       ~0x1f));
-    assert(!(Rn &       ~0x1f));
-    assert(!(Rm &       ~0x1f));
-    assert(!(Sz &        ~0x3));
-    assert(!(Op & ~0xffe0fc00));
-    i.w = Op;
-    i.size.b = Sz;
-    i.Rd.b = Rd;
-    i.Rn.b = Rn;
-    i.Rm.b = Rm;
-    ii(i.w);
-}
-
-static void
-_osvv_(jit_state_t *_jit, int32_t Op,
-       int32_t Sz, int32_t Rd, int32_t Rn)
-{
-    instr_t    i;
-    assert(!(Rd &       ~0x1f));
-    assert(!(Rn &       ~0x1f));
-    assert(!(Sz &        ~0x3));
-    assert(!(Op & ~0xfffffc00));
-    i.w = Op;
-    i.size.b = Sz;
-    i.Rd.b = Rd;
-    i.Rn.b = Rn;
-    ii(i.w);
-}
-
-static void
-_os_vv(jit_state_t *_jit, int32_t Op,
-       int32_t Sz, int32_t Rn, int32_t Rm)
-{
-    instr_t    i;
-    assert(!(Rn &       ~0x1f));
-    assert(!(Rm &       ~0x1f));
-    assert(!(Sz &        ~0x3));
-    assert(!(Op & ~0xff20fc1f));
-    i.w = Op;
-    i.size.b = Sz;
-    i.Rn.b = Rn;
-    i.Rm.b = Rm;
-    ii(i.w);
-}
-
-#define fopi(name)                                                     \
-static void                                                            \
-_##name##i_f(jit_state_t *_jit,                                                
\
-            int32_t r0, int32_t r1, jit_float32_t i0)          \
-{                                                                      \
-    int32_t            reg = jit_get_reg(jit_class_fpr);               \
-    movi_f(rn(reg), i0);                                               \
-    name##r_f(r0, r1, rn(reg));                                                
\
-    jit_unget_reg(reg);                                                        
\
-}
-#define dopi(name)                                                     \
-static void                                                            \
-_##name##i_d(jit_state_t *_jit,                                                
\
-            int32_t r0, int32_t r1, jit_float64_t i0)          \
-{                                                                      \
-    int32_t            reg = jit_get_reg(jit_class_fpr);               \
-    movi_d(rn(reg), i0);                                               \
-    name##r_d(r0, r1, rn(reg));                                                
\
-    jit_unget_reg(reg);                                                        
\
-}
-#define fbopi(name)                                                    \
-static jit_word_t                                                      \
-_b##name##i_f(jit_state_t *_jit,                                       \
-             jit_word_t i0, int32_t r0, jit_float32_t i1)              \
-{                                                                      \
-    jit_word_t         word;                                           \
-    int32_t            reg = jit_get_reg(jit_class_fpr|                \
-                                         jit_class_nospill);           \
-    movi_f(rn(reg), i1);                                               \
-    word = b##name##r_f(i0, r0, rn(reg));                              \
-    jit_unget_reg(reg);                                                        
\
-    return (word);                                                     \
+static void
+osvvv(jit_state_t *_jit, int32_t Op, int32_t Sz, int32_t Rd, int32_t Rn,
+      int32_t Rm)
+{
+  ASSERT(!(Rd &       ~0x1f));
+  ASSERT(!(Rn &       ~0x1f));
+  ASSERT(!(Rm &       ~0x1f));
+  ASSERT(!(Sz &        ~0x3));
+  ASSERT(!(Op & ~0xffe0fc00));
+  instr_t i;
+  i.w = Op;
+  i.size.b = Sz;
+  i.Rd.b = Rd;
+  i.Rn.b = Rn;
+  i.Rm.b = Rm;
+  emit_u32(_jit, i.w);
+}
+
+static void
+osvv_(jit_state_t *_jit, int32_t Op, int32_t Sz, int32_t Rd, int32_t Rn)
+{
+  ASSERT(!(Rd &       ~0x1f));
+  ASSERT(!(Rn &       ~0x1f));
+  ASSERT(!(Sz &        ~0x3));
+  ASSERT(!(Op & ~0xfffffc00));
+  instr_t i;
+  i.w = Op;
+  i.size.b = Sz;
+  i.Rd.b = Rd;
+  i.Rn.b = Rn;
+  emit_u32(_jit, i.w);
+}
+
+static void
+os_vv(jit_state_t *_jit, int32_t Op, int32_t Sz, int32_t Rn, int32_t Rm)
+{
+  ASSERT(!(Rn &       ~0x1f));
+  ASSERT(!(Rm &       ~0x1f));
+  ASSERT(!(Sz &        ~0x3));
+  ASSERT(!(Op & ~0xff20fc1f));
+  instr_t i;
+  i.w = Op;
+  i.size.b = Sz;
+  i.Rn.b = Rn;
+  i.Rm.b = Rm;
+  emit_u32(_jit, i.w);
+}
+
+#define A64_SCVTF                     0x1e220000
+#define A64_FMOVWV                    0x1e260000
+#define A64_FMOVVW                    0x1e270000
+#define A64_FMOVXV                    0x9e260000
+#define A64_FMOVVX                    0x9e270000
+#define A64_FCVTZS                    0x1e380000
+#define A64_FCMPE                     0x1e202010
+#define A64_FMOV                      0x1e204000
+#define A64_FABS                      0x1e20c000
+#define A64_FNEG                      0x1e214000
+#define A64_FSQRT                     0x1e21c000
+#define A64_FCVTS                     0x1e224000
+#define A64_FCVTD                     0x1e22c000
+#define A64_FMUL                      0x1e200800
+#define A64_FDIV                      0x1e201800
+#define A64_FADD                      0x1e202800
+#define A64_FSUB                      0x1e203800
+
+static void
+FCMPES(jit_state_t *_jit, int32_t Rn, int32_t Rm)
+{
+  os_vv(_jit, A64_FCMPE, 0, Rn, Rm);
+}
+
+static void
+FCMPED(jit_state_t *_jit, int32_t Rn, int32_t Rm)
+{
+  os_vv(_jit, A64_FCMPE, 1, Rn, Rm);
+}
+
+static void
+FMOVS(jit_state_t *_jit, int32_t Rd, int32_t Rn)
+{
+  osvv_(_jit, A64_FMOV, 0, Rd, Rn);
+}
+
+static void
+FMOVD(jit_state_t *_jit, int32_t Rd, int32_t Rn)
+{
+  osvv_(_jit, A64_FMOV, 1, Rd, Rn);
+}
+
+static void
+FMOVWS(jit_state_t *_jit, int32_t Rd, int32_t Rn)
+{
+  osvv_(_jit, A64_FMOVWV, 0, Rd, Rn);
+}
+
+static void
+FMOVSW(jit_state_t *_jit, int32_t Rd, int32_t Rn)
+{
+  osvv_(_jit, A64_FMOVVW, 0, Rd, Rn);
+}
+
+static void
+FMOVXD(jit_state_t *_jit, int32_t Rd, int32_t Rn)
+{
+  osvv_(_jit, A64_FMOVXV, 1, Rd, Rn);
+}
+
+static void
+FMOVDX(jit_state_t *_jit, int32_t Rd, int32_t Rn)
+{
+  osvv_(_jit, A64_FMOVVX, 1, Rd, Rn);
+}
+
+static void
+FCVT_SD(jit_state_t *_jit, int32_t Rd, int32_t Rn)
+{
+  osvv_(_jit, A64_FCVTS, 1, Rd, Rn);
+}
+
+static void
+FCVT_DS(jit_state_t *_jit, int32_t Rd, int32_t Rn)
+{
+  osvv_(_jit, A64_FCVTD, 0, Rd, Rn);
+}
+
+static void
+SCVTFS(jit_state_t *_jit, int32_t Rd, int32_t Rn)
+{
+  osvv_(_jit, A64_SCVTF|XS, 0, Rd, Rn);
+}
+
+static void
+SCVTFD(jit_state_t *_jit, int32_t Rd, int32_t Rn)
+{
+  osvv_(_jit, A64_SCVTF|XS, 1, Rd, Rn);
+}
+
+static void
+FCVTSZ_WS(jit_state_t *_jit, int32_t Rd, int32_t Rn)
+{
+  osvv_(_jit, A64_FCVTZS, 0, Rd, Rn);
+}
+
+static void
+FCVTSZ_WD(jit_state_t *_jit, int32_t Rd, int32_t Rn)
+{
+  osvv_(_jit, A64_FCVTZS, 1, Rd, Rn);
+}
+
+static void
+FCVTSZ_XS(jit_state_t *_jit, int32_t Rd, int32_t Rn)
+{
+  osvv_(_jit, A64_FCVTZS|XS, 0, Rd, Rn);
+}
+
+static void
+FCVTSZ_XD(jit_state_t *_jit, int32_t Rd, int32_t Rn)
+{
+  osvv_(_jit, A64_FCVTZS|XS, 1, Rd, Rn);
+}
+
+static void
+FABSS(jit_state_t *_jit, int32_t Rd, int32_t Rn)
+{
+  osvv_(_jit, A64_FABS, 0, Rd, Rn);
+}
+
+static void
+FABSD(jit_state_t *_jit, int32_t Rd, int32_t Rn)
+{
+  osvv_(_jit, A64_FABS, 1, Rd, Rn);
+}
+
+static void
+FNEGS(jit_state_t *_jit, int32_t Rd, int32_t Rn)
+{
+  osvv_(_jit, A64_FNEG, 0, Rd, Rn);
+}
+
+static void
+FNEGD(jit_state_t *_jit, int32_t Rd, int32_t Rn)
+{
+  osvv_(_jit, A64_FNEG, 1, Rd, Rn);
+}
+
+static void
+FSQRTS(jit_state_t *_jit, int32_t Rd, int32_t Rn)
+{
+  osvv_(_jit, A64_FSQRT, 0, Rd, Rn);
+}
+
+static void
+FSQRTD(jit_state_t *_jit, int32_t Rd, int32_t Rn)
+{
+  osvv_(_jit, A64_FSQRT, 1, Rd, Rn);
+}
+
+static void
+FADDS(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm)
+{
+  osvvv(_jit, A64_FADD, 0, Rd, Rn, Rm);
 }
-#define dbopi(name)                                                    \
-static jit_word_t                                                      \
-_b##name##i_d(jit_state_t *_jit,                                       \
-             jit_word_t i0, int32_t r0, jit_float64_t i1)              \
-{                                                                      \
-    jit_word_t         word;                                           \
-    int32_t            reg = jit_get_reg(jit_class_fpr|                \
-                                         jit_class_nospill);           \
-    movi_d(rn(reg), i1);                                               \
-    word = b##name##r_d(i0, r0, rn(reg));                              \
-    jit_unget_reg(reg);                                                        
\
-    return (word);                                                     \
+
+static void
+FADDD(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm)
+{
+  osvvv(_jit, A64_FADD, 1, Rd, Rn, Rm);
 }
 
 static void
-_truncr_f_i(jit_state_t *_jit, int32_t r0, int32_t r1)
+FSUBS(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm)
 {
-    FCVTSZ_WS(r0, r1);
-    extr_i(r0, r0);
+  osvvv(_jit, A64_FSUB, 0, Rd, Rn, Rm);
 }
 
 static void
-_truncr_d_i(jit_state_t *_jit, int32_t r0, int32_t r1)
+FSUBD(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm)
 {
-    FCVTSZ_WD(r0, r1);
-    extr_i(r0, r0);
+  osvvv(_jit, A64_FSUB, 1, Rd, Rn, Rm);
 }
 
-fopi(add)
-fopi(sub)
-fopi(rsb)
-fopi(mul)
-fopi(div)
+static void
+FMULS(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm)
+{
+  osvvv(_jit, A64_FMUL, 0, Rd, Rn, Rm);
+}
 
 static void
-_ldr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+FMULD(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm)
 {
-    int32_t            reg;
-    reg = jit_get_reg(jit_class_gpr);
-    ldr_i(rn(reg), r1);
-    FMOVSW(r0, rn(reg));
-    jit_unget_reg(reg);
+  osvvv(_jit, A64_FMUL, 1, Rd, Rn, Rm);
 }
 
 static void
-_ldi_f(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+FDIVS(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm)
 {
-    int32_t            reg;
-    reg = jit_get_reg(jit_class_gpr);
-    ldi_i(rn(reg), i0);
-    FMOVSW(r0, rn(reg));
-    jit_unget_reg(reg);
+  osvvv(_jit, A64_FDIV, 0, Rd, Rn, Rm);
 }
 
 static void
-_ldxr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+FDIVD(jit_state_t *_jit, int32_t Rd, int32_t Rn, int32_t Rm)
 {
-    int32_t            reg;
-    reg = jit_get_reg(jit_class_gpr);
-    ldxr_i(rn(reg), r1, r2);
-    FMOVSW(r0, rn(reg));
-    jit_unget_reg(reg);
+  osvvv(_jit, A64_FDIV, 1, Rd, Rn, Rm);
 }
 
 static void
-_ldxi_f(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+truncr_f_l(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    int32_t            reg;
-    reg = jit_get_reg(jit_class_gpr);
-    ldxi_i(rn(reg), r1, i0);
-    FMOVSW(r0, rn(reg));
-    jit_unget_reg(reg);
+  FCVTSZ_XS(_jit, r0, r1);
 }
 
 static void
-_str_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+truncr_d_l(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    int32_t            reg;
-    reg = jit_get_reg(jit_class_gpr);
-    FMOVWS(rn(reg), r1);
-    str_i(r0, rn(reg));
-    jit_unget_reg(reg);
+  FCVTSZ_XD(_jit, r0, r1);
 }
 
 static void
-_sti_f(jit_state_t *_jit, jit_word_t i0, int32_t r0)
+addr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
-    int32_t            reg;
-    reg = jit_get_reg(jit_class_gpr);
-    FMOVWS(rn(reg), r0);
-    sti_i(i0, rn(reg));
-    jit_unget_reg(reg);
+  FADDS(_jit, r0, r1, r2);
 }
 
 static void
-_stxr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+subr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
-    int32_t            reg;
-    reg = jit_get_reg(jit_class_gpr);
-    FMOVWS(rn(reg), r2);
-    stxr_i(r0, r1, rn(reg));
-    jit_unget_reg(reg);
+  FSUBS(_jit, r0, r1, r2);
 }
 
 static void
-_stxi_f(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+mulr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
-    int32_t            reg;
-    reg = jit_get_reg(jit_class_gpr);
-    FMOVWS(rn(reg), r1);
-    stxi_i(i0, r0, rn(reg));
-    jit_unget_reg(reg);
+  FMULS(_jit, r0, r1, r2);
 }
 
 static void
-_movr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+divr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
-    if (r0 != r1)
-       FMOVS(r0, r1);
+  FDIVS(_jit, r0, r1, r2);
 }
 
 static void
-_movi_f(jit_state_t *_jit, int32_t r0, jit_float32_t i0)
+absr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    union {
-       int32_t i;
-       jit_float32_t   f;
-    } u;
-    int32_t            reg;
-    u.f = i0;
-    if (u.i == 0)
-       FMOVSW(r0, WZR_REGNO);
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-       /* prevent generating unused top 32 bits */
-       movi(rn(reg), ((jit_word_t)u.i) & 0xffffffff);
-       FMOVSW(r0, rn(reg));
-       jit_unget_reg(reg);
-    }
+  FABSS(_jit, r0, r1);
 }
 
 static void
-_fccr(jit_state_t *_jit, int32_t cc,
-      int32_t r0, int32_t r1, int32_t r2)
+negr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    FCMPES(r1, r2);
-    CSET(r0, cc);
+  FNEGS(_jit, r0, r1);
 }
 
 static void
-_fcci(jit_state_t *_jit, int32_t cc,
-      int32_t r0, int32_t r1, jit_float32_t i0)
+sqrtr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  FSQRTS(_jit, r0, r1);
+}
+
+static void
+extr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  SCVTFS(_jit, r0, r1);
+}
+
+static void
+extr_d_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  FCVT_SD(_jit, r0, r1);
+}
+
+static jit_reloc_t
+fbccr(jit_state_t *_jit, int32_t cc, int32_t r0, int32_t r1)
+{
+  FCMPES(_jit, r0, r1);
+  return B_C(_jit, cc);
+}
+
+static jit_reloc_t
+bltr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return fbccr(_jit, BCC_MI,r0, r1);
+}
+
+static jit_reloc_t
+bler_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return fbccr(_jit, BCC_LS,r0, r1);
+}
+
+static jit_reloc_t
+beqr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return fbccr(_jit, BCC_EQ,r0, r1);
+}
+
+static jit_reloc_t
+bger_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return fbccr(_jit, BCC_GE,r0, r1);
+}
+
+static jit_reloc_t
+bgtr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return fbccr(_jit, BCC_GT,r0, r1);
+}
+
+static jit_reloc_t
+bner_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return fbccr(_jit, BCC_NE,r0, r1);
+}
+
+static jit_reloc_t
+bunltr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return fbccr(_jit, BCC_LT,r0, r1);
+}
+
+static jit_reloc_t
+bunler_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return fbccr(_jit, BCC_LE,r0, r1);
+}
+
+static jit_reloc_t
+bunger_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return fbccr(_jit, BCC_PL,r0, r1);
+}
+
+static jit_reloc_t
+bungtr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return fbccr(_jit, BCC_HI,r0, r1);
+}
+
+static jit_reloc_t
+bordr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return fbccr(_jit, BCC_VC,r0, r1);
+}
+
+static jit_reloc_t
+bunordr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    int32_t            reg;
-    reg = jit_get_reg(jit_class_fpr);
-    movi_f(rn(reg), i0);
-    fccr(cc, r0, r1, rn(reg));
-    jit_unget_reg(reg);
+  return fbccr(_jit, BCC_VS, r0, r1);
 }
 
 static void
-_uneqr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+addr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
-    jit_word_t         w;
-    FCMPES(r1, r2);
-    CSET(r0, CC_VS);
-    w = _jit->pc.w;
-    B_C(BCC_VS, 1);            /* unordered satisfies condition */
-    CSET(r0, CC_EQ);           /* equal satisfies condition */
-    patch_at(w, _jit->pc.w);
+  FADDD(_jit, r0, r1, r2);
 }
-fopi(uneq)
 
 static void
-_ltgtr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+subr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
-    jit_word_t         w;
-    FCMPES(r1, r2);
-    CSET(r0, CC_VC);           /* set to 1 if ordered */
-    w = _jit->pc.w;
-    B_C(BCC_VS, 1);            /* unordered does not satisfy condition */
-    CSET(r0, CC_NE);           /* set to 1 if not equal */
-    patch_at(w, _jit->pc.w);
+  FSUBD(_jit, r0, r1, r2);
 }
-fopi(ltgt)
 
-static jit_word_t
-_fbccr(jit_state_t *_jit, int32_t cc,
-       jit_word_t i0, int32_t r0, int32_t r1)
+static void
+mulr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
-    jit_word_t         w, d;
-    FCMPES(r0, r1);
-    w = _jit->pc.w;
-    d = (i0 - w) >> 2;
-    B_C(cc, d);
-    return (w);
+  FMULD(_jit, r0, r1, r2);
 }
 
-static jit_word_t
-_fbcci(jit_state_t *_jit, int32_t cc,
-       jit_word_t i0, int32_t r0, jit_float32_t i1)
+static void
+divr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
-    jit_word_t         w;
-    int32_t            reg;
-    reg = jit_get_reg(jit_class_fpr|jit_class_nospill);
-    movi_f(rn(reg), i1);
-    w = fbccr(cc, i0, r0, rn(reg));
-    jit_unget_reg(reg);
-    return (w);
+  FDIVD(_jit, r0, r1, r2);
 }
 
-static jit_word_t
-_buneqr_f(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+static void
+absr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    jit_word_t         u, v, w;
-    FCMPES(r0, r1);
-    u = _jit->pc.w;
-    B_C(BCC_VS, 1);            /* unordered satisfies condition */
-    v = _jit->pc.w;
-    B_C(BCC_NE, 1);            /* not equal (or unordered) does not satisfy */
-    patch_at(u, _jit->pc.w);
-    w = _jit->pc.w;
-    B((i0 - w) >> 2);
-    patch_at(v, _jit->pc.w);
-    return (w);
+  FABSD(_jit, r0, r1);
 }
-fbopi(uneq)
 
-static jit_word_t
-_bltgtr_f(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+static void
+negr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    jit_word_t         u, v, w;
-    FCMPES(r0, r1);
-    u = _jit->pc.w;
-    B_C(BCC_VS, 2);            /* jump over if unordered */
-    v = _jit->pc.w;
-    B_C(BCC_EQ, 1);            /* jump over if equal */
-    w = _jit->pc.w;
-    B((i0 - w) >> 2);
-    patch_at(u, _jit->pc.w);
-    patch_at(v, _jit->pc.w);
-    return (w);
+  FNEGD(_jit, r0, r1);
 }
-fbopi(ltgt)
 
-dopi(add)
-dopi(sub)
-dopi(rsb)
-dopi(mul)
-dopi(div)
+static void
+sqrtr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  FSQRTD(_jit, r0, r1);
+}
 
 static void
-_ldr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+extr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    int32_t            reg;
-    reg = jit_get_reg(jit_class_gpr);
-    ldr_l(rn(reg), r1);
-    FMOVDX(r0, rn(reg));
-    jit_unget_reg(reg);
+  SCVTFD(_jit, r0, r1);
 }
 
 static void
-_ldi_d(jit_state_t *_jit, int32_t r0, jit_word_t i0)
+extr_f_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  FCVT_DS(_jit, r0, r1);
+}
+
+static jit_reloc_t
+dbccr(jit_state_t *_jit, int32_t cc, int32_t r0, int32_t r1)
+{
+  FCMPED(_jit, r0, r1);
+  return B_C(_jit, cc);
+}
+
+static jit_reloc_t
+bltr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return dbccr(_jit, BCC_MI, r0, r1);
+}
+
+static jit_reloc_t
+bler_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return dbccr(_jit, BCC_LS, r0, r1);
+}
+
+static jit_reloc_t
+beqr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return dbccr(_jit, BCC_EQ, r0, r1);
+}
+
+static jit_reloc_t
+bger_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return dbccr(_jit, BCC_GE, r0, r1);
+}
+
+static jit_reloc_t
+bgtr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return dbccr(_jit, BCC_GT, r0, r1);
+}
+
+static jit_reloc_t
+bner_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return dbccr(_jit, BCC_NE, r0, r1);
+}
+
+static jit_reloc_t
+bunltr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return dbccr(_jit, BCC_LT, r0, r1);
+}
+
+static jit_reloc_t
+bunler_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return dbccr(_jit, BCC_LE, r0, r1);
+}
+
+static jit_reloc_t
+bunger_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return dbccr(_jit, BCC_PL, r0, r1);
+}
+
+static jit_reloc_t
+bungtr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return dbccr(_jit, BCC_HI, r0, r1);
+}
+
+static jit_reloc_t
+bordr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  return dbccr(_jit, BCC_VC, r0, r1);
+}
+
+static jit_reloc_t
+bunordr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    int32_t            reg;
-    reg = jit_get_reg(jit_class_gpr);
-    ldi_l(rn(reg), i0);
-    FMOVDX(r0, rn(reg));
-    jit_unget_reg(reg);
+  return dbccr(_jit, BCC_VS, r0, r1);
 }
 
+
 static void
-_ldxr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+truncr_f_i(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    int32_t            reg;
-    reg = jit_get_reg(jit_class_gpr);
-    ldxr_l(rn(reg), r1, r2);
-    FMOVDX(r0, rn(reg));
-    jit_unget_reg(reg);
+  FCVTSZ_WS(_jit, r0, r1);
+  extr_i(_jit, r0, r0);
 }
 
 static void
-_ldxi_d(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+truncr_d_i(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    int32_t            reg;
-    reg = jit_get_reg(jit_class_gpr);
-    ldxi_l(rn(reg), r1, i0);
-    FMOVDX(r0, rn(reg));
-    jit_unget_reg(reg);
+  FCVTSZ_WD(_jit, r0, r1);
+  extr_i(_jit, r0, r0);
 }
 
 static void
-_str_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+ldr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    int32_t            reg;
-    reg = jit_get_reg(jit_class_gpr);
-    FMOVXD(rn(reg), r1);
-    str_l(r0, rn(reg));
-    jit_unget_reg(reg);
+  jit_gpr_t reg = get_temp_gpr(_jit);
+  ldr_i(_jit, jit_gpr_regno(reg), r1);
+  FMOVSW(_jit, r0, jit_gpr_regno(reg));
+  unget_temp_gpr(_jit);
 }
 
 static void
-_sti_d(jit_state_t *_jit, jit_word_t i0, int32_t r0)
+ldi_f(jit_state_t *_jit, int32_t r0, jit_word_t i0)
 {
-    int32_t            reg;
-    reg = jit_get_reg(jit_class_gpr);
-    FMOVXD(rn(reg), r0);
-    sti_l(i0, rn(reg));
-    jit_unget_reg(reg);
+  jit_gpr_t reg = get_temp_gpr(_jit);
+  ldi_i(_jit, jit_gpr_regno(reg), i0);
+  FMOVSW(_jit, r0, jit_gpr_regno(reg));
+  unget_temp_gpr(_jit);
 }
 
 static void
-_stxr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+ldxr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
-    int32_t            reg;
-    reg = jit_get_reg(jit_class_gpr);
-    FMOVXD(rn(reg), r2);
-    stxr_l(r0, r1, rn(reg));
-    jit_unget_reg(reg);
+  jit_gpr_t reg = get_temp_gpr(_jit);
+  ldxr_i(_jit, jit_gpr_regno(reg), r1, r2);
+  FMOVSW(_jit, r0, jit_gpr_regno(reg));
+  unget_temp_gpr(_jit);
 }
 
 static void
-_stxi_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+ldxi_f(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
 {
-    int32_t            reg;
-    reg = jit_get_reg(jit_class_gpr);
-    FMOVXD(rn(reg), r1);
-    stxi_l(i0, r0, rn(reg));
-    jit_unget_reg(reg);
+  jit_gpr_t reg = get_temp_gpr(_jit);
+  ldxi_i(_jit, jit_gpr_regno(reg), r1, i0);
+  FMOVSW(_jit, r0, jit_gpr_regno(reg));
+  unget_temp_gpr(_jit);
 }
 
 static void
-_movr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+str_f(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    if (r0 != r1)
-       FMOVD(r0, r1);
+  jit_gpr_t reg = get_temp_gpr(_jit);
+  FMOVWS(_jit, jit_gpr_regno(reg), r1);
+  str_i(_jit, r0, jit_gpr_regno(reg));
+  unget_temp_gpr(_jit);
 }
 
 static void
-_movi_d(jit_state_t *_jit, int32_t r0, jit_float64_t i0)
+sti_f(jit_state_t *_jit, jit_word_t i0, int32_t r0)
 {
-    union {
-       int64_t l;
-       jit_float64_t   d;
-    } u;
-    int32_t            reg;
-    u.d = i0;
-    if (u.l == 0)
-       FMOVDX(r0, XZR_REGNO);
-    else {
-       reg = jit_get_reg(jit_class_gpr);
-       movi(rn(reg), u.l);
-       FMOVDX(r0, rn(reg));
-       jit_unget_reg(reg);
-    }
+  jit_gpr_t reg = get_temp_gpr(_jit);
+  FMOVWS(_jit, jit_gpr_regno(reg), r0);
+  sti_i(_jit, i0, jit_gpr_regno(reg));
+  unget_temp_gpr(_jit);
 }
 
 static void
-_dccr(jit_state_t *_jit, int32_t cc,
-      int32_t r0, int32_t r1, int32_t r2)
+stxr_f(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
-    FCMPED(r1, r2);
-    CSET(r0, cc);
+  jit_gpr_t reg = get_temp_gpr(_jit);
+  FMOVWS(_jit, jit_gpr_regno(reg), r2);
+  stxr_i(_jit, r0, r1, jit_gpr_regno(reg));
+  unget_temp_gpr(_jit);
 }
 
 static void
-_dcci(jit_state_t *_jit, int32_t cc,
-      int32_t r0, int32_t r1, jit_float64_t i0)
+stxi_f(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
 {
-    int32_t            reg;
-    reg = jit_get_reg(jit_class_fpr);
-    movi_d(rn(reg), i0);
-    dccr(cc, r0, r1, rn(reg));
-    jit_unget_reg(reg);
+  jit_gpr_t reg = get_temp_gpr(_jit);
+  FMOVWS(_jit, jit_gpr_regno(reg), r1);
+  stxi_i(_jit, i0, r0, jit_gpr_regno(reg));
+  unget_temp_gpr(_jit);
 }
 
 static void
-_uneqr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+movr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    jit_word_t         w;
-    FCMPED(r1, r2);
-    CSET(r0, CC_VS);
-    w = _jit->pc.w;
-    B_C(BCC_VS, 1);            /* unordered satisfies condition */
-    CSET(r0, CC_EQ);           /* equal satisfies condition */
-    patch_at(w, _jit->pc.w);
+  if (r0 != r1)
+    FMOVS(_jit, r0, r1);
 }
-dopi(uneq)
 
 static void
-_ltgtr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+movi_f(jit_state_t *_jit, int32_t r0, float i0)
 {
-    jit_word_t         w;
-    FCMPED(r1, r2);
-    CSET(r0, CC_VC);           /* set to 1 if ordered */
-    w = _jit->pc.w;
-    B_C(BCC_VS, 1);            /* unordered does not satisfy condition */
-    CSET(r0, CC_NE);           /* set to 1 if not equal */
-    patch_at(w, _jit->pc.w);
+  union {
+    int32_t i;
+    float   f;
+  } u;
+  u.f = i0;
+  if (u.i == 0)
+    FMOVSW(_jit, r0, WZR_REGNO);
+  else {
+    jit_gpr_t reg = get_temp_gpr(_jit);
+    /* prevent generating unused top 32 bits */
+    movi(_jit, jit_gpr_regno(reg), ((jit_word_t)u.i) & 0xffffffff);
+    FMOVSW(_jit, r0, jit_gpr_regno(reg));
+    unget_temp_gpr(_jit);
+  }
 }
-dopi(ltgt)
 
-static jit_word_t
-_dbccr(jit_state_t *_jit, int32_t cc,
-       jit_word_t i0, int32_t r0, int32_t r1)
+static jit_reloc_t
+buneqr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    jit_word_t         w, d;
-    FCMPED(r0, r1);
-    w = _jit->pc.w;
-    d = (i0 - w) >> 2;
-    B_C(cc, d);
-    return (w);
+  FCMPES(_jit, r0, r1);
+  jit_reloc_t unordered = B_C(_jit, BCC_VS); /* unordered satisfies condition 
*/
+  jit_reloc_t neq = B_C(_jit, BCC_NE); /* not equal (or unordered) does not 
satisfy */
+  jit_patch_here(_jit, unordered);
+  jit_reloc_t ret = B(_jit);
+  jit_patch_here(_jit, neq);
+  return ret;
 }
 
-static jit_word_t
-_dbcci(jit_state_t *_jit, int32_t cc,
-       jit_word_t i0, int32_t r0, jit_float64_t i1)
+static jit_reloc_t
+bltgtr_f(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    jit_word_t         w;
-    int32_t            reg;
-    reg = jit_get_reg(jit_class_fpr|jit_class_nospill);
-    movi_d(rn(reg), i1);
-    w = dbccr(cc, i0, r0, rn(reg));
-    jit_unget_reg(reg);
-    return (w);
+  FCMPES(_jit, r0, r1);
+  jit_reloc_t unordered = B_C(_jit, BCC_VS); /* jump over if unordered */
+  jit_reloc_t eq = B_C(_jit, BCC_EQ); /* jump over if equal */
+  jit_reloc_t ret = B(_jit);
+  jit_patch_here(_jit, unordered);
+  jit_patch_here(_jit, eq);
+  return ret;
 }
 
-static jit_word_t
-_buneqr_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+static void
+ldr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
 {
-    jit_word_t         u, v, w;
-    FCMPED(r0, r1);
-    u = _jit->pc.w;
-    B_C(BCC_VS, 1);            /* unordered satisfies condition */
-    v = _jit->pc.w;
-    B_C(BCC_NE, 1);            /* not equal (or unordered) does not satisfy */
-    patch_at(u, _jit->pc.w);
-    w = _jit->pc.w;
-    B((i0 - w) >> 2);
-    patch_at(v, _jit->pc.w);
-    return (w);
+  jit_gpr_t reg = get_temp_gpr(_jit);
+  ldr_l(_jit, jit_gpr_regno(reg), r1);
+  FMOVDX(_jit, r0, jit_gpr_regno(reg));
+  unget_temp_gpr(_jit);
 }
-dbopi(uneq)
 
-static jit_word_t
-_bltgtr_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+static void
+ldi_d(jit_state_t *_jit, int32_t r0, jit_word_t i0)
 {
-    jit_word_t         u, v, w;
-    FCMPED(r0, r1);
-    u = _jit->pc.w;
-    B_C(BCC_VS, 2);            /* jump over if unordered */
-    v = _jit->pc.w;
-    B_C(BCC_EQ, 1);            /* jump over if equal */
-    w = _jit->pc.w;
-    B((i0 - w) >> 2);
-    patch_at(u, _jit->pc.w);
-    patch_at(v, _jit->pc.w);
-    return (w);
+  jit_gpr_t reg = get_temp_gpr(_jit);
+  ldi_l(_jit, jit_gpr_regno(reg), i0);
+  FMOVDX(_jit, r0, jit_gpr_regno(reg));
+  unget_temp_gpr(_jit);
 }
-dbopi(ltgt)
 
 static void
-_vaarg_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+ldxr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
 {
-    jit_word_t         ge_code;
-    jit_word_t         lt_code;
-    int32_t            rg0, rg1;
+  jit_gpr_t reg = get_temp_gpr(_jit);
+  ldxr_l(_jit, jit_gpr_regno(reg), r1, r2);
+  FMOVDX(_jit, r0, jit_gpr_regno(reg));
+  unget_temp_gpr(_jit);
+}
 
-    assert(_jitc->function->self.call & jit_call_varargs);
+static void
+ldxi_d(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
+{
+  jit_gpr_t reg = get_temp_gpr(_jit);
+  ldxi_l(_jit, jit_gpr_regno(reg), r1, i0);
+  FMOVDX(_jit, r0, jit_gpr_regno(reg));
+  unget_temp_gpr(_jit);
+}
 
-    rg0 = jit_get_reg(jit_class_gpr);
-    rg1 = jit_get_reg(jit_class_gpr);
+static void
+str_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  jit_gpr_t reg = get_temp_gpr(_jit);
+  FMOVXD(_jit, jit_gpr_regno(reg), r1);
+  str_l(_jit, r0, jit_gpr_regno(reg));
+  unget_temp_gpr(_jit);
+}
 
-    /* Load the fp offset in save area in the first temporary. */
-    ldxi_i(rn(rg0), r1, offsetof(jit_va_list_t, fpoff));
+static void
+sti_d(jit_state_t *_jit, jit_word_t i0, int32_t r0)
+{
+  jit_gpr_t reg = get_temp_gpr(_jit);
+  FMOVXD(_jit, jit_gpr_regno(reg), r0);
+  sti_l(_jit, i0, jit_gpr_regno(reg));
+  unget_temp_gpr(_jit);
+}
 
-    /* Jump over if there are no remaining arguments in the save area. */
-    ge_code = bgei(_jit->pc.w, rn(rg0), 0);
+static void
+stxr_d(jit_state_t *_jit, int32_t r0, int32_t r1, int32_t r2)
+{
+  jit_gpr_t reg = get_temp_gpr(_jit);
+  FMOVXD(_jit, jit_gpr_regno(reg), r2);
+  stxr_l(_jit, r0, r1, jit_gpr_regno(reg));
+  unget_temp_gpr(_jit);
+}
 
-    /* Load the gp save pointer in the second temporary. */
-    ldxi(rn(rg1), r1, offsetof(jit_va_list_t, fptop));
+static void
+stxi_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+{
+  jit_gpr_t reg = get_temp_gpr(_jit);
+  FMOVXD(_jit, jit_gpr_regno(reg), r1);
+  stxi_l(_jit, i0, r0, jit_gpr_regno(reg));
+  unget_temp_gpr(_jit);
+}
 
-    /* Load the vararg argument in the first argument. */
-    ldxr_d(r0, rn(rg1), rn(rg0));
+static void
+movr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  if (r0 != r1)
+    FMOVD(_jit, r0, r1);
+}
 
-    /* Update the fp offset. */
-    addi(rn(rg0), rn(rg0), 16);
-    stxi_i(offsetof(jit_va_list_t, fpoff), r1, rn(rg0));
+static void
+movi_d(jit_state_t *_jit, int32_t r0, double i0)
+{
+  union {
+    int64_t l;
+    double   d;
+  } u;
+  u.d = i0;
+  if (u.l == 0)
+    FMOVDX(_jit, r0, XZR_REGNO);
+  else {
+    jit_gpr_t reg = get_temp_gpr(_jit);
+    movi(_jit, jit_gpr_regno(reg), u.l);
+    FMOVDX(_jit, r0, jit_gpr_regno(reg));
+    unget_temp_gpr(_jit);
+  }
+}
 
-    /* Will only need one temporary register below. */
-    jit_unget_reg(rg1);
+static jit_reloc_t
+buneqr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  FCMPED(_jit, r0, r1);
+  jit_reloc_t unordered = B_C(_jit, BCC_VS); /* unordered satisfies condition 
*/
+  jit_reloc_t neq = B_C(_jit, BCC_NE); /* not equal (or unordered) does not 
satisfy */
+  jit_patch_here(_jit, unordered);
+  jit_reloc_t ret = B(_jit);
+  jit_patch_here(_jit, neq);
+  return ret;
+}
 
-    /* Jump over overflow code. */
-    lt_code = jmpi_p(_jit->pc.w);
+static jit_reloc_t
+bltgtr_d(jit_state_t *_jit, int32_t r0, int32_t r1)
+{
+  FCMPED(_jit, r0, r1);
+  jit_reloc_t unordered = B_C(_jit, BCC_VS); /* jump over if unordered */
+  jit_reloc_t eq = B_C(_jit, BCC_EQ); /* jump over if equal */
+  jit_reloc_t ret = B(_jit);
+  jit_patch_here(_jit, unordered);
+  jit_patch_here(_jit, eq);
+  return ret;
+}
 
-    /* Where to land if argument is in overflow area. */
-    patch_at(ge_code, _jit->pc.w);
+static void
+retr_d(jit_state_t *_jit, int32_t r)
+{
+  movr_d(_jit, jit_fpr_regno(_D0), r);
+  ret(_jit);
+}
 
-    /* Load stack pointer. */
-    ldxi(rn(rg0), r1, offsetof(jit_va_list_t, stack));
+static void
+retr_f(jit_state_t *_jit, int32_t r)
+{
+  movr_f(_jit, jit_fpr_regno(_D0), r);
+  ret(_jit);
+}
 
-    /* Load argument. */
-    ldr_d(r0, rn(rg0));
+static void
+retval_f(jit_state_t *_jit, int32_t r0)
+{
+  movr_f(_jit, r0, jit_fpr_regno(_D0));
+}
 
-    /* Update stack pointer. */
-    addi(rn(rg0), rn(rg0), 8);
-    stxi(offsetof(jit_va_list_t, stack), r1, rn(rg0));
+static void
+retval_d(jit_state_t *_jit, int32_t r0)
+{
+  movr_d(_jit, r0, jit_fpr_regno(_D0));
+}
 
-    /* Where to land if argument is in gp save area. */
-    patch_at(lt_code, _jit->pc.w);
+static void
+pushr_d(jit_state_t *_jit, int32_t r0)
+{
+  abort();
+}
 
-    jit_unget_reg(rg0);
+static void
+popr_d(jit_state_t *_jit, int32_t r0)
+{
+  abort();
 }
-#endif
diff --git a/lightening/aarch64.c b/lightening/aarch64.c
index 65efb74..b678908 100644
--- a/lightening/aarch64.c
+++ b/lightening/aarch64.c
@@ -17,1572 +17,164 @@
  *     Paulo Cesar Pereira de Andrade
  */
 
-# define JIT_RA0                _R0
-# define JIT_FA0                _V0
-# define JIT_SP         _SP
-# define JIT_RET                _R0
-# define JIT_FRET               _V0
-
-#define jit_arg_reg_p(i)               ((i) >= 0 && (i) < 8)
-#define jit_arg_f_reg_p(i)             ((i) >= 0 && (i) < 8)
-
-typedef struct jit_qreg {
-    jit_float64_t      l;
-    jit_float64_t      h;
-} jit_qreg_t;
-
-#define va_gp_top_offset               offsetof(jit_va_list_t, q0)
-#define va_fp_top_offset               sizeof(jit_va_list_t)
-typedef struct jit_va_list {
-    jit_pointer_t      stack;
-    jit_pointer_t      gptop;
-    jit_pointer_t      fptop;
-    int32_t            gpoff;
-    int32_t            fpoff;
-
-    int64_t            x0;
-    int64_t            x1;
-    int64_t            x2;
-    int64_t            x3;
-    int64_t            x4;
-    int64_t            x5;
-    int64_t            x6;
-    int64_t            x7;
-
-    jit_qreg_t         q0;
-    jit_qreg_t         q1;
-    jit_qreg_t         q2;
-    jit_qreg_t         q3;
-    jit_qreg_t         q4;
-    jit_qreg_t         q5;
-    jit_qreg_t         q6;
-    jit_qreg_t         q7;
-} jit_va_list_t;
-
-/*
- * Prototypes
- */
-#define patch(instr, node)             _patch(_jit, instr, node)
-static void _patch(jit_state_t*,jit_word_t,jit_node_t*);
-
 /* libgcc */
 extern void __clear_cache(void *, void *);
 
-#define PROTO                          1
-#  include "aarch64-cpu.c"
-#  include "aarch64-fpu.c"
-#undef PROTO
-
-/*
- * Initialization
- */
-static const jit_register_t _rvs[] = {
-    { rc(gpr) | 0x08,                  "x8" },
-    { rc(gpr) | 0x12,                  "x18" },
-    { rc(gpr) | 0x11,                  "x17" },
-    { rc(gpr) | 0x10,                  "x16" },
-    { rc(gpr) | 0x09,                  "x9" },
-    { rc(gpr) | 0x0a,                  "x10" },
-    { rc(gpr) | 0x0b,                  "x11" },
-    { rc(gpr) | 0x0c,                  "x12" },
-    { rc(gpr) | 0x0d,                  "x13" },
-    { rc(gpr) | 0x0e,                  "x14" },
-    { rc(gpr) | 0x0f,                  "x15" },
-    { rc(sav) | rc(gpr) | 0x13,                "x19" },
-    { rc(sav) | rc(gpr) | 0x14,                "x20" },
-    { rc(sav) | rc(gpr) | 0x15,                "x21" },
-    { rc(sav) | rc(gpr) | 0x16,                "x22" },
-    { rc(sav) | rc(gpr) | 0x17,                "x23" },
-    { rc(sav) | rc(gpr) | 0x18,                "x24" },
-    { rc(sav) | rc(gpr) | 0x19,                "x25" },
-    { rc(sav) | rc(gpr) | 0x1a,                "x26" },
-    { rc(sav) | rc(gpr) | 0x1b,                "x27" },
-    { rc(sav) | rc(gpr) | 0x1c,                "x28" },
-    { 0x1f,                            "sp" },
-    { 0x1e,                            "lr" },
-    { 0x1d,                            "fp" },
-    { rc(arg) | rc(gpr) | 0x07,                "x7" },
-    { rc(arg) | rc(gpr) | 0x06,                "x6" },
-    { rc(arg) | rc(gpr) | 0x05,                "x5" },
-    { rc(arg) | rc(gpr) | 0x04,                "x4" },
-    { rc(arg) | rc(gpr) | 0x03,                "x3" },
-    { rc(arg) | rc(gpr) | 0x02,                "x2" },
-    { rc(arg) | rc(gpr) | 0x01,                "x1" },
-    { rc(arg) | rc(gpr) | 0x00,                "x0" },
-    { rc(fpr) | 0x1f,                  "v31" },
-    { rc(fpr) | 0x1e,                  "v30" },
-    { rc(fpr) | 0x1d,                  "v29" },
-    { rc(fpr) | 0x1c,                  "v28" },
-    { rc(fpr) | 0x1b,                  "v27" },
-    { rc(fpr) | 0x1a,                  "v26" },
-    { rc(fpr) | 0x19,                  "v25" },
-    { rc(fpr) | 0x18,                  "v24" },
-    { rc(fpr) | 0x17,                  "v23" },
-    { rc(fpr) | 0x16,                  "v22" },
-    { rc(fpr) | 0x15,                  "v21" },
-    { rc(fpr) | 0x14,                  "v20" },
-    { rc(fpr) | 0x13,                  "v19" },
-    { rc(fpr) | 0x12,                  "v18" },
-    { rc(fpr) | 0x11,                  "v17" },
-    { rc(fpr) | 0x10,                  "v16" },
-    { rc(sav) | rc(fpr) | 0x08,                "v8" },
-    { rc(sav) | rc(fpr) | 0x09,                "v9" },
-    { rc(sav) | rc(fpr) | 0x0a,                "v10" },
-    { rc(sav) | rc(fpr) | 0x0b,                "v11" },
-    { rc(sav) | rc(fpr) | 0x0c,                "v12" },
-    { rc(sav) | rc(fpr) | 0x0d,                "v13" },
-    { rc(sav) | rc(fpr) | 0x0e,                "v14" },
-    { rc(sav) | rc(fpr) | 0x0f,                "v15" },
-    { rc(arg) | rc(fpr) | 0x07,                "v7" },
-    { rc(arg) | rc(fpr) | 0x06,                "v6" },
-    { rc(arg) | rc(fpr) | 0x05,                "v5" },
-    { rc(arg) | rc(fpr) | 0x04,                "v4" },
-    { rc(arg) | rc(fpr) | 0x03,                "v3" },
-    { rc(arg) | rc(fpr) | 0x02,                "v2" },
-    { rc(arg) | rc(fpr) | 0x01,                "v1" },
-    { rc(arg) | rc(fpr) | 0x00,                "v0" },
-    { _NOREG,                          "<none>" },
-};
-
-/*
- * Implementation
- */
-void
-jit_get_cpu(void)
-{
-}
-
-void
-_jit_init(jit_state_t *_jit)
-{
-    _jitc->reglen = jit_size(_rvs) - 1;
-}
-
-void
-_jit_prolog(jit_state_t *_jit)
-{
-    int32_t             offset;
-
-    if (_jitc->function)
-       jit_epilog();
-    assert(jit_regset_cmp_ui(&_jitc->regarg, 0) == 0);
-    jit_regset_set_ui(&_jitc->regsav, 0);
-    offset = _jitc->functions.offset;
-    if (offset >= _jitc->functions.length) {
-       jit_realloc((jit_pointer_t *)&_jitc->functions.ptr,
-                   _jitc->functions.length * sizeof(jit_function_t),
-                   (_jitc->functions.length + 16) * sizeof(jit_function_t));
-       _jitc->functions.length += 16;
-    }
-    _jitc->function = _jitc->functions.ptr + _jitc->functions.offset++;
-    _jitc->function->self.size = stack_framesize;
-    _jitc->function->self.argi = _jitc->function->self.argf =
-       _jitc->function->self.alen = 0;
-    _jitc->function->self.aoff = 0;
-    _jitc->function->self.call = jit_call_default;
-    jit_alloc((jit_pointer_t *)&_jitc->function->regoff,
-             _jitc->reglen * sizeof(int32_t));
-
-    /* _no_link here does not mean the jit_link() call can be removed
-     * by rewriting as:
-     * _jitc->function->prolog = jit_new_node(jit_code_prolog);
-     */
-    _jitc->function->prolog = jit_new_node_no_link(jit_code_prolog);
-    jit_link(_jitc->function->prolog);
-    _jitc->function->prolog->w.w = offset;
-    _jitc->function->epilog = jit_new_node_no_link(jit_code_epilog);
-    /* u:      label value
-     * v:      offset in blocks vector
-     * w:      offset in functions vector
-     */
-    _jitc->function->epilog->w.w = offset;
-
-    jit_regset_new(&_jitc->function->regset);
-}
-
-int32_t
-_jit_allocai(jit_state_t *_jit, int32_t length)
-{
-    assert(_jitc->function);
-    switch (length) {
-       case 0: case 1:                                         break;
-       case 2:         _jitc->function->self.aoff &= -2;       break;
-       case 3: case 4: _jitc->function->self.aoff &= -4;       break;
-       default:        _jitc->function->self.aoff &= -8;       break;
-    }
-    _jitc->function->self.aoff -= length;
-    if (!_jitc->realize) {
-       jit_inc_synth_ww(allocai, _jitc->function->self.aoff, length);
-       jit_dec_synth();
-    }
-    return (_jitc->function->self.aoff);
-}
-
-void
-_jit_allocar(jit_state_t *_jit, int32_t u, int32_t v)
-{
-    int32_t             r0, r1;
-    assert(_jitc->function);
-    jit_inc_synth_ww(allocar, u, v);
-    if (!_jitc->function->allocar) {
-       _jitc->function->aoffoff = jit_allocai(sizeof(int32_t));
-       _jitc->function->allocar = 1;
-    }
-    r0 = jit_get_reg(jit_class_gpr);
-    jit_negr(r0, v);
-    jit_andi(r0, r0, -16);
-    jit_ldxi_i(u, JIT_FP, _jitc->function->aoffoff);
-    jit_addr(u, u, r0);
-    /* Cannot "addr sp, sp, reg" because in this context "sp" is "[w|x]zr",
-     * the zero register */
-#if 0
-    jit_addr(JIT_SP, JIT_SP, r0);
-#else
-    r1 = jit_get_reg(jit_class_gpr);
-    /* note that "mov r1, sp" does not work, but the proper encoding
-     * can be triggered before actually emiting with "add r1, sp, 0" */
-    jit_addi(r1, JIT_SP, 0);
-    jit_addr(r1, r1, r0);
-    jit_addi(JIT_SP, r1, 0);
-    jit_unget_reg(r1);
-#endif
-    jit_stxi_i(_jitc->function->aoffoff, JIT_FP, u);
-    jit_unget_reg(r0);
-    jit_dec_synth();
-}
-
-void
-_jit_ret(jit_state_t *_jit)
-{
-    jit_node_t         *instr;
-    assert(_jitc->function);
-    jit_inc_synth(ret);
-    /* jump to epilog */
-    instr = jit_jmpi();
-    jit_patch_at(instr, _jitc->function->epilog);
-    jit_dec_synth();
-}
-
-void
-_jit_retr(jit_state_t *_jit, int32_t u)
-{
-    jit_inc_synth_w(retr, u);
-    if (JIT_RET != u)
-       jit_movr(JIT_RET, u);
-    jit_live(JIT_RET);
-    jit_ret();
-    jit_dec_synth();
-}
-
-void
-_jit_reti(jit_state_t *_jit, jit_word_t u)
-{
-    jit_inc_synth_w(reti, u);
-    jit_movi(JIT_RET, u);
-    jit_ret();
-    jit_dec_synth();
-}
-
-void
-_jit_retr_f(jit_state_t *_jit, int32_t u)
-{
-    jit_inc_synth_w(retr_f, u);
-    if (u != JIT_FRET)
-       jit_movr_f(JIT_FRET, u);
-    else
-       jit_live(JIT_FRET);
-    jit_ret();
-    jit_dec_synth();
-}
-
-void
-_jit_reti_f(jit_state_t *_jit, jit_float32_t u)
-{
-    jit_inc_synth_f(reti_f, u);
-    jit_movi_f(JIT_FRET, u);
-    jit_ret();
-    jit_dec_synth();
-}
-
-void
-_jit_retr_d(jit_state_t *_jit, int32_t u)
-{
-    jit_inc_synth_w(retr_d, u);
-    if (u != JIT_FRET)
-       jit_movr_d(JIT_FRET, u);
-    else
-       jit_live(JIT_FRET);
-    jit_ret();
-    jit_dec_synth();
-}
-
-void
-_jit_reti_d(jit_state_t *_jit, jit_float64_t u)
-{
-    jit_inc_synth_d(reti_d, u);
-    jit_movi_d(JIT_FRET, u);
-    jit_ret();
-    jit_dec_synth();
-}
-
-void
-_jit_epilog(jit_state_t *_jit)
-{
-    assert(_jitc->function);
-    assert(_jitc->function->epilog->next == NULL);
-    jit_link(_jitc->function->epilog);
-    _jitc->function = NULL;
-}
-
-jit_bool_t
-_jit_arg_register_p(jit_state_t *_jit, jit_node_t *u)
-{
-    if (u->code == jit_code_arg)
-       return (jit_arg_reg_p(u->u.w));
-    assert(u->code == jit_code_arg_f || u->code == jit_code_arg_d);
-    return (jit_arg_f_reg_p(u->u.w));
-}
-
-void
-_jit_ellipsis(jit_state_t *_jit)
-{
-    jit_inc_synth(ellipsis);
-    if (_jitc->prepare) {
-       jit_link_prepare();
-       assert(!(_jitc->function->call.call & jit_call_varargs));
-       _jitc->function->call.call |= jit_call_varargs;
-    }
-    else {
-       jit_link_prolog();
-       assert(!(_jitc->function->self.call & jit_call_varargs));
-       _jitc->function->self.call |= jit_call_varargs;
-
-       /* Allocate va_list like object in the stack,
-        * with enough space to save all argument
-        * registers, and use fixed offsets for them. */
-       _jitc->function->vaoff = jit_allocai(sizeof(jit_va_list_t));
-
-       /* Initialize gp offset in save area. */
-       if (jit_arg_reg_p(_jitc->function->self.argi))
-           _jitc->function->vagp = (8 - _jitc->function->self.argi) * -8;
-       else
-           _jitc->function->vagp = 0;
-
-       /* Initialize fp offset in save area. */
-       if (jit_arg_f_reg_p(_jitc->function->self.argf))
-           _jitc->function->vafp = (8 - _jitc->function->self.argf) * -16;
-       else
-           _jitc->function->vafp = 0;
-    }
-    jit_dec_synth();
-}
-
-void
-_jit_va_push(jit_state_t *_jit, int32_t u)
-{
-    jit_inc_synth_w(va_push, u);
-    jit_pushargr(u);
-    jit_dec_synth();
-}
-
-jit_node_t *
-_jit_arg(jit_state_t *_jit)
-{
-    jit_node_t         *node;
-    int32_t             offset;
-    assert(_jitc->function);
-    assert(!(_jitc->function->self.call & jit_call_varargs));
-    if (jit_arg_reg_p(_jitc->function->self.argi))
-       offset = _jitc->function->self.argi++;
-    else {
-       offset = _jitc->function->self.size;
-       _jitc->function->self.size += sizeof(jit_word_t);
-    }
-    node = jit_new_node_ww(jit_code_arg, offset,
-                          ++_jitc->function->self.argn);
-    jit_link_prolog();
-    return (node);
-}
-
-jit_node_t *
-_jit_arg_f(jit_state_t *_jit)
-{
-    jit_node_t         *node;
-    int32_t             offset;
-    assert(_jitc->function);
-    assert(!(_jitc->function->self.call & jit_call_varargs));
-    if (jit_arg_f_reg_p(_jitc->function->self.argf))
-       offset = _jitc->function->self.argf++;
-    else {
-       offset = _jitc->function->self.size;
-       _jitc->function->self.size += sizeof(jit_word_t);
-    }
-    node = jit_new_node_ww(jit_code_arg_f, offset,
-                          ++_jitc->function->self.argn);
-    jit_link_prolog();
-    return (node);
-}
-
-jit_node_t *
-_jit_arg_d(jit_state_t *_jit)
-{
-    jit_node_t         *node;
-    int32_t             offset;
-    assert(_jitc->function);
-    assert(!(_jitc->function->self.call & jit_call_varargs));
-    if (jit_arg_f_reg_p(_jitc->function->self.argf))
-       offset = _jitc->function->self.argf++;
-    else {
-       offset = _jitc->function->self.size;
-       _jitc->function->self.size += sizeof(jit_word_t);
-    }
-    node = jit_new_node_ww(jit_code_arg_d, offset,
-                          ++_jitc->function->self.argn);
-    jit_link_prolog();
-    return (node);
-}
-
-void
-_jit_getarg_c(jit_state_t *_jit, int32_t u, jit_node_t *v)
-{
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(getarg_c, u, v);
-    if (jit_arg_reg_p(v->u.w))
-       jit_extr_c(u, JIT_RA0 - v->u.w);
-    else
-       jit_ldxi_c(u, JIT_FP, v->u.w);
-    jit_dec_synth();
-}
-
-void
-_jit_getarg_uc(jit_state_t *_jit, int32_t u, jit_node_t *v)
-{
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(getarg_uc, u, v);
-    if (jit_arg_reg_p(v->u.w))
-       jit_extr_uc(u, JIT_RA0 - v->u.w);
-    else
-       jit_ldxi_uc(u, JIT_FP, v->u.w);
-    jit_dec_synth();
-}
-
-void
-_jit_getarg_s(jit_state_t *_jit, int32_t u, jit_node_t *v)
-{
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(getarg_s, u, v);
-    if (jit_arg_reg_p(v->u.w))
-       jit_extr_s(u, JIT_RA0 - v->u.w);
-    else
-       jit_ldxi_s(u, JIT_FP, v->u.w);
-    jit_dec_synth();
-}
-
-void
-_jit_getarg_us(jit_state_t *_jit, int32_t u, jit_node_t *v)
-{
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(getarg_us, u, v);
-    if (jit_arg_reg_p(v->u.w))
-       jit_extr_us(u, JIT_RA0 - v->u.w);
-    else
-       jit_ldxi_us(u, JIT_FP, v->u.w);
-    jit_dec_synth();
-}
-
-void
-_jit_getarg_i(jit_state_t *_jit, int32_t u, jit_node_t *v)
-{
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(getarg_i, u, v);
-    if (jit_arg_reg_p(v->u.w))
-       jit_extr_i(u, JIT_RA0 - v->u.w);
-    else
-       jit_ldxi_i(u, JIT_FP, v->u.w);
-    jit_dec_synth();
-}
-
-void
-_jit_getarg_ui(jit_state_t *_jit, int32_t u, jit_node_t *v)
-{
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(getarg_ui, u, v);
-    if (jit_arg_reg_p(v->u.w))
-       jit_extr_ui(u, JIT_RA0 - v->u.w);
-    else
-       jit_ldxi_ui(u, JIT_FP, v->u.w);
-    jit_dec_synth();
-}
 
-void
-_jit_getarg_l(jit_state_t *_jit, int32_t u, jit_node_t *v)
+static inline int32_t
+read_offset(uint32_t *loc, uint8_t bits, uint8_t base)
 {
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(getarg_l, u, v);
-    if (jit_arg_reg_p(v->u.w))
-       jit_movr(u, JIT_RA0 - v->u.w);
-    else
-       jit_ldxi_l(u, JIT_FP, v->u.w);
-    jit_dec_synth();
+  return (*((int32_t*)loc)) << (32 - bits - base) >> (32 - bits);
 }
 
-void
-_jit_putargr(jit_state_t *_jit, int32_t u, jit_node_t *v)
+static inline int
+in_signed_range(ptrdiff_t diff, uint8_t bits)
 {
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(putargr, u, v);
-    if (jit_arg_reg_p(v->u.w))
-       jit_movr(JIT_RA0 - v->u.w, u);
-    else
-       jit_stxi(v->u.w, JIT_FP, u);
-    jit_dec_synth();
+  return (-1 << (bits - 1)) <= diff && diff < (1 << (bits - 1));
 }
 
-void
-_jit_putargi(jit_state_t *_jit, jit_word_t u, jit_node_t *v)
-{
-    int32_t            regno;
-    assert(v->code == jit_code_arg);
-    jit_inc_synth_wp(putargi, u, v);
-    if (jit_arg_reg_p(v->u.w))
-       jit_movi(JIT_RA0 - v->u.w, u);
-    else {
-       regno = jit_get_reg(jit_class_gpr);
-       jit_movi(regno, u);
-       jit_stxi(v->u.w, JIT_FP, regno);
-       jit_unget_reg(regno);
-    }
-    jit_dec_synth();
-}
-
-void
-_jit_getarg_f(jit_state_t *_jit, int32_t u, jit_node_t *v)
+static inline int32_t
+write_offset(uint32_t *loc, uint8_t bits, uint8_t base, ptrdiff_t offset)
 {
-    assert(v->code == jit_code_arg_f);
-    jit_inc_synth_wp(getarg_f, u, v);
-    if (jit_arg_reg_p(v->u.w))
-       jit_movr_f(u, JIT_FA0 - v->u.w);
-    else
-       jit_ldxi_f(u, JIT_FP, v->u.w);
-    jit_dec_synth();
+  ASSERT(read_offset(loc, bits, base) == 0);
+  ASSERT(in_signed_range(offset, bits));
+  *loc |= (((uint32_t) offset) & ((1 << bits) - 1)) << base;
 }
 
-void
-_jit_putargr_f(jit_state_t *_jit, int32_t u, jit_node_t *v)
-{
-    assert(v->code == jit_code_arg_f);
-    jit_inc_synth_wp(putargr_f, u, v);
-    if (jit_arg_f_reg_p(v->u.w))
-       jit_movr_f(JIT_FA0 - v->u.w, u);
-    else
-       jit_stxi_f(v->u.w, JIT_FP, u);
-    jit_dec_synth();
-}
+#define DEFINE_PATCHABLE_INSTRUCTION(name, bits, base, RELOC, rsh)      \
+  static const uint8_t name##_offset_bits = bits;                       \
+  static const uint8_t name##_offset_base = base;                       \
+  static int32_t                                                        \
+  read_##name##_offset(uint32_t *loc)                                   \
+  {                                                                     \
+    return read_offset(loc, name##_offset_bits, name##_offset_base);    \
+  }                                                                     \
+  static int                                                            \
+  in_##name##_range(ptrdiff_t diff)                                     \
+  {                                                                     \
+    return in_signed_range(diff, name##_offset_bits);                   \
+  }                                                                     \
+  static int32_t                                                        \
+  write_##name##_offset(uint32_t *loc, ptrdiff_t diff)                  \
+  {                                                                     \
+    return write_offset(loc, name##_offset_bits, name##_offset_base, diff); \
+  }                                                                     \
+  static jit_reloc_t                                                    \
+  emit_##name(jit_state_t *_jit, uint32_t inst)                         \
+  {                                                                     \
+    jit_reloc_t ret = jit_reloc (_jit, JIT_RELOC_##RELOC, 0,            \
+                                 _jit->pc.uc, _jit->pc.uc, rsh);        \
+    add_pending_literal(_jit, ret, name##_offset_bits);                 \
+    emit_u32(_jit, inst);                                               \
+    return ret;                                                         \
+  }
 
-void
-_jit_putargi_f(jit_state_t *_jit, jit_float32_t u, jit_node_t *v)
-{
-    int32_t            regno;
-    assert(v->code == jit_code_arg_f);
-    jit_inc_synth_fp(putargi_f, u, v);
-    if (jit_arg_f_reg_p(v->u.w))
-       jit_movi_f(JIT_FA0 - v->u.w, u);
-    else {
-       regno = jit_get_reg(jit_class_fpr);
-       jit_movi_f(regno, u);
-       jit_stxi_f(v->u.w, JIT_FP, regno);
-       jit_unget_reg(regno);
-    }
-    jit_dec_synth();
-}
+DEFINE_PATCHABLE_INSTRUCTION(jmp, 26, 0, JCC_WITH_VENEER, 2);
+DEFINE_PATCHABLE_INSTRUCTION(jcc, 19, 5, JMP_WITH_VENEER, 2);
+DEFINE_PATCHABLE_INSTRUCTION(load_from_pool, 19, 5, LOAD_FROM_POOL, 2);
 
-void
-_jit_getarg_d(jit_state_t *_jit, int32_t u, jit_node_t *v)
+struct veneer
 {
-    assert(v->code == jit_code_arg_d);
-    jit_inc_synth_wp(getarg_d, u, v);
-    if (jit_arg_f_reg_p(v->u.w))
-       jit_movr_d(u, JIT_FA0 - v->u.w);
-    else
-       jit_ldxi_d(u, JIT_FP, v->u.w);
-    jit_dec_synth();
-}
+  uint32_t ldr;
+  uint32_t br;
+  uint64_t addr;
+};
 
-void
-_jit_putargr_d(jit_state_t *_jit, int32_t u, jit_node_t *v)
+static void
+patch_veneer(uint32_t *loc, jit_pointer_t addr)
 {
-    assert(v->code == jit_code_arg_d);
-    jit_inc_synth_wp(putargr_d, u, v);
-    if (jit_arg_reg_p(v->u.w))
-       jit_movr_d(JIT_FA0 - v->u.w, u);
-    else
-       jit_stxi_d(v->u.w, JIT_FP, u);
-    jit_dec_synth();
+  struct veneer *v = (struct veneer*) v;
+  v->addr = (uint64_t) addr;
 }
 
-void
-_jit_putargi_d(jit_state_t *_jit, jit_float64_t u, jit_node_t *v)
-{
-    int32_t            regno;
-    assert(v->code == jit_code_arg_d);
-    jit_inc_synth_dp(putargi_d, u, v);
-    if (jit_arg_reg_p(v->u.w))
-       jit_movi_d(JIT_FA0 - v->u.w, u);
-    else {
-       regno = jit_get_reg(jit_class_fpr);
-       jit_movi_d(regno, u);
-       jit_stxi_d(v->u.w, JIT_FP, regno);
-       jit_unget_reg(regno);
-    }
-    jit_dec_synth();
-}
+#include "aarch64-cpu.c"
+#include "aarch64-fpu.c"
 
-void
-_jit_pushargr(jit_state_t *_jit, int32_t u)
-{
-    assert(_jitc->function);
-    jit_inc_synth_w(pushargr, u);
-    jit_link_prepare();
-    if (jit_arg_reg_p(_jitc->function->call.argi)) {
-       jit_movr(JIT_RA0 - _jitc->function->call.argi, u);
-       ++_jitc->function->call.argi;
-    }
-    else {
-       jit_stxi(_jitc->function->call.size, JIT_SP, u);
-       _jitc->function->call.size += sizeof(jit_word_t);
-    }
-    jit_dec_synth();
-}
+static const jit_gpr_t abi_gpr_args[] = {
+  _X0, _X1, _X2, _X3, _X4, _X5, _X6, _X7
+};
 
-void
-_jit_pushargi(jit_state_t *_jit, jit_word_t u)
-{
-    int32_t             regno;
-    assert(_jitc->function);
-    jit_inc_synth_w(pushargi, u);
-    jit_link_prepare();
-    if (jit_arg_reg_p(_jitc->function->call.argi)) {
-       jit_movi(JIT_RA0 - _jitc->function->call.argi, u);
-       ++_jitc->function->call.argi;
-    }
-    else {
-       regno = jit_get_reg(jit_class_gpr);
-       jit_movi(regno, u);
-       jit_stxi(_jitc->function->call.size, JIT_SP, regno);
-       jit_unget_reg(regno);
-       _jitc->function->call.size += sizeof(jit_word_t);
-    }
-    jit_dec_synth();
-}
+static const jit_fpr_t abi_fpr_args[] = {
+  _D0, _D1, _D2, _D3, _D4, _D5, _D6, _D7
+};
 
-void
-_jit_pushargr_f(jit_state_t *_jit, int32_t u)
-{
-    assert(_jitc->function);
-    jit_inc_synth_w(pushargr_f, u);
-    jit_link_prepare();
-    if (jit_arg_f_reg_p(_jitc->function->call.argf)) {
-       jit_movr_f(JIT_FA0 - _jitc->function->call.argf, u);
-       ++_jitc->function->call.argf;
-    }
-    else {
-       jit_stxi_f(_jitc->function->call.size, JIT_SP, u);
-       _jitc->function->call.size += sizeof(jit_word_t);
-    }
-    jit_dec_synth();
-}
+static const int abi_gpr_arg_count = sizeof(abi_gpr_args) / 
sizeof(abi_gpr_args[0]);
+static const int abi_fpr_arg_count = sizeof(abi_fpr_args) / 
sizeof(abi_fpr_args[0]);
 
-void
-_jit_pushargi_f(jit_state_t *_jit, jit_float32_t u)
+struct abi_arg_iterator
 {
-    int32_t            regno;
-    assert(_jitc->function);
-    jit_inc_synth_f(pushargi_f, u);
-    jit_link_prepare();
-    if (jit_arg_f_reg_p(_jitc->function->call.argf)) {
-       jit_movi_f(JIT_FA0 - _jitc->function->call.argf, u);
-       ++_jitc->function->call.argf;
-    }
-    else {
-       regno = jit_get_reg(jit_class_fpr);
-       jit_movi_f(regno, u);
-       jit_stxi_f(_jitc->function->call.size, JIT_SP, regno);
-       jit_unget_reg(regno);
-       _jitc->function->call.size += sizeof(jit_word_t);
-    }
-    jit_dec_synth();
-}
+  const jit_operand_t *args;
+  size_t argc;
 
-void
-_jit_pushargr_d(jit_state_t *_jit, int32_t u)
-{
-    assert(_jitc->function);
-    jit_inc_synth_w(pushargr_d, u);
-    jit_link_prepare();
-    if (jit_arg_f_reg_p(_jitc->function->call.argf)) {
-       jit_movr_d(JIT_FA0 - _jitc->function->call.argf, u);
-       ++_jitc->function->call.argf;
-    }
-    else {
-       jit_stxi_d(_jitc->function->call.size, JIT_SP, u);
-       _jitc->function->call.size += sizeof(jit_word_t);
-    }
-    jit_dec_synth();
-}
+  size_t arg_idx;
+  size_t gpr_idx;
+  size_t fpr_idx;
+  size_t stack_size;
+  size_t stack_padding;
+};
 
-void
-_jit_pushargi_d(jit_state_t *_jit, jit_float64_t u)
-{
-    int32_t            regno;
-    assert(_jitc->function);
-    jit_inc_synth_d(pushargi_d, u);
-    jit_link_prepare();
-    if (jit_arg_f_reg_p(_jitc->function->call.argf)) {
-       jit_movi_d(JIT_FA0 - _jitc->function->call.argf, u);
-       ++_jitc->function->call.argf;
-    }
-    else {
-       regno = jit_get_reg(jit_class_fpr);
-       jit_movi_d(regno, u);
-       jit_stxi_d(_jitc->function->call.size, JIT_SP, regno);
-       jit_unget_reg(regno);
-       _jitc->function->call.size += sizeof(jit_word_t);
-    }
-    jit_dec_synth();
-}
+static size_t page_size;
 
 jit_bool_t
-_jit_regarg_p(jit_state_t *_jit, jit_node_t *node, int32_t regno)
-{
-    int32_t            spec;
-    spec = jit_class(_rvs[regno].spec);
-    if (spec & jit_class_arg) {
-       regno = JIT_RA0 - regno;
-       if (regno >= 0 && regno < node->v.w)
-           return (1);
-       if (spec & jit_class_fpr) {
-           regno = JIT_FA0 - regno;
-           if (regno >= 0 && regno < node->w.w)
-               return (1);
-       }
-    }
-
-    return (0);
-}
-
-void
-_jit_finishr(jit_state_t *_jit, int32_t r0)
-{
-    jit_node_t         *node;
-    assert(_jitc->function);
-    jit_inc_synth_w(finishr, r0);
-    if (_jitc->function->self.alen < _jitc->function->call.size)
-       _jitc->function->self.alen = _jitc->function->call.size;
-    node = jit_callr(r0);
-    node->v.w = _jitc->function->self.argi;
-    node->w.w = _jitc->function->call.argf;
-    _jitc->function->call.argi = _jitc->function->call.argf =
-       _jitc->function->call.size = 0;
-    _jitc->prepare = 0;
-    jit_dec_synth();
-}
-
-jit_node_t *
-_jit_finishi(jit_state_t *_jit, jit_pointer_t i0)
-{
-    jit_node_t         *node;
-    assert(_jitc->function);
-    jit_inc_synth_w(finishi, (jit_word_t)i0);
-    if (_jitc->function->self.alen < _jitc->function->call.size)
-       _jitc->function->self.alen = _jitc->function->call.size;
-    node = jit_calli(i0);
-    node->v.w = _jitc->function->call.argi;
-    node->w.w = _jitc->function->call.argf;
-    _jitc->function->call.argi = _jitc->function->call.argf =
-       _jitc->function->call.size = 0;
-    _jitc->prepare = 0;
-    jit_dec_synth();
-    return (node);
-}
-
-void
-_jit_retval_c(jit_state_t *_jit, int32_t r0)
-{
-    jit_inc_synth_w(retval_c, r0);
-    jit_extr_c(r0, JIT_RET);
-    jit_dec_synth();
-}
-
-void
-_jit_retval_uc(jit_state_t *_jit, int32_t r0)
-{
-    jit_inc_synth_w(retval_uc, r0);
-    jit_extr_uc(r0, JIT_RET);
-    jit_dec_synth();
-}
-
-void
-_jit_retval_s(jit_state_t *_jit, int32_t r0)
+jit_get_cpu(void)
 {
-    jit_inc_synth_w(retval_s, r0);
-    jit_extr_s(r0, JIT_RET);
-    jit_dec_synth();
+  page_size = sysconf(_SC_PAGE_SIZE);
+  return 1;
 }
 
-void
-_jit_retval_us(jit_state_t *_jit, int32_t r0)
+jit_bool_t
+jit_init(jit_state_t *_jit)
 {
-    jit_inc_synth_w(retval_us, r0);
-    jit_extr_us(r0, JIT_RET);
-    jit_dec_synth();
+  return 1;
 }
 
-void
-_jit_retval_i(jit_state_t *_jit, int32_t r0)
+static size_t
+jit_operand_abi_sizeof(enum jit_operand_abi abi)
 {
-    jit_inc_synth_w(retval_i, r0);
-    jit_extr_i(r0, JIT_RET);
-    jit_dec_synth();
+  return 8;
 }
 
-void
-_jit_retval_ui(jit_state_t *_jit, int32_t r0)
+static size_t
+jit_initial_frame_size (void)
 {
-    jit_inc_synth_w(retval_ui, r0);
-    jit_extr_ui(r0, JIT_RET);
-    jit_dec_synth();
+  return 0;
 }
 
-void
-_jit_retval_l(jit_state_t *_jit, int32_t r0)
+static void
+reset_abi_arg_iterator(struct abi_arg_iterator *iter, size_t argc,
+                       const jit_operand_t *args)
 {
-    jit_inc_synth_w(retval_l, r0);
-    if (r0 != JIT_RET)
-       jit_movr(r0, JIT_RET);
-    jit_dec_synth();
+  memset(iter, 0, sizeof *iter);
+  iter->argc = argc;
+  iter->args = args;
 }
 
-void
-_jit_retval_f(jit_state_t *_jit, int32_t r0)
+static void
+next_abi_arg(struct abi_arg_iterator *iter, jit_operand_t *arg)
 {
-    jit_inc_synth_w(retval_f, r0);
-    if (r0 != JIT_FRET)
-       jit_movr_f(r0, JIT_FRET);
-    jit_dec_synth();
+  ASSERT(iter->arg_idx < iter->argc);
+  enum jit_operand_abi abi = iter->args[iter->arg_idx].abi;
+  if (is_gpr_arg(abi) && iter->gpr_idx < abi_gpr_arg_count) {
+    *arg = jit_operand_gpr (abi, abi_gpr_args[iter->gpr_idx++]);
+  } else if (is_fpr_arg(abi) && iter->fpr_idx < abi_fpr_arg_count) {
+    *arg = jit_operand_fpr (abi, abi_fpr_args[iter->fpr_idx++]);
+  } else {
+    *arg = jit_operand_mem (abi, JIT_SP, iter->stack_size);
+    iter->stack_size += 8;
+  }
+  iter->arg_idx++;
 }
 
 void
-_jit_retval_d(jit_state_t *_jit, int32_t r0)
-{
-    jit_inc_synth_w(retval_d, r0);
-    if (r0 != JIT_FRET)
-       jit_movr_d(r0, JIT_FRET);
-    jit_dec_synth();
-}
-
-jit_pointer_t
-_emit_code(jit_state_t *_jit)
-{
-    jit_node_t         *node;
-    jit_node_t         *temp;
-    jit_word_t          word;
-    jit_word_t          value;
-    int32_t             offset;
-    struct {
-       jit_node_t      *node;
-       uint8_t *data;
-       jit_word_t       word;
-#if DEVEL_DISASSEMBLER
-       jit_word_t       prevw;
-#endif
-       int32_t  const_offset;
-       int32_t  patch_offset;
-    } undo;
-#if DEVEL_DISASSEMBLER
-    jit_word_t          prevw;
-#endif
-
-    _jitc->function = NULL;
-
-    jit_reglive_setup();
-
-    undo.word = 0;
-    undo.node = NULL;
-    undo.const_offset = undo.patch_offset = 0;
-#  define assert_data(node)            /**/
-#define case_rr(name, type)                                            \
-           case jit_code_##name##r##type:                              \
-               name##r##type(rn(node->u.w), rn(node->v.w));            \
-               break
-#define case_rw(name, type)                                            \
-           case jit_code_##name##i##type:                              \
-               name##i##type(rn(node->u.w), node->v.w);                \
-               break
-#define case_wr(name, type)                                            \
-           case jit_code_##name##i##type:                              \
-               name##i##type(node->u.w, rn(node->v.w));                \
-               break
-#define case_rrr(name, type)                                           \
-           case jit_code_##name##r##type:                              \
-               name##r##type(rn(node->u.w),                            \
-                             rn(node->v.w), rn(node->w.w));            \
-               break
-#define case_rrrr(name, type)                                          \
-           case jit_code_##name##r##type:                              \
-               name##r##type(rn(node->u.q.l), rn(node->u.q.h),         \
-                             rn(node->v.w), rn(node->w.w));            \
-               break
-#define case_rrw(name, type)                                           \
-           case jit_code_##name##i##type:                              \
-               name##i##type(rn(node->u.w), rn(node->v.w), node->w.w); \
-               break
-#define case_rrrw(name, type)                                          \
-           case jit_code_##name##i##type:                              \
-               name##i##type(rn(node->u.q.l), rn(node->u.q.h),         \
-                             rn(node->v.w), node->w.w);                \
-               break
-#define case_rrf(name)                                                 \
-           case jit_code_##name##i_f:                                  \
-               assert_data(node);                                      \
-               name##i_f(rn(node->u.w), rn(node->v.w), node->w.f);     \
-               break
-#define case_rrd(name)                                                 \
-           case jit_code_##name##i_d:                                  \
-               assert_data(node);                                      \
-               name##i_d(rn(node->u.w), rn(node->v.w), node->w.d);     \
-               break
-#define case_wrr(name, type)                                           \
-           case jit_code_##name##i##type:                              \
-               name##i##type(node->u.w, rn(node->v.w), rn(node->w.w)); \
-               break
-#define case_brr(name, type)                                           \
-           case jit_code_##name##r##type:                              \
-               temp = node->u.n;                                       \
-               assert(temp->code == jit_code_label ||                  \
-                      temp->code == jit_code_epilog);                  \
-               if (temp->flag & jit_flag_patch)                        \
-                   name##r##type(temp->u.w, rn(node->v.w),             \
-                                 rn(node->w.w));                       \
-               else {                                                  \
-                   word = name##r##type(_jit->pc.w,                    \
-                                        rn(node->v.w), rn(node->w.w)); \
-                   patch(word, node);                                  \
-               }                                                       \
-               break
-#define case_brw(name, type)                                           \
-           case jit_code_##name##i##type:                              \
-               temp = node->u.n;                                       \
-               assert(temp->code == jit_code_label ||                  \
-                      temp->code == jit_code_epilog);                  \
-               if (temp->flag & jit_flag_patch)                        \
-                   name##i##type(temp->u.w,                            \
-                                 rn(node->v.w), node->w.w);            \
-               else {                                                  \
-                   word = name##i##type(_jit->pc.w,                    \
-                                        rn(node->v.w), node->w.w);     \
-                   patch(word, node);                                  \
-               }                                                       \
-               break;
-#define case_brf(name)                                                 \
-           case jit_code_##name##i_f:                                  \
-               temp = node->u.n;                                       \
-               assert(temp->code == jit_code_label ||                  \
-                      temp->code == jit_code_epilog);                  \
-               if (temp->flag & jit_flag_patch)                        \
-                   name##i_f(temp->u.w, rn(node->v.w), node->w.f);     \
-               else {                                                  \
-                   word = name##i_f(_jit->pc.w, rn(node->v.w),         \
-                                    node->w.f);                        \
-                   patch(word, node);                                  \
-               }                                                       \
-               break
-#define case_brd(name)                                                 \
-           case jit_code_##name##i_d:                                  \
-               temp = node->u.n;                                       \
-               assert(temp->code == jit_code_label ||                  \
-                      temp->code == jit_code_epilog);                  \
-               if (temp->flag & jit_flag_patch)                        \
-                   name##i_d(temp->u.w, rn(node->v.w), node->w.d);     \
-               else {                                                  \
-                   word = name##i_d(_jit->pc.w, rn(node->v.w),         \
-                                    node->w.d);                        \
-                   patch(word, node);                                  \
-               }                                                       \
-               break
-#if DEVEL_DISASSEMBLER
-    prevw = _jit->pc.w;
-#endif
-    for (node = _jitc->head; node; node = node->next) {
-       if (_jit->pc.uc >= _jitc->code.end)
-           return (NULL);
-
-#if DEVEL_DISASSEMBLER
-       node->offset = (jit_uword_t)_jit->pc.w - (jit_uword_t)prevw;
-       prevw = _jit->pc.w;
-#endif
-       value = jit_classify(node->code);
-       jit_regarg_set(node, value);
-       switch (node->code) {
-           case jit_code_align:
-               assert(!(node->u.w & (node->u.w - 1)) &&
-                      node->u.w <= sizeof(jit_word_t));
-               if (node->u.w == sizeof(jit_word_t) &&
-                   (word = _jit->pc.w & (sizeof(jit_word_t) - 1)))
-                   nop(sizeof(jit_word_t) - word);
-               break;
-           case jit_code_note:         case jit_code_name:
-               node->u.w = _jit->pc.w;
-               break;
-           case jit_code_label:
-               /* remember label is defined */
-               node->flag |= jit_flag_patch;
-               node->u.w = _jit->pc.w;
-               break;
-               case_rrr(add,);
-               case_rrw(add,);
-               case_rrr(addc,);
-               case_rrw(addc,);
-               case_rrr(addx,);
-               case_rrw(addx,);
-               case_rrr(sub,);
-               case_rrw(sub,);
-               case_rrr(subc,);
-               case_rrw(subc,);
-               case_rrr(subx,);
-               case_rrw(subx,);
-               case_rrw(rsb,);
-               case_rrr(mul,);
-               case_rrw(mul,);
-               case_rrrr(qmul,);
-               case_rrrw(qmul,);
-               case_rrrr(qmul, _u);
-               case_rrrw(qmul, _u);
-               case_rrr(div,);
-               case_rrw(div,);
-               case_rrr(div, _u);
-               case_rrw(div, _u);
-               case_rrrr(qdiv,);
-               case_rrrw(qdiv,);
-               case_rrrr(qdiv, _u);
-               case_rrrw(qdiv, _u);
-               case_rrr(rem,);
-               case_rrw(rem,);
-               case_rrr(rem, _u);
-               case_rrw(rem, _u);
-               case_rrr(lsh,);
-               case_rrw(lsh,);
-               case_rrr(rsh,);
-               case_rrw(rsh,);
-               case_rrr(rsh, _u);
-               case_rrw(rsh, _u);
-               case_rr(neg,);
-               case_rr(com,);
-               case_rrr(and,);
-               case_rrw(and,);
-               case_rrr(or,);
-               case_rrw(or,);
-               case_rrr(xor,);
-               case_rrw(xor,);
-               case_rr(trunc, _f_i);
-               case_rr(trunc, _d_i);
-               case_rr(trunc, _f_l);
-               case_rr(trunc, _d_l);
-               case_rr(ld, _c);
-               case_rw(ld, _c);
-               case_rr(ld, _uc);
-               case_rw(ld, _uc);
-               case_rr(ld, _s);
-               case_rw(ld, _s);
-               case_rr(ld, _us);
-               case_rw(ld, _us);
-               case_rr(ld, _i);
-               case_rw(ld, _i);
-               case_rr(ld, _ui);
-               case_rw(ld, _ui);
-               case_rr(ld, _l);
-               case_rw(ld, _l);
-               case_rrr(ldx, _c);
-               case_rrw(ldx, _c);
-               case_rrr(ldx, _uc);
-               case_rrw(ldx, _uc);
-               case_rrr(ldx, _s);
-               case_rrw(ldx, _s);
-               case_rrr(ldx, _us);
-               case_rrw(ldx, _us);
-               case_rrr(ldx, _i);
-               case_rrw(ldx, _i);
-               case_rrr(ldx, _ui);
-               case_rrw(ldx, _ui);
-               case_rrr(ldx, _l);
-               case_rrw(ldx, _l);
-               case_rr(st, _c);
-               case_wr(st, _c);
-               case_rr(st, _s);
-               case_wr(st, _s);
-               case_rr(st, _i);
-               case_wr(st, _i);
-               case_rr(st, _l);
-               case_wr(st, _l);
-               case_rrr(stx, _c);
-               case_wrr(stx, _c);
-               case_rrr(stx, _s);
-               case_wrr(stx, _s);
-               case_rrr(stx, _i);
-               case_wrr(stx, _i);
-               case_rrr(stx, _l);
-               case_wrr(stx, _l);
-               case_rr(hton, _us);
-               case_rr(hton, _ui);
-               case_rr(hton, _ul);
-               case_rr(ext, _c);
-               case_rr(ext, _uc);
-               case_rr(ext, _s);
-               case_rr(ext, _us);
-               case_rr(ext, _i);
-               case_rr(ext, _ui);
-               case_rr(mov,);
-           case jit_code_movi:
-               if (node->flag & jit_flag_node) {
-                   temp = node->v.n;
-                   if (temp->code == jit_code_data ||
-                       (temp->code == jit_code_label &&
-                        (temp->flag & jit_flag_patch)))
-                       movi(rn(node->u.w), temp->u.w);
-                   else {
-                       assert(temp->code == jit_code_label ||
-                              temp->code == jit_code_epilog);
-                       word = movi_p(rn(node->u.w), temp->u.w);
-                       patch(word, node);
-                   }
-               }
-               else
-                   movi(rn(node->u.w), node->v.w);
-               break;
-               case_rrr(lt,);
-               case_rrw(lt,);
-               case_rrr(lt, _u);
-               case_rrw(lt, _u);
-               case_rrr(le,);
-               case_rrw(le,);
-               case_rrr(le, _u);
-               case_rrw(le, _u);
-               case_rrr(eq,);
-               case_rrw(eq,);
-               case_rrr(ge,);
-               case_rrw(ge,);
-               case_rrr(ge, _u);
-               case_rrw(ge, _u);
-               case_rrr(gt,);
-               case_rrw(gt,);
-               case_rrr(gt, _u);
-               case_rrw(gt, _u);
-               case_rrr(ne,);
-               case_rrw(ne,);
-               case_brr(blt,);
-               case_brw(blt,);
-               case_brr(blt, _u);
-               case_brw(blt, _u);
-               case_brr(ble,);
-               case_brw(ble,);
-               case_brr(ble, _u);
-               case_brw(ble, _u);
-               case_brr(beq,);
-               case_brw(beq,);
-               case_brr(bge,);
-               case_brw(bge,);
-               case_brr(bge, _u);
-               case_brw(bge, _u);
-               case_brr(bgt,);
-               case_brw(bgt,);
-               case_brr(bgt, _u);
-               case_brw(bgt, _u);
-               case_brr(bne,);
-               case_brw(bne,);
-               case_brr(boadd,);
-               case_brw(boadd,);
-               case_brr(boadd, _u);
-               case_brw(boadd, _u);
-               case_brr(bxadd,);
-               case_brw(bxadd,);
-               case_brr(bxadd, _u);
-               case_brw(bxadd, _u);
-               case_brr(bosub,);
-               case_brw(bosub,);
-               case_brr(bosub, _u);
-               case_brw(bosub, _u);
-               case_brr(bxsub,);
-               case_brw(bxsub,);
-               case_brr(bxsub, _u);
-               case_brw(bxsub, _u);
-               case_brr(bms,);
-               case_brw(bms,);
-               case_brr(bmc,);
-               case_brw(bmc,);
-               case_rrr(add, _f);
-               case_rrf(add);
-               case_rrr(sub, _f);
-               case_rrf(sub);
-               case_rrf(rsb);
-               case_rrr(mul, _f);
-               case_rrf(mul);
-               case_rrr(div, _f);
-               case_rrf(div);
-               case_rr(abs, _f);
-               case_rr(neg, _f);
-               case_rr(sqrt, _f);
-               case_rr(ext, _f);
-               case_rr(ld, _f);
-               case_rw(ld, _f);
-               case_rrr(ldx, _f);
-               case_rrw(ldx, _f);
-               case_rr(st, _f);
-               case_wr(st, _f);
-               case_rrr(stx, _f);
-               case_wrr(stx, _f);
-               case_rr(mov, _f);
-           case jit_code_movi_f:
-               assert_data(node);
-               movi_f(rn(node->u.w), node->v.f);
-               break;
-               case_rr(ext, _d_f);
-               case_rrr(lt, _f);
-               case_rrf(lt);
-               case_rrr(le, _f);
-               case_rrf(le);
-               case_rrr(eq, _f);
-               case_rrf(eq);
-               case_rrr(ge, _f);
-               case_rrf(ge);
-               case_rrr(gt, _f);
-               case_rrf(gt);
-               case_rrr(ne, _f);
-               case_rrf(ne);
-               case_rrr(unlt, _f);
-               case_rrf(unlt);
-               case_rrr(unle, _f);
-               case_rrf(unle);
-               case_rrr(uneq, _f);
-               case_rrf(uneq);
-               case_rrr(unge, _f);
-               case_rrf(unge);
-               case_rrr(ungt, _f);
-               case_rrf(ungt);
-               case_rrr(ltgt, _f);
-               case_rrf(ltgt);
-               case_rrr(ord, _f);
-               case_rrf(ord);
-               case_rrr(unord, _f);
-               case_rrf(unord);
-               case_brr(blt, _f);
-               case_brf(blt);
-               case_brr(ble, _f);
-               case_brf(ble);
-               case_brr(beq, _f);
-               case_brf(beq);
-               case_brr(bge, _f);
-               case_brf(bge);
-               case_brr(bgt, _f);
-               case_brf(bgt);
-               case_brr(bne, _f);
-               case_brf(bne);
-               case_brr(bunlt, _f);
-               case_brf(bunlt);
-               case_brr(bunle, _f);
-               case_brf(bunle);
-               case_brr(buneq, _f);
-               case_brf(buneq);
-               case_brr(bunge, _f);
-               case_brf(bunge);
-               case_brr(bungt, _f);
-               case_brf(bungt);
-               case_brr(bltgt, _f);
-               case_brf(bltgt);
-               case_brr(bord, _f);
-               case_brf(bord);
-               case_brr(bunord, _f);
-               case_brf(bunord);
-               case_rrr(add, _d);
-               case_rrd(add);
-               case_rrr(sub, _d);
-               case_rrd(sub);
-               case_rrd(rsb);
-               case_rrr(mul, _d);
-               case_rrd(mul);
-               case_rrr(div, _d);
-               case_rrd(div);
-               case_rr(abs, _d);
-               case_rr(neg, _d);
-               case_rr(sqrt, _d);
-               case_rr(ext, _d);
-               case_rr(ld, _d);
-               case_rw(ld, _d);
-               case_rrr(ldx, _d);
-               case_rrw(ldx, _d);
-               case_rr(st, _d);
-               case_wr(st, _d);
-               case_rrr(stx, _d);
-               case_wrr(stx, _d);
-               case_rr(mov, _d);
-           case jit_code_movi_d:
-               assert_data(node);
-               movi_d(rn(node->u.w), node->v.d);
-               break;
-               case_rr(ext, _f_d);
-               case_rrr(lt, _d);
-               case_rrd(lt);
-               case_rrr(le, _d);
-               case_rrd(le);
-               case_rrr(eq, _d);
-               case_rrd(eq);
-               case_rrr(ge, _d);
-               case_rrd(ge);
-               case_rrr(gt, _d);
-               case_rrd(gt);
-               case_rrr(ne, _d);
-               case_rrd(ne);
-               case_rrr(unlt, _d);
-               case_rrd(unlt);
-               case_rrr(unle, _d);
-               case_rrd(unle);
-               case_rrr(uneq, _d);
-               case_rrd(uneq);
-               case_rrr(unge, _d);
-               case_rrd(unge);
-               case_rrr(ungt, _d);
-               case_rrd(ungt);
-               case_rrr(ltgt, _d);
-               case_rrd(ltgt);
-               case_rrr(ord, _d);
-               case_rrd(ord);
-               case_rrr(unord, _d);
-               case_rrd(unord);
-               case_brr(blt, _d);
-               case_brd(blt);
-               case_brr(ble, _d);
-               case_brd(ble);
-               case_brr(beq, _d);
-               case_brd(beq);
-               case_brr(bge, _d);
-               case_brd(bge);
-               case_brr(bgt, _d);
-               case_brd(bgt);
-               case_brr(bne, _d);
-               case_brd(bne);
-               case_brr(bunlt, _d);
-               case_brd(bunlt);
-               case_brr(bunle, _d);
-               case_brd(bunle);
-               case_brr(buneq, _d);
-               case_brd(buneq);
-               case_brr(bunge, _d);
-               case_brd(bunge);
-               case_brr(bungt, _d);
-               case_brd(bungt);
-               case_brr(bltgt, _d);
-               case_brd(bltgt);
-               case_brr(bord, _d);
-               case_brd(bord);
-               case_brr(bunord, _d);
-               case_brd(bunord);
-           case jit_code_jmpr:
-               jmpr(rn(node->u.w));
-               break;
-           case jit_code_jmpi:
-               if (node->flag & jit_flag_node) {
-                   temp = node->u.n;
-                   assert(temp->code == jit_code_label ||
-                          temp->code == jit_code_epilog);
-                   if (temp->flag & jit_flag_patch)
-                       jmpi(temp->u.w);
-                   else {
-                       word = jmpi_p(_jit->pc.w);
-                       patch(word, node);
-                   }
-               }
-               else
-                   jmpi(node->u.w);
-               break;
-           case jit_code_callr:
-               callr(rn(node->u.w));
-               break;
-           case jit_code_calli:
-               if (node->flag & jit_flag_node) {
-                   temp = node->u.n;
-                   assert(temp->code == jit_code_label ||
-                          temp->code == jit_code_epilog);
-                   if (temp->flag & jit_flag_patch)
-                       calli(temp->u.w);
-                   else {
-                       word = calli_p(_jit->pc.w);
-                       patch(word, node);
-                   }
-               }
-               else
-                   calli(node->u.w);
-               break;
-           case jit_code_prolog:
-               _jitc->function = _jitc->functions.ptr + node->w.w;
-               undo.node = node;
-               undo.word = _jit->pc.w;
-#if DEVEL_DISASSEMBLER
-               undo.prevw = prevw;
-#endif
-               undo.patch_offset = _jitc->patches.offset;
-           restart_function:
-               _jitc->again = 0;
-               prolog(node);
-               break;
-           case jit_code_epilog:
-               assert(_jitc->function == _jitc->functions.ptr + node->w.w);
-               if (_jitc->again) {
-                   for (temp = undo.node->next;
-                        temp != node; temp = temp->next) {
-                       if (temp->code == jit_code_label ||
-                           temp->code == jit_code_epilog)
-                           temp->flag &= ~jit_flag_patch;
-                   }
-                   temp->flag &= ~jit_flag_patch;
-                   node = undo.node;
-                   _jit->pc.w = undo.word;
-#if DEVEL_DISASSEMBLER
-                   prevw = undo.prevw;
-#endif
-                   _jitc->patches.offset = undo.patch_offset;
-                   goto restart_function;
-               }
-               /* remember label is defined */
-               node->flag |= jit_flag_patch;
-               node->u.w = _jit->pc.w;
-               epilog(node);
-               _jitc->function = NULL;
-               break;
-           case jit_code_va_start:
-               vastart(rn(node->u.w));
-               break;
-           case jit_code_va_arg:
-               vaarg(rn(node->u.w), rn(node->v.w));
-               break;
-           case jit_code_va_arg_d:
-               vaarg_d(rn(node->u.w), rn(node->v.w));
-               break;
-           case jit_code_live:                 case jit_code_ellipsis:
-           case jit_code_va_push:
-           case jit_code_allocai:              case jit_code_allocar:
-           case jit_code_arg:
-           case jit_code_arg_f:                case jit_code_arg_d:
-           case jit_code_va_end:
-           case jit_code_ret:
-           case jit_code_retr:                 case jit_code_reti:
-           case jit_code_retr_f:               case jit_code_reti_f:
-           case jit_code_retr_d:               case jit_code_reti_d:
-           case jit_code_getarg_c:             case jit_code_getarg_uc:
-           case jit_code_getarg_s:             case jit_code_getarg_us:
-           case jit_code_getarg_i:             case jit_code_getarg_ui:
-           case jit_code_getarg_l:
-           case jit_code_getarg_f:             case jit_code_getarg_d:
-           case jit_code_putargr:              case jit_code_putargi:
-           case jit_code_putargr_f:            case jit_code_putargi_f:
-           case jit_code_putargr_d:            case jit_code_putargi_d:
-           case jit_code_pushargr:             case jit_code_pushargi:
-           case jit_code_pushargr_f:           case jit_code_pushargi_f:
-           case jit_code_pushargr_d:           case jit_code_pushargi_d:
-           case jit_code_retval_c:             case jit_code_retval_uc:
-           case jit_code_retval_s:             case jit_code_retval_us:
-           case jit_code_retval_i:
-           case jit_code_retval_ui:            case jit_code_retval_l:
-           case jit_code_retval_f:             case jit_code_retval_d:
-           case jit_code_prepare:
-           case jit_code_finishr:              case jit_code_finishi:
-               break;
-           default:
-               abort();
-       }
-       jit_regarg_clr(node, value);
-       assert(_jitc->regarg == 0 && _jitc->synth == 0);
-       /* update register live state */
-       jit_reglive(node);
-    }
-#undef case_brw
-#undef case_brr
-#undef case_wrr
-#undef case_rrw
-#undef case_rrr
-#undef case_wr
-#undef case_rw
-#undef case_rr
-
-    for (offset = 0; offset < _jitc->patches.offset; offset++) {
-       node = _jitc->patches.ptr[offset].node;
-       word = _jitc->patches.ptr[offset].inst;
-       value = node->code == jit_code_movi ? node->v.n->u.w : node->u.n->u.w;
-       patch_at(word, value);
-    }
-
-    jit_flush(_jit->code.ptr, _jit->pc.uc);
-
-    return (_jit->code.ptr);
-}
-
-#define CODE                           1
-#  include "aarch64-cpu.c"
-#  include ", 2018aarch64-fpu.c"
-#undef CODE
-
-void
 jit_flush(void *fptr, void *tptr)
 {
-#if defined(__GNUC__)
-    jit_word_t         f, t, s;
-
-    s = sysconf(_SC_PAGE_SIZE);
-    f = (jit_word_t)fptr & -s;
-    t = (((jit_word_t)tptr) + s - 1) & -s;
-    __clear_cache((void *)f, (void *)t);
-#endif
+  jit_word_t f = (jit_word_t)fptr & -page_size;
+  jit_word_t t = (((jit_word_t)tptr) + page_size - 1) & -page_size;
+  __clear_cache((void *)f, (void *)t);
 }
 
-void
-_emit_ldxi(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
-{
-    ldxi(rn(r0), rn(r1), i0);
-}
-
-void
-_emit_stxi(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
+static inline size_t
+jit_stack_alignment(void)
 {
-    stxi(i0, rn(r0), rn(r1));
-}
-
-void
-_emit_ldxi_d(jit_state_t *_jit, int32_t r0, int32_t r1, jit_word_t i0)
-{
-    ldxi_d(rn(r0), rn(r1), i0);
-}
-
-void
-_emit_stxi_d(jit_state_t *_jit, jit_word_t i0, int32_t r0, int32_t r1)
-{
-    stxi_d(i0, rn(r0), rn(r1));
-}
-
-static void
-_patch(jit_state_t *_jit, jit_word_t instr, jit_node_t *node)
-{
-    int32_t             flag;
-
-    assert(node->flag & jit_flag_node);
-    if (node->code == jit_code_movi)
-       flag = node->v.n->flag;
-    else
-       flag = node->u.n->flag;
-    assert(!(flag & jit_flag_patch));
-    if (_jitc->patches.offset >= _jitc->patches.length) {
-       jit_realloc((jit_pointer_t *)&_jitc->patches.ptr,
-                   _jitc->patches.length * sizeof(jit_patch_t),
-                   (_jitc->patches.length + 1024) * sizeof(jit_patch_t));
-       _jitc->patches.length += 1024;
-    }
-    _jitc->patches.ptr[_jitc->patches.offset].inst = instr;
-    _jitc->patches.ptr[_jitc->patches.offset].node = node;
-    ++_jitc->patches.offset;
+  return 16;
 }
diff --git a/lightening/aarch64.h b/lightening/aarch64.h
index 65723a6..3916d0d 100644
--- a/lightening/aarch64.h
+++ b/lightening/aarch64.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2013-2017  Free Software Foundation, Inc.
+ * Copyright (C) 2013-2017, 2019  Free Software Foundation, Inc.
  *
  * This file is part of GNU lightning.
  *
@@ -20,70 +20,161 @@
 #ifndef _jit_aarch64_h
 #define _jit_aarch64_h
 
-#define JIT_HASH_CONSTS                0
-#define JIT_NUM_OPERANDS       3
 
-/*
- * Types
- */
-#define JIT_FP                 _R29
-typedef enum {
-#define jit_r(i)               (_R9 + (i))
-#define jit_r_num()            7
-#define jit_v(i)               (_R19 + (i))
-#define jit_v_num()            10
-#define jit_f(i)               (_V8 + (i))
-#define jit_f_num()            8
-#define JIT_R0                 _R9
-#define JIT_R1                 _R10
-#define JIT_R2                 _R11
-#define JIT_R3                 _R12
-#define JIT_R4                 _R13
-#define JIT_R5                 _R14
-#define JIT_R6                 _R15
-    _R8,                               /* indirect result */
-    _R18,                              /* platform register */
-    _R17,                              /* IP1 */
-    _R16,                              /* IP0 */
-    _R9,       _R10,   _R11,   _R12,   /* temporaries */
-    _R13,      _R14,   _R15,
-#define JIT_V0                 _R19
-#define JIT_V1                 _R20
-#define JIT_V2                 _R21
-#define JIT_V3                 _R22
-#define JIT_V4                 _R23
-#define JIT_V5                 _R24
-#define JIT_V6                 _R25
-#define JIT_V7                 _R26
-#define JIT_V8                 _R27
-#define JIT_V9                 _R28
-    _R19,      _R20,   _R21,   _R22,   /* callee save */
-    _R23,      _R24,   _R25,   _R26,
-    _R27,      _R28,
-    _SP,                               /* stack pointer */
-    _R30,                              /* link register */
-    _R29,                              /* frame pointer */
-    _R7,       _R6,    _R5,    _R4,
-    _R3,       _R2,    _R1,    _R0,
-#define JIT_F0                 _V8
-#define JIT_F1                 _V9
-#define JIT_F2                 _V10
-#define JIT_F3                 _V11
-#define JIT_F4                 _V12
-#define JIT_F5                 _V13
-#define JIT_F6                 _V14
-#define JIT_F7                 _V15
-    _V31,      _V30,   _V29,   _V28,   /* temporaries */
-    _V27,      _V26,   _V25,   _V24,
-    _V23,      _V22,   _V21,   _V20,
-    _V19,      _V18,   _V17,   _V16,
-    /* callee save */
-    _V8,       _V9,    _V10,   _V11,
-    _V12,      _V13,   _V14,   _V15,
-    _V7,       _V6,    _V5,    _V4,    /* arguments */
-    _V3,       _V2,    _V1,    _V0,
-    _NOREG,
-#define JIT_NOREG              _NOREG
-} jit_reg_t;
+#define JIT_NEEDS_LITERAL_POOL 1
+
+#define _X0     JIT_GPR(0)
+#define _X1     JIT_GPR(1)
+#define _X2     JIT_GPR(2)
+#define _X3     JIT_GPR(3)
+#define _X4     JIT_GPR(4)
+#define _X5     JIT_GPR(5)
+#define _X6     JIT_GPR(6)
+#define _X7     JIT_GPR(7)
+#define _X8     JIT_GPR(8)
+#define _X9     JIT_GPR(9)
+#define _X10    JIT_GPR(10)
+#define _X11    JIT_GPR(11)
+#define _X12    JIT_GPR(12)
+#define _X13    JIT_GPR(13)
+#define _X14    JIT_GPR(14)
+#define _X15    JIT_GPR(15)
+#define _X16    JIT_GPR(16)
+#define _X17    JIT_GPR(17)
+#define _X18    JIT_GPR(18)
+#define _X19    JIT_GPR(19)
+#define _X20    JIT_GPR(20)
+#define _X21    JIT_GPR(21)
+#define _X22    JIT_GPR(22)
+#define _X23    JIT_GPR(23)
+#define _X24    JIT_GPR(24)
+#define _X25    JIT_GPR(25)
+#define _X26    JIT_GPR(26)
+#define _X27    JIT_GPR(27)
+#define _X28    JIT_GPR(28)
+#define _X29    JIT_GPR(29)
+#define _X30    JIT_GPR(30)
+#define _X31    JIT_GPR(31)
+
+#define _D0     JIT_FPR(0)
+#define _D1     JIT_FPR(1)
+#define _D2     JIT_FPR(2)
+#define _D3     JIT_FPR(3)
+#define _D4     JIT_FPR(4)
+#define _D5     JIT_FPR(5)
+#define _D6     JIT_FPR(6)
+#define _D7     JIT_FPR(7)
+#define _D8     JIT_FPR(8)
+#define _D9     JIT_FPR(9)
+#define _D10    JIT_FPR(10)
+#define _D11    JIT_FPR(11)
+#define _D12    JIT_FPR(12)
+#define _D13    JIT_FPR(13)
+#define _D14    JIT_FPR(14)
+#define _D15    JIT_FPR(15)
+#define _D16    JIT_FPR(16)
+#define _D17    JIT_FPR(17)
+#define _D18    JIT_FPR(18)
+#define _D19    JIT_FPR(19)
+#define _D20    JIT_FPR(20)
+#define _D21    JIT_FPR(21)
+#define _D22    JIT_FPR(22)
+#define _D23    JIT_FPR(23)
+#define _D24    JIT_FPR(24)
+#define _D25    JIT_FPR(25)
+#define _D26    JIT_FPR(26)
+#define _D27    JIT_FPR(27)
+#define _D28    JIT_FPR(28)
+#define _D29    JIT_FPR(29)
+#define _D30    JIT_FPR(30)
+#define _D31    JIT_FPR(31)
+
+static inline jit_bool_t
+jit_gpr_is_callee_save (jit_gpr_t reg)
+{
+  // x19 to x28 are callee-save, and x29 is the frame pointer.
+  return 19 <= jit_gpr_regno (reg) && jit_gpr_regno (reg) <= 29;
+}
+
+static inline jit_bool_t
+jit_fpr_is_callee_save (jit_fpr_t reg)
+{
+  // v8 to v15 are callee-save.
+  return 8 <= jit_fpr_regno (reg) && jit_fpr_regno (reg) <= 15;
+}
+
+#define JIT_R0    _X0
+#define JIT_R1    _X1
+#define JIT_R2    _X2
+#define JIT_R3    _X3
+#define JIT_R4    _X4
+#define JIT_R5    _X5
+#define JIT_R6    _X6
+#define JIT_R7    _X7
+#define JIT_R8    _X8
+#define JIT_R9    _X9
+#define JIT_R10   _X10
+#define JIT_R11   _X11
+#define JIT_R12   _X12
+#define JIT_R13   _X13
+#define JIT_R14   _X14
+#define JIT_R15   _X15
+#define JIT_R16   _X16
+#define JIT_R17   _X17
+#define JIT_RTMP  _X18
+#define JIT_V0    _X19
+#define JIT_V1    _X20
+#define JIT_V2    _X21
+#define JIT_V3    _X22
+#define JIT_V4    _X23
+#define JIT_V5    _X24
+#define JIT_V6    _X25
+#define JIT_V7    _X26
+#define JIT_V8    _X27
+#define JIT_V9    _X28
+// x29 is frame pointer
+// x30 is link register
+// x31 is stack pointer
+
+#define JIT_SP    _X31
+
+#define JIT_F0  _D0
+#define JIT_F1  _D1
+#define JIT_F2  _D2
+#define JIT_F3  _D3
+#define JIT_F4  _D4
+#define JIT_F5  _D5
+#define JIT_F6  _D6
+#define JIT_F7  _D7
+#define JIT_F8  _D16
+#define JIT_F9  _D17
+#define JIT_F10 _D18
+#define JIT_F11 _D19
+#define JIT_F12 _D20
+#define JIT_F13 _D21
+#define JIT_F14 _D22
+#define JIT_F15 _D23
+#define JIT_F16 _D24
+#define JIT_F17 _D25
+#define JIT_F18 _D26
+#define JIT_F19 _D27
+#define JIT_F20 _D28
+#define JIT_F21 _D29
+#define JIT_F22 _D30
+#define JIT_FTMP _D31
+
+#define JIT_VF0  _D8
+#define JIT_VF1  _D9
+#define JIT_VF2 _D10
+#define JIT_VF3 _D11
+#define JIT_VF4 _D12
+#define JIT_VF5 _D13
+#define JIT_VF6 _D14
+#define JIT_VF7 _D15
+
+#define _FP _X29
+#define _LR _X30
+#define _SP _X31
+
 
 #endif /* _jit_aarch64_h */



reply via email to

[Prev in Thread] Current Thread [Next in Thread]