guile-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Guile-commits] 347/437: S390: Add support for 32 bit.


From: Andy Wingo
Subject: [Guile-commits] 347/437: S390: Add support for 32 bit.
Date: Mon, 2 Jul 2018 05:14:52 -0400 (EDT)

wingo pushed a commit to branch lightning
in repository guile.

commit 32c4f90a2b45c9166ad1d9d3973b688358c73f7a
Author: pcpa <address@hidden>
Date:   Thu Jan 8 10:45:30 2015 -0200

    S390: Add support for 32 bit.
    
            * include/lightning.h, include/lightning/jit_private.h,
            include/lightning/jit_s390x.h, lib/jit_disasm.c,
            lib/jit_s390x-cpu.c, lib/jit_s390x-fpu.c, lib/jit_s390x-sz.c,
            lib/jit_s390x.c, lib/jit_size.c, lib/lightning.c:
        Add support for generating jit for s390 32 bit. This change
        also removed %f15 from the list of temporaries fpr registers;
        it was not being used, but if were, it would corrupt the
        stack frame because the spill address would overwrite grp
        offsets.
---
 ChangeLog                       |  12 +
 include/lightning.h             |   2 +-
 include/lightning/jit_private.h |   2 +-
 include/lightning/jit_s390x.h   |   6 +-
 lib/jit_disasm.c                |   6 +-
 lib/jit_s390x-cpu.c             | 526 ++++++++++++++++++++++++++++++----------
 lib/jit_s390x-fpu.c             |  58 +++--
 lib/jit_s390x-sz.c              | 356 +++++++++++++++++++++++++++
 lib/jit_s390x.c                 |  77 ++++--
 lib/jit_size.c                  |   2 +-
 lib/lightning.c                 |   2 +-
 11 files changed, 876 insertions(+), 173 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index ae72602..f293b91 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,15 @@
+2015-01-08 Paulo Andrade <address@hidden>
+
+        * include/lightning.h, include/lightning/jit_private.h,
+        include/lightning/jit_s390x.h, lib/jit_disasm.c,
+        lib/jit_s390x-cpu.c, lib/jit_s390x-fpu.c, lib/jit_s390x-sz.c,
+        lib/jit_s390x.c, lib/jit_size.c, lib/lightning.c:
+       Add support for generating jit for s390 32 bit. This change
+       also removed %f15 from the list of temporaries fpr registers;
+       it was not being used, but if were, it would corrupt the
+       stack frame because the spill address would overwrite grp
+       offsets.
+
 2014-12-26 Paulo Andrade <address@hidden>
 
        * lib/jit_ppc-cpu.c, lib/jit_ppc.c: Correct some endianess issues
diff --git a/include/lightning.h b/include/lightning.h
index a2eac57..6cdd360 100644
--- a/include/lightning.h
+++ b/include/lightning.h
@@ -140,7 +140,7 @@ typedef jit_int32_t         jit_fpr_t;
 #  include <lightning/jit_hppa.h>
 #elif defined(__aarch64__)
 #  include <lightning/jit_aarch64.h>
-#elif defined(__s390x__)
+#elif defined(__s390__) || defined(__s390x__)
 #  include <lightning/jit_s390x.h>
 #elif defined(__alpha__)
 #  include <lightning/jit_alpha.h>
diff --git a/include/lightning/jit_private.h b/include/lightning/jit_private.h
index 84bed83..32aa31c 100644
--- a/include/lightning/jit_private.h
+++ b/include/lightning/jit_private.h
@@ -115,7 +115,7 @@ typedef jit_uint64_t                jit_regset_t;
 #  define JIT_RET              _R0
 #  define JIT_FRET             _V0
 typedef jit_uint64_t           jit_regset_t;
-#elif defined(__s390x__)
+#elif defined(__s390__) || defined(__s390x__)
 #  define JIT_SP               _R15
 #  define JIT_RET              _R2
 #  define JIT_FRET             _F0
diff --git a/include/lightning/jit_s390x.h b/include/lightning/jit_s390x.h
index fc18f42..196a02f 100644
--- a/include/lightning/jit_s390x.h
+++ b/include/lightning/jit_s390x.h
@@ -33,7 +33,11 @@ typedef enum {
 #define jit_r_num()            3
 #define jit_v(i)               (_R11 + ((i) << 1))
 #define jit_v_num()            3
-#define jit_arg_f_reg_p(i)     ((i) >= 0 && (i) <= 4)
+#if __WORDSIZE == 32
+#  define jit_arg_f_reg_p(i)   ((i) >= 0 && (i) <= 2)
+#else
+#  define jit_arg_f_reg_p(i)   ((i) >= 0 && (i) <= 4)
+#endif
 #define jit_f(i)               (_F8 + (i))
 #define jit_f_num()            6
 #define JIT_R0                 _R12
diff --git a/lib/jit_disasm.c b/lib/jit_disasm.c
index 2f41067..01158de 100644
--- a/lib/jit_disasm.c
+++ b/lib/jit_disasm.c
@@ -100,9 +100,13 @@ jit_init_debug(const char *progname)
 #  if defined(__sparc__)
     disasm_info.endian = disasm_info.display_endian = BFD_ENDIAN_BIG;
 #  endif
-#  if defined(__s390x__)
+#  if defined(__s390__) || defined(__s390x__)
     disasm_info.arch = bfd_arch_s390;
+#    if __WORDSIZE == 32
+    disasm_info.mach = bfd_mach_s390_31;
+#    else
     disasm_info.mach = bfd_mach_s390_64;
+#    endif
     disasm_info.endian = disasm_info.display_endian = BFD_ENDIAN_BIG;
     disasm_info.disassembler_options = "zarch";
 #  endif
diff --git a/lib/jit_s390x-cpu.c b/lib/jit_s390x-cpu.c
index ef5d844..0d13d70 100644
--- a/lib/jit_s390x-cpu.c
+++ b/lib/jit_s390x-cpu.c
@@ -18,10 +18,19 @@
  */
 
 #if PROTO
-#  define ldxi(r0,r1,i0)               ldxi_l(r0,r1,i0)
-#  define stxi(i0,r0,r1)               stxi_l(i0,r0,r1)
+#  if __WORDSIZE == 32
+#    define ldxi(r0,r1,i0)             ldxi_i(r0,r1,i0)
+#    define stxi(i0,r0,r1)             stxi_i(i0,r0,r1)
+#  else
+#    define ldxi(r0,r1,i0)             ldxi_l(r0,r1,i0)
+#    define stxi(i0,r0,r1)             stxi_l(i0,r0,r1)
+#  endif
 #  define is(i)                                *_jit->pc.us++ = i
-#  define stack_framesize              160
+#  if __WORDSIZE == 32
+#    define stack_framesize            96
+#  else
+#    define stack_framesize            160
+#  endif
 #  define _R0_REGNO                    0
 #  define _R1_REGNO                    1
 #  define _R7_REGNO                    7
@@ -34,8 +43,12 @@
 #  define x16(i0)                      ((i0) & 0xffff)
 #  define s20_p(i0)                    ((i0) >= -524288 && (i0) <= 524287)
 #  define x20(i0)                      ((i0) & 0xfffff)
-#  define s32_p(i0)                                                    \
+#  if __WORDSIZE == 32
+#    define s32_p(i0)                  1
+#  else
+#    define s32_p(i0)                                                  \
     ((i0) >= -2147483648L && (i0) < 2147483647L)
+#  endif
 
 /*
        Condition Code          Instruction     (Mask) Bit Mask Value
@@ -144,6 +157,45 @@ static void _SSE(jit_state_t*,_ui,_ui,_ui,_ui,_ui);
 #  undef _ui
 #  define nop(c)                       _nop(_jit,c)
 static void _nop(jit_state_t*,jit_int32_t);
+#  if __WORDSIZE == 32
+#    define ADD_(r0,r1)                        AR(r0,r1)
+#    define ADDI_(r0,i0)               AHI(r0,i0)
+#    define ADDC_(r0,r1)               ALR(r0,r1)
+#    define ADDX_(r0,r1)               ALCR(r0,r1)
+#    define AND_(r0,r1)                        NR(r0,r1)
+#    define CMP_(r0,r1)                        CR(r0,r1)
+#    define CMPU_(r0,r1)               CLR(r0,r1)
+#    define DIVREM_(r0,r1)             DR(r0,r1)
+#    define DIVREMU_(r0,r1)            DLR(r0,r1)
+#    define OR_(r0,r1)                 OR(r0,r1)
+#    define MUL_(r0,r1)                        MSR(r0,r1)
+#    define MULI_(r0,i0)               MHI(r0,i0)
+#    define MULU_(r0,r1)               MLR(r0,r1)
+#    define SUB_(r0,r1)                        SR(r0,r1)
+#    define SUBC_(r0,r1)               SLR(r0,r1)
+#    define SUBX_(r0,r1)               SLBR(r0,r1)
+#    define TEST_(r0,r1)               LTR(r0,r1)
+#    define XOR_(r0,r1)                        XR(r0,r1)
+#  else
+#    define ADD_(r0,r1)                        AGR(r0,r1)
+#    define ADDI_(r0,i0)               AGHI(r0,i0)
+#    define ADDC_(r0,r1)               ALGR(r0,r1)
+#    define ADDX_(r0,r1)               ALCGR(r0,r1)
+#    define AND_(r0,r1)                        NGR(r0,r1)
+#    define CMP_(r0,r1)                        CGR(r0,r1)
+#    define CMPU_(r0,r1)               CLGR(r0,r1)
+#    define DIVREM_(r0,r1)             DSGR(r0,r1)
+#    define DIVREMU_(r0,r1)            DLGR(r0,r1)
+#    define MUL_(r0,r1)                        MSGR(r0,r1)
+#    define MULI_(r0,i0)               MGHI(r0,i0)
+#    define MULU_(r0,r1)               MLGR(r0,r1)
+#    define OR_(r0,r1)                 OGR(r0,r1)
+#    define SUB_(r0,r1)                        SGR(r0,r1)
+#    define SUBC_(r0,r1)               SLGR(r0,r1)
+#    define SUBX_(r0,r1)               SLBGR(r0,r1)
+#    define TEST_(r0,r1)               LTGR(r0,r1)
+#    define XOR_(r0,r1)                        XGR(r0,r1)
+#  endif
 /****************************************************************
  * General Instructions                                                *
  ****************************************************************/
@@ -551,7 +603,7 @@ static void _nop(jit_state_t*,jit_int32_t);
 #  define STH(R1,D2,X2,B2)             RX_(0x40,R1,X2,B2,D2)
 #  define STHY(R1,D2,X2,B2)            RXY_(0xE3,R1,X2,B2,D2,0x70)
 /* STORE MULTIPLE */
-#  define STM(R1,R3,D2,B2)             RS_(0x90,R1,R3,D2,B2)
+#  define STM(R1,R3,D2,B2)             RS_(0x90,R1,R3,B2,D2)
 #  define STMY(R1,R3,D2,B2)            RSY_(0xEB,R1,R3,B2,D2,0x90)
 #  define STMG(R1,R3,D2,B2)            RSY_(0xEB,R1,R3,B2,D2,0x24)
 /* STORE MULTIPLE HIGH */
@@ -980,16 +1032,35 @@ static void _qdivr_u(jit_state_t*,jit_int32_t,
 #  define qdivi_u(r0,r1,r2,i0)         _qdivi_u(_jit,r0,r1,r2,i0)
 static void _qdivi_u(jit_state_t*,jit_int32_t,
                     jit_int32_t,jit_int32_t,jit_word_t);
-#  define lshr(r0,r1,r2)               SLLG(r0,r1,0,r2)
+#  if __WORDSIZE == 32
+#    define lshr(r0,r1,r2)             _lshr(_jit,r0,r1,r2)
+static void _lshr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  else
+#    define lshr(r0,r1,r2)             SLLG(r0,r1,0,r2)
+#  endif
 #  define lshi(r0,r1,i0)               _lshi(_jit,r0,r1,i0)
 static void _lshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
-#  define rshr(r0,r1,r2)               SRAG(r0,r1,0,r2)
+#  if __WORDSIZE == 32
+#    define rshr(r0,r1,r2)             _rshr(_jit,r0,r1,r2)
+static void _rshr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  else
+#    define rshr(r0,r1,r2)             SRAG(r0,r1,0,r2)
+#  endif
 #  define rshi(r0,r1,i0)               _rshi(_jit,r0,r1,i0)
 static void _rshi(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
-#  define rshr_u(r0,r1,r2)             SRLG(r0,r1,0,r2)
+#  if __WORDSIZE == 32
+#    define rshr_u(r0,r1,r2)           _rshr_u(_jit,r0,r1,r2)
+static void _rshr_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
+#  else
+#    define rshr_u(r0,r1,r2)           SRLG(r0,r1,0,r2)
+#  endif
 #  define rshi_u(r0,r1,i0)             _rshi_u(_jit,r0,r1,i0)
 static void _rshi_u(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
-#  define negr(r0,r1)                  LCGR(r0,r1)
+#  if __WORDSIZE == 32
+#    define negr(r0,r1)                        LCR(r0,r1)
+#  else
+#    define negr(r0,r1)                        LCGR(r0,r1)
+#  endif
 #  define comr(r0,r1)                  _comr(_jit,r0,r1)
 static void _comr(jit_state_t*,jit_int32_t,jit_int32_t);
 #  define andr(r0,r1,r2)               _andr(_jit,r0,r1,r2)
@@ -1005,14 +1076,20 @@ static void 
_xorr(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #  define xori(r0,r1,i0)               _xori(_jit,r0,r1,i0)
 static void _xori(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
 #  define htonr_us(r0,r1)              extr_us(r0,r1)
-#  define htonr_ui(r0,r1)              extr_ui(r0,r1)
-#  define htonr_ul(r0,r1)              movr(r0,r1)
+#  if __WORDSIZE == 32
+#    define htonr_ui(r0,r1)            movr(r0,r1)
+#  else
+#    define htonr_ui(r0,r1)            extr_ui(r0,r1)
+#    define htonr_ul(r0,r1)            movr(r0,r1)
+#  endif
 #  define extr_c(r0,r1)                        LGBR(r0,r1)
 #  define extr_uc(r0,r1)               LLGCR(r0,r1)
 #  define extr_s(r0,r1)                        LGHR(r0,r1)
 #  define extr_us(r0,r1)               LLGHR(r0,r1)
-#  define extr_i(r0,r1)                        LGFR(r0,r1)
-#  define extr_ui(r0,r1)               LLGFR(r0,r1)
+#  if __WORDSIZE == 64
+#    define extr_i(r0,r1)              LGFR(r0,r1)
+#    define extr_ui(r0,r1)             LLGFR(r0,r1)
+#  endif
 #  define ldr_c(r0,r1)                 LGB(r0,0,0,r1)
 #  define ldi_c(r0,i0)                 _ldi_c(_jit,r0,i0)
 static void _ldi_c(jit_state_t*,jit_int32_t,jit_word_t);
@@ -1027,7 +1104,11 @@ static void _ldi_uc(jit_state_t*,jit_int32_t,jit_word_t);
 static void _ldxr_uc(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #  define ldxi_uc(r0,r1,i0)            _ldxi_uc(_jit,r0,r1,i0)
 static void _ldxi_uc(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
-#  define ldr_s(r0,r1)                 LGH(r0,0,0,r1)
+#  if __WORDSIZE == 32
+#    define ldr_s(r0,r1)               LH(r0,0,0,r1)
+#  else
+#    define ldr_s(r0,r1)               LGH(r0,0,0,r1)
+#  endif
 #  define ldi_s(r0,i0)                 _ldi_s(_jit,r0,i0)
 static void _ldi_s(jit_state_t*,jit_int32_t,jit_word_t);
 #  define ldxr_s(r0,r1,r2)             _ldxr_s(_jit,r0,r1,r2)
@@ -1041,27 +1122,33 @@ static void 
_ldi_us(jit_state_t*,jit_int32_t,jit_word_t);
 static void _ldxr_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #  define ldxi_us(r0,r1,i0)            _ldxi_us(_jit,r0,r1,i0)
 static void _ldxi_us(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
-#  define ldr_i(r0,r1)                 LGF(r0,0,0,r1)
+#  if __WORDSIZE == 32
+#    define ldr_i(r0,r1)               LLGF(r0,0,0,r1)
+#  else
+#    define ldr_i(r0,r1)               LGF(r0,0,0,r1)
+#  endif
 #  define ldi_i(r0,i0)                 _ldi_i(_jit,r0,i0)
 static void _ldi_i(jit_state_t*,jit_int32_t,jit_word_t);
 #  define ldxr_i(r0,r1,r2)             _ldxr_i(_jit,r0,r1,r2)
 static void _ldxr_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #  define ldxi_i(r0,r1,i0)             _ldxi_i(_jit,r0,r1,i0)
 static void _ldxi_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
-#  define ldr_ui(r0,r1)                        LLGF(r0,0,0,r1)
-#  define ldi_ui(r0,i0)                        _ldi_ui(_jit,r0,i0)
+#  if __WORDSIZE == 64
+#    define ldr_ui(r0,r1)              LLGF(r0,0,0,r1)
+#    define ldi_ui(r0,i0)              _ldi_ui(_jit,r0,i0)
 static void _ldi_ui(jit_state_t*,jit_int32_t,jit_word_t);
-#  define ldxr_ui(r0,r1,r2)            _ldxr_ui(_jit,r0,r1,r2)
+#    define ldxr_ui(r0,r1,r2)          _ldxr_ui(_jit,r0,r1,r2)
 static void _ldxr_ui(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
-#  define ldxi_ui(r0,r1,i0)            _ldxi_ui(_jit,r0,r1,i0)
+#    define ldxi_ui(r0,r1,i0)          _ldxi_ui(_jit,r0,r1,i0)
 static void _ldxi_ui(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
-#  define ldr_l(r0,r1)                 LG(r0,0,0,r1)
-#  define ldi_l(r0,i0)                 _ldi_l(_jit,r0,i0)
+#    define ldr_l(r0,r1)               LG(r0,0,0,r1)
+#    define ldi_l(r0,i0)               _ldi_l(_jit,r0,i0)
 static void _ldi_l(jit_state_t*,jit_int32_t,jit_word_t);
-#  define ldxr_l(r0,r1,r2)             _ldxr_l(_jit,r0,r1,r2)
+#    define ldxr_l(r0,r1,r2)           _ldxr_l(_jit,r0,r1,r2)
 static void _ldxr_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
-#  define ldxi_l(r0,r1,i0)             _ldxi_l(_jit,r0,r1,i0)
+#    define ldxi_l(r0,r1,i0)           _ldxi_l(_jit,r0,r1,i0)
 static void _ldxi_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_word_t);
+#  endif
 #  define str_c(r0,r1)                 STC(r1,0,0,r0)
 #  define sti_c(i0,r0)                 _sti_c(_jit,i0,r0)
 static void _sti_c(jit_state_t*,jit_word_t,jit_int32_t);
@@ -1083,13 +1170,15 @@ static void _sti_i(jit_state_t*,jit_word_t,jit_int32_t);
 static void _stxr_i(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #  define stxi_i(i0,r0,r1)             _stxi_i(_jit,i0,r0,r1)
 static void _stxi_i(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
-#  define str_l(r0,r1)                 STG(r1,0,0,r0)
-#  define sti_l(i0,r0)                 _sti_l(_jit,i0,r0)
+#  if __WORDSIZE == 64
+#    define str_l(r0,r1)               STG(r1,0,0,r0)
+#    define sti_l(i0,r0)               _sti_l(_jit,i0,r0)
 static void _sti_l(jit_state_t*,jit_word_t,jit_int32_t);
 #  define stxr_l(r0,r1,r2)             _stxr_l(_jit,r0,r1,r2)
 static void _stxr_l(jit_state_t*,jit_int32_t,jit_int32_t,jit_int32_t);
 #  define stxi_l(i0,r0,r1)             _stxi_l(_jit,i0,r0,r1)
 static void _stxi_l(jit_state_t*,jit_word_t,jit_int32_t,jit_int32_t);
+#  endif
 #  define ltr(r0,r1,r2)                        crr(CC_L,r0,r1,r2)
 #  define lti(r0,r1,i0)                        cri(CC_L,r0,r1,i0)
 #  define ltr_u(r0,r1,r2)              crr_u(CC_L,r0,r1,r2)
@@ -1855,8 +1944,13 @@ _xdivr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
     jit_int32_t                regno;
     regno = jit_get_reg_pair();
+#if __WORDSIZE == 32
+    movr(rn(regno), r0);
+    SRDA(rn(regno), 32, 0);
+#else
     movr(rn(regno) + 1, r0);
-    DSGR(rn(regno), r1);
+#endif
+    DIVREM_(rn(regno), r1);
     jit_unget_reg_pair(regno);
     return (regno);
 }
@@ -1866,9 +1960,14 @@ _xdivr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1)
 {
     jit_int32_t                regno;
     regno = jit_get_reg_pair();
+#if __WORDSIZE == 32
+    movr(rn(regno), r0);
+    SRDL(rn(regno), 32, 0);
+#else
     movr(rn(regno) + 1, r0);
-    LGHI(rn(regno), 0);
-    DLGR(rn(regno), r1);
+#endif
+    movi(rn(regno), 0);
+    DIVREMU_(rn(regno), r1);
     jit_unget_reg_pair(regno);
     return (regno);
 }
@@ -1876,12 +1975,18 @@ _xdivr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1)
 static jit_int32_t
 _xdivi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
-    /* overlap */
-    jit_int32_t                regno;
+    jit_int32_t                imm, regno;
     regno = jit_get_reg_pair();
+    imm = jit_get_reg(jit_class_gpr);
+#if __WORDSIZE == 32
+    movr(rn(regno), r0);
+    SRDA(rn(regno), 32, 0);
+#else
     movr(rn(regno) + 1, r0);
-    movi(rn(regno), i0);
-    DSGR(rn(regno), rn(regno));
+#endif
+    movi(rn(imm), i0);
+    DIVREM_(rn(regno), rn(imm));
+    jit_unget_reg(imm);
     jit_unget_reg_pair(regno);
     return (regno);
 }
@@ -1893,10 +1998,15 @@ _xdivi_u(jit_state_t *_jit, jit_int32_t r0, jit_word_t 
i0)
     jit_int32_t                imm, regno;
     regno = jit_get_reg_pair();
     imm = jit_get_reg(jit_class_gpr);
+#if __WORDSIZE == 32
+    movr(rn(regno), r0);
+    SRDL(rn(regno), 32, 0);
+#else
     movr(rn(regno) + 1, r0);
-    LGHI(rn(regno), 0);
+#endif
+    movi(rn(regno), 0);
     movi(rn(imm), i0);
-    DLGR(rn(regno), rn(imm));
+    DIVREMU_(rn(regno), rn(imm));
     jit_unget_reg(imm);
     jit_unget_reg_pair(regno);
     return (regno);
@@ -1914,11 +2024,11 @@ _crr(jit_state_t *_jit, jit_int32_t cc,
     }
     else
        rg = r0;
-    LGHI(rg, 1);
-    CGR(r1, r2);
+    movi(rg, 1);
+    CMP_(r1, r2);
     w = _jit->pc.w;
     BRC(cc, 0);
-    LGHI(rg, 0);
+    movi(rg, 0);
     patch_at(w, _jit->pc.w);
     if (r0 == r1 || r0 == r2) {
        movr(r0, rg);
@@ -1949,11 +2059,11 @@ _crr_u(jit_state_t *_jit, jit_int32_t cc,
     }
     else
        rg = r0;
-    LGHI(rg, 1);
-    CLGR(r1, r2);
+    movi(rg, 1);
+    CMPU_(r1, r2);
     w = _jit->pc.w;
     BRC(cc, 0);
-    LGHI(rg, 0);
+    movi(rg, 0);
     patch_at(w, _jit->pc.w);
     if (r0 == r1 || r0 == r2) {
        movr(r0, rg);
@@ -1977,7 +2087,7 @@ _brr(jit_state_t *_jit, jit_int32_t cc,
      jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     jit_word_t         d;
-    CGR(r0, r1);
+    CMP_(r0, r1);
     d = (i0 - _jit->pc.w) >> 1;
     if (s16_p(d))
        BRC(cc, x16(d));
@@ -1992,7 +2102,7 @@ _brr_p(jit_state_t *_jit, jit_int32_t cc,
        jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     jit_word_t         w;
-    CGR(r0, r1);
+    CMP_(r0, r1);
     w = _jit->pc.w;
     BRCL(cc, 0);
     return (w);
@@ -2027,7 +2137,7 @@ _brr_u(jit_state_t *_jit, jit_int32_t cc,
        jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     jit_word_t         d;
-    CLGR(r0, r1);
+    CMPU_(r0, r1);
     d = (i0 - _jit->pc.w) >> 1;
     if (s16_p(d))
        BRC(cc, x16(d));
@@ -2042,7 +2152,7 @@ _brr_u_p(jit_state_t *_jit, jit_int32_t cc,
         jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     jit_word_t         w;
-    CLGR(r0, r1);
+    CMPU_(r0, r1);
     w = _jit->pc.w;
     BRCL(cc, 0);
     return (w);
@@ -2077,8 +2187,8 @@ _baddr(jit_state_t *_jit, jit_int32_t c, jit_bool_t s,
        jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     jit_word_t         d;
-    if (s)             AGR(r0, r1);
-    else               ALGR(r0, r1);
+    if (s)             addr(r0, r0, r1);
+    else               addcr(r0, r0, r1);
     d = (i0 - _jit->pc.w) >> 1;
     if (s16_p(d))
        BRC(c, x16(d));
@@ -2104,8 +2214,8 @@ _baddr_p(jit_state_t *_jit, jit_int32_t c, jit_bool_t s,
         jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     jit_word_t         d, w;
-    if (s)             AGR(r0, r1);
-    else               ALGR(r0, r1);
+    if (s)             addr(r0, r0, r1);
+    else               addcr(r0, r0, r1);
     d = (i0 - _jit->pc.w) >> 1;
     w = _jit->pc.w;
     BRCL(c, d);
@@ -2130,8 +2240,8 @@ _bsubr(jit_state_t *_jit, jit_int32_t c, jit_bool_t s,
        jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     jit_word_t         d;
-    if (s)             SGR(r0, r1);
-    else               SLGR(r0, r1);
+    if (s)             subr(r0, r0, r1);
+    else               subcr(r0, r0, r1);
     d = (i0 - _jit->pc.w) >> 1;
     if (s16_p(d))
        BRC(c, x16(d));
@@ -2157,8 +2267,8 @@ _bsubr_p(jit_state_t *_jit, jit_int32_t c, jit_bool_t s,
         jit_word_t i0, jit_int32_t r0, jit_int32_t r1)
 {
     jit_word_t         d, w;
-    if (s)             SGR(r0, r1);
-    else               SLGR(r0, r1);
+    if (s)             subr(r0, r0, r1);
+    else               subcr(r0, r0, r1);
     d = (i0 - _jit->pc.w) >> 1;
     w = _jit->pc.w;
     BRCL(c, d);
@@ -2186,8 +2296,8 @@ _bmxr(jit_state_t *_jit, jit_int32_t cc,
     jit_int32_t                reg;
     reg = jit_get_reg(jit_class_gpr);
     movr(rn(reg), r0);
-    NGR(rn(reg), r1);
-    LTGR(rn(reg), rn(reg));
+    andr(rn(reg), rn(reg), r1);
+    TEST_(rn(reg), rn(reg));
     jit_unget_reg(reg);
     d = (i0 - _jit->pc.w) >> 1;
     if (s16_p(d))
@@ -2206,8 +2316,8 @@ _bmxr_p(jit_state_t *_jit, jit_int32_t cc,
     jit_int32_t                reg;
     reg = jit_get_reg(jit_class_gpr);
     movr(rn(reg), r0);
-    NGR(rn(reg), r1);
-    LTGR(rn(reg), rn(reg));
+    andr(rn(reg), rn(reg), r1);
+    TEST_(rn(reg), rn(reg));
     jit_unget_reg(reg);
     w = _jit->pc.w;
     BRCL(cc, 0);
@@ -2222,8 +2332,8 @@ _bmxi(jit_state_t *_jit, jit_int32_t cc,
     jit_int32_t                reg;
     reg = jit_get_reg(jit_class_gpr);
     movi(rn(reg), i1);
-    NGR(rn(reg), r0);
-    LTGR(rn(reg), rn(reg));
+    andr(rn(reg), rn(reg), r0);
+    TEST_(rn(reg), rn(reg));
     jit_unget_reg(reg);
     d = (i0 - _jit->pc.w) >> 1;
     if (s16_p(d))
@@ -2242,8 +2352,8 @@ _bmxi_p(jit_state_t *_jit, jit_int32_t cc,
     jit_int32_t                reg;
     reg = jit_get_reg(jit_class_gpr);
     movi(rn(reg), i1);
-    NGR(rn(reg), r0);
-    LTGR(rn(reg), rn(reg));
+    andr(rn(reg), rn(reg), r0);
+    TEST_(rn(reg), rn(reg));
     jit_unget_reg(reg);
     w = _jit->pc.w;
     BRCL(cc, 0);
@@ -2253,22 +2363,45 @@ _bmxi_p(jit_state_t *_jit, jit_int32_t cc,
 static void
 _movr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
 {
+#if __WORDSIZE == 32
+    if (r0 != r1)
+       LR(r0, r1);
+#else
     if (r0 != r1)
        LGR(r0, r1);
+#endif
 }
 
 static void
 _movi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
     jit_word_t         d;
+#if __WORDSIZE == 64
     jit_int32_t                bits;
+#endif
     d = (i0 - _jit->pc.w) >> 1;
-    if (s16_p(i0))                     LGHI(r0, x16(i0));
+    if (s16_p(i0)) {
+#if __WORDSIZE == 32
+       LHI(r0, x16(i0));
+#else
+       LGHI(r0, x16(i0));
+#endif
+    }
     /* easy way of loading a large amount of 32 bit values and
      * usually address of constants */
-    else if (!(i0 & 1) && s32_p(d))
+    else if (!(i0 & 1) &&
+#if __WORDSIZE == 32
+            i0 > 0
+#else
+            s32_p(d)
+#endif
+            )
        LARL(r0, d);
     else {
+#if __WORDSIZE == 32
+       LHI(r0, x16(i0));
+       IILH(r0, x16((jit_uword_t)i0 >> 16));
+#else
        bits = 0;
        if (i0 &             0xffffL)   bits |= 1;
        if (i0 &         0xffff0000L)   bits |= 2;
@@ -2279,6 +2412,7 @@ _movi(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
        if (bits & 2)                   IILH(r0, x16((jit_uword_t)i0 >> 16));
        if (bits & 4)                   IIHL(r0, x16((jit_uword_t)i0 >> 32));
        if (bits & 8)                   IIHH(r0, x16((jit_uword_t)i0 >> 48));
+#endif
     }
 }
 
@@ -2287,10 +2421,16 @@ _movi_p(jit_state_t *_jit, jit_int32_t r0, jit_word_t 
i0)
 {
     jit_word_t         w;
     w = _jit->pc.w;
+#if __WORDSIZE == 32
+    LHI(r0, x16(i0));
+#else
     IILL(r0, x16(i0));
+#endif
     IILH(r0, x16((jit_uword_t)i0 >> 16));
+#if __WORDSIZE == 64
     IIHL(r0, x16((jit_uword_t)i0 >> 32));
     IIHH(r0, x16((jit_uword_t)i0 >> 48));
+#endif
     return (w);
 }
 
@@ -2298,10 +2438,10 @@ static void
 _addr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     if (r0 == r2)
-       AGR(r0, r1);
+       ADD_(r0, r1);
     else {
        movr(r0, r1);
-       AGR(r0, r2);
+       ADD_(r0, r2);
     }
 }
 
@@ -2310,9 +2450,11 @@ _addi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, 
jit_word_t i0)
 {
     jit_int32_t                reg;
     if (r0 == r1 && s16_p(i0))
-       AGHI(r0, x16(i0));
+       ADDI_(r0, x16(i0));
+#if __WORDSIZE == 64
     else if (s20_p(i0))
        LAY(r0, x20(i0), 0, r1);
+#endif
     else {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i0);
@@ -2325,10 +2467,10 @@ static void
 _addcr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     if (r0 == r2)
-       ALGR(r0, r1);
+       ADDC_(r0, r1);
     else {
        movr(r0, r1);
-       ALGR(r0, r2);
+       ADDC_(r0, r2);
     }
 }
 
@@ -2346,10 +2488,10 @@ static void
 _addxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     if (r0 == r2)
-       ALCGR(r0, r1);
+       ADDX_(r0, r1);
     else {
        movr(r0, r1);
-       ALCGR(r0, r2);
+       ADDX_(r0, r2);
     }
 }
 
@@ -2371,12 +2513,12 @@ _subr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1, jit_int32_t r2)
        reg = jit_get_reg(jit_class_gpr);
        movr(rn(reg), r2);
        movr(r0, r1);
-       SGR(r0, rn(reg));
+       SUB_(r0, rn(reg));
        jit_unget_reg(reg);
     }
     else {
        movr(r0, r1);
-       SGR(r0, r2);
+       SUB_(r0, r2);
     }
 }
 
@@ -2385,9 +2527,11 @@ _subi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, 
jit_word_t i0)
 {
     jit_int32_t                reg;
     if (r0 == r1 && s16_p(-i0))
-       AGHI(r0, x16(-i0));
+       ADDI_(r0, x16(-i0));
+#if __WORDSIZE == 64
     else if (s20_p(-i0))
        LAY(r0, x20(-i0), 0, r1);
+#endif
     else {
        reg = jit_get_reg(jit_class_gpr);
        movi(rn(reg), i0);
@@ -2404,12 +2548,12 @@ _subcr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1, jit_int32_t r2)
        reg = jit_get_reg(jit_class_gpr);
        movr(rn(reg), r2);
        movr(r0, r1);
-       SLGR(r0, rn(reg));
+       SUBC_(r0, rn(reg));
        jit_unget_reg(reg);
     }
     else {
        movr(r0, r1);
-       SLGR(r0, r2);
+       SUBC_(r0, r2);
     }
 }
 
@@ -2431,12 +2575,12 @@ _subxr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1, jit_int32_t r2)
        reg = jit_get_reg(jit_class_gpr);
        movr(rn(reg), r2);
        movr(r0, r1);
-       SLBGR(r0, rn(reg));
+       SUBX_(r0, rn(reg));
        jit_unget_reg(reg);
     }
     else {
        movr(r0, r1);
-       SLBGR(r0, r2);
+       SUBX_(r0, r2);
     }
 }
 
@@ -2461,10 +2605,10 @@ static void
 _mulr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     if (r0 == r2)
-       MSGR(r0, r1);
+       MUL_(r0, r1);
     else {
        movr(r0, r1);
-       MSGR(r0, r2);
+       MUL_(r0, r2);
     }
 }
 
@@ -2474,7 +2618,7 @@ _muli(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, 
jit_word_t i0)
     jit_int32_t                reg;
     if (s16_p(i0)) {
        movr(r0, r1);
-       MGHI(r0, x16(i0));
+       MULI_(r0, x16(i0));
     }
     else {
        reg = jit_get_reg(jit_class_gpr);
@@ -2540,7 +2684,7 @@ _qmulr_u(jit_state_t *_jit,
     jit_int32_t                regno;
     regno = jit_get_reg_pair();
     movr(rn(regno) + 1, r2);
-    MLGR(rn(regno), r3);
+    MULU_(rn(regno), r3);
     movr(r0, rn(regno) + 1);
     movr(r1, rn(regno));
     jit_unget_reg_pair(regno);
@@ -2554,7 +2698,7 @@ _qmuli_u(jit_state_t *_jit,
     regno = jit_get_reg_pair();
     movr(rn(regno) + 1, r2);
     movi(rn(regno), i0);
-    MLGR(rn(regno), rn(regno));
+    MULU_(rn(regno), rn(regno));
     movr(r0, rn(regno) + 1);
     movr(r1, rn(regno));
     jit_unget_reg_pair(regno);
@@ -2664,6 +2808,25 @@ _qdivi_u(jit_state_t *_jit,
     movr(r1, rn(regno));
 }
 
+#  if __WORDSIZE == 32
+static void
+_lshr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (r0 == r2) {
+       reg = jit_get_reg_but_zero(0);
+       movr(rn(reg), r2);
+       movr(r0, r1);
+       SLL(r0, 0, rn(reg));
+       jit_unget_reg_but_zero(reg);
+    }
+    else {
+       movr(r0, r1);
+       SLL(r0, 0, r2);
+    }
+}
+#endif
+
 static void
 _lshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
@@ -2674,6 +2837,25 @@ _lshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, 
jit_word_t i0)
     jit_unget_reg_but_zero(reg);
 }
 
+#  if __WORDSIZE == 32
+static void
+_rshr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (r0 == r2) {
+       reg = jit_get_reg_but_zero(0);
+       movr(rn(reg), r2);
+       movr(r0, r1);
+       SRA(r0, 0, rn(reg));
+       jit_unget_reg_but_zero(reg);
+    }
+    else {
+       movr(r0, r1);
+       SRA(r0, 0, r2);
+    }
+}
+#endif
+
 static void
 _rshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
@@ -2684,6 +2866,25 @@ _rshi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, 
jit_word_t i0)
     jit_unget_reg_but_zero(reg);
 }
 
+#  if __WORDSIZE == 32
+static void
+_rshr_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
+{
+    jit_int32_t                reg;
+    if (r0 == r2) {
+       reg = jit_get_reg_but_zero(0);
+       movr(rn(reg), r2);
+       movr(r0, r1);
+       SRL(r0, 0, rn(reg));
+       jit_unget_reg_but_zero(reg);
+    }
+    else {
+       movr(r0, r1);
+       SRL(r0, 0, r2);
+    }
+}
+#endif
+
 static void
 _rshi_u(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
@@ -2701,7 +2902,7 @@ _comr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1)
     reg = jit_get_reg(jit_class_gpr);
     movi(rn(reg), -1);
     movr(r0, r1);
-    XGR(r0, rn(reg));
+    XOR_(r0, rn(reg));
     jit_unget_reg(reg);
 }
 
@@ -2709,10 +2910,10 @@ static void
 _andr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     if (r0 == r2)
-       NGR(r0, r1);
+       AND_(r0, r1);
     else {
        movr(r0, r1);
-       NGR(r0, r2);
+       AND_(r0, r2);
     }
 }
 
@@ -2722,18 +2923,20 @@ _andi(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1, jit_word_t i0)
     movr(r0, r1);
     NILL(r0, x16(i0));
     NILH(r0, x16((jit_uword_t)i0 >> 16));
+#if __WORDSIZE == 64
     NIHL(r0, x16((jit_uword_t)i0 >> 32));
     NIHH(r0, x16((jit_uword_t)i0 >> 48));
+#endif
 }
 
 static void
 _orr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     if (r0 == r2)
-       OGR(r0, r1);
+       OR_(r0, r1);
     else {
        movr(r0, r1);
-       OGR(r0, r2);
+       OR_(r0, r2);
     }
 }
 
@@ -2743,18 +2946,20 @@ _ori(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, 
jit_word_t i0)
     movr(r0, r1);
     OILL(r0, x16(i0));
     OILH(r0, x16((jit_uword_t)i0 >> 16));
+#if __WORDSIZE == 64
     OIHL(r0, x16((jit_uword_t)i0 >> 32));
     OIHH(r0, x16((jit_uword_t)i0 >> 48));
+#endif
 }
 
 static void
 _xorr(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     if (r0 == r2)
-       XGR(r0, r1);
+       XOR_(r0, r1);
     else {
        movr(r0, r1);
-       XGR(r0, r2);
+       XOR_(r0, r2);
     }
 }
 
@@ -2779,12 +2984,12 @@ static void
 _ldxr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     if (r0 == r2) {
-       AGR(r0, r1);
+       addr(r0, r0, r1);
        ldr_c(r0, r0);
     }
     else {
        movr(r0, r1);
-       AGR(r0, r2);
+       addr(r0, r0, r2);
        ldr_c(r0, r0);
     }
 }
@@ -2793,17 +2998,22 @@ static void
 _ldxi_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
     jit_int32_t                reg;
-    if (s20_p(i0))
+    if (s20_p(i0)) {
+#if __WORDSIZE == 32
+       LB(r0, x20(i0), 0, r1);
+#else
        LGB(r0, x20(i0), 0, r1);
+#endif
+    }
     else if (r0 != r1) {
        movi(r0, i0);
-       AGR(r0, r1);
+       addr(r0, r0, r1);
        ldr_c(r0, r0);
     }
     else {
        reg = jit_get_reg_but_zero(0);
        movi(rn(reg), i0);
-       AGR(rn(reg), r1);
+       addr(rn(reg), rn(reg), r1);
        ldr_c(r0, rn(reg));
        jit_unget_reg_but_zero(reg);
     }
@@ -2820,12 +3030,12 @@ static void
 _ldxr_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     if (r0 == r2) {
-       AGR(r0, r1);
+       addr(r0, r0, r1);
        ldr_uc(r0, r0);
     }
     else {
        movr(r0, r1);
-       AGR(r0, r2);
+       addr(r0, r0, r2);
        ldr_uc(r0, r0);
     }
 }
@@ -2838,13 +3048,13 @@ _ldxi_uc(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1, jit_word_t i0)
        LLGC(r0, x20(i0), 0, r1);
     else if (r0 != r1) {
        movi(r0, i0);
-       AGR(r0, r1);
+       addr(r0, r0, r1);
        ldr_uc(r0, r0);
     }
     else {
        reg = jit_get_reg_but_zero(0);
        movi(rn(reg), i0);
-       AGR(rn(reg), r1);
+       addr(rn(reg), rn(reg), r1);
        ldr_uc(r0, rn(reg));
        jit_unget_reg_but_zero(reg);
     }
@@ -2861,12 +3071,12 @@ static void
 _ldxr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     if (r0 == r2) {
-       AGR(r0, r1);
+       addr(r0, r0, r1);
        ldr_s(r0, r0);
     }
     else {
        movr(r0, r1);
-       AGR(r0, r2);
+       addr(r0, r0, r2);
        ldr_s(r0, r0);
     }
 }
@@ -2875,17 +3085,27 @@ static void
 _ldxi_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_word_t i0)
 {
     jit_int32_t                reg;
-    if (s20_p(i0))
+#if __WORDSIZE == 32
+    if (u12_p(i0))
+       LH(r0, i0, 0, r1);
+    else
+#endif
+    if (s20_p(i0)) {
+#if __WORDSIZE == 32
+       LHY(r0, x20(i0), 0, r1);
+#else
        LGH(r0, x20(i0), 0, r1);
+#endif
+    }
     else if (r0 != r1) {
        movi(r0, i0);
-       AGR(r0, r1);
+       addr(r0, r0, r1);
        ldr_s(r0, r0);
     }
     else {
        reg = jit_get_reg_but_zero(0);
        movi(rn(reg), i0);
-       AGR(rn(reg), r1);
+       addr(rn(reg), rn(reg), r1);
        ldr_s(r0, rn(reg));
        jit_unget_reg_but_zero(reg);
     }
@@ -2902,12 +3122,12 @@ static void
 _ldxr_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     if (r0 == r2) {
-       AGR(r0, r1);
+       addr(r0, r0, r1);
        ldr_us(r0, r0);
     }
     else {
        movr(r0, r1);
-       AGR(r0, r2);
+       addr(r0, r0, r2);
        ldr_us(r0, r0);
     }
 }
@@ -2920,13 +3140,13 @@ _ldxi_us(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1, jit_word_t i0)
        LLGH(r0, x20(i0), 0, r1);
     else if (r0 != r1) {
        movi(r0, i0);
-       AGR(r0, r1);
+       addr(r0, r0, r1);
        ldr_us(r0, r0);
     }
     else {
        reg = jit_get_reg_but_zero(0);
        movi(rn(reg), i0);
-       AGR(rn(reg), r1);
+       addr(rn(reg), rn(reg), r1);
        ldr_us(r0, rn(reg));
        jit_unget_reg_but_zero(reg);
     }
@@ -2943,12 +3163,12 @@ static void
 _ldxr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     if (r0 == r2) {
-       AGR(r0, r1);
+       addr(r0, r0, r1);
        ldr_i(r0, r0);
     }
     else {
        movr(r0, r1);
-       AGR(r0, r2);
+       addr(r0, r0, r2);
        ldr_i(r0, r0);
     }
 }
@@ -2961,18 +3181,19 @@ _ldxi_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1, jit_word_t i0)
        LGF(r0, x20(i0), 0, r1);
     else if (r0 != r1) {
        movi(r0, i0);
-       AGR(r0, r1);
+       addr(r0, r0, r1);
        ldr_i(r0, r0);
     }
     else {
        reg = jit_get_reg_but_zero(0);
        movi(rn(reg), i0);
-       AGR(rn(reg), r1);
+       addr(rn(reg), rn(reg), r1);
        ldr_i(r0, rn(reg));
        jit_unget_reg_but_zero(reg);
     }
 }
 
+#if __WORDSIZE == 64
 static void
 _ldi_ui(jit_state_t *_jit, jit_int32_t r0, jit_word_t i0)
 {
@@ -2984,12 +3205,12 @@ static void
 _ldxr_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     if (r0 == r2) {
-       AGR(r0, r1);
+       addr(r0, r0, r1);
        ldr_ui(r0, r0);
     }
     else {
        movr(r0, r1);
-       AGR(r0, r2);
+       addr(r0, r0, r2);
        ldr_ui(r0, r0);
     }
 }
@@ -3002,13 +3223,13 @@ _ldxi_ui(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1, jit_word_t i0)
        LLGF(r0, x20(i0), 0, r1);
     else if (r0 != r1) {
        movi(r0, i0);
-       AGR(r0, r1);
+       addr(r0, r0, r1);
        ldr_ui(r0, r0);
     }
     else {
        reg = jit_get_reg_but_zero(0);
        movi(rn(reg), i0);
-       AGR(rn(reg), r1);
+       addr(rn(reg), rn(reg), r1);
        ldr_ui(r0, rn(reg));
        jit_unget_reg_but_zero(reg);
     }
@@ -3025,12 +3246,12 @@ static void
 _ldxr_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     if (r0 == r2) {
-       AGR(r0, r1);
+       addr(r0, r0, r1);
        ldr_l(r0, r0);
     }
     else {
        movr(r0, r1);
-       AGR(r0, r2);
+       addr(r0, r0, r2);
        ldr_l(r0, r0);
     }
 }
@@ -3043,17 +3264,18 @@ _ldxi_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1, jit_word_t i0)
        LG(r0, x20(i0), 0, r1);
     else if (r0 != r1) {
        movi(r0, i0);
-       AGR(r0, r1);
+       addr(r0, r0, r1);
        ldr_l(r0, r0);
     }
     else {
        reg = jit_get_reg_but_zero(0);
        movi(rn(reg), i0);
-       AGR(rn(reg), r1);
+       addr(rn(reg), rn(reg), r1);
        ldr_l(r0, rn(reg));
        jit_unget_reg_but_zero(reg);
     }
 }
+#endif
 
 static void
 _sti_c(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
@@ -3071,7 +3293,7 @@ _stxr_c(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1, jit_int32_t r2)
     jit_int32_t                reg;
     reg = jit_get_reg_but_zero(0);
     movr(rn(reg), r0);
-    AGR(rn(reg), r1);
+    addr(rn(reg), rn(reg), r1);
     str_c(rn(reg), r2);
     jit_unget_reg_but_zero(reg);
 }
@@ -3108,7 +3330,7 @@ _stxr_s(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1, jit_int32_t r2)
     jit_int32_t                reg;
     reg = jit_get_reg_but_zero(0);
     movr(rn(reg), r0);
-    AGR(rn(reg), r1);
+    addr(rn(reg), rn(reg), r1);
     str_s(rn(reg), r2);
     jit_unget_reg_but_zero(reg);
 }
@@ -3145,7 +3367,7 @@ _stxr_i(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1, jit_int32_t r2)
     jit_int32_t                reg;
     reg = jit_get_reg_but_zero(0);
     movr(rn(reg), r0);
-    AGR(rn(reg), r1);
+    addr(rn(reg), rn(reg), r1);
     str_i(rn(reg), r2);
     jit_unget_reg_but_zero(reg);
 }
@@ -3166,6 +3388,7 @@ _stxi_i(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, 
jit_int32_t r1)
     }
 }
 
+#if __WORDSIZE == 64
 static void
 _sti_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0)
 {
@@ -3182,7 +3405,7 @@ _stxr_l(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1, jit_int32_t r2)
     jit_int32_t                reg;
     reg = jit_get_reg_but_zero(0);
     movr(rn(reg), r0);
-    AGR(rn(reg), r1);
+    addr(rn(reg), rn(reg), r1);
     str_l(rn(reg), r2);
     jit_unget_reg_but_zero(reg);
 }
@@ -3200,6 +3423,7 @@ _stxi_l(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, 
jit_int32_t r1)
        jit_unget_reg_but_zero(reg);
     }
 }
+#endif
 
 static void
 _jmpi(jit_state_t *_jit, jit_word_t i0)
@@ -3291,25 +3515,40 @@ _prolog(jit_state_t *_jit, jit_node_t *i0)
        if (jit_regset_tstbit(&_jitc->function->regset, gprs[regno]))
            break;
     }
+#if __WORDSIZE == 32
+    offset = regno * 4 + 32;
+    STM(rn(gprs[regno]), _R15_REGNO, x20(offset), _R15_REGNO);
+#else
     offset = regno * 8 + 48;
     STMG(rn(gprs[regno]), _R15_REGNO, x20(offset), _R15_REGNO);
+#endif
 #define SPILL(R, O)                                                    \
     do {                                                               \
        if (jit_regset_tstbit(&_jitc->function->regset, R))             \
            stxi_d(O, _R15_REGNO, rn(R));                               \
     } while (0)
     /* First 4 in low address */
+#if __WORDSIZE == 32
+    SPILL(_F10, 0);
+    SPILL(_F11, 8);
+    SPILL(_F12, 16);
+    SPILL(_F13, 24);
+    /* gpr registers here */
+    SPILL(_F14, 72);
+    SPILL(_F8, 80);
+    SPILL(_F9, 88);
+#else
     SPILL(_F10, 16);
     SPILL(_F11, 24);
     SPILL(_F12, 32);
     SPILL(_F13, 48);
-    /* Last 4 in high address */
-    SPILL(_F14, 128);
-    SPILL(_F15, 136);
+    /* Last 3 in high address */
+    SPILL(_F14, 136);
     SPILL(_F8, 144);
     SPILL(_F9, 152);
+#endif
 #undef SPILL
-    LGR(_R13_REGNO, _R15_REGNO);
+    movr(_R13_REGNO, _R15_REGNO);
     subi(_R15_REGNO, _R15_REGNO, stack_framesize + _jitc->function->stack);
 }
 
@@ -3323,23 +3562,40 @@ _epilog(jit_state_t *_jit, jit_node_t *i0)
        if (jit_regset_tstbit(&_jitc->function->regset, gprs[regno]))
            break;
     }
+#if __WORDSIZE == 32
+    offset = regno * 4 + 32;
+#else
     offset = regno * 8 + 48;
-    LGR(_R15_REGNO, _R13_REGNO);
+#endif
+    movr(_R15_REGNO, _R13_REGNO);
 #define LOAD(R, O)                                                     \
     do {                                                               \
        if (jit_regset_tstbit(&_jitc->function->regset, R))             \
            ldxi_d(rn(R), _R15_REGNO, O);                               \
     } while (0)
+#if __WORDSIZE == 32
+    LOAD(_F10, 0);
+    LOAD(_F11, 8);
+    LOAD(_F12, 16);
+    LOAD(_F13, 24);
+    LOAD(_F14, 72);
+    LOAD(_F8, 80);
+    LOAD(_F9, 88);
+#else
     LOAD(_F10, 16);
     LOAD(_F11, 24);
     LOAD(_F12, 32);
     LOAD(_F13, 48);
-    LOAD(_F14, 128);
-    LOAD(_F15, 136);
+    LOAD(_F14, 136);
     LOAD(_F8, 144);
     LOAD(_F9, 152);
+#endif
 #undef LOAD
+#if __WORDSIZE == 32
+    LM(rn(gprs[regno]), _R15_REGNO, x20(offset), _R15_REGNO);
+#else
     LMG(rn(gprs[regno]), _R15_REGNO, x20(offset), _R15_REGNO);
+#endif
     BR(_R14_REGNO);
 }
 
@@ -3375,14 +3631,23 @@ _patch_at(jit_state_t *_jit, jit_word_t instr, 
jit_word_t label)
     } i12;
     i0.s = u.s[0];
     /* movi_p */
-    if (i0.b.op == 0xA5) {
+    if (i0.b.op ==
+#if __WORDSIZE == 32
+       0xA7 && i0.b.r3 == 8
+#else
+       0xA5
+#endif
+       ) {
+#if __WORDSIZE == 64
        assert(i0.b.r3 == 3);
+#endif
        i1.b.i2 = (jit_uword_t)label;
        u.s[1] = i1.s;
        i0.s = u.s[2];
        assert(i0.b.op == 0xA5 && i0.b.r3 == 2);
        i1.b.i2 = (jit_uword_t)label >> 16;
        u.s[3] = i1.s;
+#if __WORDSIZE == 64
        i0.s = u.s[4];
        assert(i0.b.op == 0xA5 && i0.b.r3 == 1);
        i1.b.i2 = (jit_uword_t)label >> 32;
@@ -3391,6 +3656,7 @@ _patch_at(jit_state_t *_jit, jit_word_t instr, jit_word_t 
label)
        assert(i0.b.op == 0xA5 && i0.b.r3 == 0);
        i1.b.i2 = (jit_uword_t)label >> 48;
        u.s[7] = i1.s;
+#endif
     }
     /* BRC */
     else if (i0.b.op == 0xA7) {
diff --git a/lib/jit_s390x-fpu.c b/lib/jit_s390x-fpu.c
index d36f0fb..1abc28c 100644
--- a/lib/jit_s390x-fpu.c
+++ b/lib/jit_s390x-fpu.c
@@ -354,10 +354,17 @@ static void 
_movi_d(jit_state_t*,jit_int32_t,jit_float64_t*);
 #  define sqrtr_d(r0,r1)               SQDBR(r0,r1)
 #  define truncr_f_i(r0,r1)            CFEBR(r0,RND_ZERO,r1)
 #  define truncr_d_i(r0,r1)            CFDBR(r0,RND_ZERO,r1)
-#  define truncr_f_l(r0,r1)            CGEBR(r0,RND_ZERO,r1)
-#  define truncr_d_l(r0,r1)            CGDBR(r0,RND_ZERO,r1)
-#  define extr_f(r0,r1)                        CEGBR(r0,r1)
-#  define extr_d(r0,r1)                        CDGBR(r0,r1)
+#  if __WORDSIZE == 64
+#    define truncr_f_l(r0,r1)          CGEBR(r0,RND_ZERO,r1)
+#    define truncr_d_l(r0,r1)          CGDBR(r0,RND_ZERO,r1)
+#  endif
+#  if __WORDSIZE == 32
+#    define extr_f(r0,r1)              CEFBR(r0,r1)
+#    define extr_d(r0,r1)              CDFBR(r0,r1)
+#  else
+#    define extr_f(r0,r1)              CEGBR(r0,r1)
+#    define extr_d(r0,r1)              CDGBR(r0,r1)
+#  endif
 #  define extr_d_f(r0,r1)              LEDBR(r0,r1)
 #  define extr_f_d(r0,r1)              LDEBR(r0,r1)
 #  define addr_f(r0,r1,r2)             _addr_f(_jit,r0,r1,r2)
@@ -889,7 +896,11 @@ static void
 _movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t *i0)
 {
     union {
+#if __WORDSIZE == 32
+       jit_int32_t      i[2];
+#else
        jit_int64_t      l;
+#endif
        jit_float64_t    d;
     } data;
     jit_int32_t                 reg;
@@ -899,8 +910,15 @@ _movi_d(jit_state_t *_jit, jit_int32_t r0, jit_float64_t 
*i0)
     else if (_jitc->no_data) {
        data.d = *i0;
        reg = jit_get_reg_but_zero(0);
+#if __WORDSIZE == 32
+       movi(rn(reg), data.i[0]);
+       stxi_i(-8, _FP_REGNO, rn(reg));
+       movi(rn(reg), data.i[1]);
+       stxi_i(-4, _FP_REGNO, rn(reg));
+#else
        movi(rn(reg), data.l);
        stxi_l(-8, _FP_REGNO, rn(reg));
+#endif
        jit_unget_reg_but_zero(reg);
        ldxi_d(r0, _FP_REGNO, -8);
     }
@@ -1046,7 +1064,7 @@ _ldxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1, jit_int32_t r2)
     jit_int32_t                reg;
     reg = jit_get_reg_but_zero(0);
     movr(rn(reg), r1);
-    AGR(rn(reg), r2);
+    addr(rn(reg), rn(reg), r2);
     ldr_f(r0, rn(reg));
     jit_unget_reg_but_zero(reg);
 }
@@ -1057,7 +1075,7 @@ _ldxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1, jit_int32_t r2)
     jit_int32_t                reg;
     reg = jit_get_reg_but_zero(0);
     movr(rn(reg), r1);
-    AGR(rn(reg), r2);
+    addr(rn(reg), rn(reg), r2);
     ldr_d(r0, rn(reg));
     jit_unget_reg_but_zero(reg);
 }
@@ -1073,7 +1091,7 @@ _ldxi_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1, jit_word_t i0)
     else {
        reg = jit_get_reg_but_zero(0);
        movi(rn(reg), i0);
-       AGR(rn(reg), r1);
+       addr(rn(reg), rn(reg), r1);
        ldr_f(r0, rn(reg));
        jit_unget_reg_but_zero(reg);
     }
@@ -1090,7 +1108,7 @@ _ldxi_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1, jit_word_t i0)
     else {
        reg = jit_get_reg_but_zero(0);
        movi(rn(reg), i0);
-       AGR(rn(reg), r1);
+       addr(rn(reg), rn(reg), r1);
        ldr_d(r0, rn(reg));
        jit_unget_reg_but_zero(reg);
     }
@@ -1122,7 +1140,7 @@ _stxr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1, jit_int32_t r2)
     jit_int32_t                reg;
     reg = jit_get_reg_but_zero(0);
     movr(rn(reg), r0);
-    AGR(rn(reg), r1);
+    addr(rn(reg), rn(reg), r1);
     str_f(rn(reg), r2);
     jit_unget_reg_but_zero(reg);
 }
@@ -1133,7 +1151,7 @@ _stxr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t 
r1, jit_int32_t r2)
     jit_int32_t                reg;
     reg = jit_get_reg_but_zero(0);
     movr(rn(reg), r0);
-    AGR(rn(reg), r1);
+    addr(rn(reg), rn(reg), r1);
     str_d(rn(reg), r2);
     jit_unget_reg_but_zero(reg);
 }
@@ -1149,7 +1167,7 @@ _stxi_f(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, 
jit_int32_t r1)
     else {
        reg = jit_get_reg_but_zero(0);
        movi(rn(reg), i0);
-       AGR(rn(reg), r0);
+       addr(rn(reg), rn(reg), r0);
        str_f(rn(reg), r1);
        jit_unget_reg_but_zero(reg);
     }
@@ -1166,7 +1184,7 @@ _stxi_d(jit_state_t *_jit, jit_word_t i0, jit_int32_t r0, 
jit_int32_t r1)
     else {
        reg = jit_get_reg_but_zero(0);
        movi(rn(reg), i0);
-       AGR(rn(reg), r0);
+       addr(rn(reg), rn(reg), r0);
        str_d(rn(reg), r1);
        jit_unget_reg_but_zero(reg);
     }
@@ -1176,13 +1194,13 @@ static void
 _uneqr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         unord, eq;
-    LGHI(r0, 1);                       /* set to one */
+    movi(r0, 1);                       /* set to one */
     CEBR(r1, r2);
     unord = _jit->pc.w;                        /* keep set to one if unord */
     BRC(CC_O, 0);
     eq = _jit->pc.w;
     BRC(CC_E, 0);                      /* keep set to one if eq */
-    LGHI(r0, 0);                       /* set to zero */
+    movi(r0, 0);                       /* set to zero */
     patch_at(unord, _jit->pc.w);
     patch_at(eq, _jit->pc.w);
 }
@@ -1191,13 +1209,13 @@ static void
 _uneqr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         unord, eq;
-    LGHI(r0, 1);                       /* set to one */
+    movi(r0, 1);                       /* set to one */
     CDBR(r1, r2);
     unord = _jit->pc.w;                        /* keep set to one if unord */
     BRC(CC_O, 0);
     eq = _jit->pc.w;
     BRC(CC_E, 0);                      /* keep set to one if eq */
-    LGHI(r0, 0);                       /* set to zero */
+    movi(r0, 0);                       /* set to zero */
     patch_at(unord, _jit->pc.w);
     patch_at(eq, _jit->pc.w);
 }
@@ -1206,13 +1224,13 @@ static void
 _ltgtr_f(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         unord, eq;
-    LGHI(r0, 0);                       /* set to zero */
+    movi(r0, 0);                       /* set to zero */
     CEBR(r1, r2);
     unord = _jit->pc.w;                        /* keep set to zero if unord */
     BRC(CC_O, 0);
     eq = _jit->pc.w;
     BRC(CC_E, 0);                      /* keep set to zero if eq */
-    LGHI(r0, 1);                       /* set to one */
+    movi(r0, 1);                       /* set to one */
     patch_at(unord, _jit->pc.w);
     patch_at(eq, _jit->pc.w);
 }
@@ -1221,13 +1239,13 @@ static void
 _ltgtr_d(jit_state_t *_jit, jit_int32_t r0, jit_int32_t r1, jit_int32_t r2)
 {
     jit_word_t         unord, eq;
-    LGHI(r0, 0);                       /* set to zero */
+    movi(r0, 0);                       /* set to zero */
     CDBR(r1, r2);
     unord = _jit->pc.w;                        /* keep set to zero if unord */
     BRC(CC_O, 0);
     eq = _jit->pc.w;
     BRC(CC_E, 0);                      /* keep set to zero if eq */
-    LGHI(r0, 1);                       /* set to one */
+    movi(r0, 1);                       /* set to one */
     patch_at(unord, _jit->pc.w);
     patch_at(eq, _jit->pc.w);
 }
diff --git a/lib/jit_s390x-sz.c b/lib/jit_s390x-sz.c
index 397a59b..4b7ec5e 100644
--- a/lib/jit_s390x-sz.c
+++ b/lib/jit_s390x-sz.c
@@ -1,4 +1,360 @@
 
+#if __WORDSIZE == 32
+#define JIT_INSTR_MAX 50
+    0,  /* data */
+    0,  /* live */
+    2,  /* align */
+    0,  /* save */
+    0,  /* load */
+    0,  /* #name */
+    0,  /* #note */
+    2,  /* label */
+    38, /* prolog */
+    0,  /* arg */
+    4,  /* addr */
+    12, /* addi */
+    4,  /* addcr */
+    10, /* addci */
+    6,  /* addxr */
+    10, /* addxi */
+    6,  /* subr */
+    12, /* subi */
+    6,  /* subcr */
+    10, /* subci */
+    8,  /* subxr */
+    10, /* subxi */
+    14, /* rsbi */
+    6,  /* mulr */
+    14, /* muli */
+    46, /* qmulr */
+    50, /* qmuli */
+    10, /* qmulr_u */
+    18, /* qmuli_u */
+    10, /* divr */
+    18, /* divi */
+    16, /* divr_u */
+    24, /* divi_u */
+    12, /* qdivr */
+    16, /* qdivi */
+    18, /* qdivr_u */
+    22, /* qdivi_u */
+    10, /* remr */
+    18, /* remi */
+    16, /* remr_u */
+    24, /* remi_u */
+    4,  /* andr */
+    10, /* andi */
+    4,  /* orr */
+    10, /* ori */
+    4,  /* xorr */
+    12, /* xori */
+    8,  /* lshr */
+    10, /* lshi */
+    8,  /* rshr */
+    10, /* rshi */
+    8,  /* rshr_u */
+    10, /* rshi_u */
+    2,  /* negr */
+    8,  /* comr */
+    16, /* ltr */
+    20, /* lti */
+    16, /* ltr_u */
+    20, /* lti_u */
+    16, /* ler */
+    20, /* lei */
+    16, /* ler_u */
+    20, /* lei_u */
+    16, /* eqr */
+    20, /* eqi */
+    16, /* ger */
+    20, /* gei */
+    16, /* ger_u */
+    20, /* gei_u */
+    16, /* gtr */
+    20, /* gti */
+    16, /* gtr_u */
+    20, /* gti_u */
+    16, /* ner */
+    20, /* nei */
+    2,  /* movr */
+    8,  /* movi */
+    4,  /* extr_c */
+    4,  /* extr_uc */
+    4,  /* extr_s */
+    4,  /* extr_us */
+    0,  /* extr_i */
+    0,  /* extr_ui */
+    4,  /* htonr_us */
+    2,  /* htonr_ui */
+    0,  /* htonr_ul */
+    6,  /* ldr_c */
+    12, /* ldi_c */
+    6,  /* ldr_uc */
+    14, /* ldi_uc */
+    6,  /* ldr_s */
+    12, /* ldi_s */
+    6,  /* ldr_us */
+    12, /* ldi_us */
+    6,  /* ldr_i */
+    12, /* ldi_i */
+    0,  /* ldr_ui */
+    0,  /* ldi_ui */
+    0,  /* ldr_l */
+    0,  /* ldi_l */
+    10, /* ldxr_c */
+    16, /* ldxi_c */
+    10, /* ldxr_uc */
+    16, /* ldxi_uc */
+    10, /* ldxr_s */
+    16, /* ldxi_s */
+    10, /* ldxr_us */
+    16, /* ldxi_us */
+    10, /* ldxr_i */
+    16, /* ldxi_i */
+    0,  /* ldxr_ui */
+    0,  /* ldxi_ui */
+    0,  /* ldxr_l */
+    0,  /* ldxi_l */
+    4,  /* str_c */
+    12, /* sti_c */
+    4,  /* str_s */
+    10, /* sti_s */
+    4,  /* str_i */
+    10, /* sti_i */
+    0,  /* str_l */
+    0,  /* sti_l */
+    8,  /* stxr_c */
+    16, /* stxi_c */
+    8,  /* stxr_s */
+    16, /* stxi_s */
+    8,  /* stxr_i */
+    16, /* stxi_i */
+    0,  /* stxr_l */
+    0,  /* stxi_l */
+    8,  /* bltr */
+    12, /* blti */
+    8,  /* bltr_u */
+    12, /* blti_u */
+    8,  /* bler */
+    12, /* blei */
+    8,  /* bler_u */
+    12, /* blei_u */
+    8,  /* beqr */
+    16, /* beqi */
+    8,  /* bger */
+    12, /* bgei */
+    8,  /* bger_u */
+    12, /* bgei_u */
+    8,  /* bgtr */
+    12, /* bgti */
+    8,  /* bgtr_u */
+    12, /* bgti_u */
+    8,  /* bner */
+    16, /* bnei */
+    12, /* bmsr */
+    14, /* bmsi */
+    12, /* bmcr */
+    14, /* bmci */
+    8,  /* boaddr */
+    12, /* boaddi */
+    8,  /* boaddr_u */
+    12, /* boaddi_u */
+    8,  /* bxaddr */
+    12, /* bxaddi */
+    8,  /* bxaddr_u */
+    12, /* bxaddi_u */
+    8,  /* bosubr */
+    12, /* bosubi */
+    8,  /* bosubr_u */
+    12, /* bosubi_u */
+    8,  /* bxsubr */
+    12, /* bxsubi */
+    8,  /* bxsubr_u */
+    12, /* bxsubi_u */
+    2,  /* jmpr */
+    10, /* jmpi */
+    2,  /* callr */
+    10, /* calli */
+    36, /* epilog */
+    0,  /* arg_f */
+    6,  /* addr_f */
+    24, /* addi_f */
+    8,  /* subr_f */
+    24, /* subi_f */
+    28, /* rsbi_f */
+    6,  /* mulr_f */
+    24, /* muli_f */
+    8,  /* divr_f */
+    24, /* divi_f */
+    4,  /* negr_f */
+    4,  /* absr_f */
+    4,  /* sqrtr_f */
+    16, /* ltr_f */
+    36, /* lti_f */
+    16, /* ler_f */
+    36, /* lei_f */
+    16, /* eqr_f */
+    36, /* eqi_f */
+    16, /* ger_f */
+    36, /* gei_f */
+    16, /* gtr_f */
+    36, /* gti_f */
+    16, /* ner_f */
+    36, /* nei_f */
+    16, /* unltr_f */
+    36, /* unlti_f */
+    16, /* unler_f */
+    36, /* unlei_f */
+    20, /* uneqr_f */
+    40, /* uneqi_f */
+    16, /* unger_f */
+    36, /* ungei_f */
+    16, /* ungtr_f */
+    36, /* ungti_f */
+    20, /* ltgtr_f */
+    40, /* ltgti_f */
+    16, /* ordr_f */
+    36, /* ordi_f */
+    16, /* unordr_f */
+    36, /* unordi_f */
+    4,  /* truncr_f_i */
+    0,  /* truncr_f_l */
+    4,  /* extr_f */
+    4,  /* extr_d_f */
+    2,  /* movr_f */
+    20, /* movi_f */
+    4,  /* ldr_f */
+    10, /* ldi_f */
+    8,  /* ldxr_f */
+    14, /* ldxi_f */
+    4,  /* str_f */
+    10, /* sti_f */
+    8,  /* stxr_f */
+    14, /* stxi_f */
+    10, /* bltr_f */
+    28, /* blti_f */
+    10, /* bler_f */
+    30, /* blei_f */
+    10, /* beqr_f */
+    30, /* beqi_f */
+    10, /* bger_f */
+    30, /* bgei_f */
+    10, /* bgtr_f */
+    30, /* bgti_f */
+    10, /* bner_f */
+    30, /* bnei_f */
+    10, /* bunltr_f */
+    28, /* bunlti_f */
+    10, /* bunler_f */
+    28, /* bunlei_f */
+    18, /* buneqr_f */
+    36, /* buneqi_f */
+    10, /* bunger_f */
+    30, /* bungei_f */
+    10, /* bungtr_f */
+    30, /* bungti_f */
+    18, /* bltgtr_f */
+    38, /* bltgti_f */
+    10, /* bordr_f */
+    30, /* bordi_f */
+    10, /* bunordr_f */
+    28, /* bunordi_f */
+    0,  /* arg_d */
+    6,  /* addr_d */
+    34, /* addi_d */
+    8,  /* subr_d */
+    34, /* subi_d */
+    38, /* rsbi_d */
+    6,  /* mulr_d */
+    34, /* muli_d */
+    8,  /* divr_d */
+    34, /* divi_d */
+    4,  /* negr_d */
+    4,  /* absr_d */
+    4,  /* sqrtr_d */
+    16, /* ltr_d */
+    46, /* lti_d */
+    16, /* ler_d */
+    46, /* lei_d */
+    16, /* eqr_d */
+    46, /* eqi_d */
+    16, /* ger_d */
+    46, /* gei_d */
+    16, /* gtr_d */
+    46, /* gti_d */
+    16, /* ner_d */
+    46, /* nei_d */
+    16, /* unltr_d */
+    46, /* unlti_d */
+    16, /* unler_d */
+    46, /* unlei_d */
+    20, /* uneqr_d */
+    50, /* uneqi_d */
+    16, /* unger_d */
+    46, /* ungei_d */
+    16, /* ungtr_d */
+    46, /* ungti_d */
+    20, /* ltgtr_d */
+    50, /* ltgti_d */
+    16, /* ordr_d */
+    46, /* ordi_d */
+    16, /* unordr_d */
+    46, /* unordi_d */
+    4,  /* truncr_d_i */
+    0,  /* truncr_d_l */
+    4,  /* extr_d */
+    4,  /* extr_f_d */
+    2,  /* movr_d */
+    30, /* movi_d */
+    4,  /* ldr_d */
+    10, /* ldi_d */
+    8,  /* ldxr_d */
+    14, /* ldxi_d */
+    4,  /* str_d */
+    10, /* sti_d */
+    8,  /* stxr_d */
+    14, /* stxi_d */
+    10, /* bltr_d */
+    38, /* blti_d */
+    10, /* bler_d */
+    38, /* blei_d */
+    10, /* beqr_d */
+    40, /* beqi_d */
+    10, /* bger_d */
+    40, /* bgei_d */
+    10, /* bgtr_d */
+    40, /* bgti_d */
+    10, /* bner_d */
+    40, /* bnei_d */
+    10, /* bunltr_d */
+    38, /* bunlti_d */
+    10, /* bunler_d */
+    38, /* bunlei_d */
+    18, /* buneqr_d */
+    46, /* buneqi_d */
+    10, /* bunger_d */
+    40, /* bungei_d */
+    10, /* bungtr_d */
+    40, /* bungti_d */
+    18, /* bltgtr_d */
+    48, /* bltgti_d */
+    10, /* bordr_d */
+    40, /* bordi_d */
+    10, /* bunordr_d */
+    38, /* bunordi_d */
+    0,  /* movr_w_f */
+    0,  /* movr_ww_d */
+    0,  /* movr_w_d */
+    0,  /* movr_f_w */
+    0,  /* movi_f_w */
+    0,  /* movr_d_ww */
+    0,  /* movi_d_ww */
+    0,  /* movr_d_w */
+    0,  /* movi_d_w */
+    0,  /* x86_retval_f */
+    0,  /* x86_retval_d */
+#endif /* __WORDSIZE */
+
 #if __WORDSIZE == 64
 #define JIT_INSTR_MAX 68
     0, /* data */
diff --git a/lib/jit_s390x.c b/lib/jit_s390x.c
index d79e963..3e45078 100644
--- a/lib/jit_s390x.c
+++ b/lib/jit_s390x.c
@@ -22,6 +22,11 @@
 
 #define rc(value)                      jit_class_##value
 #define rn(reg)                                
(jit_regno(_rvs[jit_regno(reg)].spec))
+#if __WORDSIZE == 32
+#  define NUM_FLOAT_REG_ARGS           2
+#else
+#  define NUM_FLOAT_REG_ARGS           4
+#endif
 
 /*
  * Prototypes
@@ -69,7 +74,8 @@ jit_register_t                _rvs[] = {
     { rc(fpr) | 0x5,                   "%f5" },
     { rc(fpr) | 0x7,                   "%f7" },
     { rc(fpr) | rc(sav) | 0xe,         "%f14" },
-    { rc(fpr) | rc(sav) | 0xf,         "%f15" },
+    /* Do not use as temporary to simplify stack layout */
+    { 0xf,                             "%f15" },
     { rc(fpr) | rc(sav) | 0x8,         "%f8" },
     { rc(fpr) | rc(sav) | 0x9,         "%f9" },
     { rc(fpr) | rc(sav) | 0xa,         "%f10" },
@@ -239,7 +245,7 @@ _jit_arg_f(jit_state_t *_jit)
 {
     jit_int32_t                offset;
     assert(_jitc->function);
-    if (_jitc->function->self.argf < 4)
+    if (_jitc->function->self.argf < NUM_FLOAT_REG_ARGS)
        offset = _jitc->function->self.argf++;
     else {
        offset = _jitc->function->self.size;
@@ -251,7 +257,7 @@ _jit_arg_f(jit_state_t *_jit)
 jit_bool_t
 _jit_arg_f_reg_p(jit_state_t *_jit, jit_int32_t offset)
 {
-    return (offset >= 0 && offset < 4);
+    return (offset >= 0 && offset < NUM_FLOAT_REG_ARGS);
 }
 
 jit_node_t *
@@ -259,11 +265,11 @@ _jit_arg_d(jit_state_t *_jit)
 {
     jit_int32_t                offset;
     assert(_jitc->function);
-    if (_jitc->function->self.argf < 4)
+    if (_jitc->function->self.argf < NUM_FLOAT_REG_ARGS)
        offset = _jitc->function->self.argf++;
     else {
        offset = _jitc->function->self.size;
-       _jitc->function->self.size += sizeof(jit_word_t);
+       _jitc->function->self.size += sizeof(jit_float64_t);
     }
     return (jit_new_node_w(jit_code_arg_d, offset));
 }
@@ -317,13 +323,19 @@ _jit_getarg_us(jit_state_t *_jit, jit_int32_t u, 
jit_node_t *v)
 void
 _jit_getarg_i(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
-    if (v->u.w < 5)
+    if (v->u.w < 5) {
+#if __WORDSIZE == 32
+       jit_movr(u, _R2 - v->u.w);
+#else
        jit_extr_i(u, _R2 - v->u.w);
+#endif
+    }
     else
        jit_ldxi_i(u, JIT_FP,
                   v->u.w + (__WORDSIZE >> 3) - sizeof(jit_int32_t));
 }
 
+#if __WORDSIZE == 64
 void
 _jit_getarg_ui(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
 {
@@ -342,6 +354,7 @@ _jit_getarg_l(jit_state_t *_jit, jit_int32_t u, jit_node_t 
*v)
     else
        jit_ldxi_l(u, JIT_FP, v->u.w);
 }
+#endif
 
 void
 _jit_getarg_f(jit_state_t *_jit, jit_int32_t u, jit_node_t *v)
@@ -350,7 +363,11 @@ _jit_getarg_f(jit_state_t *_jit, jit_int32_t u, jit_node_t 
*v)
        jit_movr_f(u, _F0 - v->u.w);
     else
        jit_ldxi_f(u, JIT_FP,
-                  v->u.w + (__WORDSIZE >> 3) - sizeof(jit_float32_t));
+                  v->u.w
+#if __WORDSIZE == 64
+                  + (__WORDSIZE >> 3) - sizeof(jit_float32_t)
+#endif
+                  );
 }
 
 void
@@ -398,13 +415,16 @@ void
 _jit_pushargr_f(jit_state_t *_jit, jit_int32_t u)
 {
     assert(_jitc->function);
-    if (_jitc->function->call.argf < 4) {
+    if (_jitc->function->call.argf < NUM_FLOAT_REG_ARGS) {
        jit_movr_f(_F0 - _jitc->function->call.argf, u);
        ++_jitc->function->call.argf;
     }
     else {
-       jit_stxi_f(_jitc->function->call.size + stack_framesize +
-                  (__WORDSIZE >> 3) - sizeof(jit_float32_t), JIT_SP, u);
+       jit_stxi_f(_jitc->function->call.size + stack_framesize
+#if __WORDSIZE == 64
+                  + (__WORDSIZE >> 3) - sizeof(jit_float32_t)
+#endif
+                  , JIT_SP, u);
        _jitc->function->call.size += sizeof(jit_word_t);
     }
 }
@@ -414,15 +434,18 @@ _jit_pushargi_f(jit_state_t *_jit, jit_float32_t u)
 {
     jit_int32_t                regno;
     assert(_jitc->function);
-    if (_jitc->function->call.argf < 4) {
+    if (_jitc->function->call.argf < NUM_FLOAT_REG_ARGS) {
        jit_movi_f(_F0 - _jitc->function->call.argf, u);
        ++_jitc->function->call.argf;
     }
     else {
        regno = jit_get_reg(jit_class_fpr);
        jit_movi_f(regno, u);
-       jit_stxi_f(_jitc->function->call.size + stack_framesize +
-                  (__WORDSIZE >> 3) - sizeof(jit_float32_t), JIT_SP, regno);
+       jit_stxi_f(_jitc->function->call.size + stack_framesize
+#if __WORDSIZE == 64
+                  + (__WORDSIZE >> 3) - sizeof(jit_float32_t)
+#endif
+                  , JIT_SP, regno);
        jit_unget_reg(regno);
        _jitc->function->call.size += sizeof(jit_word_t);
     }
@@ -432,13 +455,13 @@ void
 _jit_pushargr_d(jit_state_t *_jit, jit_int32_t u)
 {
     assert(_jitc->function);
-    if (_jitc->function->call.argf < 4) {
+    if (_jitc->function->call.argf < NUM_FLOAT_REG_ARGS) {
        jit_movr_d(_F0 - _jitc->function->call.argf, u);
        ++_jitc->function->call.argf;
     }
     else {
        jit_stxi_d(_jitc->function->call.size + stack_framesize, JIT_SP, u);
-       _jitc->function->call.size += sizeof(jit_word_t);
+       _jitc->function->call.size += sizeof(jit_float64_t);
     }
 }
 
@@ -447,7 +470,7 @@ _jit_pushargi_d(jit_state_t *_jit, jit_float64_t u)
 {
     jit_int32_t                regno;
     assert(_jitc->function);
-    if (_jitc->function->call.argf < 4) {
+    if (_jitc->function->call.argf < NUM_FLOAT_REG_ARGS) {
        jit_movi_d(_F0 - _jitc->function->call.argf, u);
        ++_jitc->function->call.argf;
     }
@@ -456,7 +479,7 @@ _jit_pushargi_d(jit_state_t *_jit, jit_float64_t u)
        jit_movi_d(regno, u);
        jit_stxi_d(_jitc->function->call.size + stack_framesize, JIT_SP, regno);
        jit_unget_reg(regno);
-       _jitc->function->call.size += sizeof(jit_word_t);
+       _jitc->function->call.size += sizeof(jit_float64_t);
     }
 }
 
@@ -536,9 +559,14 @@ _jit_retval_us(jit_state_t *_jit, jit_int32_t r0)
 void
 _jit_retval_i(jit_state_t *_jit, jit_int32_t r0)
 {
+#if __WORDSIZE == 64
     jit_extr_i(r0, JIT_RET);
+#else
+    jit_movr(r0, JIT_RET);
+#endif
 }
 
+#if __WORDSIZE == 64
 void
 _jit_retval_ui(jit_state_t *_jit, jit_int32_t r0)
 {
@@ -550,6 +578,7 @@ _jit_retval_l(jit_state_t *_jit, jit_int32_t r0)
 {
     jit_movr(r0, JIT_RET);
 }
+#endif
 
 void
 _jit_retval_f(jit_state_t *_jit, jit_int32_t r0)
@@ -762,8 +791,10 @@ _emit_code(jit_state_t *_jit)
                case_rrw(xor,);
                case_rr(trunc, _f_i);
                case_rr(trunc, _d_i);
+#if __WORDSIZE == 64
                case_rr(trunc, _f_l);
                case_rr(trunc, _d_l);
+#endif
                case_rr(ld, _c);
                case_rw(ld, _c);
                case_rr(ld, _uc);
@@ -774,10 +805,12 @@ _emit_code(jit_state_t *_jit)
                case_rw(ld, _us);
                case_rr(ld, _i);
                case_rw(ld, _i);
+#if __WORDSIZE == 64
                case_rr(ld, _ui);
                case_rw(ld, _ui);
                case_rr(ld, _l);
                case_rw(ld, _l);
+#endif
                case_rrr(ldx, _c);
                case_rrw(ldx, _c);
                case_rrr(ldx, _uc);
@@ -788,35 +821,45 @@ _emit_code(jit_state_t *_jit)
                case_rrw(ldx, _us);
                case_rrr(ldx, _i);
                case_rrw(ldx, _i);
+#if __WORDSIZE == 64
                case_rrr(ldx, _ui);
                case_rrw(ldx, _ui);
                case_rrr(ldx, _l);
                case_rrw(ldx, _l);
+#endif
                case_rr(st, _c);
                case_wr(st, _c);
                case_rr(st, _s);
                case_wr(st, _s);
                case_rr(st, _i);
                case_wr(st, _i);
+#if __WORDSIZE == 64
                case_rr(st, _l);
                case_wr(st, _l);
+#endif
                case_rrr(stx, _c);
                case_wrr(stx, _c);
                case_rrr(stx, _s);
                case_wrr(stx, _s);
                case_rrr(stx, _i);
                case_wrr(stx, _i);
+#if __WORDSIZE == 64
                case_rrr(stx, _l);
                case_wrr(stx, _l);
+#endif
                case_rr(hton, _us);
                case_rr(hton, _ui);
+#if __WORDSIZE == 64
                case_rr(hton, _ul);
+#endif
                case_rr(ext, _c);
                case_rr(ext, _uc);
                case_rr(ext, _s);
                case_rr(ext, _us);
+#if __WORDSIZE == 64
                case_rr(ext, _i);
                case_rr(ext, _ui);
+#endif
                case_rr(mov,);
            case jit_code_movi:
                if (node->flag & jit_flag_node) {
diff --git a/lib/jit_size.c b/lib/jit_size.c
index b2facc5..522e3b6 100644
--- a/lib/jit_size.c
+++ b/lib/jit_size.c
@@ -46,7 +46,7 @@ static jit_int16_t    _szs[jit_code_last_code + 1] = {
 #    include "jit_hppa-sz.c"
 #  elif defined(__aarch64__)
 #    include "jit_aarch64-sz.c"
-#  elif defined(__s390x__)
+#  elif defined(__s390__) || defined(__s390x__)
 #    include "jit_s390x-sz.c"
 #  elif defined(__alpha__)
 #    include "jit_alpha-sz.c"
diff --git a/lib/lightning.c b/lib/lightning.c
index 5422b77..2795720 100644
--- a/lib/lightning.c
+++ b/lib/lightning.c
@@ -3185,7 +3185,7 @@ _patch_register(jit_state_t *_jit, jit_node_t *node, 
jit_node_t *link,
 #  include "jit_hppa.c"
 #elif defined(__aarch64__)
 #  include "jit_aarch64.c"
-#elif defined(__s390x__)
+#elif defined(__s390__) || defined(__s390x__)
 #  include "jit_s390x.c"
 #elif defined(__alpha__)
 #  include "jit_alpha.c"



reply via email to

[Prev in Thread] Current Thread [Next in Thread]