>From 0b4542e0500dac7968f769ce64028a91a83db977 Mon Sep 17 00:00:00 2001 From: Paulo Cesar Pereira de Andrade Date: Fri, 20 Aug 2010 16:34:22 -0300 Subject: [PATCH] Experimental x86_64 support for any number of integer and float arguments Tested in calls to varargs and prototyped C functions, and used the same call convention in the jit functions tested. This should match the System V abi used at least in Linux i386/x86_64, and allow calling functions, varargs or not, with any number of basic data type arguments. --- lightning/core-common.h | 2 +- lightning/i386/asm.h | 4 ++ lightning/i386/core-64.h | 131 ++++++++++++++++++++++++++++++++++++++-------- lightning/i386/fp-64.h | 52 +++++++++++++----- 4 files changed, 151 insertions(+), 38 deletions(-) diff --git a/lightning/core-common.h b/lightning/core-common.h index c9efa9d..370a529 100644 --- a/lightning/core-common.h +++ b/lightning/core-common.h @@ -462,7 +462,7 @@ typedef union jit_code { #define jit_getarg_s(reg, ofs) jit_extr_s_i ((reg), (ofs)) #define jit_getarg_uc(reg, ofs) jit_extr_uc_ui((reg), (ofs)) #define jit_getarg_ui(reg, ofs) jit_movr_ui ((reg), (ofs)) -#define jit_getarg_ul(reg, ofs) jit_extr_uc_ul((reg), (ofs)) +#define jit_getarg_ul(reg, ofs) jit_movr_ul ((reg), (ofs)) #define jit_getarg_us(reg, ofs) jit_extr_us_ul((reg), (ofs)) #else #define jit_getarg_c(reg, ofs) jit_ldxi_c((reg), JIT_AP, (ofs)); diff --git a/lightning/i386/asm.h b/lightning/i386/asm.h index 2dec4b9..f645cfe 100644 --- a/lightning/i386/asm.h +++ b/lightning/i386/asm.h @@ -94,6 +94,8 @@ typedef _uc jit_insn; #define _MM5 0x65 #define _MM6 0x66 #define _MM7 0x67 +#define _MM8 0x68 +#define _MM9 0x69 #define _XMM0 0x70 #define _XMM1 0x71 @@ -103,6 +105,8 @@ typedef _uc jit_insn; #define _XMM5 0x75 #define _XMM6 0x76 #define _XMM7 0x77 +#define _XMM8 0x78 +#define _XMM9 0x79 #define _ST0 0 #define _ST1 1 diff --git a/lightning/i386/core-64.h b/lightning/i386/core-64.h index 4c0c5dc..f4cc43b 100644 --- a/lightning/i386/core-64.h +++ b/lightning/i386/core-64.h @@ -36,11 +36,17 @@ /* Used to implement ldc, stc, ... */ #define JIT_CAN_16 0 -#define JIT_REXTMP _R9D +#define JIT_REXTMP _R11D + +/* Number or integer argument registers */ +#define JIT_A_NUM 6 + +/* Number of float argument registers */ +#define JIT_FA_NUM 8 #define JIT_R_NUM 3 #define JIT_R(i) ((i) == 0 ? _EAX : _R9D + (i)) -#define JIT_V_NUM 3 +#define JIT_V_NUM 5 #define JIT_V(i) ((i) == 0 ? _EBX : _R11D + (i)) struct jit_local_state { @@ -48,7 +54,10 @@ struct jit_local_state { int nextarg_getfp; int nextarg_putfp; int nextarg_geti; + int nextarg_puti; + int framesize; int argssize; + int fprssize; int alloca_offset; int alloca_slack; }; @@ -123,34 +132,103 @@ struct jit_local_state { #define jit_pushr_i(rs) PUSHQr(rs) #define jit_popr_i(rs) POPQr(rs) -/* A return address is 8 bytes, plus 4 registers = 32 byte, total = 40 bytes. +/* A return address is 8 bytes, plus 6 registers = 48 byte, total = 56 bytes. The final push of EBX keeps the stack aligned to 16 bytes. */ -#define jit_prolog(n) (_jitl.nextarg_getfp = _jitl.nextarg_geti = 0, _jitl.alloca_offset = 0, \ - PUSHQr(_EBX), PUSHQr(_R12), PUSHQr(_R13), PUSHQr(_EBP), MOVQrr(_ESP, _EBP), PUSHQr(_EBX)) +#define jit_prolog(n) \ + (_jitl.framesize = 56, \ + _jitl.nextarg_getfp = _jitl.nextarg_geti = 0, \ + _jitl.alloca_offset = 0, \ + PUSHQr(_EBX), \ + PUSHQr(_R12), \ + PUSHQr(_R13), \ + PUSHQr(_R14), \ + PUSHQr(_R15), \ + PUSHQr(_EBP), \ + MOVQrr(_ESP, _EBP), \ + PUSHQr(_EBX)) #define jit_calli(sub) (MOVQir((long) (sub), JIT_REXTMP), CALLsr(JIT_REXTMP)) #define jit_callr(reg) CALLsr((reg)) -/* Stack isn't used for arguments: */ -#define jit_prepare_i(ni) (_jitl.argssize = (ni)) - -#define jit_pusharg_i(rs) (--_jitl.argssize, MOVQrr(rs, jit_arg_reg_order[_jitl.argssize])) -#define jit_finish(sub) (MOVQir((long) (sub), JIT_REXTMP), \ - CALLsr(JIT_REXTMP)) +#define jit_prepare_i(ni) \ + (_jitl.argssize = _jitl.nextarg_puti = (ni), \ + _jitl.argssize = _jitl.nextarg_puti > JIT_A_NUM ? \ + _jitl.nextarg_puti - JIT_A_NUM : 0) + +#define jit_pusharg_i(rs) \ + (--_jitl.nextarg_puti, \ + _jitl.nextarg_puti >= JIT_A_NUM ? \ + PUSHQr(rs) : \ + MOVQrr(rs, jit_arg_reg_order[_jitl.nextarg_puti])) +#define jit_finish(sub) \ + (MOVBir(_jitl.fprssize < JIT_FA_NUM ? \ + _jitl.fprssize : JIT_FA_NUM, _AL), \ + jit_calli(sub), \ + (_jitl.argssize ? \ + ADDQir(sizeof(long) * _jitl.argssize, JIT_SP) : \ + 0), \ + _jitl.argssize = 0) #define jit_reg_is_arg(reg) ((reg) == _ECX || (reg) == _EDX) -#define jit_finishr(reg) ((jit_reg_is_arg((reg)) ? MOVQrr(reg, JIT_REXTMP) : (void)0), \ - CALLsr(jit_reg_is_arg((reg)) ? JIT_REXTMP : (reg))) +#define jit_finishr(reg) \ + (MOVBir(_jitl.fprssize < JIT_FA_NUM ? \ + _jitl.fprssize : JIT_FA_NUM, _AL), \ + (jit_reg_is_arg((reg)) ? \ + (MOVQrr(reg, JIT_REXTMP), jit_callr(JIT_REXTMP)) : \ + jit_callr(reg)), \ + (_jitl.argssize ? \ + ADDQir(sizeof(long) * _jitl.argssize, JIT_SP) : 0), \ + _jitl.argssize = 0) #define jit_retval_l(rd) ((void)jit_movr_l ((rd), _EAX)) -#define jit_arg_c() (jit_arg_reg_order[_jitl.nextarg_geti++]) -#define jit_arg_uc() (jit_arg_reg_order[_jitl.nextarg_geti++]) -#define jit_arg_s() (jit_arg_reg_order[_jitl.nextarg_geti++]) -#define jit_arg_us() (jit_arg_reg_order[_jitl.nextarg_geti++]) -#define jit_arg_i() (jit_arg_reg_order[_jitl.nextarg_geti++]) -#define jit_arg_ui() (jit_arg_reg_order[_jitl.nextarg_geti++]) -#define jit_arg_l() (jit_arg_reg_order[_jitl.nextarg_geti++]) -#define jit_arg_ul() (jit_arg_reg_order[_jitl.nextarg_geti++]) -#define jit_arg_p() (jit_arg_reg_order[_jitl.nextarg_geti++]) +#define jit_arg_i() \ + (_jitl.nextarg_geti < JIT_A_NUM ? \ + _jitl.nextarg_geti++ : \ + ((_jitl.framesize += sizeof(long)) - sizeof(long))) +#define jit_arg_c() jit_arg_i() +#define jit_arg_uc() jit_arg_i() +#define jit_arg_s() jit_arg_i() +#define jit_arg_us() jit_arg_i() +#define jit_arg_ui() jit_arg_i() +#define jit_arg_l() jit_arg_i() +#define jit_arg_ul() jit_arg_i() +#define jit_arg_p() jit_arg_i() + +#define jit_getarg_c(reg, ofs) \ + ((ofs) < JIT_A_NUM ? \ + jit_extr_c_i((reg), jit_arg_reg_order[(ofs)]) : \ + jit_ldxi_c((reg), JIT_FP, ofs)) +#define jit_getarg_uc(reg, ofs) \ + ((ofs) < JIT_A_NUM ? \ + jit_extr_uc_ui((reg), jit_arg_reg_order[(ofs)]) : \ + jit_ldxi_uc((reg), JIT_FP, ofs)) +#define jit_getarg_s(reg, ofs) \ + ((ofs) < JIT_A_NUM ? \ + jit_extr_s_i((reg), jit_arg_reg_order[(ofs)]) : \ + jit_ldxi_s((reg), JIT_FP, ofs)) +#define jit_getarg_us(reg, ofs) \ + ((ofs) < JIT_A_NUM ? \ + jit_extr_us_ui((reg), jit_arg_reg_order[(ofs)]) : \ + jit_ldxi_us((reg), JIT_FP, ofs)) +#define jit_getarg_i(reg, ofs) \ + ((ofs) < JIT_A_NUM ? \ + jit_movr_i((reg), jit_arg_reg_order[(ofs)]) : \ + jit_ldxi_i((reg), JIT_FP, ofs)) +#define jit_getarg_ui(reg, ofs) \ + ((ofs) < JIT_A_NUM ? \ + jit_movr_ui((reg), jit_arg_reg_order[(ofs)]) : \ + jit_ldxi_ui((reg), JIT_FP, ofs)) +#define jit_getarg_l(reg, ofs) \ + ((ofs) < JIT_A_NUM ? \ + jit_movr_l((reg), jit_arg_reg_order[(ofs)]) : \ + jit_ldxi_l((reg), JIT_FP, ofs)) +#define jit_getarg_ul(reg, ofs) \ + ((ofs) < JIT_A_NUM ? \ + jit_movr_ul((reg), jit_arg_reg_order[(ofs)]) : \ + jit_ldxi_ul((reg), JIT_FP, ofs)) +#define jit_getarg_p(reg, ofs) \ + ((ofs) < JIT_A_NUM ? \ + jit_movr_p((reg), jit_arg_reg_order[(ofs)]) : \ + jit_ldxi_p((reg), JIT_FP, ofs)) static int jit_arg_reg_order[] = { _EDI, _ESI, _EDX, _ECX, _R8D, _R9D }; @@ -178,7 +256,14 @@ static int jit_arg_reg_order[] = { _EDI, _ESI, _EDX, _ECX, _R8D, _R9D }; #define jit_patch_long_at(jump_pc,v) (*_PSL((jump_pc) - sizeof(long)) = _jit_SL((jit_insn *)(v))) #define jit_patch_short_at(jump_pc,v) (*_PSI((jump_pc) - sizeof(int)) = _jit_SI((jit_insn *)(v) - (jump_pc))) #define jit_patch_at(jump_pc,v) (_jitl.long_jumps ? jit_patch_long_at((jump_pc)-3, v) : jit_patch_short_at(jump_pc, v)) -#define jit_ret() (LEAVE_(), POPQr(_R13), POPQr(_R12), POPQr(_EBX), RET_()) +#define jit_ret() \ + (LEAVE_(), \ + POPQr(_R15), \ + POPQr(_R14), \ + POPQr(_R13), \ + POPQr(_R12), \ + POPQr(_EBX), \ + RET_()) /* Memory */ diff --git a/lightning/i386/fp-64.h b/lightning/i386/fp-64.h index 9bb2681..17aeec5 100644 --- a/lightning/i386/fp-64.h +++ b/lightning/i386/fp-64.h @@ -35,10 +35,10 @@ #include -#define JIT_FPR_NUM 9 +#define JIT_FPR_NUM 7 #define JIT_FPRET _XMM0 -#define JIT_FPR(i) (_XMM7 + (i)) -#define JIT_FPTMP _XMM6 +#define JIT_FPR(i) (_XMM9 + (i)) +#define JIT_FPTMP _XMM8 /* Either use a temporary register that is finally AND/OR/XORed with RS = RD, or use RD as the temporary register and to the AND/OR/XOR with RS. */ @@ -290,16 +290,40 @@ union jit_double_imm { #define jit_ordr_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s1), (s2)), SETNPr (jit_reg8((d)))) #define jit_unordr_d(d, s1, s2) (XORLrr ((d), (d)), UCOMISDrr ((s1), (s2)), SETPr (jit_reg8((d)))) -#define jit_prepare_f(num) (_jitl.nextarg_putfp = _XMM0 + (num)) -#define jit_prepare_d(num) (_jitl.nextarg_putfp = _XMM0 + (num)) - -#define jit_arg_f() (_XMM0 + _jitl.nextarg_getfp++) -#define jit_arg_d() (_XMM0 + _jitl.nextarg_getfp++) - -#define jit_getarg_f(rd, ofs) (jit_movr_f ((rd), (ofs))) -#define jit_getarg_d(rd, ofs) (jit_movr_d ((rd), (ofs))) - -#define jit_pusharg_f(rs) (--_jitl.nextarg_putfp, jit_movr_f (_jitl.nextarg_putfp, (rs))) -#define jit_pusharg_d(rs) (--_jitl.nextarg_putfp, jit_movr_d (_jitl.nextarg_putfp, (rs))) +#define jit_prepare_d(num) \ + ((_jitl.nextarg_putfp + (num) > JIT_FA_NUM ? \ + (_jitl.argssize += (_jitl.nextarg_putfp + (num)) - JIT_FA_NUM, \ + _jitl.fprssize = JIT_FA_NUM) : \ + (_jitl.fprssize += (num))), \ + _jitl.nextarg_putfp += (num)) +#define jit_prepare_f(num) jit_prepare_d(num) + +#define jit_arg_d() \ + (_jitl.nextarg_getfp < JIT_FA_NUM ? \ + _jitl.nextarg_getfp++ : \ + ((_jitl.framesize += sizeof(double)) - sizeof(double))) +#define jit_arg_f() jit_arg_d() + +#define jit_getarg_f(reg, ofs) \ + ((ofs) < JIT_FA_NUM ? \ + jit_movr_f((reg), _XMM0 + (ofs)) : \ + (_jitl.framesize -= sizeof(double), \ + jit_ldxi_f((reg), JIT_FP, (ofs)))) +#define jit_getarg_d(reg, ofs) \ + ((ofs) < JIT_FA_NUM ? \ + jit_movr_d((reg), _XMM0 + (ofs)) : \ + (_jitl.framesize -= sizeof(double), \ + jit_ldxi_d((reg), JIT_FP, (ofs)))) + +#define jit_pusharg_f(rs) \ + (--_jitl.nextarg_putfp, \ + _jitl.nextarg_putfp >= JIT_FA_NUM ? \ + (SUBQir(sizeof(double), JIT_SP), jit_str_d(JIT_SP,(rs))) : \ + jit_movr_f(_XMM0 + _jitl.nextarg_putfp, (rs))) +#define jit_pusharg_d(rs) \ + (--_jitl.nextarg_putfp, \ + _jitl.nextarg_putfp >= JIT_FA_NUM ? \ + (SUBQir(sizeof(double), JIT_SP), jit_str_d(JIT_SP,(rs))) : \ + jit_movr_d(_XMM0 + _jitl.nextarg_putfp, (rs))) #endif /* __lightning_fp_h */ -- 1.7.2.1