guile-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Guile-commits] 34/437: finish jit_allocai implementation


From: Andy Wingo
Subject: [Guile-commits] 34/437: finish jit_allocai implementation
Date: Mon, 2 Jul 2018 05:13:39 -0400 (EDT)

wingo pushed a commit to branch lightning
in repository guile.

commit be415cc6a517f24d6cf088503a76edb61fc7b3c1
Author: Paolo Bonzini <address@hidden>
Date:   Mon Nov 6 09:06:49 2006 +0000

    finish jit_allocai implementation
    
    2006-11-04  Paolo Bonzini  <address@hidden>
    
        * lightning/ppc/core.h: Implement jit_allocai, define JIT_FP to be R1.
        * lightning/ppc/funcs.h: Store frame size into _jitl.  Store R1 before
        the STMW, so that the offset is unchanged when we patch the STMW.
        * lightning/i386/core.h: Define JIT_FP to be EBP.
        * lightning/i386/core-32.h: Implement jit_allocai, put LEAVE in the
        epilog if jit_allocai was used.
        * lightning/i386/core-64.h: Implement jit_allocai, put LEAVE in the
        epilog if jit_allocai was used.
    
    git-archimport-id: address@hidden/lightning--stable--1.2--patch-36
---
 ChangeLog                  | 17 +++++++++++++++
 NEWS                       |  1 -
 lightning/i386/core-32.h   | 32 ++++++++++++++++++++++-----
 lightning/i386/core-64.h   | 19 ++++++++++++++--
 lightning/i386/core-i386.h |  1 +
 lightning/ppc/core.h       | 21 +++++++++++++++---
 lightning/ppc/funcs.h      | 54 +++++++++++++++++++++++-----------------------
 tests/Makefile.in          | 28 ++++++++++++++++--------
 8 files changed, 126 insertions(+), 47 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index cba89a8..17d6b56 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,20 @@
+2006-11-04  Paolo Bonzini  <address@hidden>
+
+       * lightning/ppc/core.h: Implement jit_allocai, define JIT_FP to be R1.
+       * lightning/ppc/funcs.h: Store frame size into _jitl.  Store R1 before
+       the STMW, so that the offset is unchanged when we patch the STMW.
+       * lightning/i386/core.h: Define JIT_FP to be EBP.
+       * lightning/i386/core-32.h: Implement jit_allocai, put LEAVE in the
+       epilog if jit_allocai was used.
+       * lightning/i386/core-64.h: Implement jit_allocai, put LEAVE in the
+       epilog if jit_allocai was used.
+
+2006-11-04  Ludovic Courtes  <address@hidden>
+
+       * lightning/sparc/core.h: Implement jit_allocai.
+       * tests/allocai.c: New.
+       * tests/Makefile.am: Point to new tests.
+
 2006-11-03  Paolo Bonzini  <address@hidden>
 
        * lightning/ppc/core.h: Fix jit_bms using BNE rather than BGT.
diff --git a/NEWS b/NEWS
index ac0e383..9292f12 100644
--- a/NEWS
+++ b/NEWS
@@ -9,7 +9,6 @@ o   Support for stack-allocated variables.  Because of this,
     backends defining JIT_FP should now rename it to JIT_AP.
     JIT_FP is now a user-visible register used in ldxi/ldxr
     to access stack-allocated variables.
-    [a promise for now, not yet implemented!]
 
 
 ---
diff --git a/lightning/i386/core-32.h b/lightning/i386/core-32.h
index d68f8f6..805af03 100644
--- a/lightning/i386/core-32.h
+++ b/lightning/i386/core-32.h
@@ -41,21 +41,43 @@
 struct jit_local_state {
   int  framesize;
   int  argssize;
+  int  alloca_offset;
+  int  alloca_slack;
 };
 
 #define jit_base_prolog() (PUSHLr(_EBP), MOVLrr(_ESP, _EBP), PUSHLr(_EBX), 
PUSHLr(_ESI), PUSHLr(_EDI))
-#define jit_prolog(n) (_jitl.framesize = 8, jit_base_prolog())
-
-/* The += allows for stack pollution */
+#define jit_prolog(n) (_jitl.framesize = 8, _jitl.alloca_offset = -12, 
jit_base_prolog())
+
+/* Used internally.  SLACK is used by the Darwin ABI which keeps the stack
+   aligned to 16-bytes.  */
+
+#define jit_allocai_internal(amount, slack)                              \
+  (((amount) < _jitl.alloca_slack                                        \
+    ? 0                                                                        
  \
+    : (_jitl.alloca_slack += (amount) + (slack),                         \
+       ((amount) + (slack) == sizeof (int)                               \
+        ? PUSHLr(_EAX)                                                   \
+        : SUBLir((amount) + (slack), _ESP)))),                           \
+   _jitl.alloca_slack -= (amount),                                       \
+   _jitl.alloca_offset -= (amount))
+   
+/* The += in argssize allows for stack pollution */
 
 #ifdef __APPLE__
-  /* Stack must stay 16-byte aligned: */
+/* Stack must stay 16-byte aligned: */
 # define jit_prepare_i(ni)     (((ni & 0x3) \
                                   ? SUBLir(4 * ((((ni) + 3) & ~(0x3)) - (ni)), 
JIT_SP) \
                                   : (void)0), \
                                  _jitl.argssize += (((ni) + 3) & ~(0x3)))
+
+#define jit_allocai(n)                                         \
+  jit_allocai_internal ((n), (_jitl.alloca_slack - (n)) & 15)
+
 #else
 # define jit_prepare_i(ni)     (_jitl.argssize += (ni))
+
+#define jit_allocai(n)                                         \
+  jit_allocai_internal ((n), 0)
 #endif
 
 #define jit_pusharg_i(rs)      PUSHLr(rs)
@@ -74,7 +96,7 @@ struct jit_local_state {
 
 #define jit_patch_long_at(jump_pc,v)  (*_PSL((jump_pc) - sizeof(long)) = 
_jit_SL((jit_insn *)(v) - (jump_pc)))
 #define jit_patch_at(jump_pc,v)  jit_patch_long_at(jump_pc, v)
-#define jit_ret() (POPLr(_EDI), POPLr(_ESI), POPLr(_EBX), POPLr(_EBP), RET_())
+#define jit_ret()              (POPLr(_EDI), POPLr(_ESI), POPLr(_EBX), 
(_jitl.alloca_offset < -12 ? LEAVE_() : POPLr(_EBP)), RET_())
 
 #endif /* __lightning_core_h */
 
diff --git a/lightning/i386/core-64.h b/lightning/i386/core-64.h
index 7680d18..420fbcf 100644
--- a/lightning/i386/core-64.h
+++ b/lightning/i386/core-64.h
@@ -40,8 +40,23 @@ struct jit_local_state {
   int   long_jumps;
   int   nextarg_geti;
   int  argssize;
+  int   alloca_offset;
+  int   alloca_slack;
 };
 
+
+/* Keep the stack 16-byte aligned, the SSE hardware prefers it this way.  */
+#define jit_allocai_internal(amount, slack)                           \
+  (((amount) < _jitl.alloca_slack                                     \
+    ? 0                                                               \
+    : (_jitl.alloca_slack += (amount) + (slack),                      \
+      SUBQir((amount) + (slack), _ESP))),                             \
+   _jitl.alloca_slack -= (amount),                                    \
+   _jitl.alloca_offset -= (amount))
+
+#define jit_allocai(n)                                                \
+  jit_allocai_internal ((n), (_jitl.alloca_slack - (n)) & 15)
+
 /* 3-parameter operation */
 #define jit_qopr_(d, s1, s2, op1d, op2d)                               \
        ( (s2 == d) ? op1d :                                            \
@@ -95,7 +110,7 @@ struct jit_local_state {
 #define jit_popr_l(rs)         POPQr(rs)
 
 #define jit_base_prolog() (PUSHQr(_EBP), MOVQrr(_ESP, _EBP), PUSHQr(_EBX), 
PUSHQr(_R12), PUSHQr(_R13))
-#define jit_prolog(n) (_jitl.nextarg_geti = 0, jit_base_prolog())
+#define jit_prolog(n) (_jitl.nextarg_geti = 0, _jitl.alloca_offset = -24, 
jit_base_prolog())
 
 /* Stack isn't used for arguments: */
 #define jit_prepare_i(ni)      (_jitl.argssize = 0)
@@ -154,7 +169,7 @@ static int jit_arg_reg_order[] = { _EDI, _ESI, _EDX, _ECX };
 #define jit_patch_long_at(jump_pc,v)  (*_PSL((jump_pc) - sizeof(long)) = 
_jit_SL((jit_insn *)(v)))
 #define jit_patch_short_at(jump_pc,v)  (*_PSI((jump_pc) - sizeof(int)) = 
_jit_SI((jit_insn *)(v) - (jump_pc)))
 #define jit_patch_at(jump_pc,v) (_jitl.long_jumps ? 
jit_patch_long_at((jump_pc)-3, v) : jit_patch_short_at(jump_pc, v))
-#define jit_ret() (POPQr(_R13), POPQr(_R12), POPQr(_EBX), POPQr(_EBP), RET_())
+#define jit_ret() (POPQr(_R13), POPQr(_R12), POPQr(_EBX), (_jitl.alloca_offset 
< -24 ? LEAVE_() : POPQr(_EBP)), RET_())
 
 #define _jit_ldi_l(d, is)              MOVQmr((is), 0,    0,    0,  (d))
 #define jit_ldr_l(d, rs)               MOVQmr(0,    (rs), 0,    0,  (d))
diff --git a/lightning/i386/core-i386.h b/lightning/i386/core-i386.h
index 2345467..0e3e97b 100644
--- a/lightning/i386/core-i386.h
+++ b/lightning/i386/core-i386.h
@@ -35,6 +35,7 @@
 #define __lightning_core_i386_h
 
 #define JIT_AP                 _EBP
+#define JIT_FP                 _EBP
 #define JIT_SP                 _ESP
 #define JIT_RET                        _EAX
 
diff --git a/lightning/ppc/core.h b/lightning/ppc/core.h
index cea8022..962aa7b 100644
--- a/lightning/ppc/core.h
+++ b/lightning/ppc/core.h
@@ -42,9 +42,27 @@ struct jit_local_state {
    int nextarg_geti;  /* Next r20-r25 reg. to be read */
    int nextarg_getd;  /* The FP args are picked up from FPR1 -> FPR10 */
    int  nbArgs;        /* Number of arguments for the prolog */
+
+   int  frame_size, slack;
+   jit_insn *stwu;
 };
 
+/* Patch a `stwu' instruction (with immediate operand) so that it decreases
+   r1 by AMOUNT.  AMOUNT should already be rounded so that %sp remains quadword
+   aligned.  */
+#define jit_patch_stwu(amount)                               \
+  (*(_jitl.stwu) &= ~_MASK (16),                               \
+   *(_jitl.stwu) |= _s16 ((amount)))
+
+#define jit_allocai(n)                                                   \
+   (_jitl.frame_size += (n),                                             \
+    ((n) <= _jitl.slack                                                        
  \
+     ? 0 : jit_patch_stwu (-((_jitl.frame_size + 15) & ~15))),           \
+    _jitl.slack = ((_jitl.frame_size + 15) & ~15) - _jitl.frame_size,    \
+    _jitl.frame_size - (n))
+
 #define JIT_SP                 1
+#define JIT_FP                 1
 #define JIT_RET                        3
 #define JIT_R_NUM              3
 #define JIT_V_NUM              7
@@ -52,9 +70,6 @@ struct jit_local_state {
 #define JIT_V(i)               (31-(i))
 #define JIT_AUX                        JIT_V(JIT_V_NUM)  /* for 32-bit 
operands & shift counts */
 
-#define jit_pfx_start()   (_jit.jitl.trampolines)
-#define jit_pfx_end()     (_jit.jitl.free)
-
 /* If possible, use the `small' instruction (rd, rs, imm)
  * else load imm into r26 and use the `big' instruction (rd, rs, r26)
  */
diff --git a/lightning/ppc/funcs.h b/lightning/ppc/funcs.h
index 90d84d2..22c277e 100644
--- a/lightning/ppc/funcs.h
+++ b/lightning/ppc/funcs.h
@@ -91,34 +91,23 @@ static void
 _jit_epilog(jit_state *jit)
 {
   int n = _jitl.nbArgs;
-  int frame_size, ofs;
   int first_saved_reg = JIT_AUX - n;
   int num_saved_regs = 32 - first_saved_reg;
-
-  frame_size = 24 + 32 + num_saved_regs * 4;   /* r24..r31 + args              
   */
-  frame_size += 15;                    /* the stack must be quad-word     */
-  frame_size &= ~15;                   /* aligned                         */
+  int frame_size = (_jitl.frame_size + 15) & ~15;
 
 #ifdef __APPLE__
-  LWZrm(0, frame_size + 8, 1); /* lwz   r0, x+8(r1)  (ret.addr.)  */
+  LWZrm(0, frame_size + 8, 1);         /* lwz   r0, x+8(r1)  (ret.addr.)  */
 #else
-  LWZrm(0, frame_size + 4, 1); /* lwz   r0, x+4(r1)  (ret.addr.)  */
+  LWZrm(0, frame_size + 4, 1);         /* lwz   r0, x+4(r1)  (ret.addr.)  */
 #endif
   MTLRr(0);                            /* mtspr LR, r0                    */
 
-  ofs = frame_size - num_saved_regs * 4;
-  LMWrm(first_saved_reg, ofs, 1);      /* lmw   rI, ofs(r1)               */
+  LMWrm(first_saved_reg, 24 + 32, 1);  /* lmw   rI, ofs(r1)               */
   ADDIrri(1, 1, frame_size);           /* addi  r1, r1, x                 */
   BLR();                               /* blr                             */
 }
 
 /* Emit a prolog for a function.
-   Upon entrance to the trampoline:
-     - LR      = address where the real code for the function lies
-     - R3-R8   = parameters
-   Upon finishing the trampoline:
-     - R0      = return address for the function
-     - R25-R20 = parameters (order is reversed, 1st argument is R25)
   
    The +32 in frame_size computation is to accound for the parameter area of
    a function frame. 
@@ -126,7 +115,7 @@ _jit_epilog(jit_state *jit)
    On PPC the frame must have space to host the arguments of any callee.
    However, as it currently stands, the argument to jit_trampoline (n) is
    the number of arguments of the caller we generate. Therefore, the
-   callee can overwrite a part of the stack (saved register area when it
+   callee can overwrite a part of the stack (saved register area) when it
    flushes its own parameter on the stack. The addition of a constant 
    offset = 32 is enough to hold eight 4 bytes arguments.  This is less
    than perfect but is a reasonable work around for now. 
@@ -134,8 +123,8 @@ _jit_epilog(jit_state *jit)
 static void
 _jit_prolog(jit_state *jit, int n)
 {
-  int frame_size;
-  int ofs, i;
+  int orig_frame_size, frame_size;
+  int i;
   int first_saved_reg = JIT_AUX - n;
   int num_saved_regs = 32 - first_saved_reg;
 
@@ -143,20 +132,31 @@ _jit_prolog(jit_state *jit, int n)
   _jitl.nextarg_getd = 1;
   _jitl.nbArgs = n;
 
-  frame_size = 24 + 32 + num_saved_regs * 4;   /* r27..r31 + args              
   */
-  frame_size += 15;                    /* the stack must be quad-word     */
-  frame_size &= ~15;                   /* aligned                         */
-
   MFLRr(0);
-  STWUrm(1, -frame_size, 1);           /* stwu  r1, -x(r1)                */
 
-  ofs = frame_size - num_saved_regs * 4;
-  STMWrm(first_saved_reg, ofs, 1);             /* stmw  rI, ofs(r1)            
   */
 #ifdef __APPLE__
-  STWrm(0, frame_size + 8, 1);         /* stw   r0, x+8(r1)               */
+  STWrm(0, 8, 1);                      /* stw   r0, 8(r1)         */
 #else
-  STWrm(0, frame_size + 4, 1);         /* stw   r0, x+4(r1)               */
+  STWrm(0, 4, 1);                      /* stw   r0, 4(r1)         */
 #endif
+
+  /* 0..55 -> frame data
+     56..frame_size -> saved registers
+
+     The STMW instruction is patched by jit_allocai, thus leaving
+     the space for the allocai above the 56 bytes.  jit_allocai is
+     also able to reuse the slack space needed to keep the stack
+     quadword-aligned.  */
+
+  _jitl.frame_size = 24 + 32 + num_saved_regs * 4;     /* r27..r31 + args */
+
+  /* The stack must be quad-word aligned.  */
+  frame_size = (_jitl.frame_size + 15) & ~15;
+  _jitl.slack = frame_size - _jitl.frame_size;
+  _jitl.stwu = _jit.x.pc;
+  STWUrm(1, -frame_size, 1);           /* stwu  r1, -x(r1)        */
+
+  STMWrm(first_saved_reg, 24 + 32, 1);         /* stmw  rI, ofs(r1)       */
   for (i = 0; i < n; i++)
     MRrr(JIT_AUX-1-i, 3+i);            /* save parameters below r24       */
 }
diff --git a/tests/Makefile.in b/tests/Makefile.in
index ae65e72..96613a1 100644
--- a/tests/Makefile.in
+++ b/tests/Makefile.in
@@ -41,7 +41,7 @@ check_PROGRAMS = fibit$(EXEEXT) incr$(EXEEXT) printf$(EXEEXT) 
\
        printf2$(EXEEXT) rpn$(EXEEXT) fib$(EXEEXT) fibdelay$(EXEEXT) \
        add$(EXEEXT) bp$(EXEEXT) testfp$(EXEEXT) funcfp$(EXEEXT) \
        rpnfp$(EXEEXT) modi$(EXEEXT) ldxi$(EXEEXT) divi$(EXEEXT) \
-       movi$(EXEEXT) ret$(EXEEXT)
+       movi$(EXEEXT) ret$(EXEEXT) allocai$(EXEEXT)
 subdir = tests
 DIST_COMMON = $(srcdir)/Makefile.am $(srcdir)/Makefile.in
 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
@@ -56,6 +56,11 @@ add_SOURCES = add.c
 add_OBJECTS = add.$(OBJEXT)
 add_LDADD = $(LDADD)
 @address@hidden = $(top_builddir)/opcode/libdisass.a
+allocai_SOURCES = allocai.c
+allocai_OBJECTS = allocai.$(OBJEXT)
+allocai_LDADD = $(LDADD)
address@hidden@allocai_DEPENDENCIES =  \
address@hidden@ $(top_builddir)/opcode/libdisass.a
 bp_SOURCES = bp.c
 bp_OBJECTS = bp.$(OBJEXT)
 bp_LDADD = $(LDADD)
@@ -129,12 +134,12 @@ COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) 
$(AM_CPPFLAGS) \
        $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS)
 CCLD = $(CC)
 LINK = $(CCLD) $(AM_CFLAGS) $(CFLAGS) $(AM_LDFLAGS) $(LDFLAGS) -o $@
-SOURCES = add.c bp.c divi.c fib.c fibdelay.c fibit.c funcfp.c incr.c \
-       ldxi.c modi.c movi.c printf.c printf2.c ret.c rpn.c rpnfp.c \
-       testfp.c
-DIST_SOURCES = add.c bp.c divi.c fib.c fibdelay.c fibit.c funcfp.c \
-       incr.c ldxi.c modi.c movi.c printf.c printf2.c ret.c rpn.c \
-       rpnfp.c testfp.c
+SOURCES = add.c allocai.c bp.c divi.c fib.c fibdelay.c fibit.c \
+       funcfp.c incr.c ldxi.c modi.c movi.c printf.c printf2.c ret.c \
+       rpn.c rpnfp.c testfp.c
+DIST_SOURCES = add.c allocai.c bp.c divi.c fib.c fibdelay.c fibit.c \
+       funcfp.c incr.c ldxi.c modi.c movi.c printf.c printf2.c ret.c \
+       rpn.c rpnfp.c testfp.c
 DATA = $(noinst_DATA)
 ETAGS = etags
 CTAGS = ctags
@@ -242,12 +247,13 @@ target_vendor = @target_vendor@
 AM_CPPFLAGS = -I$(top_builddir) -I$(top_srcdir) 
-I$(top_srcdir)/lightning/$(cpu)
 noinst_DATA = fibit.ok incr.ok printf.ok printf2.ok rpn.ok     \
        fib.ok fibdelay.ok testfp.ok funcfp.ok rpnfp.ok add.ok  \
-       bp.ok modi.ok ldxi.ok divi.ok movi.ok ret.ok
+       bp.ok modi.ok ldxi.ok divi.ok movi.ok ret.ok            \
+       allocai.ok
 
 EXTRA_DIST = $(noinst_DATA) run-test
 @address@hidden = $(top_builddir)/opcode/libdisass.a
 @address@hidden = fib fibit fibdelay incr printf printf2 rpn add bp    \
address@hidden@ testfp funcfp rpnfp modi ldxi divi movi ret
address@hidden@ testfp funcfp rpnfp modi ldxi divi movi ret allocai
 
 @address@hidden = $(srcdir)/run-test
 all: all-am
@@ -289,6 +295,9 @@ clean-checkPROGRAMS:
 add$(EXEEXT): $(add_OBJECTS) $(add_DEPENDENCIES) 
        @rm -f add$(EXEEXT)
        $(LINK) $(add_LDFLAGS) $(add_OBJECTS) $(add_LDADD) $(LIBS)
+allocai$(EXEEXT): $(allocai_OBJECTS) $(allocai_DEPENDENCIES) 
+       @rm -f allocai$(EXEEXT)
+       $(LINK) $(allocai_LDFLAGS) $(allocai_OBJECTS) $(allocai_LDADD) $(LIBS)
 bp$(EXEEXT): $(bp_OBJECTS) $(bp_DEPENDENCIES) 
        @rm -f bp$(EXEEXT)
        $(LINK) $(bp_LDFLAGS) $(bp_OBJECTS) $(bp_LDADD) $(LIBS)
@@ -345,6 +354,7 @@ distclean-compile:
        -rm -f *.tab.c
 
 @AMDEP_TRUE@@am__include@ @address@hidden/$(DEPDIR)/address@hidden@
address@hidden@@am__include@ @address@hidden/$(DEPDIR)/address@hidden@
 @AMDEP_TRUE@@am__include@ @address@hidden/$(DEPDIR)/address@hidden@
 @AMDEP_TRUE@@am__include@ @address@hidden/$(DEPDIR)/address@hidden@
 @AMDEP_TRUE@@am__include@ @address@hidden/$(DEPDIR)/address@hidden@



reply via email to

[Prev in Thread] Current Thread [Next in Thread]