guile-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Guile-commits] GNU Guile branch, string_abstraction2, updated. release_


From: Michael Gran
Subject: [Guile-commits] GNU Guile branch, string_abstraction2, updated. release_1-9-1-77-gefb0428
Date: Sun, 26 Jul 2009 22:05:42 +0000

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU Guile".

http://git.savannah.gnu.org/cgit/guile.git/commit/?id=efb0428f49104c2734ab70c373f437b270a8d92c

The branch, string_abstraction2 has been updated
       via  efb0428f49104c2734ab70c373f437b270a8d92c (commit)
       via  7a82cbdfeffcb15c17b6bc31dbf5a73a12ec5fa8 (commit)
      from  fd769cec4ce951f74daa3124d9426be11c631737 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit efb0428f49104c2734ab70c373f437b270a8d92c
Author: Michael Gran <address@hidden>
Date:   Sun Jul 26 15:03:47 2009 -0700

    Add wide string functionality to the compiler / VM.
    
    Operations that took variable-length strings now also take a
    character width parameter.  New operation added for char32.
    
            * module/language/assembly/compile-bytecode.scm (write-bytecode):
            add wide-string sizing
    
            * module/language/assembly.scm (object->assembly): add 32-bit
            chars
            (assembly->object): add 32-bit chars
            (byte-length): add wide string sizing
    
            * libguile/vm-i-system.c (make-char32): new instruction
    
            * libguile/vm-i-loader.c (load-string, load-symbol):
            (load-keyword, define): use wide strings
    
            * libguile/vm-engine.h (FETCH_WIDTH): new macro
    
            * libguile/strings.h: declare new func scm_string_width
    
            * libguile/strings.c (scm_string_width): new function

commit 7a82cbdfeffcb15c17b6bc31dbf5a73a12ec5fa8
Author: Michael Gran <address@hidden>
Date:   Sun Jul 26 12:56:46 2009 -0700

    Changes to write format of strings.
    
    In Guile 1.8.x, the write format printed the characters, or used
    escapes for control characters.  This change tries to be an
    extension of that behavior, where characters are printed if they
    can be represented in this encoding and are graphic.  Otherwise
    they are printed as escapes.
    
            * libguile/print.c (iprin1): change write format of strings

-----------------------------------------------------------------------

Summary of changes:
 libguile/print.c                              |  119 +++++++++++++++++--------
 libguile/strings.c                            |   14 +++
 libguile/strings.h                            |    1 +
 libguile/vm-engine.h                          |    1 +
 libguile/vm-i-loader.c                        |   88 ++++++++++++++++---
 libguile/vm-i-system.c                        |   95 +++++++++++---------
 module/language/assembly.scm                  |   23 ++++-
 module/language/assembly/compile-bytecode.scm |   30 +++++--
 8 files changed, 263 insertions(+), 108 deletions(-)

diff --git a/libguile/print.c b/libguile/print.c
index 7b82820..97ef237 100644
--- a/libguile/print.c
+++ b/libguile/print.c
@@ -597,50 +597,91 @@ iprin1 (SCM exp, SCM port, scm_print_state *pstate)
 
              scm_putc ('"', port);
              len = scm_i_string_length (exp);
-             for (i = 0, j = 0; i < len; ++i)
+             for (i = 0; i < len; ++i)
                {
                  scm_t_wchar ch = scm_i_string_ref (exp, i);
-
-                 if (ch == '"' || ch == '\\')
+                  int printed = 0;
+                  
+                  if (ch == ' ' || ch == '\n')
+                    {
+                      scm_putc (ch, port);
+                      printed = 1;
+                    }
+                 else if (ch == '"' || ch == '\\')
                    {
-                     scm_lfwrite_substr (exp, j, i, port);
                      scm_putc ('\\', port);
-                     j = i;
-                   }
-                 else if ((ch < 32 && ch != '\n') || (127 <= ch && ch < 256))
-                   {
-                     scm_lfwrite_substr (exp, j, i, port);
-                     buf[0] = '\\';
-                     buf[1] = 'x';
-                     buf[2] = hex [ch / 16];
-                     buf[3] = hex [ch % 16];
-                     scm_lfwrite (buf, 4, port);
-                     j = i+1;
-                   }
-                 else if (ch > 0xFF && ch <= 0xFFFF)
-                   {
-                     scm_lfwrite_substr (exp, j, i, port);
-                     buf[0] = '\\';
-                     buf[1] = 'u';
-                     buf[2] = hex [(ch & 0xF000) >> 12];
-                     buf[3] = hex [(ch & 0xF00) >> 8];
-                     buf[4] = hex [(ch & 0xF0) >> 4];
-                     buf[5] = hex [(ch & 0xF)];
-                   }
-                 else if (ch > 0xFFFF)
-                   {
-                     scm_lfwrite_substr (exp, j, i, port);
-                     buf[0] = '\\';
-                     buf[1] = 'U';
-                     buf[2] = hex [(ch & 0xF00000) >> 20];
-                     buf[3] = hex [(ch & 0xF0000) >> 16];
-                     buf[4] = hex [(ch & 0xF000) >> 12];
-                     buf[5] = hex [(ch & 0xF00) >> 8];
-                     buf[6] = hex [(ch & 0xF0) >> 4];
-                     buf[7] = hex [(ch & 0xF)];
+                      scm_charprint (ch, port);
+                      printed = 1;
                    }
-               }
-             scm_lfwrite_substr (exp, j, i, port);
+                  else if (uc_is_general_category_withtable (ch, 
UC_CATEGORY_MASK_L
+                                                             | 
UC_CATEGORY_MASK_M 
+                                                             | 
UC_CATEGORY_MASK_N 
+                                                             | 
UC_CATEGORY_MASK_P 
+                                                             | 
UC_CATEGORY_MASK_S))
+                    {
+                      /* Print the character since it is a graphic
+                         character.  */
+                      scm_t_wchar *wbuf;
+                      SCM wstr = scm_i_make_wide_string (1, &wbuf);
+                      char *buf;
+                      size_t len;
+
+                      wbuf[0] = ch;
+
+                      buf = u32_conv_to_encoding (scm_i_get_port_encoding 
(port), 
+                                                  iconveh_error,
+                                                  (scm_t_uint32 *) wbuf        
                , 
+                                                  1   ,
+                                                  NULL,
+                                                  NULL, &len);
+                      if (buf != NULL)
+                        {
+                          /* Character is graphic and representable in
+                             this encoding.  Print it.  */
+                          scm_lfwrite_str (wstr, port);
+                          free (buf);
+                          printed = 1;
+                        }
+                    }
+
+                  if (!printed)
+                    {
+                      /* Character is graphic but unrepresentable in
+                         this port's encoding or is not graphic.  */
+                      if (ch <= 0xFF)
+                        {
+                          buf[0] = '\\';
+                          buf[1] = 'x';
+                          buf[2] = hex [ch / 16];
+                          buf[3] = hex [ch % 16];
+                          scm_lfwrite (buf, 4, port);
+                        }
+                      else if (ch <= 0xFFFF)
+                        {
+                          buf[0] = '\\';
+                          buf[1] = 'u';
+                          buf[2] = hex [(ch & 0xF000) >> 12];
+                          buf[3] = hex [(ch & 0xF00) >> 8];
+                          buf[4] = hex [(ch & 0xF0) >> 4];
+                          buf[5] = hex [(ch & 0xF)];
+                          scm_lfwrite (buf, 6, port);
+                          j = i + 1;
+                        }
+                      else if (ch > 0xFFFF)
+                        {
+                          buf[0] = '\\';
+                          buf[1] = 'U';
+                          buf[2] = hex [(ch & 0xF00000) >> 20];
+                          buf[3] = hex [(ch & 0xF0000) >> 16];
+                          buf[4] = hex [(ch & 0xF000) >> 12];
+                          buf[5] = hex [(ch & 0xF00) >> 8];
+                          buf[6] = hex [(ch & 0xF0) >> 4];
+                          buf[7] = hex [(ch & 0xF)];
+                          scm_lfwrite (buf, 8, port);
+                          j = i + 1;
+                        }
+                    }
+                }
              scm_putc ('"', port);
              scm_remember_upto_here_1 (exp);
            }
diff --git a/libguile/strings.c b/libguile/strings.c
index becae10..8fc9264 100644
--- a/libguile/strings.c
+++ b/libguile/strings.c
@@ -1003,6 +1003,20 @@ SCM_DEFINE (scm_string_length, "string-length", 1, 0, 0,
 }
 #undef FUNC_NAME
 
+SCM_DEFINE (scm_string_width, "string-width", 1, 0, 0, 
+           (SCM string),
+           "Return the bytes used to represent a character in @var{string}."
+            "This will return 1 or 4.")
+#define FUNC_NAME s_scm_string_width
+{
+  SCM_VALIDATE_STRING (1, string);
+  if (!scm_i_is_narrow_string (string))
+    return scm_from_int (4);
+
+  return scm_from_int (1);
+}
+#undef FUNC_NAME
+
 size_t
 scm_c_string_length (SCM string)
 {
diff --git a/libguile/strings.h b/libguile/strings.h
index 3efdb72..e702aa4 100644
--- a/libguile/strings.h
+++ b/libguile/strings.h
@@ -83,6 +83,7 @@ SCM_API SCM scm_string_p (SCM x);
 SCM_API SCM scm_string (SCM chrs);
 SCM_API SCM scm_make_string (SCM k, SCM chr);
 SCM_API SCM scm_string_length (SCM str);
+SCM_API SCM scm_string_width (SCM str);
 SCM_API SCM scm_string_ref (SCM str, SCM k);
 SCM_API SCM scm_string_set_x (SCM str, SCM k, SCM chr);
 SCM_API SCM scm_substring (SCM str, SCM start, SCM end);
diff --git a/libguile/vm-engine.h b/libguile/vm-engine.h
index d684979..a8ea583 100644
--- a/libguile/vm-engine.h
+++ b/libguile/vm-engine.h
@@ -314,6 +314,7 @@ do {                                                \
 
 #define FETCH()                (*ip++)
 #define FETCH_LENGTH(len) do { len=*ip++; len<<=8; len+=*ip++; len<<=8; 
len+=*ip++; } while (0)
+#define FETCH_WIDTH(width) do { width=*ip++; } while (0)
 
 #undef CLOCK
 #if VM_USE_CLOCK
diff --git a/libguile/vm-i-loader.c b/libguile/vm-i-loader.c
index 35fba9a..02280c0 100644
--- a/libguile/vm-i-loader.c
+++ b/libguile/vm-i-loader.c
@@ -72,35 +72,82 @@ VM_DEFINE_LOADER (61, load_number, "load-number")
 VM_DEFINE_LOADER (62, load_string, "load-string")
 {
   size_t len;
+  int width;
   SCM str;
-  char *buf;
+
   FETCH_LENGTH (len);
+  FETCH_WIDTH (width);
   SYNC_REGISTER ();
-  str = scm_i_make_string(len, &buf);
-  memcpy (buf, (char *)ip, len);
+  if (width == 1)
+    {
+      char *buf;
+      str = scm_i_make_string(len, &buf);
+      memcpy (buf, (char *)ip, len);
+    }
+  else if (width == 4)
+    {
+      scm_t_wchar *wbuf;
+      str = scm_i_make_wide_string(len, &wbuf);
+      memcpy ((char *)wbuf, (char *)ip, len * width);
+    }
+  else
+    SCM_MISC_ERROR ("load-string: invalid character width", SCM_EOL);
   PUSH (str);
-  /* Was: scm_makfromstr (ip, len, 0) */
-  ip += len;
+  ip += len * width;
   NEXT;
 }
 
 VM_DEFINE_LOADER (63, load_symbol, "load-symbol")
 {
   size_t len;
+  int width;
+  SCM str;
   FETCH_LENGTH (len);
+  FETCH_WIDTH (width);
   SYNC_REGISTER ();
-  PUSH (scm_from_locale_symboln ((char *)ip, len));
-  ip += len;
+  if (width == 1)
+    {
+      char *buf;
+      str = scm_i_make_string(len, &buf);
+      memcpy (buf, (char *)ip, len);
+    }
+  else if (width == 4)
+    {
+      scm_t_wchar *wbuf;
+      str = scm_i_make_wide_string(len, &wbuf);
+      memcpy ((char *)wbuf, (char *)ip, len * width);
+    }
+  else
+    SCM_MISC_ERROR ("load-symbol: invalid character width", SCM_EOL);
+  PUSH (scm_string_to_symbol (str));
+  ip += len * width;
   NEXT;
 }
 
 VM_DEFINE_LOADER (64, load_keyword, "load-keyword")
 {
   size_t len;
+  int width;
+  SCM str;
   FETCH_LENGTH (len);
+  FETCH_WIDTH (width);
   SYNC_REGISTER ();
-  PUSH (scm_from_locale_keywordn ((char *)ip, len));
-  ip += len;
+  if (width == 1)
+    {
+      char *buf;
+      str = scm_i_make_string(len, &buf);
+      memcpy (buf, (char *)ip, len);
+    }
+  else if (width == 4)
+    {
+      scm_t_wchar *wbuf;
+      str = scm_i_make_wide_string(len, &wbuf);
+      memcpy ((char *)wbuf, (char *)ip, len * width);
+    }
+  else
+    SCM_MISC_ERROR ("load-keyword: invalid character width", SCM_EOL);
+  PUSH (scm_symbol_to_keyword (scm_string_to_symbol (str)));
+  ip += len * width;
   NEXT;
 }
 
@@ -136,13 +183,28 @@ VM_DEFINE_INSTRUCTION (66, link_now, "link-now", 0, 1, 1)
 
 VM_DEFINE_LOADER (67, define, "define")
 {
-  SCM sym;
+  SCM str, sym;
   size_t len;
-
+  int width;
   FETCH_LENGTH (len);
+  FETCH_WIDTH (width);
   SYNC_REGISTER ();
-  sym = scm_from_locale_symboln ((char *)ip, len);
-  ip += len;
+  if (width == 1)
+    {
+      char *buf;
+      str = scm_i_make_string(len, &buf);
+      memcpy (buf, (char *)ip, len);
+    }
+  else if (width == 4)
+    {
+      scm_t_wchar *wbuf;
+      str = scm_i_make_wide_string(len, &wbuf);
+      memcpy ((char *)wbuf, (char *)ip, len * width);
+    }
+  else
+    SCM_MISC_ERROR ("load define: invalid character width", SCM_EOL);
+  sym = scm_string_to_symbol (str);
+  ip += len * width;
 
   SYNC_REGISTER ();
   PUSH (scm_sym2var (sym, scm_current_module_lookup_closure (), SCM_BOOL_T));
diff --git a/libguile/vm-i-system.c b/libguile/vm-i-system.c
index d55d6e2..1e71b2d 100644
--- a/libguile/vm-i-system.c
+++ b/libguile/vm-i-system.c
@@ -139,7 +139,7 @@ VM_DEFINE_INSTRUCTION (13, make_int16, "make-int16", 2, 0, 
1)
   NEXT;
 }
 
-VM_DEFINE_INSTRUCTION (55, make_int64, "make-int64", 8, 0, 1)
+VM_DEFINE_INSTRUCTION (14, make_int64, "make-int64", 8, 0, 1)
 {
   scm_t_uint64 v = 0;
   v += FETCH ();
@@ -154,7 +154,7 @@ VM_DEFINE_INSTRUCTION (55, make_int64, "make-int64", 8, 0, 
1)
   NEXT;
 }
 
-VM_DEFINE_INSTRUCTION (56, make_uint64, "make-uint64", 8, 0, 1)
+VM_DEFINE_INSTRUCTION (15, make_uint64, "make-uint64", 8, 0, 1)
 {
   scm_t_uint64 v = 0;
   v += FETCH ();
@@ -169,13 +169,24 @@ VM_DEFINE_INSTRUCTION (56, make_uint64, "make-uint64", 8, 
0, 1)
   NEXT;
 }
 
-VM_DEFINE_INSTRUCTION (14, make_char8, "make-char8", 1, 0, 1)
+VM_DEFINE_INSTRUCTION (16, make_char8, "make-char8", 1, 0, 1)
 {
   PUSH (SCM_MAKE_CHAR (FETCH ()));
   NEXT;
 }
 
-VM_DEFINE_INSTRUCTION (15, list, "list", 2, -1, 1)
+VM_DEFINE_INSTRUCTION (17, make_char32, "make-char32", 4, 0, 1)
+{
+  scm_t_wchar v = 0;
+  v += FETCH ();
+  v <<= 8; v += FETCH ();
+  v <<= 8; v += FETCH ();
+  v <<= 8; v += FETCH ();
+  PUSH (SCM_MAKE_CHAR (v));
+  NEXT;
+}
+
+VM_DEFINE_INSTRUCTION (18, list, "list", 2, -1, 1)
 {
   unsigned h = FETCH ();
   unsigned l = FETCH ();
@@ -184,7 +195,7 @@ VM_DEFINE_INSTRUCTION (15, list, "list", 2, -1, 1)
   NEXT;
 }
 
-VM_DEFINE_INSTRUCTION (16, vector, "vector", 2, -1, 1)
+VM_DEFINE_INSTRUCTION (19, vector, "vector", 2, -1, 1)
 {
   unsigned h = FETCH ();
   unsigned l = FETCH ();
@@ -202,19 +213,19 @@ VM_DEFINE_INSTRUCTION (16, vector, "vector", 2, -1, 1)
   NEXT;
 }
 
-VM_DEFINE_INSTRUCTION (17, list_mark, "list-mark", 0, 0, 0)
+VM_DEFINE_INSTRUCTION (20, list_mark, "list-mark", 0, 0, 0)
 {
   POP_LIST_MARK ();
   NEXT;
 }
 
-VM_DEFINE_INSTRUCTION (18, cons_mark, "cons-mark", 0, 0, 0)
+VM_DEFINE_INSTRUCTION (21, cons_mark, "cons-mark", 0, 0, 0)
 {
   POP_CONS_MARK ();
   NEXT;
 }
 
-VM_DEFINE_INSTRUCTION (19, vector_mark, "vector-mark", 0, 0, 0)
+VM_DEFINE_INSTRUCTION (22, vector_mark, "vector-mark", 0, 0, 0)
 {
   POP_LIST_MARK ();
   SYNC_REGISTER ();
@@ -222,7 +233,7 @@ VM_DEFINE_INSTRUCTION (19, vector_mark, "vector-mark", 0, 
0, 0)
   NEXT;
 }
 
-VM_DEFINE_INSTRUCTION (20, list_break, "list-break", 0, 0, 0)
+VM_DEFINE_INSTRUCTION (23, list_break, "list-break", 0, 0, 0)
 {
   SCM l;
   POP (l);
@@ -250,7 +261,7 @@ VM_DEFINE_INSTRUCTION (20, list_break, "list-break", 0, 0, 
0)
 
 /* ref */
 
-VM_DEFINE_INSTRUCTION (21, object_ref, "object-ref", 1, 0, 1)
+VM_DEFINE_INSTRUCTION (24, object_ref, "object-ref", 1, 0, 1)
 {
   register unsigned objnum = FETCH ();
   CHECK_OBJECT (objnum);
@@ -258,14 +269,14 @@ VM_DEFINE_INSTRUCTION (21, object_ref, "object-ref", 1, 
0, 1)
   NEXT;
 }
 
-VM_DEFINE_INSTRUCTION (22, local_ref, "local-ref", 1, 0, 1)
+VM_DEFINE_INSTRUCTION (25, local_ref, "local-ref", 1, 0, 1)
 {
   PUSH (LOCAL_REF (FETCH ()));
   ASSERT_BOUND (*sp);
   NEXT;
 }
 
-VM_DEFINE_INSTRUCTION (23, external_ref, "external-ref", 1, 0, 1)
+VM_DEFINE_INSTRUCTION (26, external_ref, "external-ref", 1, 0, 1)
 {
   unsigned int i;
   SCM e = external;
@@ -280,7 +291,7 @@ VM_DEFINE_INSTRUCTION (23, external_ref, "external-ref", 1, 
0, 1)
   NEXT;
 }
 
-VM_DEFINE_INSTRUCTION (24, variable_ref, "variable-ref", 0, 0, 1)
+VM_DEFINE_INSTRUCTION (27, variable_ref, "variable-ref", 0, 0, 1)
 {
   SCM x = *sp;
 
@@ -299,7 +310,7 @@ VM_DEFINE_INSTRUCTION (24, variable_ref, "variable-ref", 0, 
0, 1)
   NEXT;
 }
 
-VM_DEFINE_INSTRUCTION (25, toplevel_ref, "toplevel-ref", 1, 0, 1)
+VM_DEFINE_INSTRUCTION (28, toplevel_ref, "toplevel-ref", 1, 0, 1)
 {
   unsigned objnum = FETCH ();
   SCM what;
@@ -324,14 +335,14 @@ VM_DEFINE_INSTRUCTION (25, toplevel_ref, "toplevel-ref", 
1, 0, 1)
 
 /* set */
 
-VM_DEFINE_INSTRUCTION (26, local_set, "local-set", 1, 1, 0)
+VM_DEFINE_INSTRUCTION (29, local_set, "local-set", 1, 1, 0)
 {
   LOCAL_SET (FETCH (), *sp);
   DROP ();
   NEXT;
 }
 
-VM_DEFINE_INSTRUCTION (27, external_set, "external-set", 1, 1, 0)
+VM_DEFINE_INSTRUCTION (30, external_set, "external-set", 1, 1, 0)
 {
   unsigned int i;
   SCM e = external;
@@ -346,14 +357,14 @@ VM_DEFINE_INSTRUCTION (27, external_set, "external-set", 
1, 1, 0)
   NEXT;
 }
 
-VM_DEFINE_INSTRUCTION (28, variable_set, "variable-set", 0, 1, 0)
+VM_DEFINE_INSTRUCTION (31, variable_set, "variable-set", 0, 1, 0)
 {
   VARIABLE_SET (sp[0], sp[-1]);
   DROPN (2);
   NEXT;
 }
 
-VM_DEFINE_INSTRUCTION (29, toplevel_set, "toplevel-set", 1, 1, 0)
+VM_DEFINE_INSTRUCTION (32, toplevel_set, "toplevel-set", 1, 1, 0)
 {
   unsigned objnum = FETCH ();
   SCM what;
@@ -396,7 +407,7 @@ VM_DEFINE_INSTRUCTION (29, toplevel_set, "toplevel-set", 1, 
1, 0)
   NEXT;                                                \
 }
 
-VM_DEFINE_INSTRUCTION (31, br, "br", 2, 0, 0)
+VM_DEFINE_INSTRUCTION (33, br, "br", 2, 0, 0)
 {
   int h = FETCH ();
   int l = FETCH ();
@@ -404,34 +415,34 @@ VM_DEFINE_INSTRUCTION (31, br, "br", 2, 0, 0)
   NEXT;
 }
 
-VM_DEFINE_INSTRUCTION (32, br_if, "br-if", 2, 0, 0)
+VM_DEFINE_INSTRUCTION (34, br_if, "br-if", 2, 0, 0)
 {
   BR (!SCM_FALSEP (*sp));
 }
 
-VM_DEFINE_INSTRUCTION (33, br_if_not, "br-if-not", 2, 0, 0)
+VM_DEFINE_INSTRUCTION (35, br_if_not, "br-if-not", 2, 0, 0)
 {
   BR (SCM_FALSEP (*sp));
 }
 
-VM_DEFINE_INSTRUCTION (34, br_if_eq, "br-if-eq", 2, 0, 0)
+VM_DEFINE_INSTRUCTION (36, br_if_eq, "br-if-eq", 2, 0, 0)
 {
   sp--; /* underflow? */
   BR (SCM_EQ_P (sp[0], sp[1]));
 }
 
-VM_DEFINE_INSTRUCTION (35, br_if_not_eq, "br-if-not-eq", 2, 0, 0)
+VM_DEFINE_INSTRUCTION (37, br_if_not_eq, "br-if-not-eq", 2, 0, 0)
 {
   sp--; /* underflow? */
   BR (!SCM_EQ_P (sp[0], sp[1]));
 }
 
-VM_DEFINE_INSTRUCTION (36, br_if_null, "br-if-null", 2, 0, 0)
+VM_DEFINE_INSTRUCTION (38, br_if_null, "br-if-null", 2, 0, 0)
 {
   BR (SCM_NULLP (*sp));
 }
 
-VM_DEFINE_INSTRUCTION (37, br_if_not_null, "br-if-not-null", 2, 0, 0)
+VM_DEFINE_INSTRUCTION (39, br_if_not_null, "br-if-not-null", 2, 0, 0)
 {
   BR (!SCM_NULLP (*sp));
 }
@@ -441,7 +452,7 @@ VM_DEFINE_INSTRUCTION (37, br_if_not_null, 
"br-if-not-null", 2, 0, 0)
  * Subprogram call
  */
 
-VM_DEFINE_INSTRUCTION (38, make_closure, "make-closure", 0, 1, 1)
+VM_DEFINE_INSTRUCTION (40, make_closure, "make-closure", 0, 1, 1)
 {
   SYNC_BEFORE_GC ();
   SCM_NEWSMOB3 (*sp, scm_tc16_program, SCM_PROGRAM_OBJCODE (*sp),
@@ -449,7 +460,7 @@ VM_DEFINE_INSTRUCTION (38, make_closure, "make-closure", 0, 
1, 1)
   NEXT;
 }
 
-VM_DEFINE_INSTRUCTION (39, call, "call", 1, -1, 1)
+VM_DEFINE_INSTRUCTION (41, call, "call", 1, -1, 1)
 {
   SCM x;
   nargs = FETCH ();
@@ -570,7 +581,7 @@ VM_DEFINE_INSTRUCTION (39, call, "call", 1, -1, 1)
   goto vm_error_wrong_type_apply;
 }
 
-VM_DEFINE_INSTRUCTION (40, goto_args, "goto/args", 1, -1, 1)
+VM_DEFINE_INSTRUCTION (42, goto_args, "goto/args", 1, -1, 1)
 {
   register SCM x;
   nargs = FETCH ();
@@ -764,7 +775,7 @@ VM_DEFINE_INSTRUCTION (40, goto_args, "goto/args", 1, -1, 1)
   goto vm_error_wrong_type_apply;
 }
 
-VM_DEFINE_INSTRUCTION (41, goto_nargs, "goto/nargs", 0, 0, 1)
+VM_DEFINE_INSTRUCTION (43, goto_nargs, "goto/nargs", 0, 0, 1)
 {
   SCM x;
   POP (x);
@@ -773,7 +784,7 @@ VM_DEFINE_INSTRUCTION (41, goto_nargs, "goto/nargs", 0, 0, 
1)
   goto vm_goto_args;
 }
 
-VM_DEFINE_INSTRUCTION (42, call_nargs, "call/nargs", 0, 0, 1)
+VM_DEFINE_INSTRUCTION (44, call_nargs, "call/nargs", 0, 0, 1)
 {
   SCM x;
   POP (x);
@@ -782,7 +793,7 @@ VM_DEFINE_INSTRUCTION (42, call_nargs, "call/nargs", 0, 0, 
1)
   goto vm_call;
 }
 
-VM_DEFINE_INSTRUCTION (43, mv_call, "mv-call", 3, -1, 1)
+VM_DEFINE_INSTRUCTION (45, mv_call, "mv-call", 3, -1, 1)
 {
   SCM x;
   signed short offset;
@@ -843,7 +854,7 @@ VM_DEFINE_INSTRUCTION (43, mv_call, "mv-call", 3, -1, 1)
   goto vm_error_wrong_type_apply;
 }
 
-VM_DEFINE_INSTRUCTION (44, apply, "apply", 1, -1, 1)
+VM_DEFINE_INSTRUCTION (46, apply, "apply", 1, -1, 1)
 {
   int len;
   SCM ls;
@@ -862,7 +873,7 @@ VM_DEFINE_INSTRUCTION (44, apply, "apply", 1, -1, 1)
   goto vm_call;
 }
 
-VM_DEFINE_INSTRUCTION (45, goto_apply, "goto/apply", 1, -1, 1)
+VM_DEFINE_INSTRUCTION (47, goto_apply, "goto/apply", 1, -1, 1)
 {
   int len;
   SCM ls;
@@ -881,7 +892,7 @@ VM_DEFINE_INSTRUCTION (45, goto_apply, "goto/apply", 1, -1, 
1)
   goto vm_goto_args;
 }
 
-VM_DEFINE_INSTRUCTION (46, call_cc, "call/cc", 0, 1, 1)
+VM_DEFINE_INSTRUCTION (48, call_cc, "call/cc", 0, 1, 1)
 {
   int first;
   SCM proc, cont;
@@ -915,7 +926,7 @@ VM_DEFINE_INSTRUCTION (46, call_cc, "call/cc", 0, 1, 1)
     }
 }
 
-VM_DEFINE_INSTRUCTION (47, goto_cc, "goto/cc", 0, 1, 1)
+VM_DEFINE_INSTRUCTION (49, goto_cc, "goto/cc", 0, 1, 1)
 {
   int first;
   SCM proc, cont;
@@ -947,7 +958,7 @@ VM_DEFINE_INSTRUCTION (47, goto_cc, "goto/cc", 0, 1, 1)
     }
 }
 
-VM_DEFINE_INSTRUCTION (48, return, "return", 0, 1, 1)
+VM_DEFINE_INSTRUCTION (50, return, "return", 0, 1, 1)
 {
  vm_return:
   EXIT_HOOK ();
@@ -986,7 +997,7 @@ VM_DEFINE_INSTRUCTION (48, return, "return", 0, 1, 1)
   NEXT;
 }
 
-VM_DEFINE_INSTRUCTION (49, return_values, "return/values", 1, -1, -1)
+VM_DEFINE_INSTRUCTION (51, return_values, "return/values", 1, -1, -1)
 {
   /* nvalues declared at top level, because for some reason gcc seems to think
      that perhaps it might be used without declaration. Fooey to that, I say. 
*/
@@ -1047,7 +1058,7 @@ VM_DEFINE_INSTRUCTION (49, return_values, 
"return/values", 1, -1, -1)
   NEXT;
 }
 
-VM_DEFINE_INSTRUCTION (50, return_values_star, "return/values*", 1, -1, -1)
+VM_DEFINE_INSTRUCTION (52, return_values_star, "return/values*", 1, -1, -1)
 {
   SCM l;
 
@@ -1070,7 +1081,7 @@ VM_DEFINE_INSTRUCTION (50, return_values_star, 
"return/values*", 1, -1, -1)
   goto vm_return_values;
 }
 
-VM_DEFINE_INSTRUCTION (51, truncate_values, "truncate-values", 2, -1, -1)
+VM_DEFINE_INSTRUCTION (53, truncate_values, "truncate-values", 2, -1, -1)
 {
   SCM x;
   int nbinds, rest;
@@ -1093,7 +1104,7 @@ VM_DEFINE_INSTRUCTION (51, truncate_values, 
"truncate-values", 2, -1, -1)
   NEXT;
 }
 
-VM_DEFINE_INSTRUCTION (52, long_object_ref, "long-object-ref", 2, 0, 1)
+VM_DEFINE_INSTRUCTION (54, long_object_ref, "long-object-ref", 2, 0, 1)
 {
   unsigned int objnum = FETCH ();
   objnum <<= 8;
@@ -1103,7 +1114,7 @@ VM_DEFINE_INSTRUCTION (52, long_object_ref, 
"long-object-ref", 2, 0, 1)
   NEXT;
 }
 
-VM_DEFINE_INSTRUCTION (53, long_toplevel_ref, "long-toplevel-ref", 2, 0, 1)
+VM_DEFINE_INSTRUCTION (55, long_toplevel_ref, "long-toplevel-ref", 2, 0, 1)
 {
   SCM what;
   unsigned int objnum = FETCH ();
@@ -1128,7 +1139,7 @@ VM_DEFINE_INSTRUCTION (53, long_toplevel_ref, 
"long-toplevel-ref", 2, 0, 1)
   NEXT;
 }
 
-VM_DEFINE_INSTRUCTION (54, long_toplevel_set, "long-toplevel-set", 2, 1, 0)
+VM_DEFINE_INSTRUCTION (56, long_toplevel_set, "long-toplevel-set", 2, 1, 0)
 {
   SCM what;
   unsigned int objnum = FETCH ();
diff --git a/module/language/assembly.scm b/module/language/assembly.scm
index 3a0b387..e3769f1 100644
--- a/module/language/assembly.scm
+++ b/module/language/assembly.scm
@@ -34,6 +34,10 @@
 ;; lengths are encoded in 3 bytes
 (define *len-len* 3)
 
+;; the number of bytes per string character is encoded in 1 byte
+(define *width-len* 1)
+
+
 (define (byte-length assembly)
   (pmatch assembly
     (,label (guard (not (pair? label)))
@@ -45,15 +49,15 @@
     ((load-number ,str)
      (+ 1 *len-len* (string-length str)))
     ((load-string ,str)
-     (+ 1 *len-len* (string-length str)))
+     (+ 1 *len-len* *width-len* (* (string-width str) (string-length str))))
     ((load-symbol ,str)
-     (+ 1 *len-len* (string-length str)))
+     (+ 1 *len-len* *width-len* (* (string-width str) (string-length str))))
     ((load-keyword ,str)
-     (+ 1 *len-len* (string-length str)))
+     (+ 1 *len-len* *width-len* (* (string-width str) (string-length str))))
     ((load-array ,bv)
      (+ 1 *len-len* (bytevector-length bv)))
     ((define ,str)
-     (+ 1 *len-len* (string-length str)))
+     (+ 1 *len-len* *width-len* (* (string-width str) (string-length str))))
     ((load-program ,nargs ,nrest ,nlocs ,nexts ,labels ,len ,meta . ,code)
      (+ 1 *program-header-len* len (if meta (1- (byte-length meta)) 0)))
     ((,inst . _) (guard (>= (instruction-length inst) 0))
@@ -124,7 +128,11 @@
                                   (bytevector-s64-set! bv 0 x (endianness big))
                                   bv))))
               (else #f)))
-       ((char? x) `(make-char8 ,(char->integer x)))
+       ((char? x)
+         (cond ((<= (char->integer x) #xff)
+                `(make-char8 ,(char->integer x)))
+               (else
+                `(make-char32 ,(char->integer x)))))
        (else #f)))
 
 (define (assembly->object code)
@@ -149,6 +157,11 @@
       (endianness big)))
     ((make-char8 ,n)
      (integer->char n))
+    ((make-char32 ,n1 ,n2 ,n3 ,n4)
+     (integer->char (+ (* n1 #x1000000)
+                       (* n2 #x10000)
+                       (* n3 #x100)
+                       n4)))
     ((load-string ,s) s)
     ((load-symbol ,s) (string->symbol s))
     ((load-keyword ,s) (symbol->keyword (string->symbol s)))
diff --git a/module/language/assembly/compile-bytecode.scm 
b/module/language/assembly/compile-bytecode.scm
index 4b9f7b7..1f202e4 100644
--- a/module/language/assembly/compile-bytecode.scm
+++ b/module/language/assembly/compile-bytecode.scm
@@ -65,6 +65,12 @@
     (write-byte (logand (ash x -8) 255))
     (write-byte (logand (ash x -16) 255))
     (write-byte (logand (ash x -24) 255)))
+  (define (write-uint32 x) (case byte-order
+                             ((1234) (write-uint32-le x))
+                             ((4321) (write-uint32-be x))
+                             (else (error "unknown endianness" byte-order))))
+  (define (write-wide-string s)
+    (string-for-each (lambda (c) (write-uint32 (char->integer c))) s))
   (define (write-loader-len len)
     (write-byte (ash len -16))
     (write-byte (logand (ash len -8) 255))
@@ -72,6 +78,14 @@
   (define (write-loader str)
     (write-loader-len (string-length str))
     (write-string str))
+  (define (write-sized-loader str)
+    (let ((len (string-length str))
+          (wid (string-width str)))
+      (write-loader-len len)
+      (write-byte wid)
+      (if (= wid 4)
+          (write-wide-string str)
+          (write-string str))))
   (define (write-bytevector bv)
     (write-loader-len (bytevector-length bv))
     ;; Ew!
@@ -80,11 +94,8 @@
     (write-uint16-be (- (assq-ref labels label) (+ (get-addr) 2))))
   
   (let ((inst (car asm))
-        (args (cdr asm))
-        (write-uint32 (case byte-order
-                        ((1234) write-uint32-le)
-                        ((4321) write-uint32-be)
-                        (else (error "unknown endianness" byte-order)))))
+        (args (cdr asm)))
+
     (let ((opcode (instruction->opcode inst))
           (len (instruction-length inst)))
       (write-byte opcode)
@@ -117,11 +128,12 @@
         ((load-unsigned-integer ,str) (write-loader str))
         ((load-integer ,str) (write-loader str))
         ((load-number ,str) (write-loader str))
-        ((load-string ,str) (write-loader str))
-        ((load-symbol ,str) (write-loader str))
-        ((load-keyword ,str) (write-loader str))
+        ((load-string ,str) (write-sized-loader str))
+        ((load-symbol ,str) (write-sized-loader str))
+        ((load-keyword ,str) (write-sized-loader str))
         ((load-array ,bv) (write-bytevector bv))
-        ((define ,str) (write-loader str))
+        ((make-char32 ,x) (write-uint32-be x))
+        ((define ,str) (write-sized-loader str))
         ((br ,l) (write-break l))
         ((br-if ,l) (write-break l))
         ((br-if-not ,l) (write-break l))


hooks/post-receive
-- 
GNU Guile




reply via email to

[Prev in Thread] Current Thread [Next in Thread]