guile-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Guile-commits] GNU Guile branch, wip-threads-and-fork, updated. v2.1.0-


From: Andy Wingo
Subject: [Guile-commits] GNU Guile branch, wip-threads-and-fork, updated. v2.1.0-14-g0354d10
Date: Thu, 09 Feb 2012 22:16:55 +0000

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU Guile".

http://git.savannah.gnu.org/cgit/guile.git/commit/?id=0354d101ca35d7e44badf4d2197fc33ea222f389

The branch, wip-threads-and-fork has been updated
       via  0354d101ca35d7e44badf4d2197fc33ea222f389 (commit)
       via  89de25fd60ff3a6f1e070ed957fd8670106bc58a (commit)
      from  c9a0e1bcac797d399072002124af295a4c6a0cb7 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 0354d101ca35d7e44badf4d2197fc33ea222f389
Author: Andy Wingo <address@hidden>
Date:   Thu Feb 9 23:15:25 2012 +0100

    more efficient scm_string_to_utf8, scm_string_to_utf32
    
    * libguile/bytevectors.c (scm_string_to_utf8): More efficient
      implementation.
      (scm_string_to_utf32): Likewise.

commit 89de25fd60ff3a6f1e070ed957fd8670106bc58a
Author: Andy Wingo <address@hidden>
Date:   Thu Feb 9 23:14:11 2012 +0100

    more efficient scm_to_utf8_stringn, scm_to_utf32_stringn
    
    * libguile/strings.c (scm_to_utf8_stringn): More efficient
      implementation than calling scm_to_stringn.
      (scm_to_utf32_stringn): Likewise.

-----------------------------------------------------------------------

Summary of changes:
 libguile/bytevectors.c |   58 ++++++++++++++++++++++++------------------------
 libguile/strings.c     |   56 +++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 81 insertions(+), 33 deletions(-)

diff --git a/libguile/bytevectors.c b/libguile/bytevectors.c
index c18eddd..6ea60f8 100644
--- a/libguile/bytevectors.c
+++ b/libguile/bytevectors.c
@@ -1975,37 +1975,15 @@ SCM_DEFINE (scm_string_to_utf8, "string->utf8",
 #define FUNC_NAME s_scm_string_to_utf8
 {
   SCM utf;
-  uint8_t *c_utf;
-  size_t c_strlen, c_utf_len = 0;
+  scm_t_uint8 *c_utf;
+  size_t c_utf_len = 0;
 
   SCM_VALIDATE_STRING (1, str);
 
-  c_strlen = scm_i_string_length (str);
-  if (scm_i_is_narrow_string (str))
-    {
-      scm_i_lock_iconv ();
-      c_utf = u8_conv_from_encoding ("ISO-8859-1", iconveh_question_mark,
-                                     scm_i_string_chars (str), c_strlen,
-                                     NULL, NULL, &c_utf_len);
-      scm_i_unlock_iconv ();
-    }
-  else
-    {
-      const scm_t_wchar *wbuf = scm_i_string_wide_chars (str);
-      c_utf = u32_to_u8 ((const uint32_t *) wbuf, c_strlen, NULL, &c_utf_len);
-    }
-  if (SCM_UNLIKELY (c_utf == NULL))
-    scm_syserror (FUNC_NAME);
-  else
-    {
-      scm_dynwind_begin (0);
-      scm_dynwind_free (c_utf);
-
-      utf = make_bytevector (c_utf_len, SCM_ARRAY_ELEMENT_TYPE_VU8);
-      memcpy (SCM_BYTEVECTOR_CONTENTS (utf), c_utf, c_utf_len);
-
-      scm_dynwind_end ();
-    }
+  c_utf = (scm_t_uint8 *) scm_to_utf8_stringn (str, &c_utf_len);
+  utf = make_bytevector (c_utf_len, SCM_ARRAY_ELEMENT_TYPE_VU8);
+  memcpy (SCM_BYTEVECTOR_CONTENTS (utf), c_utf, c_utf_len);
+  free (c_utf);
 
   return (utf);
 }
@@ -2022,6 +2000,14 @@ SCM_DEFINE (scm_string_to_utf16, "string->utf16",
 }
 #undef FUNC_NAME
 
+static void
+swap_u32 (scm_t_wchar *vals, size_t len)
+{
+  size_t n;
+  for (n = 0; n < len; n++)
+    vals[n] = bswap_32 (vals[n]);
+}
+
 SCM_DEFINE (scm_string_to_utf32, "string->utf32",
            1, 1, 0,
            (SCM str, SCM endianness),
@@ -2029,7 +2015,21 @@ SCM_DEFINE (scm_string_to_utf32, "string->utf32",
            "encoding of @var{str}.")
 #define FUNC_NAME s_scm_string_to_utf32
 {
-  STRING_TO_UTF (32);
+  SCM bv;
+  scm_t_wchar *wchars;
+  size_t wchar_len, bytes_len;
+
+  wchars = scm_to_utf32_stringn (str, &wchar_len);
+  bytes_len = wchar_len * sizeof (scm_t_wchar);
+  if (!scm_is_eq (SCM_UNBNDP (endianness) ? scm_endianness_big : endianness,
+                  scm_i_native_endianness))
+    swap_u32 (wchars, wchar_len);
+  
+  bv = make_bytevector (bytes_len, SCM_ARRAY_ELEMENT_TYPE_VU8);
+  memcpy (SCM_BYTEVECTOR_CONTENTS (bv), wchars, bytes_len);
+  free (wchars);
+
+  return bv;
 }
 #undef FUNC_NAME
 
diff --git a/libguile/strings.c b/libguile/strings.c
index 5901f9f..62e3429 100644
--- a/libguile/strings.c
+++ b/libguile/strings.c
@@ -1935,10 +1935,47 @@ scm_to_utf8_string (SCM str)
   return scm_to_utf8_stringn (str, NULL);
 }
 
+static size_t
+latin1_u8_strlen (const scm_t_uint8 *str, size_t len)
+{
+  size_t ret, i;
+  for (i = 0, ret = 0; i < len; i++)
+    ret += (str[i] < 128) ? 1 : 2;
+  return ret;
+}
+
+static scm_t_uint8*
+latin1_to_u8 (const scm_t_uint8 *str, size_t latin_len,
+              scm_t_uint8 *u8_result, size_t *u8_lenp)
+{
+  size_t i, n;
+  size_t u8_len = latin1_u8_strlen (str, latin_len);
+
+  if (!(u8_result && u8_lenp && *u8_lenp > u8_len))
+    u8_result = scm_malloc (u8_len + 1);
+  if (u8_lenp)
+    *u8_lenp = u8_len;
+
+  for (i = 0, n = 0; i < latin_len; i++)
+    n += u8_uctomb (u8_result + n, str[i], u8_len - n);
+  if (n != u8_len)
+    abort ();
+  u8_result[n] = 0;
+
+  return u8_result;
+}
+
 char *
 scm_to_utf8_stringn (SCM str, size_t *lenp)
 {
-  return scm_to_stringn (str, lenp, "UTF-8", SCM_FAILED_CONVERSION_ERROR);
+  if (scm_i_is_narrow_string (str))
+    return (char *) latin1_to_u8 ((scm_t_uint8 *) scm_i_string_chars (str),
+                                  scm_i_string_length (str),
+                                  NULL, lenp);
+  else
+    return (char *) u32_to_u8 ((scm_t_uint32*)scm_i_string_wide_chars (str),
+                               scm_i_string_length (str),
+                               NULL, lenp);
 }
 
 scm_t_wchar *
@@ -1956,9 +1993,20 @@ scm_to_utf32_stringn (SCM str, size_t *lenp)
   SCM_VALIDATE_STRING (1, str);
 
   if (scm_i_is_narrow_string (str))
-    result = (scm_t_wchar *)
-      scm_to_stringn (str, lenp, "UTF-32",
-                     SCM_FAILED_CONVERSION_ERROR);
+    {
+      scm_t_uint8 *codepoints;
+      size_t i, len;
+
+      codepoints = (scm_t_uint8*) scm_i_string_chars (str);
+      len = scm_i_string_length (str);
+      if (lenp)
+       *lenp = len;
+
+      result = scm_malloc ((len + 1) * sizeof (scm_t_wchar));
+      for (i = 0; i < len; i++)
+        result[i] = codepoints[i];
+      result[len] = 0;
+    }
   else
     {
       size_t len;


hooks/post-receive
-- 
GNU Guile



reply via email to

[Prev in Thread] Current Thread [Next in Thread]