guile-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Guile-commits] 04/05: Factor out iconv to port-decode-char


From: Andy Wingo
Subject: [Guile-commits] 04/05: Factor out iconv to port-decode-char
Date: Tue, 10 May 2016 10:51:24 +0000 (UTC)

wingo pushed a commit to branch wip-port-refactor
in repository guile.

commit 8ee189980de6d86fa270775b4bc4020352596d98
Author: Andy Wingo <address@hidden>
Date:   Tue May 10 12:45:56 2016 +0200

    Factor out iconv to port-decode-char
    
    * libguile/ports.c (scm_port_decode_char): New helper, exported
      to (ice-9 ports).
      (peek_iconv_codepoint): Use scm_port_decode_char.
---
 libguile/ports.c |  133 ++++++++++++++++++++++++++++++++----------------------
 1 file changed, 80 insertions(+), 53 deletions(-)

diff --git a/libguile/ports.c b/libguile/ports.c
index bbe3867..c81bf9a 100644
--- a/libguile/ports.c
+++ b/libguile/ports.c
@@ -1699,6 +1699,62 @@ peek_latin1_codepoint (SCM port, size_t *len)
   return ret;
 }
 
+SCM_INTERNAL SCM scm_port_decode_char (SCM, SCM, SCM, SCM);
+SCM_DEFINE (scm_port_decode_char, "port-decode-char", 4, 0, 0,
+            (SCM port, SCM bv, SCM start, SCM count),
+            "")
+#define FUNC_NAME s_scm_port_decode_char
+{
+  char *input, *output;
+  scm_t_uint8 utf8_buf[SCM_MBCHAR_BUF_SIZE];
+  scm_t_iconv_descriptors *id;
+  size_t c_start, c_count;
+  size_t input_left, output_left, done;
+
+  SCM_VALIDATE_OPINPORT (1, port);
+  SCM_VALIDATE_BYTEVECTOR (2, bv);
+  c_start = scm_to_size_t (start);
+  c_count = scm_to_size_t (count);
+  SCM_ASSERT_RANGE (3, start, c_start <= SCM_BYTEVECTOR_LENGTH (bv));
+  SCM_ASSERT_RANGE (4, count, c_count <= SCM_BYTEVECTOR_LENGTH (bv) - c_start);
+
+  id = scm_i_port_iconv_descriptors (port);
+  input = (char *) SCM_BYTEVECTOR_CONTENTS (bv) + c_start;
+  input_left = c_count;
+  output = (char *) utf8_buf;
+  output_left = sizeof (utf8_buf);
+
+  /* FIXME: locking!  */
+  done = iconv (id->input_cd, &input, &input_left, &output, &output_left);
+
+  if (done == (size_t) -1)
+    {
+      int err = errno;
+      if (err == EINVAL)
+        /* The input byte sequence did not form a complete
+           character.  Read another byte and try again. */
+        return SCM_BOOL_F;
+      else if (scm_is_eq (SCM_PTAB_ENTRY (port)->conversion_strategy,
+                          sym_substitute))
+        return SCM_MAKE_CHAR ('?');
+      else
+        scm_decoding_error ("decode-char", err, "input decoding error", port);
+    }
+
+  {
+    size_t output_size = sizeof (utf8_buf) - output_left;
+    if (output_size == 0)
+      /* iconv consumed some bytes without producing any output.
+         Most likely this means that a Unicode byte-order mark
+         (BOM) was consumed.  In any case, keep going until we get
+         output.  */
+      return SCM_BOOL_F;
+
+    return SCM_MAKE_CHAR (utf8_to_codepoint (utf8_buf, output_size));
+  }
+}
+#undef FUNC_NAME
+
 /* Peek a codepoint from PORT, decoding it through iconv.  On success,
    return the codepoint and set *LEN to the length in bytes.  If there
    was a decoding error and the port conversion strategy was
@@ -1708,75 +1764,46 @@ peek_latin1_codepoint (SCM port, size_t *len)
 static scm_t_wchar
 peek_iconv_codepoint (SCM port, size_t *len)
 {
-  scm_t_iconv_descriptors *id;
-  scm_t_uint8 utf8_buf[SCM_MBCHAR_BUF_SIZE];
   size_t input_size = 0;
+  SCM maybe_char = SCM_BOOL_F;
 
-  for (;;)
+  while (scm_is_false (maybe_char))
     {
-      SCM read_buf;
-      char *input, *output;
-      size_t input_left, output_left, done;
-
-      read_buf = scm_fill_input (port, input_size + 1);
-      id = scm_i_port_iconv_descriptors (port);
+      SCM read_buf = scm_fill_input (port, input_size + 1);
 
       if (scm_port_buffer_can_take (read_buf) <= input_size)
        {
           *len = input_size;
           if (input_size == 0)
             /* Normal EOF.  */
-            return EOF;
+            {
+              /* Make sure iconv descriptors have been opened even if
+                 there were no bytes, to be sure that a decoding error
+                 is signalled if the encoding itself was invalid.  */
+              scm_i_port_iconv_descriptors (port);
+              return EOF;
+            }
 
           /* EOF found in the middle of a multibyte character. */
-          goto decoding_error;
+          if (scm_is_eq (SCM_PTAB_ENTRY (port)->conversion_strategy,
+                         sym_substitute))
+            return '?';
+
+          scm_decoding_error ("peek-char", EILSEQ,
+                              "input decoding error", port);
+          /* Not reached.  */
+          return 0;
        }
 
       input_size++;
-      input = (char *) scm_port_buffer_take_pointer (read_buf);
-      input_left = input_size;
-      output = (char *) utf8_buf;
-      output_left = sizeof (utf8_buf);
-
-      /* FIXME: locking!  */
-      done = iconv (id->input_cd, &input, &input_left, &output, &output_left);
-
-      if (done == (size_t) -1)
-       {
-         int err = errno;
-         if (err == EINVAL)
-            /* The input byte sequence did not form a complete
-               character.  Read another byte and try again. */
-            continue;
-
-          *len = input_size;
-          goto decoding_error;
-       }
-      else
-        {
-          size_t output_size = sizeof (utf8_buf) - output_left;
-          if (output_size == 0)
-            /* iconv consumed some bytes without producing any output.
-               Most likely this means that a Unicode byte-order mark
-               (BOM) was consumed.  In any case, keep going until we get
-               output.  */
-            continue;
-
-          /* iconv generated output.  Convert the UTF8_BUF sequence
-             to a Unicode code point.  */
-          *len = input_size;
-          return utf8_to_codepoint (utf8_buf, output_size);
-        }
+      maybe_char = scm_port_decode_char (port,
+                                         scm_port_buffer_bytevector (read_buf),
+                                         scm_port_buffer_cur (read_buf),
+                                         SCM_I_MAKINUM (input_size));
     }
 
- decoding_error:
-  if (scm_is_eq (SCM_PTAB_ENTRY (port)->conversion_strategy, sym_substitute))
-    return '?';
-
-  scm_decoding_error ("peek-char", EILSEQ, "input decoding error",
-                      port);
-  /* Not reached.  */
-  return 0;
+  *len = input_size;
+  return SCM_CHAR (maybe_char);
 }
 
 /* Peek a codepoint from PORT and return it in *CODEPOINT.  Set *LEN to



reply via email to

[Prev in Thread] Current Thread [Next in Thread]