guile-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Guile-commits] GNU Guile branch, master, updated. release_1-9-2-164-g0d


From: Michael Gran
Subject: [Guile-commits] GNU Guile branch, master, updated. release_1-9-2-164-g0d05ae7
Date: Tue, 08 Sep 2009 02:13:41 +0000

This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU Guile".

http://git.savannah.gnu.org/cgit/guile.git/commit/?id=0d05ae7c4b1eddf6257f99f44eaf5cb7b11191be

The branch, master has been updated
       via  0d05ae7c4b1eddf6257f99f44eaf5cb7b11191be (commit)
       via  7519234547acd3ced5cbe265f0bf1fcd6d6cda06 (commit)
      from  eebff6d7f1055bb59fbad24c23f8db8ce14391d5 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
commit 0d05ae7c4b1eddf6257f99f44eaf5cb7b11191be
Author: Michael Gran <address@hidden>
Date:   Mon Sep 7 18:50:39 2009 -0700

    8-bit locale needed for 8-bit regexp tests
    
    Since the regex library expects 8-bit clean characters and
    an 8-bit locale, tests of 8-bit characters need to occur within
    the context of an 8-bit locale.
    
    * test-suite/tests/regexp.test (regexp-quote tests): wrap them in an
      ISO-8859-1 locale

commit 7519234547acd3ced5cbe265f0bf1fcd6d6cda06
Author: Michael Gran <address@hidden>
Date:   Mon Sep 7 18:42:29 2009 -0700

    Fix broken interaction between readline and Unicode
    
    This requires separate small fixes.
    
    Readline has internal logic to deal with multi-byte characters, so
    it wants bytes, not characters.
    
    scm_c_read gets called by the vm when readline is activated, and it was
    truncating multi-byte characters because soft ports didn't have the
    UCS-4 capability.
    
    Soft ports need the capability to read UCS-4 characters.  Since soft ports
    may have a single byte buffer, full characters need to be stored into the
    pushback buffer.
    
    This broke the optimizations in scm_c_read for using an alternate buffer
    for single-byte-buffered ports, because the opimization wasn't expecting
    anything in the pushback buffer.
    
    * libguile/vports.c (sf_fill_input): store complete chars, not single bytes
    
    * libguile/ports.c (scm_c_read): don't use optimized path for non Latin-1.
      Add debug prints.
    
    * libguile/string.h: make scm_i_from_stringn and scm_i_string_ref public
      so that readline can use them
    
    * guile-readline/readline.c: read bytes, not complete chars, from the
      input port.  Convert output to the output port's locale

-----------------------------------------------------------------------

Summary of changes:
 guile-readline/readline.c    |   11 ++++-
 libguile/ports.c             |   20 +++++++-
 libguile/strings.h           |    4 +-
 libguile/vports.c            |   23 ++++++---
 test-suite/tests/regexp.test |   99 +++++++++++++++++++++---------------------
 5 files changed, 93 insertions(+), 64 deletions(-)

diff --git a/guile-readline/readline.c b/guile-readline/readline.c
index cbf4051..a665415 100644
--- a/guile-readline/readline.c
+++ b/guile-readline/readline.c
@@ -128,6 +128,7 @@ rl_free_line_state ()
 
 static int promptp;
 static SCM input_port;
+static SCM output_port;
 static SCM before_read;
 
 static int
@@ -138,7 +139,7 @@ current_input_getc (FILE *in SCM_UNUSED)
       scm_apply (before_read, SCM_EOL, SCM_EOL);
       promptp = 0;
     }
-  return scm_getc (input_port);
+  return scm_get_byte_or_eof (input_port);
 }
 
 static int in_readline = 0;
@@ -255,7 +256,12 @@ internal_readline (SCM text)
   promptp = 1;
   s = readline (prompt);
   if (s)
-    ret = scm_from_locale_string (s);
+    {
+      scm_t_port *pt = SCM_PTAB_ENTRY (output_port);
+      
+      ret = scm_i_from_stringn (s, strlen (s), pt->encoding, 
+                                SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE);
+    }
   else 
     ret = SCM_EOF_VAL;
 
@@ -311,6 +317,7 @@ scm_readline_init_ports (SCM inp, SCM outp)
   }
 
   input_port = inp;
+  output_port = outp;
 #ifndef __MINGW32__
   rl_instream = stream_from_fport (inp, "r", s_scm_readline);
   rl_outstream = stream_from_fport (outp, "w", s_scm_readline);
diff --git a/libguile/ports.c b/libguile/ports.c
index b3547f5..35046dd 100644
--- a/libguile/ports.c
+++ b/libguile/ports.c
@@ -1293,7 +1293,7 @@ scm_c_read (SCM port, void *buffer, size_t size)
      requested number of bytes.  (Note that a single scm_fill_input
      call does not guarantee to fill the whole of the port's read
      buffer.) */
-  if (pt->read_buf_size <= 1)
+  if (pt->read_buf_size <= 1 && pt->encoding == NULL)
     {
       /* The port that we are reading from is unbuffered - i.e. does
         not have its own persistent buffer - but we have a buffer,
@@ -1305,7 +1305,14 @@ scm_c_read (SCM port, void *buffer, size_t size)
         We need to make sure that the port's normal (1 byte) buffer
         is reinstated in case one of the scm_fill_input () calls
         throws an exception; we use the scm_dynwind_* API to achieve
-        that. */
+        that. 
+
+         A consequence of this optimization is that the fill_input
+         functions can't unget characters.  That'll push data to the
+         pushback buffer instead of this psb buffer.  */
+#if SCM_DEBUG == 1
+      unsigned char *pback = pt->putback_buf;
+#endif      
       psb.pt = pt;
       psb.buffer = buffer;
       psb.size = size;
@@ -1320,8 +1327,15 @@ scm_c_read (SCM port, void *buffer, size_t size)
          pt->read_buf_size -= (pt->read_end - pt->read_pos);
          pt->read_pos = pt->read_buf = pt->read_end;
        }
+#if SCM_DEBUG == 1
+      if (pback != pt->putback_buf 
+          || pt->read_buf - (unsigned char *) buffer < 0)
+        scm_misc_error (FUNC_NAME, 
+                        "scm_c_read must not call a fill function that pushes "
+                        "back characters onto an unbuffered port", SCM_EOL);
+#endif      
       n_read += pt->read_buf - (unsigned char *) buffer;
-
+      
       /* Reinstate the port's normal buffer. */
       scm_dynwind_end ();
     }
diff --git a/libguile/strings.h b/libguile/strings.h
index c521926..658d64d 100644
--- a/libguile/strings.h
+++ b/libguile/strings.h
@@ -111,7 +111,7 @@ SCM_API SCM scm_substring_shared (SCM str, SCM start, SCM 
end);
 SCM_API SCM scm_substring_copy (SCM str, SCM start, SCM end);
 SCM_API SCM scm_string_append (SCM args);
 
-SCM_INTERNAL SCM scm_i_from_stringn (const char *str, size_t len, 
+SCM_API SCM scm_i_from_stringn (const char *str, size_t len, 
                                      const char *encoding,
                                      scm_t_string_failed_conversion_handler 
                                      handler);
@@ -157,7 +157,7 @@ SCM_INTERNAL const scm_t_wchar *scm_i_string_wide_chars 
(SCM str);
 SCM_INTERNAL SCM scm_i_string_start_writing (SCM str);
 SCM_INTERNAL void scm_i_string_stop_writing (void);
 SCM_INTERNAL int scm_i_is_narrow_string (SCM str);
-SCM_INTERNAL scm_t_wchar scm_i_string_ref (SCM str, size_t x);
+SCM_API scm_t_wchar scm_i_string_ref (SCM str, size_t x);
 SCM_INTERNAL int scm_i_string_contains_char (SCM str, char c);
 SCM_INTERNAL int scm_i_string_strcmp (SCM sstr, size_t start_x, const char 
*cstr);
 SCM_INTERNAL void scm_i_string_set_x (SCM str, size_t p, scm_t_wchar chr);
diff --git a/libguile/vports.c b/libguile/vports.c
index cea11c6..e3db60d 100644
--- a/libguile/vports.c
+++ b/libguile/vports.c
@@ -92,19 +92,26 @@ sf_fill_input (SCM port)
 {
   SCM p = SCM_PACK (SCM_STREAM (port));
   SCM ans;
+  scm_t_port *pt;
 
   ans = scm_call_0 (SCM_SIMPLE_VECTOR_REF (p, 3)); /* get char.  */
   if (scm_is_false (ans) || SCM_EOF_OBJECT_P (ans))
     return EOF;
   SCM_ASSERT (SCM_CHARP (ans), ans, SCM_ARG1, "sf_fill_input");
-  {
-    scm_t_port *pt = SCM_PTAB_ENTRY (port);    
-
-    *pt->read_buf = SCM_CHAR (ans);
-    pt->read_pos = pt->read_buf;
-    pt->read_end = pt->read_buf + 1;
-    return *pt->read_buf;
-  }
+  pt = SCM_PTAB_ENTRY (port);    
+
+  if (pt->encoding == NULL)
+    {
+      scm_t_port *pt = SCM_PTAB_ENTRY (port);    
+      
+      *pt->read_buf = SCM_CHAR (ans);
+      pt->read_pos = pt->read_buf;
+      pt->read_end = pt->read_buf + 1;
+      return *pt->read_buf;
+    }
+  else
+    scm_ungetc (SCM_CHAR (ans), port);
+  return SCM_CHAR (ans);
 }
 
 
diff --git a/test-suite/tests/regexp.test b/test-suite/tests/regexp.test
index 7308399..eac6527 100644
--- a/test-suite/tests/regexp.test
+++ b/test-suite/tests/regexp.test
@@ -132,55 +132,56 @@
 
 (with-test-prefix "regexp-quote"
 
-  (pass-if-exception "no args" exception:wrong-num-args
-    (regexp-quote))
-
-  (pass-if-exception "bad string arg" exception:wrong-type-arg
-    (regexp-quote 'blah))
-
-  (let ((lst `((regexp/basic    ,regexp/basic)
-              (regexp/extended ,regexp/extended)))
-       ;; string of all characters, except #\nul which doesn't work because
-       ;; it's the usual end-of-string for the underlying C regexec()
-       (allchars (list->string (map integer->char
-                                    (cdr (iota char-code-limit))))))
-    (for-each
-     (lambda (elem)
-       (let ((name (car  elem))
-            (flag (cadr elem)))
-
-        (with-test-prefix name
-
-          ;; try on each individual character, except #\nul
-          (do ((i 1 (1+ i)))
-              ((>= i char-code-limit))
-            (let* ((c (integer->char i))
-                   (s (string c))
-                   (q (regexp-quote s)))
-              (pass-if (list "char" i c s q)
-                (let ((m (regexp-exec (make-regexp q flag) s)))
-                  (and (= 0 (match:start m))
-                       (= 1 (match:end m)))))))
-
-          ;; try on pattern "aX" where X is each character, except #\nul
-          ;; this exposes things like "?" which are special only when they
-          ;; follow a pattern to repeat or whatever ("a" in this case)
-          (do ((i 1 (1+ i)))
-              ((>= i char-code-limit))
-            (let* ((c (integer->char i))
-                   (s (string #\a c))
-                   (q (regexp-quote s)))
-              (pass-if (list "string \"aX\"" i c s q)
-                (let ((m (regexp-exec (make-regexp q flag) s)))
-                  (and (= 0 (match:start m))
-                       (= 2 (match:end m)))))))
-
-          (pass-if "string of all chars"
-            (let ((m (regexp-exec (make-regexp (regexp-quote allchars)
-                                               flag) allchars)))
-              (and (= 0 (match:start m))
-                   (= (string-length allchars) (match:end m))))))))
-     lst)))
+  (with-locale "en_US.iso88591"
+    (pass-if-exception "no args" exception:wrong-num-args
+      (regexp-quote))
+
+    (pass-if-exception "bad string arg" exception:wrong-type-arg
+      (regexp-quote 'blah))
+
+    (let ((lst `((regexp/basic    ,regexp/basic)
+                (regexp/extended ,regexp/extended)))
+          ;; string of all characters, except #\nul which doesn't work because
+          ;; it's the usual end-of-string for the underlying C regexec()
+          (allchars (list->string (map integer->char
+                                       (cdr (iota char-code-limit))))))
+      (for-each
+       (lambda (elem)
+         (let ((name (car  elem))
+               (flag (cadr elem)))
+
+           (with-test-prefix name
+
+            ;; try on each individual character, except #\nul
+            (do ((i 1 (1+ i)))
+                 ((>= i char-code-limit))
+               (let* ((c (integer->char i))
+                      (s (string c))
+                      (q (regexp-quote s)))
+                 (pass-if (list "char" i (format #f "~s ~s ~s" c s q))
+                   (let ((m (regexp-exec (make-regexp q flag) s)))
+                     (and (= 0 (match:start m))
+                          (= 1 (match:end m)))))))
+
+             ;; try on pattern "aX" where X is each character, except #\nul
+             ;; this exposes things like "?" which are special only when they
+             ;; follow a pattern to repeat or whatever ("a" in this case)
+             (do ((i 1 (1+ i)))
+                 ((>= i char-code-limit))
+               (let* ((c (integer->char i))
+                      (s (string #\a c))
+                      (q (regexp-quote s)))
+                 (pass-if (list "string \"aX\"" i (format #f "~s ~s ~s" c s q))
+                   (let ((m (regexp-exec (make-regexp q flag) s)))
+                     (and (= 0 (match:start m))
+                          (= 2 (match:end m)))))))
+
+             (pass-if "string of all chars"
+              (let ((m (regexp-exec (make-regexp (regexp-quote allchars)
+                                                  flag) allchars)))
+                 (and (= 0 (match:start m))
+                      (= (string-length allchars) (match:end m))))))))
+       lst))))
 
 ;;;
 ;;; regexp-substitute


hooks/post-receive
-- 
GNU Guile




reply via email to

[Prev in Thread] Current Thread [Next in Thread]