[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Guile-commits] GNU Guile branch, master, updated. release_1-9-2-164-g0d
From: |
Michael Gran |
Subject: |
[Guile-commits] GNU Guile branch, master, updated. release_1-9-2-164-g0d05ae7 |
Date: |
Tue, 08 Sep 2009 02:13:41 +0000 |
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "GNU Guile".
http://git.savannah.gnu.org/cgit/guile.git/commit/?id=0d05ae7c4b1eddf6257f99f44eaf5cb7b11191be
The branch, master has been updated
via 0d05ae7c4b1eddf6257f99f44eaf5cb7b11191be (commit)
via 7519234547acd3ced5cbe265f0bf1fcd6d6cda06 (commit)
from eebff6d7f1055bb59fbad24c23f8db8ce14391d5 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
commit 0d05ae7c4b1eddf6257f99f44eaf5cb7b11191be
Author: Michael Gran <address@hidden>
Date: Mon Sep 7 18:50:39 2009 -0700
8-bit locale needed for 8-bit regexp tests
Since the regex library expects 8-bit clean characters and
an 8-bit locale, tests of 8-bit characters need to occur within
the context of an 8-bit locale.
* test-suite/tests/regexp.test (regexp-quote tests): wrap them in an
ISO-8859-1 locale
commit 7519234547acd3ced5cbe265f0bf1fcd6d6cda06
Author: Michael Gran <address@hidden>
Date: Mon Sep 7 18:42:29 2009 -0700
Fix broken interaction between readline and Unicode
This requires separate small fixes.
Readline has internal logic to deal with multi-byte characters, so
it wants bytes, not characters.
scm_c_read gets called by the vm when readline is activated, and it was
truncating multi-byte characters because soft ports didn't have the
UCS-4 capability.
Soft ports need the capability to read UCS-4 characters. Since soft ports
may have a single byte buffer, full characters need to be stored into the
pushback buffer.
This broke the optimizations in scm_c_read for using an alternate buffer
for single-byte-buffered ports, because the opimization wasn't expecting
anything in the pushback buffer.
* libguile/vports.c (sf_fill_input): store complete chars, not single bytes
* libguile/ports.c (scm_c_read): don't use optimized path for non Latin-1.
Add debug prints.
* libguile/string.h: make scm_i_from_stringn and scm_i_string_ref public
so that readline can use them
* guile-readline/readline.c: read bytes, not complete chars, from the
input port. Convert output to the output port's locale
-----------------------------------------------------------------------
Summary of changes:
guile-readline/readline.c | 11 ++++-
libguile/ports.c | 20 +++++++-
libguile/strings.h | 4 +-
libguile/vports.c | 23 ++++++---
test-suite/tests/regexp.test | 99 +++++++++++++++++++++---------------------
5 files changed, 93 insertions(+), 64 deletions(-)
diff --git a/guile-readline/readline.c b/guile-readline/readline.c
index cbf4051..a665415 100644
--- a/guile-readline/readline.c
+++ b/guile-readline/readline.c
@@ -128,6 +128,7 @@ rl_free_line_state ()
static int promptp;
static SCM input_port;
+static SCM output_port;
static SCM before_read;
static int
@@ -138,7 +139,7 @@ current_input_getc (FILE *in SCM_UNUSED)
scm_apply (before_read, SCM_EOL, SCM_EOL);
promptp = 0;
}
- return scm_getc (input_port);
+ return scm_get_byte_or_eof (input_port);
}
static int in_readline = 0;
@@ -255,7 +256,12 @@ internal_readline (SCM text)
promptp = 1;
s = readline (prompt);
if (s)
- ret = scm_from_locale_string (s);
+ {
+ scm_t_port *pt = SCM_PTAB_ENTRY (output_port);
+
+ ret = scm_i_from_stringn (s, strlen (s), pt->encoding,
+ SCM_FAILED_CONVERSION_ESCAPE_SEQUENCE);
+ }
else
ret = SCM_EOF_VAL;
@@ -311,6 +317,7 @@ scm_readline_init_ports (SCM inp, SCM outp)
}
input_port = inp;
+ output_port = outp;
#ifndef __MINGW32__
rl_instream = stream_from_fport (inp, "r", s_scm_readline);
rl_outstream = stream_from_fport (outp, "w", s_scm_readline);
diff --git a/libguile/ports.c b/libguile/ports.c
index b3547f5..35046dd 100644
--- a/libguile/ports.c
+++ b/libguile/ports.c
@@ -1293,7 +1293,7 @@ scm_c_read (SCM port, void *buffer, size_t size)
requested number of bytes. (Note that a single scm_fill_input
call does not guarantee to fill the whole of the port's read
buffer.) */
- if (pt->read_buf_size <= 1)
+ if (pt->read_buf_size <= 1 && pt->encoding == NULL)
{
/* The port that we are reading from is unbuffered - i.e. does
not have its own persistent buffer - but we have a buffer,
@@ -1305,7 +1305,14 @@ scm_c_read (SCM port, void *buffer, size_t size)
We need to make sure that the port's normal (1 byte) buffer
is reinstated in case one of the scm_fill_input () calls
throws an exception; we use the scm_dynwind_* API to achieve
- that. */
+ that.
+
+ A consequence of this optimization is that the fill_input
+ functions can't unget characters. That'll push data to the
+ pushback buffer instead of this psb buffer. */
+#if SCM_DEBUG == 1
+ unsigned char *pback = pt->putback_buf;
+#endif
psb.pt = pt;
psb.buffer = buffer;
psb.size = size;
@@ -1320,8 +1327,15 @@ scm_c_read (SCM port, void *buffer, size_t size)
pt->read_buf_size -= (pt->read_end - pt->read_pos);
pt->read_pos = pt->read_buf = pt->read_end;
}
+#if SCM_DEBUG == 1
+ if (pback != pt->putback_buf
+ || pt->read_buf - (unsigned char *) buffer < 0)
+ scm_misc_error (FUNC_NAME,
+ "scm_c_read must not call a fill function that pushes "
+ "back characters onto an unbuffered port", SCM_EOL);
+#endif
n_read += pt->read_buf - (unsigned char *) buffer;
-
+
/* Reinstate the port's normal buffer. */
scm_dynwind_end ();
}
diff --git a/libguile/strings.h b/libguile/strings.h
index c521926..658d64d 100644
--- a/libguile/strings.h
+++ b/libguile/strings.h
@@ -111,7 +111,7 @@ SCM_API SCM scm_substring_shared (SCM str, SCM start, SCM
end);
SCM_API SCM scm_substring_copy (SCM str, SCM start, SCM end);
SCM_API SCM scm_string_append (SCM args);
-SCM_INTERNAL SCM scm_i_from_stringn (const char *str, size_t len,
+SCM_API SCM scm_i_from_stringn (const char *str, size_t len,
const char *encoding,
scm_t_string_failed_conversion_handler
handler);
@@ -157,7 +157,7 @@ SCM_INTERNAL const scm_t_wchar *scm_i_string_wide_chars
(SCM str);
SCM_INTERNAL SCM scm_i_string_start_writing (SCM str);
SCM_INTERNAL void scm_i_string_stop_writing (void);
SCM_INTERNAL int scm_i_is_narrow_string (SCM str);
-SCM_INTERNAL scm_t_wchar scm_i_string_ref (SCM str, size_t x);
+SCM_API scm_t_wchar scm_i_string_ref (SCM str, size_t x);
SCM_INTERNAL int scm_i_string_contains_char (SCM str, char c);
SCM_INTERNAL int scm_i_string_strcmp (SCM sstr, size_t start_x, const char
*cstr);
SCM_INTERNAL void scm_i_string_set_x (SCM str, size_t p, scm_t_wchar chr);
diff --git a/libguile/vports.c b/libguile/vports.c
index cea11c6..e3db60d 100644
--- a/libguile/vports.c
+++ b/libguile/vports.c
@@ -92,19 +92,26 @@ sf_fill_input (SCM port)
{
SCM p = SCM_PACK (SCM_STREAM (port));
SCM ans;
+ scm_t_port *pt;
ans = scm_call_0 (SCM_SIMPLE_VECTOR_REF (p, 3)); /* get char. */
if (scm_is_false (ans) || SCM_EOF_OBJECT_P (ans))
return EOF;
SCM_ASSERT (SCM_CHARP (ans), ans, SCM_ARG1, "sf_fill_input");
- {
- scm_t_port *pt = SCM_PTAB_ENTRY (port);
-
- *pt->read_buf = SCM_CHAR (ans);
- pt->read_pos = pt->read_buf;
- pt->read_end = pt->read_buf + 1;
- return *pt->read_buf;
- }
+ pt = SCM_PTAB_ENTRY (port);
+
+ if (pt->encoding == NULL)
+ {
+ scm_t_port *pt = SCM_PTAB_ENTRY (port);
+
+ *pt->read_buf = SCM_CHAR (ans);
+ pt->read_pos = pt->read_buf;
+ pt->read_end = pt->read_buf + 1;
+ return *pt->read_buf;
+ }
+ else
+ scm_ungetc (SCM_CHAR (ans), port);
+ return SCM_CHAR (ans);
}
diff --git a/test-suite/tests/regexp.test b/test-suite/tests/regexp.test
index 7308399..eac6527 100644
--- a/test-suite/tests/regexp.test
+++ b/test-suite/tests/regexp.test
@@ -132,55 +132,56 @@
(with-test-prefix "regexp-quote"
- (pass-if-exception "no args" exception:wrong-num-args
- (regexp-quote))
-
- (pass-if-exception "bad string arg" exception:wrong-type-arg
- (regexp-quote 'blah))
-
- (let ((lst `((regexp/basic ,regexp/basic)
- (regexp/extended ,regexp/extended)))
- ;; string of all characters, except #\nul which doesn't work because
- ;; it's the usual end-of-string for the underlying C regexec()
- (allchars (list->string (map integer->char
- (cdr (iota char-code-limit))))))
- (for-each
- (lambda (elem)
- (let ((name (car elem))
- (flag (cadr elem)))
-
- (with-test-prefix name
-
- ;; try on each individual character, except #\nul
- (do ((i 1 (1+ i)))
- ((>= i char-code-limit))
- (let* ((c (integer->char i))
- (s (string c))
- (q (regexp-quote s)))
- (pass-if (list "char" i c s q)
- (let ((m (regexp-exec (make-regexp q flag) s)))
- (and (= 0 (match:start m))
- (= 1 (match:end m)))))))
-
- ;; try on pattern "aX" where X is each character, except #\nul
- ;; this exposes things like "?" which are special only when they
- ;; follow a pattern to repeat or whatever ("a" in this case)
- (do ((i 1 (1+ i)))
- ((>= i char-code-limit))
- (let* ((c (integer->char i))
- (s (string #\a c))
- (q (regexp-quote s)))
- (pass-if (list "string \"aX\"" i c s q)
- (let ((m (regexp-exec (make-regexp q flag) s)))
- (and (= 0 (match:start m))
- (= 2 (match:end m)))))))
-
- (pass-if "string of all chars"
- (let ((m (regexp-exec (make-regexp (regexp-quote allchars)
- flag) allchars)))
- (and (= 0 (match:start m))
- (= (string-length allchars) (match:end m))))))))
- lst)))
+ (with-locale "en_US.iso88591"
+ (pass-if-exception "no args" exception:wrong-num-args
+ (regexp-quote))
+
+ (pass-if-exception "bad string arg" exception:wrong-type-arg
+ (regexp-quote 'blah))
+
+ (let ((lst `((regexp/basic ,regexp/basic)
+ (regexp/extended ,regexp/extended)))
+ ;; string of all characters, except #\nul which doesn't work because
+ ;; it's the usual end-of-string for the underlying C regexec()
+ (allchars (list->string (map integer->char
+ (cdr (iota char-code-limit))))))
+ (for-each
+ (lambda (elem)
+ (let ((name (car elem))
+ (flag (cadr elem)))
+
+ (with-test-prefix name
+
+ ;; try on each individual character, except #\nul
+ (do ((i 1 (1+ i)))
+ ((>= i char-code-limit))
+ (let* ((c (integer->char i))
+ (s (string c))
+ (q (regexp-quote s)))
+ (pass-if (list "char" i (format #f "~s ~s ~s" c s q))
+ (let ((m (regexp-exec (make-regexp q flag) s)))
+ (and (= 0 (match:start m))
+ (= 1 (match:end m)))))))
+
+ ;; try on pattern "aX" where X is each character, except #\nul
+ ;; this exposes things like "?" which are special only when they
+ ;; follow a pattern to repeat or whatever ("a" in this case)
+ (do ((i 1 (1+ i)))
+ ((>= i char-code-limit))
+ (let* ((c (integer->char i))
+ (s (string #\a c))
+ (q (regexp-quote s)))
+ (pass-if (list "string \"aX\"" i (format #f "~s ~s ~s" c s q))
+ (let ((m (regexp-exec (make-regexp q flag) s)))
+ (and (= 0 (match:start m))
+ (= 2 (match:end m)))))))
+
+ (pass-if "string of all chars"
+ (let ((m (regexp-exec (make-regexp (regexp-quote allchars)
+ flag) allchars)))
+ (and (= 0 (match:start m))
+ (= (string-length allchars) (match:end m))))))))
+ lst))))
;;;
;;; regexp-substitute
hooks/post-receive
--
GNU Guile
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [Guile-commits] GNU Guile branch, master, updated. release_1-9-2-164-g0d05ae7,
Michael Gran <=