guile-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Guile-commits] branch master updated: Revert "Handle CRLF and Unicode l


From: Andy Wingo
Subject: [Guile-commits] branch master updated: Revert "Handle CRLF and Unicode line endings in read-line"
Date: Fri, 12 Mar 2021 16:08:25 -0500

This is an automated email from the git hooks/post-receive script.

wingo pushed a commit to branch master
in repository guile.

The following commit(s) were added to refs/heads/master by this push:
     new e30ee90  Revert "Handle CRLF and Unicode line endings in read-line"
e30ee90 is described below

commit e30ee9047845751615ca6726e22b67aa9b38e187
Author: Andy Wingo <wingo@pobox.com>
AuthorDate: Fri Mar 12 22:06:56 2021 +0100

    Revert "Handle CRLF and Unicode line endings in read-line"
    
    This reverts commit 0f983e3db0c43ad7c89f57ea84f792ede373ba0c.
    
    After discussing with Mike we are going to punt the read-line changes
    for now.  Open the port in O_TEXT mode if you want to chomp the CR in
    CFLF sequences.
---
 NEWS                               |  6 ---
 doc/ref/api-io.texi                | 10 ++---
 libguile/rdelim.c                  | 47 +++++------------------
 module/ice-9/suspendable-ports.scm | 77 ++------------------------------------
 module/web/http.scm                | 14 ++++---
 test-suite/tests/rdelim.test       | 42 +--------------------
 6 files changed, 25 insertions(+), 171 deletions(-)

diff --git a/NEWS b/NEWS
index 2ac599a..ec3d096 100644
--- a/NEWS
+++ b/NEWS
@@ -166,12 +166,6 @@ See the newly reorganized "Foreign Function Interface", 
for details.
 These new interfaces replace `dynamic-link', `dynamic-pointer' and
 similar, which will eventually be deprecated.
 
-** `read-line'
-
-This now accepts return + newline and the Unicode line separator and
-paragraph separator as line separators, as well as the newline and <eof>
-line separators it handled before.
-
 ** `read-syntax'
 
 See "Annotated Scheme Read" in the manual.
diff --git a/doc/ref/api-io.texi b/doc/ref/api-io.texi
index 2345f04..777f282 100644
--- a/doc/ref/api-io.texi
+++ b/doc/ref/api-io.texi
@@ -755,10 +755,8 @@ a specified set of characters.
 
 @deffn {Scheme Procedure} read-line [port] [handle-delim]
 Return a line of text from @var{port} if specified, otherwise from the
-value returned by @code{(current-input-port)}.  Under Unix, a line of
-text is terminated by the first end-of-line character or by end-of-file.
-The end-of-line characters handled are newline, carriage return plus
-newline, or the Unicode line or paragraph separators.
+value returned by @code{(current-input-port)}.  Under Unix, a line of text
+is terminated by the first end-of-line character or by end-of-file.
 
 If @var{handle-delim} is specified, it should be one of the following
 symbols:
@@ -773,9 +771,7 @@ Append the terminating delimiter (if any) to the returned 
string.
 Push the terminating delimiter (if any) back on to the port.
 @item split
 Return a pair containing the string read from the port and the
-terminating delimiter or end-of-file object.  The delimiter will either
-be a single character for newline or the Unicode line or paragraph
-separators, or it will be the string @code{"\r\n"}.
+terminating delimiter or end-of-file object.
 @end table
 @end deffn
 
diff --git a/libguile/rdelim.c b/libguile/rdelim.c
index c1b9202..4a0b209 100644
--- a/libguile/rdelim.c
+++ b/libguile/rdelim.c
@@ -112,11 +112,10 @@ SCM_DEFINE (scm_read_delimited_x, "%read-delimited!", 3, 
3, 0,
 
 SCM_DEFINE (scm_read_line, "%read-line", 0, 1, 0, 
             (SCM port),
-           "Read a line from @var{port}, allocating storage as necessary.\n"
-           "The terminator (if any) is removed from the string,\n"
+           "Read a newline-terminated line from @var{port}, allocating storage 
as\n"
+           "necessary.  The newline terminator (if any) is removed from the 
string,\n"
            "and a pair consisting of the line and its delimiter is returned.  
The\n"
-           "delimiter may be either a newline, return + newline, the Unicode\n"
-            "line or paragraph separators, or the @var{eof-object}; if\n"
+           "delimiter may be either a newline or the @var{eof-object}; if\n"
            "@code{%read-line} is called at the end of file, it returns the 
pair\n"
            "@code{(#<eof> . #<eof>)}.")
 #define FUNC_NAME s_scm_read_line
@@ -128,7 +127,6 @@ SCM_DEFINE (scm_read_line, "%read-line", 0, 1, 0,
   SCM line, strings, result;
   scm_t_wchar buf[LINE_BUFFER_SIZE], delim;
   size_t index;
-  int cr = 0;
 
   if (SCM_UNBNDP (port))
     port = scm_current_input_port ();
@@ -154,24 +152,12 @@ SCM_DEFINE (scm_read_line, "%read-line", 0, 1, 0,
          buf[index] = scm_getc (port);
          switch (buf[index])
            {
+           case EOF:
            case '\n':
              delim = buf[index];
-              break;
-
-            case EOF:
-            case 0x2028:        /* U+2028 LINE SEPARATOR */
-            case 0x2029:        /* U+2029 PARAGRAPH SEPARATOR */
-              cr = 0;
-              delim = buf[index];
-              break;
-
-            case '\r':
-              cr = 1;
-              index ++;
-              break;
+             break;
 
            default:
-              cr = 0;
              index++;
            }
        }
@@ -179,33 +165,20 @@ SCM_DEFINE (scm_read_line, "%read-line", 0, 1, 0,
   while (delim == 0);
 
   if (SCM_LIKELY (scm_is_false (strings)))
-    {
-      /* The fast path.  */
-      if (cr)
-        line = scm_from_utf32_stringn (buf, index - 1);
-      else
-        line = scm_from_utf32_stringn (buf, index);
-    }
+    /* The fast path.  */
+    line = scm_from_utf32_stringn (buf, index);
   else
     {
       /* Aggregate the intermediary results.  */
-      if (cr)
-        strings = scm_cons (scm_from_utf32_stringn (buf, index - 1), strings);
-      else
-        strings = scm_cons (scm_from_utf32_stringn (buf, index), strings);
+      strings = scm_cons (scm_from_utf32_stringn (buf, index), strings);
       line = scm_string_concatenate (scm_reverse (strings));
     }
 
   if (delim == EOF && scm_i_string_length (line) == 0)
     result = scm_cons (SCM_EOF_VAL, SCM_EOF_VAL);
   else
-    {
-      if (cr)
-        result = scm_cons (line, scm_from_latin1_string("\r\n"));
-      else
-        result = scm_cons (line,
-                           delim == EOF ? SCM_EOF_VAL : SCM_MAKE_CHAR (delim));
-    }
+    result = scm_cons (line,
+                      delim == EOF ? SCM_EOF_VAL : SCM_MAKE_CHAR (delim));
 
   return result;
 #undef LINE_BUFFER_SIZE
diff --git a/module/ice-9/suspendable-ports.scm 
b/module/ice-9/suspendable-ports.scm
index ba8d225..f5f005c 100644
--- a/module/ice-9/suspendable-ports.scm
+++ b/module/ice-9/suspendable-ports.scm
@@ -1,5 +1,5 @@
 ;;; Ports, implemented in Scheme
-;;; Copyright (C) 2016, 2018, 2021 Free Software Foundation, Inc.
+;;; Copyright (C) 2016, 2019 Free Software Foundation, Inc.
 ;;;
 ;;; This library is free software: you can redistribute it and/or modify
 ;;; it under the terms of the GNU Lesser General Public License as
@@ -689,81 +689,10 @@
 
 (define* (read-line #:optional (port (current-input-port))
                     (handle-delim 'trim))
-  (let* ((line/delim   (%read-line port))
-        (line          (car line/delim))
-        (delim         (cdr line/delim)))
-    (case handle-delim
-      ((trim) line)
-      ((split) line/delim)
-      ((concat) (if (and (string? line) (char? delim))
-                   (string-append line (string delim))
-                   line))
-      ((peek) (if (char? delim)
-                 (unread-char delim port))
-             line)
-      (else
-       (error "unexpected handle-delim value: " handle-delim)))))
+  (read-delimited "\n" port handle-delim))
 
 (define* (%read-line port)
-  (let ((LINE_BUFFER_SIZE 256))
-    (let ((strings #f)
-          (result #f)
-          (buf (make-string LINE_BUFFER_SIZE #\nul))
-          (delim #f)
-          (index 0)
-          (cr #f)
-          (go #t))
-      (cond
-       ((not (input-port? port))
-        (error "Not an input port." port))
-       (else
-        (while go
-               (cond
-                ((>= index LINE_BUFFER_SIZE)
-                 (set! strings (cons (substring buf 0 index)
-                                     (or strings '())))
-                 (set! index 0))
-                (else
-                 (let ((c (read-char port)))
-                   (cond
-                    ((or (eof-object? c)
-                         (char=? c #\x2028)  ; U+2028 LINE SEPARATOR
-                         (char=? c #\x2029)) ; U+2029 PARAGRAPH SEPARATOR
-                     (set! cr #f)
-                     (set! delim c))
-                    ((char=? c #\newline)
-                     (set! delim c))
-                    ((char=? c #\return)
-                     (set! cr #t)
-                     (string-set! buf index c)
-                     (set! index (1+ index)))
-                    (else
-                     (set! cr #f)
-                     (string-set! buf index c)
-                     (set! index (1+ index)))))))
-
-               (if (or (eof-object? delim)
-                       (char? delim))
-                   (set! go #f)))
-      (let ((line (if (not strings)
-                      ;; A short string.
-                      (if cr
-                          (substring buf 0 (1- index))
-                          (substring buf 0 index))
-                      ;; Else, aggregate the intermediary results.
-                      (begin
-                        (if cr
-                            (set! strings (cons (substring buf 0 (1- index)) 
strings))
-                            (set! strings (cons (substring buf 0 index) 
strings)))
-                        (apply string-append (reverse strings))))))
-
-        (if (and (eof-object? delim)
-                 (zero? (string-length line)))
-            (cons the-eof-object the-eof-object)
-            ;; Else
-            (if cr
-                (cons line "\r\n")
-                (cons line delim)))))))))
+  (read-line port 'split))
 
 (define* (put-string port str #:optional (start 0)
                      (count (- (string-length str) start)))
diff --git a/module/web/http.scm b/module/web/http.scm
index 32a3093..4276e17 100644
--- a/module/web/http.scm
+++ b/module/web/http.scm
@@ -157,12 +157,13 @@ The default writer will call ‘put-string’."
 Raise a 'bad-header' exception if the line does not end in CRLF or LF,
 or if EOF is reached."
   (match (%read-line port)
-    (((? string? line) . "\r\n")
-     line)
     (((? string? line) . #\newline)
-     ;; We are more tolerant than the RFC in that we tolerate LF-only
-     ;; endings.
-     line)
+     ;; '%read-line' does not consider #\return a delimiter; so if it's
+     ;; there, remove it.  We are more tolerant than the RFC in that we
+     ;; tolerate LF-only endings.
+     (if (string-suffix? "\r" line)
+         (string-drop-right line 1)
+         line))
     ((line . _)                                ;EOF or missing delimiter
      (bad-header 'read-header-line line))))
 
@@ -183,7 +184,8 @@ was known but the value was invalid.
 Returns the end-of-file object for both values if the end of the message
 body was reached (i.e., a blank line)."
   (let ((line (read-header-line port)))
-    (if (string-null? line)
+    (if (or (string-null? line)
+            (string=? line "\r"))
         (values *eof* *eof*)
         (let* ((delim (or (string-index line #\:)
                           (bad-header '%read line)))
diff --git a/test-suite/tests/rdelim.test b/test-suite/tests/rdelim.test
index 1060d5c..3aaa0b2 100644
--- a/test-suite/tests/rdelim.test
+++ b/test-suite/tests/rdelim.test
@@ -1,7 +1,7 @@
 ;;;; rdelim.test --- Delimited I/O.      -*- mode: scheme; coding: utf-8; -*-
 ;;;; Ludovic Courtès <ludo@gnu.org>
 ;;;;
-;;;;   Copyright (C) 2011, 2013, 2014, 2021 Free Software Foundation, Inc.
+;;;;   Copyright (C) 2011, 2013, 2014 Free Software Foundation, Inc.
 ;;;;
 ;;;; This library is free software; you can redistribute it and/or
 ;;;; modify it under the terms of the GNU Lesser General Public
@@ -62,46 +62,6 @@
                          (read-line p 'split)))
            (eof-object? (read-line p)))))
 
-  (pass-if "two lines, split, CRLF"
-    (let* ((s "foo\r\nbar\r\n")
-           (p (open-input-string s)))
-      (and (equal? '(("foo" . "\r\n")
-                     ("bar" . "\r\n"))
-                   (list (read-line p 'split)
-                         (read-line p 'split)))
-           (eof-object? (read-line p)))))
-
-  (pass-if "two long lines, split, CRLF"
-    ;; Must be longer than 256 codepoints
-    (let* ((text0 "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
-           (text1 (string-append text0 text0 text0 text0 text0))
-           (text2 (string-append text1 "\r\n" text1 "\r\n")))
-      (let* ((s text2)
-             (p (open-input-string s)))
-        (and (equal? `((,text1 . "\r\n")
-                       (,text1 . "\r\n"))
-                     (list (read-line p 'split)
-                           (read-line p 'split)))
-             (eof-object? (read-line p))))))
-
-  (pass-if "two lines, split, LS"
-    (let* ((s "foo\u2028bar\u2028")
-           (p (open-input-string s)))
-      (and (equal? '(("foo" . #\x2028)
-                     ("bar" . #\x2028))
-                   (list (read-line p 'split)
-                         (read-line p 'split)))
-           (eof-object? (read-line p)))))
-
-  (pass-if "two lines, split, PS"
-    (let* ((s "foo\u2029bar\u2029")
-           (p (open-input-string s)))
-      (and (equal? '(("foo" . #\x2029)
-                     ("bar" . #\x2029))
-                   (list (read-line p 'split)
-                         (read-line p 'split)))
-           (eof-object? (read-line p)))))
-
   (pass-if "two Greek lines, trim"
     (let* ((s "λαμβδα\nμυ\n")
            (p (open-input-string s)))



reply via email to

[Prev in Thread] Current Thread [Next in Thread]