bug-guile
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

bug#20302: peek-char messes up file position on binary string ports


From: Mark H Weaver
Subject: bug#20302: peek-char messes up file position on binary string ports
Date: Sun, 06 Sep 2015 07:55:02 -0400
User-agent: Gnus/5.13 (Gnus v5.13) Emacs/24.5 (gnu/linux)

Mark H Weaver <address@hidden> writes:

> David Kastrup <address@hidden> writes:
>
>> (use-modules (rnrs bytevectors) (rnrs io ports))
>> (let ((port (open-bytevector-input-port
>>           (string->utf8 "Blablabla\nBlablabla\n"))))
>>   (seek port 13 SEEK_SET)
>>   (format #t "~c ~d\n" (peek-char port)
>>        (ftell port)))
>> ;; Outputs b 3 but should output b 13
>>
>> This is using
>> guile (GNU Guile) 2.0.11
>> Packaged by Debian (2.0.11-deb+1-1)
>
> Ouch :-(
>
> The problem is that r6rs-ports.c:bip_seek assumes that
> c_port->read_{buf,pos,end} point to the original bytevector, and fail to
> handle the case where it points to a "putback" buffer.
>
> Note that (ftell port) is equivalent to (seek port 0 SEEK_CUR).

I've attached a preliminary patch set to fix this bug and some others.
I'm going to hold off on pushing them to stable-2.0 until I've added
tests and convinced myself that I haven't introduced any new problems.

     Mark


>From 1d398a5ee910f3edf17b8b86e29a7fbe967071ec Mon Sep 17 00:00:00 2001
From: Mark H Weaver <address@hidden>
Date: Sun, 6 Sep 2015 07:33:55 -0400
Subject: [PATCH 1/3] build: Add SCM_T_OFF_MAX and SCM_T_OFF_MIN to
 scmconfig.h.

* libguile/gen-scmconfig.c (main): Add SCM_T_OFF_MAX and SCM_T_OFF_MIN
  to the generated 'scmconfig.h' file.
---
 libguile/gen-scmconfig.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/libguile/gen-scmconfig.c b/libguile/gen-scmconfig.c
index 2f6fa6e..e433bd1 100644
--- a/libguile/gen-scmconfig.c
+++ b/libguile/gen-scmconfig.c
@@ -376,10 +376,16 @@ main (int argc, char *argv[])
 
 #if defined GUILE_USE_64_CALLS && defined HAVE_STAT64
   pf ("typedef scm_t_int64 scm_t_off;\n");
+  pf ("#define SCM_T_OFF_MAX SCM_T_INT64_MAX\n");
+  pf ("#define SCM_T_OFF_MIN SCM_T_INT64_MIN\n");
 #elif SIZEOF_OFF_T == SIZEOF_INT
   pf ("typedef int scm_t_off;\n");
+  pf ("#define SCM_T_OFF_MAX INT_MAX\n");
+  pf ("#define SCM_T_OFF_MIN INT_MIN\n");
 #else
   pf ("typedef long int scm_t_off;\n");
+  pf ("#define SCM_T_OFF_MAX LONG_MAX\n");
+  pf ("#define SCM_T_OFF_MIN LONG_MIN\n");
 #endif
 
   pf ("/* Define to 1 if the compiler supports the "
-- 
2.5.0

>From 53a6a5f7a66ac1f7b92a7347dc9486db14c73df5 Mon Sep 17 00:00:00 2001
From: Mark H Weaver <address@hidden>
Date: Sun, 6 Sep 2015 07:35:58 -0400
Subject: [PATCH 2/3] PRELIMINARY: Fix seeking on binary input ports with
 putback buffers.

Fixes <http://bugs.gnu.org/20302>.
Reported by David Kastrup <address@hidden>.

* libguile/r6rs-ports.c (bip_end_input): New static function.
  (initialize_bytevector_input_ports): Register it.
  (bip_seek): Rewrite to handle putback buffers, based on st_seek from
  strports.c.
---
 libguile/r6rs-ports.c | 83 +++++++++++++++++++++++++++++++++------------------
 1 file changed, 54 insertions(+), 29 deletions(-)

diff --git a/libguile/r6rs-ports.c b/libguile/r6rs-ports.c
index a17b7b4..1bf766c 100644
--- a/libguile/r6rs-ports.c
+++ b/libguile/r6rs-ports.c
@@ -125,45 +125,69 @@ bip_fill_input (SCM port)
   return result;
 }
 
+static void
+bip_end_input (SCM port, int offset)
+{
+  scm_t_port *c_port = SCM_PTAB_ENTRY (port);
+  
+  if (c_port->read_pos - c_port->read_buf < offset)
+    scm_misc_error ("bip_end_input", "negative position", SCM_EOL);
+
+  c_port->read_pos -= offset;
+}
+
 static scm_t_off
 bip_seek (SCM port, scm_t_off offset, int whence)
 #define FUNC_NAME "bip_seek"
 {
-  scm_t_off c_result = 0;
   scm_t_port *c_port = SCM_PTAB_ENTRY (port);
+  scm_t_off target;
 
-  switch (whence)
+  if (offset == 0 && whence == SEEK_CUR)
+    /* special case to avoid disturbing the putback buffer.  */
     {
-    case SEEK_CUR:
-      offset += c_port->read_pos - c_port->read_buf;
-      /* Fall through.  */
-
-    case SEEK_SET:
-      if (c_port->read_buf + offset <= c_port->read_end)
-       {
-         c_port->read_pos = c_port->read_buf + offset;
-         c_result = offset;
-       }
+      if (c_port->read_buf == c_port->putback_buf)
+        target = c_port->saved_read_pos - c_port->saved_read_buf
+          - (c_port->read_end - c_port->read_pos);
       else
-       scm_out_of_range (FUNC_NAME, scm_from_int (offset));
-      break;
+        target = c_port->read_pos - c_port->read_buf;
+    }
+  else
+    {
+      scm_t_off base = 0;
 
-    case SEEK_END:
-      if (c_port->read_end - offset >= c_port->read_buf)
-       {
-         c_port->read_pos = c_port->read_end - offset;
-         c_result = c_port->read_pos - c_port->read_buf;
-       }
-      else
-       scm_out_of_range (FUNC_NAME, scm_from_int (offset));
-      break;
+      /* If the putback buffer is currently active, this will dump its
+         contents, switch back to the main read buffer, and move
+         read_pos backwards as needed to account for the bytes that were
+         put back. */
+      if (c_port->read_buf == c_port->putback_buf)
+        scm_end_input (port);
 
-    default:
-      scm_wrong_type_arg_msg (FUNC_NAME, 0, port,
-                             "invalid `seek' parameter");
-    }
+      switch (whence)
+        {
+        case SEEK_CUR:
+          base = c_port->read_pos - c_port->read_buf;
+          break;
+        case SEEK_END:
+          base = c_port->read_end - c_port->read_buf;
+          break;
+        case SEEK_SET:
+          base = 0;
+          break;
+        default:
+          scm_wrong_type_arg_msg (FUNC_NAME, 0, port,
+                                  "invalid `whence' argument");
+        }
 
-  return c_result;
+      if (offset > SCM_T_OFF_MAX - base)  /* Overflow check */
+        scm_out_of_range (FUNC_NAME, scm_from_int (offset));
+      target = base + offset;
+      if (target < 0 || target > c_port->read_end - c_port->read_buf)
+        scm_out_of_range (FUNC_NAME, scm_from_int (offset));
+
+      c_port->read_pos = c_port->read_buf + target;
+    }
+  return target;
 }
 #undef FUNC_NAME
 
@@ -176,7 +200,8 @@ initialize_bytevector_input_ports (void)
     scm_make_port_type ("r6rs-bytevector-input-port", bip_fill_input,
                        NULL);
 
-  scm_set_port_seek (bytevector_input_port_type, bip_seek);
+  scm_set_port_end_input (bytevector_input_port_type, bip_end_input);
+  scm_set_port_seek      (bytevector_input_port_type, bip_seek);
 }
 
 
-- 
2.5.0

>From dc09359733feb68590c905d77e2172ec6e3781d0 Mon Sep 17 00:00:00 2001
From: Mark H Weaver <address@hidden>
Date: Sun, 6 Sep 2015 07:42:07 -0400
Subject: [PATCH 3/3] PRELIMINARY: string ports: Add overflow checks and other
 fixes.

* libguile/strports.c (st_resize_port): Check that 'new_size' fits in a
  size_t.
  (st_end_input): Improve code clarity.
  (st_seek): Check for overflow during computation of target position.
  Check for invalid 'whence' argument.  Resize the port when seeking to
  a position beyond the end of the buffer.  Check for overflow during
  computation of new buffer size when resizing the port.
---
 libguile/strports.c | 64 +++++++++++++++++++++++++++++++++--------------------
 1 file changed, 40 insertions(+), 24 deletions(-)

diff --git a/libguile/strports.c b/libguile/strports.c
index f306019..18f1970 100644
--- a/libguile/strports.c
+++ b/libguile/strports.c
@@ -103,28 +103,33 @@ stfill_buffer (SCM port)
 static void
 st_resize_port (scm_t_port *pt, scm_t_off new_size)
 {
-  SCM old_stream = SCM_PACK (pt->stream);
-  const signed char *src = SCM_BYTEVECTOR_CONTENTS (old_stream);
-  SCM new_stream = scm_c_make_bytevector (new_size);
-  signed char *dst = SCM_BYTEVECTOR_CONTENTS (new_stream);
-  unsigned long int old_size = SCM_BYTEVECTOR_LENGTH (old_stream);
-  unsigned long int min_size = min (old_size, new_size);
+  if (new_size < 0 || new_size > SCM_I_SIZE_MAX)
+    scm_misc_error ("st_resize_port", "new_size overflow", SCM_EOL);
 
-  scm_t_off index = pt->write_pos - pt->write_buf;
+  {
+    SCM old_stream = SCM_PACK (pt->stream);
+    const signed char *src = SCM_BYTEVECTOR_CONTENTS (old_stream);
+    SCM new_stream = scm_c_make_bytevector (new_size);
+    signed char *dst = SCM_BYTEVECTOR_CONTENTS (new_stream);
+    unsigned long int old_size = SCM_BYTEVECTOR_LENGTH (old_stream);
+    unsigned long int min_size = min (old_size, new_size);
 
-  pt->write_buf_size = new_size;
+    scm_t_off index = pt->write_pos - pt->write_buf;
 
-  memcpy (dst, src, min_size);
+    pt->write_buf_size = new_size;
 
-  scm_remember_upto_here_1 (old_stream);
+    memcpy (dst, src, min_size);
 
-  /* reset buffer. */
-  {
-    pt->stream = SCM_UNPACK (new_stream);
-    pt->read_buf = pt->write_buf = (unsigned char *)dst;
-    pt->read_pos = pt->write_pos = pt->write_buf + index;
-    pt->write_end = pt->write_buf + pt->write_buf_size;
-    pt->read_end = pt->read_buf + pt->read_buf_size;
+    scm_remember_upto_here_1 (old_stream);
+
+    /* reset buffer. */
+    {
+      pt->stream = SCM_UNPACK (new_stream);
+      pt->read_buf = pt->write_buf = (unsigned char *)dst;
+      pt->read_pos = pt->write_pos = pt->write_buf + index;
+      pt->write_end = pt->write_buf + pt->write_buf_size;
+      pt->read_end = pt->read_buf + pt->read_buf_size;
+    }
   }
 }
 
@@ -176,7 +181,8 @@ st_end_input (SCM port, int offset)
   if (pt->read_pos - pt->read_buf < offset)
     scm_misc_error ("st_end_input", "negative position", SCM_EOL);
 
-  pt->write_pos = (unsigned char *) (pt->read_pos = pt->read_pos - offset);
+  pt->read_pos -= offset;
+  pt->write_pos = (unsigned char *) pt->read_pos;
   pt->rw_active = SCM_PORT_NEITHER;
 }
 
@@ -202,6 +208,8 @@ st_seek (SCM port, scm_t_off offset, int whence)
   else
     /* all other cases.  */
     {
+      scm_t_off base = 0;
+
       if (pt->rw_active == SCM_PORT_WRITE)
        st_flush (port);
   
@@ -211,18 +219,24 @@ st_seek (SCM port, scm_t_off offset, int whence)
       switch (whence)
        {
        case SEEK_CUR:
-         target = pt->read_pos - pt->read_buf + offset;
+         base = pt->read_pos - pt->read_buf;
          break;
        case SEEK_END:
-         target = pt->read_end - pt->read_buf + offset;
+         base = pt->read_end - pt->read_buf;
          break;
-       default: /* SEEK_SET */
-         target = offset;
+       case SEEK_SET:
+         base = 0;
          break;
+       default:
+         scm_wrong_type_arg_msg ("st_seek", 0, port,
+                                 "invalid `whence' argument");
        }
 
+      if (offset > SCM_T_OFF_MAX - base)
+       scm_misc_error ("st_seek", "target would overflow", SCM_EOL);
+      target = base + offset;
       if (target < 0)
-       scm_misc_error ("st_seek", "negative offset", SCM_EOL);
+       scm_misc_error ("st_seek", "negative target", SCM_EOL);
   
       if (target >= pt->write_buf_size)
        {
@@ -235,7 +249,9 @@ st_seek (SCM port, scm_t_off offset, int whence)
                                  SCM_EOL);
                }
            }
-         else if (target == pt->write_buf_size)
+         else if (target > SCM_T_OFF_MAX - target)
+           scm_misc_error ("st_seek", "target * 2 would overflow", SCM_EOL);
+         else
            st_resize_port (pt, target * 2);
        }
       pt->read_pos = pt->write_pos = pt->read_buf + target;
-- 
2.5.0


reply via email to

[Prev in Thread] Current Thread [Next in Thread]