bug-coreutils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

bug#7357: csplit: memory exhausted when using stdout / pipe instead of a


From: Paul Eggert
Subject: bug#7357: csplit: memory exhausted when using stdout / pipe instead of a file
Date: Wed, 10 Nov 2010 20:37:38 -0800
User-agent: Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.2.12) Gecko/20101027 Thunderbird/3.1.6

On 11/10/10 10:49, Eric Blake wrote:
> Actually, the next version of POSIX will require EOVERFLOW if printf is
> directed to print more than INT_MAX bytes:

Thanks, I didn't know that.  But this issue is slightly different.
For example, glibc (RHEL 5 x86-64) treats printf ("%4294967297d", 0)
as if it were printf ("%1d", 0), and returns 1 without setting
EOVERFLOW.  I expect that this bug is common, and our code should be
robust when it happens.

There are other printf problems in this area too, I'm afraid.  max_out
does not check for size_t overflow and can mess up big-time when it
happens.  And get_format_flags does not reject flag combinations that
lead to undefined behavior.  There are other issues with size_t overflow.
It's no doubt possible to get a buffer overflow on some platforms.

I scouted through the code looking for undefined behavior with printf
formats in csplit and fixed everything I found.  I propose the
following further patch to address them.

>From 2b1f077d355b56b49c82f4f5223ee76a4f1b3396 Mon Sep 17 00:00:00 2001
From: Paul Eggert <address@hidden>
Date: Wed, 10 Nov 2010 20:34:52 -0800
Subject: [PATCH] csplit: do not rely on undefined behavior in printf formats

* doc/coreutils.texi (csplit invocation): Say that %d and %i are
aliases for %u.
* src/csplit.c (FLAG_THOUSANDS, FLAG_ALTERNATIVE): New constants.
(get_format_flags): Now take char const * and int * and return
size_t.  It now stores info about the flags instead of merely
scanning them.  Also, it handles '0' correctly.  Drop support for
the undocumented '+' and ' ' flags since the value is unsigned.
Add support for the (undocumented) "'" flag.  All uses changed.
(get_format_width, get_format_prec): Remove.
(check_format_conv_type): Renamed from get_format_conv_type, with
a different signature.  It now converts the format to one that is
compatible with unsigned int, and checks flags.  All uses changed.
(max_out): Have snprintf compute the number of bytes needed rather
than attempting to do it ourselves (which doesn't work portably
with outlandish formats such as %4294967296d).
(check_format_conv_type, main): Check for overflow in size
calculations.  Don't assume size_t fits in unsigned int.
* tests/misc/csplit: Check for proper handling of flags, with
%0#6.3x.  Coreutils 8.6 mishandles this somewhat-weird example.
---
 doc/coreutils.texi |    3 +-
 src/csplit.c       |  141 +++++++++++++++++++++++----------------------------
 tests/misc/csplit  |   15 ++++++
 3 files changed, 81 insertions(+), 78 deletions(-)

diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index 8dfb069..ce56b0e 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -3082,7 +3082,8 @@ specified, the suffix string must include exactly one
 @code{printf(3)}-style conversion specification, possibly including
 format specification flags, a field width, a precision specifications,
 or all of these kinds of modifiers.  The format letter must convert a
-binary integer argument to readable form; thus, only @samp{d}, @samp{i},
+binary unsigned integer argument to readable form.  The format letters
address@hidden and @samp{i} are aliases for @samp{u}, and the
 @samp{u}, @samp{o}, @samp{x}, and @samp{X} conversions are allowed.  The
 entire @var{suffix} is given (with the current output file number) to
 @code{sprintf(3)} to form the file name suffixes for each of the
diff --git a/src/csplit.c b/src/csplit.c
index 948a795..531e492 100644
--- a/src/csplit.c
+++ b/src/csplit.c
@@ -1181,81 +1181,64 @@ parse_patterns (int argc, int start, char **argv)
     }
 }
 
-static unsigned int
-get_format_flags (char **format_ptr)
+
+
+/* Names for the printf format flags ' and #.  These can be ORed together.  */
+enum { FLAG_THOUSANDS = 1, FLAG_ALTERNATIVE = 2 };
+
+/* Scan the printf format flags in FORMAT, storing info about the
+   flags into *FLAGS_PTR.  Return the number of flags found.  */
+static size_t
+get_format_flags (char const *format, int *flags_ptr)
 {
-  unsigned int count = 0;
+  int flags = 0;
 
-  for (; **format_ptr; (*format_ptr)++)
+  for (size_t count = 0; ; count++)
     {
-      switch (**format_ptr)
+      switch (format[count])
         {
         case '-':
+        case '0':
           break;
 
-        case '+':
-        case ' ':
-          count |= 1;
+        case '\'':
+          flags |= FLAG_THOUSANDS;
           break;
 
         case '#':
-          count |= 2;  /* Allow for 0x prefix preceding an `x' conversion.  */
+          flags |= FLAG_ALTERNATIVE;
           break;
 
         default:
+          *flags_ptr = flags;
           return count;
         }
     }
-  return count;
-}
-
-static size_t
-get_format_width (char **format_ptr)
-{
-  unsigned long int val = 0;
-
-  if (ISDIGIT (**format_ptr)
-      && (xstrtoul (*format_ptr, format_ptr, 10, &val, NULL) != LONGINT_OK
-          || SIZE_MAX < val))
-    error (EXIT_FAILURE, 0, _("invalid format width"));
-
-  /* Allow for enough octal digits to represent the value of UINT_MAX,
-     even if the field width is less than that.  */
-  return MAX (val, (sizeof (unsigned int) * CHAR_BIT + 2) / 3);
-}
-
-static size_t
-get_format_prec (char **format_ptr)
-{
-  if (**format_ptr != '.')
-    return 0;
-  (*format_ptr)++;
-
-  if (! ISDIGIT (**format_ptr))
-    return 0;
-  else
-    {
-      unsigned long int val;
-      if (xstrtoul (*format_ptr, format_ptr, 10, &val, NULL) != LONGINT_OK
-          || SIZE_MAX < val)
-        error (EXIT_FAILURE, 0, _("invalid format precision"));
-      return val;
-    }
 }
 
+/* Check that the printf format conversion specifier *FORMAT is valid
+   and compatible with FLAGS.  Change it to 'u' if it is 'd' or 'i',
+   since the format will be used with an unsigned value.  */
 static void
-get_format_conv_type (char **format_ptr)
+check_format_conv_type (char *format, int flags)
 {
-  unsigned char ch = *(*format_ptr)++;
+  unsigned char ch = *format;
+  int compatible_flags = FLAG_THOUSANDS;
 
   switch (ch)
     {
     case 'd':
     case 'i':
-    case 'o':
+      *format = 'u';
+      break;
+
     case 'u':
+      break;
+
+    case 'o':
     case 'x':
     case 'X':
+      compatible_flags = FLAG_ALTERNATIVE;
       break;
 
     case 0:
@@ -1270,45 +1253,46 @@ get_format_conv_type (char **format_ptr)
         error (EXIT_FAILURE, 0,
                _("invalid conversion specifier in suffix: \\%.3o"), ch);
     }
+
+  if (flags & ~ compatible_flags)
+    error (EXIT_FAILURE, 0,
+           _("invalid flags in conversion specification: %%%c%c"),
+           (flags & ~ compatible_flags & FLAG_ALTERNATIVE ? '#' : '\''), ch);
 }
 
+/* Return the maximum number of bytes that can be generated by
+   applying FORMAT to an unsigned int value.  If the format is
+   invalid, diagnose the problem and exit.  */
 static size_t
 max_out (char *format)
 {
-  size_t out_count = 0;
   bool percent = false;
 
-  while (*format)
-    {
-      if (*format++ != '%')
-        out_count++;
-      else if (*format == '%')
-        {
-          format++;
-          out_count++;
-        }
-      else
-        {
-          if (percent)
-            error (EXIT_FAILURE, 0,
-                   _("too many %% conversion specifications in suffix"));
-          percent = true;
-          out_count += get_format_flags (&format);
-          {
-            size_t width = get_format_width (&format);
-            size_t prec = get_format_prec (&format);
-
-            out_count += MAX (width, prec);
-          }
-          get_format_conv_type (&format);
-        }
-    }
+  for (char *f = format; *f; f++)
+    if (*f == '%' && *++f != '%')
+      {
+        if (percent)
+          error (EXIT_FAILURE, 0,
+                 _("too many %% conversion specifications in suffix"));
+        percent = true;
+        unsigned int flags;
+        f += get_format_flags (f, &flags);
+        while (ISDIGIT (*f))
+          f++;
+        if (*f == '.')
+          while (ISDIGIT (*++f))
+            continue;
+        check_format_conv_type (f, flags);
+      }
 
   if (! percent)
     error (EXIT_FAILURE, 0,
            _("missing %% conversion specification in suffix"));
 
-  return out_count;
+  int maxlen = snprintf (NULL, 0, format, UINT_MAX);
+  if (! (0 <= maxlen && maxlen <= SIZE_MAX))
+    xalloc_die ();
+  return maxlen;
 }
 
 int
@@ -1349,7 +1333,7 @@ main (int argc, char **argv)
 
       case 'n':
         if (xstrtoul (optarg, NULL, 10, &val, "") != LONGINT_OK
-            || val > INT_MAX)
+            || MIN (INT_MAX, SIZE_MAX) < val)
           error (EXIT_FAILURE, 0, _("%s: invalid number"), optarg);
         digits = val;
         break;
@@ -1380,11 +1364,14 @@ main (int argc, char **argv)
       usage (EXIT_FAILURE);
     }
 
-  unsigned int max_digit_string_len
+  size_t prefix_len = strlen (prefix);
+  size_t max_digit_string_len
     = (suffix
        ? max_out (suffix)
        : MAX (INT_STRLEN_BOUND (unsigned int), digits));
-  filename_space = xmalloc (strlen (prefix) + max_digit_string_len + 1);
+  if (SIZE_MAX - 1 - prefix_len < max_digit_string_len)
+    xalloc_die ();
+  filename_space = xmalloc (prefix_len + max_digit_string_len + 1);
 
   set_input_file (argv[optind++]);
 
diff --git a/tests/misc/csplit b/tests/misc/csplit
index bef7224..f365da3 100755
--- a/tests/misc/csplit
+++ b/tests/misc/csplit
@@ -67,6 +67,21 @@ EOF
 compare err experr || fail=1
 rm -f in out exp err experr
 
+# `echo | csplit -b '%0#6.3x' - 1' incorrectly warned about the format
+# up through coreutils 8.6.
+echo > in
+csplit -b '%0#6.3x' in 1 > out 2> err || fail=1
+cat <<EOF > exp
+0
+1
+EOF
+compare out exp || fail=1
+touch experr
+compare err experr || fail=1
+compare 'xx   000' experr || fail=1
+compare 'xx 0x001' in || fail=1
+rm -f in out exp err experr xx*
+
 # make sure `csplit FILE 0' fails.
 echo > in
 csplit in 0 > out 2> err && fail=1





reply via email to

[Prev in Thread] Current Thread [Next in Thread]