bug-coreutils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

remove minor arbitrary numeric restrictions from join, sort, uniq


From: Paul Eggert
Subject: remove minor arbitrary numeric restrictions from join, sort, uniq
Date: Tue, 12 Dec 2006 16:53:03 -0800
User-agent: Gnus/5.1008 (Gnus v5.10.8) Emacs/21.4 (gnu/linux)

While looking into some POSIX conformance issues for 'sort', I noticed
an arbitrary limit in 'sort' that we can remove.  "sort -k
1000000000000000000" currently complains that the count is too large,
but we can implement that case correctly without too much trouble.
The same issue affects join and uniq.

It's kind of silly, I suppose, but since the fix is easy and (except
for uniq) simplifies the code we might as well do it.

Here is a proposed patch.

This patch verifies that SIZE_MAX <= ULONG_MAX in a couple of places,
but there are many, many more places in coreutils where that
assumption is made, so perhaps that test should be moved to system.h?
Your call.


2006-12-12  Paul Eggert  <address@hidden>

        Remove some arbitrary restrictions on size fields, so that
        commands like "sort -k 18446744073709551616" no longer fail merely
        because 18446744073709551616 doesn't fit in uintmax_t.  The trick
        is that these fields can all be treated as effectively infinity;
        their exact values don't matter, since no internal buffer can be
        that long.
        * src/join.c (string_to_join_field): Verify that SIZE_MAX <=
        ULONG_MAX if the code assumes this.  Silently truncate too-large
        values to SIZE_MAX, as the remaining code will do the right thing
        in this case.
        * src/sort.c (parse_field_count): Likewise.
        * src/uniq.c (size_opt, main): Likewise.
        * tests/join/Test.pm (bigfield): New test.
        * tests/sort/Test.pm (bigfield): New test.
        * tests/uniq/Test.pm (121): New test.

diff --git a/src/join.c b/src/join.c
index 77a3896..b113c54 100644
--- a/src/join.c
+++ b/src/join.c
@@ -599,7 +599,8 @@ add_field (int file, size_t field)

 /* Convert a string of decimal digits, STR (the 1-based join field number),
    to an integral value.  Upon successful conversion, return one less
-   (the zero-based field number).  If it cannot be converted, give a
+   (the zero-based field number).  Silently convert too-large values
+   to SIZE_MAX - 1.  Otherwise, if a value cannot be converted, give a
    diagnostic and exit.  */

 static size_t
@@ -607,16 +608,12 @@ string_to_join_field (char const *str)
 {
   size_t result;
   unsigned long int val;
+  verify (SIZE_MAX <= ULONG_MAX);

   strtol_error s_err = xstrtoul (str, NULL, 10, &val, "");
   if (s_err == LONGINT_OVERFLOW || (s_err == LONGINT_OK && SIZE_MAX < val))
-    {
-      error (EXIT_FAILURE, 0,
-            _("value %s is so large that it is not representable"),
-            quote (str));
-    }
-
-  if (s_err != LONGINT_OK || val == 0)
+    val = SIZE_MAX;
+  else if (s_err != LONGINT_OK || val == 0)
     error (EXIT_FAILURE, 0, _("invalid field number: %s"), quote (str));

   result = val - 1;
diff --git a/src/sort.c b/src/sort.c
index feaf5a5..f03237c 100644
--- a/src/sort.c
+++ b/src/sort.c
@@ -2170,7 +2170,8 @@ check_ordering_compatibility (void)

 /* Parse the leading integer in STRING and store the resulting value
    (which must fit into size_t) into *VAL.  Return the address of the
-   suffix after the integer.  If MSGID is NULL, return NULL after
+   suffix after the integer.  If the value is too large, silently
+   substitute SIZE_MAX.  If MSGID is NULL, return NULL after
    failure; otherwise, report MSGID and exit on failure.  */

 static char const *
@@ -2189,10 +2190,8 @@ parse_field_count (char const *string, s
       /* Fall through.  */
     case LONGINT_OVERFLOW:
     case LONGINT_OVERFLOW | LONGINT_INVALID_SUFFIX_CHAR:
-      if (msgid)
-       error (SORT_FAILURE, 0, _("%s: count `%.*s' too large"),
-              _(msgid), (int) (suffix - string), string);
-      return NULL;
+      *val = SIZE_MAX;
+      break;

     case LONGINT_INVALID:
       if (msgid)
diff --git a/src/uniq.c b/src/uniq.c
index 0d60961..7e5a291 100644
--- a/src/uniq.c
+++ b/src/uniq.c
@@ -172,17 +172,26 @@ Fields are skipped before chars.\n\
   exit (status);
 }

-/* Convert OPT to size_t, reporting an error using MSGID if it does
-   not fit.  */
+/* Convert OPT to size_t, reporting an error using MSGID if OPT is
+   invalid.  Silently convert too-large values to SIZE_MAX.  */

 static size_t
 size_opt (char const *opt, char const *msgid)
 {
   unsigned long int size;
-  if (xstrtoul (opt, NULL, 10, &size, "") != LONGINT_OK
-      || SIZE_MAX < size)
-    error (EXIT_FAILURE, 0, "%s: %s", opt, _(msgid));
-  return size;
+  verify (SIZE_MAX <= ULONG_MAX);
+
+  switch (xstrtoul (opt, NULL, 10, &size, ""))
+    {
+    case LONGINT_OK:
+    case LONGINT_OVERFLOW:
+      break;
+
+    default:
+      error (EXIT_FAILURE, 0, "%s: %s", opt, _(msgid));
+    }
+
+  return MIN (size, SIZE_MAX);
 }

 /* Given a linebuffer LINE,
@@ -471,9 +480,13 @@ main (int argc, char **argv)
            if (skip_field_option_type == SFO_NEW)
              skip_fields = 0;

-           if (!DECIMAL_DIGIT_ACCUMULATE (skip_fields, optc - '0', size_t))
-             error (EXIT_FAILURE, 0, "%s",
-                    _("invalid number of fields to skip"));
+           skip_fields =
+             ((skip_fields < SIZE_MAX / 10
+               || (skip_fields == SIZE_MAX / 10
+                   && optc - '0' <= (int) (SIZE_MAX % 10)))
+              ? 10 * skip_fields + (optc - '0')
+              : SIZE_MAX);
+
            skip_field_option_type = SFO_OBSOLETE;
          }
          break;
diff --git a/tests/join/Test.pm b/tests/join/Test.pm
index 37b1449..3d60ce3 100644
--- a/tests/join/Test.pm
+++ b/tests/join/Test.pm
@@ -136,6 +136,9 @@ my @tv = (
  [t_subst "a:1\nb:1\n", t_subst "a:2:\nb:2:\n"],
  t_subst "a:1:2:\nb:1:2:\n", 0],

+['bigfield', '-1 340282366920938463463374607431768211456 -2 2',
+ ["a\n", "b\n"], " a b\n", 0],
+
 # FIXME: change this to ensure the diagnostic makes sense
 ['invalid-j', '-j x', {}, "", 1],

diff --git a/tests/sort/Test.pm b/tests/sort/Test.pm
index fbc6c49..6bed61b 100755
--- a/tests/sort/Test.pm
+++ b/tests/sort/Test.pm
@@ -275,6 +275,9 @@ my @tv = (

 # -t '\0' is accepted, as of coreutils-5.0.91
 ['nul-tab', "-k2,2 -t '\\0'", "a\0z\01\nb\0y\02\n", "b\0y\02\na\0z\01\n", 0],
+
+["bigfield", '-k 340282366920938463463374607431768211456',
+ "2\n1\n", "1\n2\n", 0],
 );

 sub test_vector
diff --git a/tests/uniq/Test.pm b/tests/uniq/Test.pm
index fe0fc68..1eda7e2 100644
--- a/tests/uniq/Test.pm
+++ b/tests/uniq/Test.pm
@@ -105,6 +105,8 @@ my @tv = (
 ['119', '--all-repeated=badoption', "a\n",           "",                 1],
 # Check that -d and -u suppress all output, as POSIX requires.
 ['120', '-d -u', "a\na\n\b",        "",                         0],
+['121', '-d -u -w340282366920938463463374607431768211456',
+                "a\na\n\b",        "",                         0],
 );

 sub test_vector
M ChangeLog
M src/join.c
M src/sort.c
M src/uniq.c
M tests/join/Test.pm
M tests/sort/Test.pm
M tests/uniq/Test.pm
Committed as f967b553f437d4a3bfaf07688383b75eca89f18b




reply via email to

[Prev in Thread] Current Thread [Next in Thread]