[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
remove minor arbitrary numeric restrictions from join, sort, uniq
From: |
Paul Eggert |
Subject: |
remove minor arbitrary numeric restrictions from join, sort, uniq |
Date: |
Tue, 12 Dec 2006 16:53:03 -0800 |
User-agent: |
Gnus/5.1008 (Gnus v5.10.8) Emacs/21.4 (gnu/linux) |
While looking into some POSIX conformance issues for 'sort', I noticed
an arbitrary limit in 'sort' that we can remove. "sort -k
1000000000000000000" currently complains that the count is too large,
but we can implement that case correctly without too much trouble.
The same issue affects join and uniq.
It's kind of silly, I suppose, but since the fix is easy and (except
for uniq) simplifies the code we might as well do it.
Here is a proposed patch.
This patch verifies that SIZE_MAX <= ULONG_MAX in a couple of places,
but there are many, many more places in coreutils where that
assumption is made, so perhaps that test should be moved to system.h?
Your call.
2006-12-12 Paul Eggert <address@hidden>
Remove some arbitrary restrictions on size fields, so that
commands like "sort -k 18446744073709551616" no longer fail merely
because 18446744073709551616 doesn't fit in uintmax_t. The trick
is that these fields can all be treated as effectively infinity;
their exact values don't matter, since no internal buffer can be
that long.
* src/join.c (string_to_join_field): Verify that SIZE_MAX <=
ULONG_MAX if the code assumes this. Silently truncate too-large
values to SIZE_MAX, as the remaining code will do the right thing
in this case.
* src/sort.c (parse_field_count): Likewise.
* src/uniq.c (size_opt, main): Likewise.
* tests/join/Test.pm (bigfield): New test.
* tests/sort/Test.pm (bigfield): New test.
* tests/uniq/Test.pm (121): New test.
diff --git a/src/join.c b/src/join.c
index 77a3896..b113c54 100644
--- a/src/join.c
+++ b/src/join.c
@@ -599,7 +599,8 @@ add_field (int file, size_t field)
/* Convert a string of decimal digits, STR (the 1-based join field number),
to an integral value. Upon successful conversion, return one less
- (the zero-based field number). If it cannot be converted, give a
+ (the zero-based field number). Silently convert too-large values
+ to SIZE_MAX - 1. Otherwise, if a value cannot be converted, give a
diagnostic and exit. */
static size_t
@@ -607,16 +608,12 @@ string_to_join_field (char const *str)
{
size_t result;
unsigned long int val;
+ verify (SIZE_MAX <= ULONG_MAX);
strtol_error s_err = xstrtoul (str, NULL, 10, &val, "");
if (s_err == LONGINT_OVERFLOW || (s_err == LONGINT_OK && SIZE_MAX < val))
- {
- error (EXIT_FAILURE, 0,
- _("value %s is so large that it is not representable"),
- quote (str));
- }
-
- if (s_err != LONGINT_OK || val == 0)
+ val = SIZE_MAX;
+ else if (s_err != LONGINT_OK || val == 0)
error (EXIT_FAILURE, 0, _("invalid field number: %s"), quote (str));
result = val - 1;
diff --git a/src/sort.c b/src/sort.c
index feaf5a5..f03237c 100644
--- a/src/sort.c
+++ b/src/sort.c
@@ -2170,7 +2170,8 @@ check_ordering_compatibility (void)
/* Parse the leading integer in STRING and store the resulting value
(which must fit into size_t) into *VAL. Return the address of the
- suffix after the integer. If MSGID is NULL, return NULL after
+ suffix after the integer. If the value is too large, silently
+ substitute SIZE_MAX. If MSGID is NULL, return NULL after
failure; otherwise, report MSGID and exit on failure. */
static char const *
@@ -2189,10 +2190,8 @@ parse_field_count (char const *string, s
/* Fall through. */
case LONGINT_OVERFLOW:
case LONGINT_OVERFLOW | LONGINT_INVALID_SUFFIX_CHAR:
- if (msgid)
- error (SORT_FAILURE, 0, _("%s: count `%.*s' too large"),
- _(msgid), (int) (suffix - string), string);
- return NULL;
+ *val = SIZE_MAX;
+ break;
case LONGINT_INVALID:
if (msgid)
diff --git a/src/uniq.c b/src/uniq.c
index 0d60961..7e5a291 100644
--- a/src/uniq.c
+++ b/src/uniq.c
@@ -172,17 +172,26 @@ Fields are skipped before chars.\n\
exit (status);
}
-/* Convert OPT to size_t, reporting an error using MSGID if it does
- not fit. */
+/* Convert OPT to size_t, reporting an error using MSGID if OPT is
+ invalid. Silently convert too-large values to SIZE_MAX. */
static size_t
size_opt (char const *opt, char const *msgid)
{
unsigned long int size;
- if (xstrtoul (opt, NULL, 10, &size, "") != LONGINT_OK
- || SIZE_MAX < size)
- error (EXIT_FAILURE, 0, "%s: %s", opt, _(msgid));
- return size;
+ verify (SIZE_MAX <= ULONG_MAX);
+
+ switch (xstrtoul (opt, NULL, 10, &size, ""))
+ {
+ case LONGINT_OK:
+ case LONGINT_OVERFLOW:
+ break;
+
+ default:
+ error (EXIT_FAILURE, 0, "%s: %s", opt, _(msgid));
+ }
+
+ return MIN (size, SIZE_MAX);
}
/* Given a linebuffer LINE,
@@ -471,9 +480,13 @@ main (int argc, char **argv)
if (skip_field_option_type == SFO_NEW)
skip_fields = 0;
- if (!DECIMAL_DIGIT_ACCUMULATE (skip_fields, optc - '0', size_t))
- error (EXIT_FAILURE, 0, "%s",
- _("invalid number of fields to skip"));
+ skip_fields =
+ ((skip_fields < SIZE_MAX / 10
+ || (skip_fields == SIZE_MAX / 10
+ && optc - '0' <= (int) (SIZE_MAX % 10)))
+ ? 10 * skip_fields + (optc - '0')
+ : SIZE_MAX);
+
skip_field_option_type = SFO_OBSOLETE;
}
break;
diff --git a/tests/join/Test.pm b/tests/join/Test.pm
index 37b1449..3d60ce3 100644
--- a/tests/join/Test.pm
+++ b/tests/join/Test.pm
@@ -136,6 +136,9 @@ my @tv = (
[t_subst "a:1\nb:1\n", t_subst "a:2:\nb:2:\n"],
t_subst "a:1:2:\nb:1:2:\n", 0],
+['bigfield', '-1 340282366920938463463374607431768211456 -2 2',
+ ["a\n", "b\n"], " a b\n", 0],
+
# FIXME: change this to ensure the diagnostic makes sense
['invalid-j', '-j x', {}, "", 1],
diff --git a/tests/sort/Test.pm b/tests/sort/Test.pm
index fbc6c49..6bed61b 100755
--- a/tests/sort/Test.pm
+++ b/tests/sort/Test.pm
@@ -275,6 +275,9 @@ my @tv = (
# -t '\0' is accepted, as of coreutils-5.0.91
['nul-tab', "-k2,2 -t '\\0'", "a\0z\01\nb\0y\02\n", "b\0y\02\na\0z\01\n", 0],
+
+["bigfield", '-k 340282366920938463463374607431768211456',
+ "2\n1\n", "1\n2\n", 0],
);
sub test_vector
diff --git a/tests/uniq/Test.pm b/tests/uniq/Test.pm
index fe0fc68..1eda7e2 100644
--- a/tests/uniq/Test.pm
+++ b/tests/uniq/Test.pm
@@ -105,6 +105,8 @@ my @tv = (
['119', '--all-repeated=badoption', "a\n", "", 1],
# Check that -d and -u suppress all output, as POSIX requires.
['120', '-d -u', "a\na\n\b", "", 0],
+['121', '-d -u -w340282366920938463463374607431768211456',
+ "a\na\n\b", "", 0],
);
sub test_vector
M ChangeLog
M src/join.c
M src/sort.c
M src/uniq.c
M tests/join/Test.pm
M tests/sort/Test.pm
M tests/uniq/Test.pm
Committed as f967b553f437d4a3bfaf07688383b75eca89f18b
- remove minor arbitrary numeric restrictions from join, sort, uniq,
Paul Eggert <=