[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH] cut: Improved large file support on 32 bit archs
From: |
Tobias Stoeckmann |
Subject: |
[PATCH] cut: Improved large file support on 32 bit archs |
Date: |
Mon, 26 Mar 2018 13:37:12 +0200 |
Using ranges in cut is limited to SIZE_MAX, which means that they
cannot exceed 4 GB on 32 bit systems. These systems have generally
turned large file support on, i.e. off_t would be 64 bit. Even if
there is no large file support, streams like standard input could
exceed 4 GB.
This patch replaces the limitation of SIZE_MAX with UINTMAX_MAX,
which is at least 64 bit on i386 and amd64.
The test case for cut, written in 2013, mentioned that SIZE_MAX is
a required limitation due to allocation per line, which is not the
case any longer (discussion happened in bug#13127).
The mentioned test cases in this bug report pass as of today
(I use 2^32 -2 instead of -1 to not trigger safety checks):
$ uname -m
i686
$ echo a | cut -b1-$(echo '2^32-2'|bc)
a
$ _
And this is the output of original and patched cut with a large file:
$ echo 1 | dd of=test.4gb bs=1 count=1 seek=$((2**32-1))
$ cut -c$((2**32)) test.4gb
cut: byte/character offset ‘4294967296’ is too large
Try 'cut --help' for more information.
$ new-cut -c$((2**32)) test.4gb
1
$ _
Please note that numfmt supports larger field indices due to shared
code base now as well.
No functional change on amd64.
Signed-off-by: Tobias Stoeckmann <address@hidden>
---
src/cut.c | 10 +++++-----
src/numfmt.c | 8 ++++----
src/set-fields.c | 34 +++++++++++++++++-----------------
src/set-fields.h | 4 ++--
tests/misc/cut-huge-range.sh | 9 +++------
tests/misc/numfmt.pl | 4 ++--
6 files changed, 33 insertions(+), 36 deletions(-)
diff --git a/src/cut.c b/src/cut.c
index be2e67af4..5dc511af3 100644
--- a/src/cut.c
+++ b/src/cut.c
@@ -204,7 +204,7 @@ Each range is one of:\n\
and if required CURRENT_RP. */
static inline void
-next_item (size_t *item_idx)
+next_item (uintmax_t *item_idx)
{
(*item_idx)++;
if ((*item_idx) > current_rp->hi)
@@ -214,7 +214,7 @@ next_item (size_t *item_idx)
/* Return nonzero if the K'th field or byte is printable. */
static inline bool
-print_kth (size_t k)
+print_kth (uintmax_t k)
{
return current_rp->lo <= k;
}
@@ -222,7 +222,7 @@ print_kth (size_t k)
/* Return nonzero if K'th byte is the beginning of a range. */
static inline bool
-is_range_start_index (size_t k)
+is_range_start_index (uintmax_t k)
{
return k == current_rp->lo;
}
@@ -232,7 +232,7 @@ is_range_start_index (size_t k)
static void
cut_bytes (FILE *stream)
{
- size_t byte_idx; /* Number of bytes in the line so far. */
+ uintmax_t byte_idx; /* Number of bytes in the line so far. */
/* Whether to begin printing delimiters between ranges for the current line.
Set after we've begun printing data corresponding to the first range. */
bool print_delimiter;
@@ -286,7 +286,7 @@ static void
cut_fields (FILE *stream)
{
int c;
- size_t field_idx = 1;
+ uintmax_t field_idx = 1;
bool found_any_selected_field = false;
bool buffer_first_field;
diff --git a/src/numfmt.c b/src/numfmt.c
index 130e0388d..ce5c131e4 100644
--- a/src/numfmt.c
+++ b/src/numfmt.c
@@ -1351,13 +1351,13 @@ next_field (char **line)
}
static bool _GL_ATTRIBUTE_PURE
-include_field (size_t field)
+include_field (uintmax_t field)
{
struct field_range_pair *p = frp;
if (!p)
return field == 1;
- while (p->lo != SIZE_MAX)
+ while (p->lo != UINTMAX_MAX)
{
if (p->lo <= field && p->hi >= field)
return true;
@@ -1369,7 +1369,7 @@ include_field (size_t field)
/* Convert and output the given field. If it is not included in the set
of fields to process just output the original */
static bool
-process_field (char *text, size_t field)
+process_field (char *text, uintmax_t field)
{
long double val = 0;
size_t precision = 0;
@@ -1400,7 +1400,7 @@ static int
process_line (char *line, bool newline)
{
char *next;
- size_t field = 0;
+ uintmax_t field = 0;
bool valid_number = true;
while (true) {
diff --git a/src/set-fields.c b/src/set-fields.c
index 20687b293..0119e3f99 100644
--- a/src/set-fields.c
+++ b/src/set-fields.c
@@ -45,7 +45,7 @@ static size_t n_frp_allocated;
space if necessary. Update global variable N_FRP. When allocating,
update global variable N_FRP_ALLOCATED. */
static void
-add_range_pair (size_t lo, size_t hi)
+add_range_pair (uintmax_t lo, uintmax_t hi)
{
if (n_frp == n_frp_allocated)
frp = X2NREALLOC (frp, &n_frp_allocated);
@@ -89,8 +89,8 @@ complement_rp (void)
add_range_pair (c[i-1].hi + 1, c[i].lo - 1);
}
- if (c[n-1].hi < SIZE_MAX)
- add_range_pair (c[n-1].hi + 1, SIZE_MAX);
+ if (c[n-1].hi < UINTMAX_MAX)
+ add_range_pair (c[n-1].hi + 1, UINTMAX_MAX);
free (c);
}
@@ -100,7 +100,7 @@ complement_rp (void)
be composed of one or more numbers or ranges of numbers, separated
by blanks or commas. Incomplete ranges may be given: '-m' means '1-m';
'n-' means 'n' through end of line.
- n=0 and n>=SIZE_MAX values will trigger an error.
+ n=0 and n>=UINTMAX_MAX values will trigger an error.
if SETFLD_ALLOW_DASH option is used, a single '-' means all fields
(otherwise a single dash triggers an error).
@@ -121,24 +121,24 @@ complement_rp (void)
The first field is stored as 1 (zero is not used).
An open-ended range (i.e., until the last field of the input line)
- is indicated with hi = SIZE_MAX.
+ is indicated with hi = UINTMAX_MAX.
- A sentinel of SIZE_MAX/SIZE_MAX is always added as the last
+ A sentinel of UINTMAX_MAX/UINTMAX_MAX is always added as the last
field range pair.
Examples:
- given '1-2,4', frp = [ { .lo = 1, .hi = 2 },
- { .lo = 4, .hi = 4 },
- { .lo = SIZE_MAX, .hi = SIZE_MAX } ];
+ given '1-2,4', frp = [ { .lo = 1, .hi = 2 },
+ { .lo = 4, .hi = 4 },
+ { .lo = UINTMAX_MAX, .hi = UINTMAX_MAX } ];
- given '3-', frp = [ { .lo = 3, .hi = SIZE_MAX },
- { .lo = SIZE_MAX, .hi = SIZE_MAX } ];
+ given '3-', frp = [ { .lo = 3, .hi = UINTMAX_MAX },
+ { .lo = UINTMAX_MAX, .hi = UINTMAX_MAX } ];
*/
void
set_fields (const char *fieldstr, unsigned int options)
{
- size_t initial = 1; /* Value of first number in a range. */
- size_t value = 0; /* If nonzero, a number being accumulated. */
+ uintmax_t initial = 1; /* Value of first number in a range. */
+ uintmax_t value = 0; /* If nonzero, a number being accumulated. */
bool lhs_specified = false;
bool rhs_specified = false;
bool dash_found = false; /* True if a '-' is found in this field. */
@@ -201,7 +201,7 @@ set_fields (const char *fieldstr, unsigned int options)
if (!rhs_specified)
{
/* 'n-'. From 'initial' to end of line. */
- add_range_pair (initial, SIZE_MAX);
+ add_range_pair (initial, UINTMAX_MAX);
}
else
{
@@ -247,8 +247,8 @@ set_fields (const char *fieldstr, unsigned int options)
lhs_specified = 1;
/* Detect overflow. */
- if (!DECIMAL_DIGIT_ACCUMULATE (value, *fieldstr - '0', size_t)
- || value == SIZE_MAX)
+ if (!DECIMAL_DIGIT_ACCUMULATE (value, *fieldstr - '0', uintmax_t)
+ || value == UINTMAX_MAX)
{
/* In case the user specified -c$(echo 2^64|bc),22,
complain only about the first number. */
@@ -307,7 +307,7 @@ set_fields (const char *fieldstr, unsigned int options)
and for performance reasons. */
++n_frp;
frp = xrealloc (frp, n_frp * sizeof (struct field_range_pair));
- frp[n_frp - 1].lo = frp[n_frp - 1].hi = SIZE_MAX;
+ frp[n_frp - 1].lo = frp[n_frp - 1].hi = UINTMAX_MAX;
}
void
diff --git a/src/set-fields.h b/src/set-fields.h
index b9bab1fd5..06f5ba8f3 100644
--- a/src/set-fields.h
+++ b/src/set-fields.h
@@ -19,8 +19,8 @@
struct field_range_pair
{
- size_t lo;
- size_t hi;
+ uintmax_t lo;
+ uintmax_t hi;
};
/* Array of `struct range_pair' holding all the finite ranges. */
diff --git a/tests/misc/cut-huge-range.sh b/tests/misc/cut-huge-range.sh
index e7c17c222..01936266f 100755
--- a/tests/misc/cut-huge-range.sh
+++ b/tests/misc/cut-huge-range.sh
@@ -44,11 +44,9 @@ subtract_one='
'
# Ensure we can cut up to our sentinel value.
-# This is currently SIZE_MAX, but could be raised to UINTMAX_MAX
-# if we didn't allocate memory for each line as a unit.
# Don't use expr to subtract one,
-# since SIZE_MAX may exceed its maximum value.
-CUT_MAX=$(echo $SIZE_MAX | sed "$subtract_one")
+# since UINTMAX_MAX may exceed its maximum value.
+CUT_MAX=$(echo $UINTMAX_MAX | sed "$subtract_one")
# From coreutils-8.10 through 8.20, this would make cut try to allocate
# a 256MiB bit vector.
@@ -59,8 +57,7 @@ CUT_MAX=$(echo $SIZE_MAX | sed "$subtract_one")
(ulimit -v $vm && cut -b1-$CUT_MAX /dev/null >> err 2>&1) || fail=1
# Explicitly disallow values above CUT_MAX
-(ulimit -v $vm && returns_ 1 cut -b$SIZE_MAX /dev/null 2>/dev/null) || fail=1
-(ulimit -v $vm && returns_ 1 cut -b$SIZE_OFLOW /dev/null 2>/dev/null) || fail=1
+(ulimit -v $vm && returns_ 1 cut -b$UINTMAX_MAX /dev/null 2>/dev/null) ||
fail=1
compare /dev/null err || fail=1
diff --git a/tests/misc/numfmt.pl b/tests/misc/numfmt.pl
index 6b3623399..dee4a1d58 100755
--- a/tests/misc/numfmt.pl
+++ b/tests/misc/numfmt.pl
@@ -308,9 +308,9 @@ my @Tests =
{EXIT=>1}, {ERR=>"$prog: invalid field range\n$try"}],
['field-range-err-12','--field 0-1 --to=si 10',
{EXIT=>1}, {ERR=>"$prog: fields are numbered from 1\n$try"}],
- ['field-range-err-13','--field '.$limits->{SIZE_MAX}.',22 --to=si 10',
+ ['field-range-err-13','--field '.$limits->{UINTMAX_MAX}.',22 --to=si 10',
{EXIT=>1}, {ERR=>"$prog: field number " .
- "'".$limits->{SIZE_MAX}."' is too large\n$try"}],
+ "'".$limits->{UINTMAX_MAX}."' is too
large\n$try"}],
# Auto-consume white-space, setup auto-padding
['whitespace-1', '--to=si --field 2 "A 500 B"', {OUT=>"A 500 B"}],
--
2.16.3
- [PATCH] cut: Improved large file support on 32 bit archs,
Tobias Stoeckmann <=
- Prev by Date:
Re: Multibyte support for sort, uniq, join, tr, cut, paste, expand, unexpand, fmt, fold, and pr
- Next by Date:
Re: Multibyte support for sort, uniq, join, tr, cut, paste, expand, unexpand, fmt, fold, and pr
- Previous by thread:
sleep command documentation clarification request.
- Next by thread:
Re: [PATCH] cut: Improved large file support on 32 bit archs
- Index(es):