Re: PATCH: larger output file sizes for 'split'

bug-coreutils
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: PATCH: larger output file sizes for 'split'

From:	Paul Eggert
Subject:	Re: PATCH: larger output file sizes for 'split'
Date:	Wed, 02 May 2007 13:57:35 -0700
User-agent:	Gnus/5.110006 (No Gnus v0.6) Emacs/21.4 (gnu/linux)
Evan Hunt <address@hidden> writes:

> Here's a one-line patch to split.c (plus a few more lines of doc changes
> in that file and coreutils.texi) to allow it to emit output files sized in
> gigabytes, terabytes, etc.

Sounds reasonable to me, but let's do that more consistently with the other
programs that take POSIX-specified suffixes.  Here's a proposed patch.

2007-05-02  Paul Eggert  <address@hidden>

        The following commands and options now support the standard size
        suffixes kB, M, MB, G, GB, and so on for T, P, Y, Z, and Y:
        head -c, head -n, od -j, od -N, od -S, split -b, split -C,
        tail -c, tail -n.
        * doc/coreutils.texi (od invocation, head invocation, tail invocation):
        Document support for new size suffixes.
        (head invocation, tail invocation):
        Document that -n uses the same suffixes as -c.
        (tail invocation): More-clearly document what leading "+" does.
        * src/head.c (usage, string_to_integer): Support new suffixes.
        * src/od.c (usage, main): Likewise.
        * src/split.c (usage, main): Likewise.
        * src/tail.c (usage, parse_options): Likewise.

diff --git a/src/head.c b/src/head.c
index 9d83064..0e4fe6c 100644
--- a/src/head.c
+++ b/src/head.c
@@ -135,7 +135,9 @@ Mandatory arguments to long options are mandatory for short 
options too.\n\
       fputs (VERSION_OPTION_DESCRIPTION, stdout);
       fputs (_("\
 \n\
-N may have a multiplier suffix: b 512, k 1024, m 1024*1024.\n\
+N may have a multiplier suffix:\n\
+b 512, kB 1000, K 1024, MB 1000*1000, M 1024*1024,\n\
+GB 1000*1000*1000, G 1024*1024*1024, and so on for T, P, E, Z, Y.\n\
 "), stdout);
       emit_bug_reporting_address ();
     }
@@ -860,8 +862,8 @@ head_file (const char *filename, uintmax_t n_units, bool 
count_lines,
   return ok;
 }

-/* Convert a string of decimal digits, N_STRING, with a single, optional suffix
-   character (b, k, or m) to an integral value.  Upon successful conversion,
+/* Convert a string of decimal digits, N_STRING, with an optional suffinx
+   to an integral value.  Upon successful conversion,
    return that value.  If it cannot be converted, give a diagnostic and exit.
    COUNT_LINES indicates whether N_STRING is a number of bytes or a number
    of lines.  It is used solely to give a more specific diagnostic.  */
@@ -872,7 +874,7 @@ string_to_integer (bool count_lines, const char *n_string)
   strtol_error s_err;
   uintmax_t n;

-  s_err = xstrtoumax (n_string, NULL, 10, &n, "bkm");
+  s_err = xstrtoumax (n_string, NULL, 10, &n, "bkKmMGTPEZY0");

   if (s_err == LONGINT_OVERFLOW)
     {
diff --git a/src/od.c b/src/od.c
index 1ae7834..c5eefe1 100644
--- a/src/od.c
+++ b/src/od.c
@@ -383,10 +383,11 @@ for sizeof(double) or L for sizeof(long double).\n\
       fputs (_("\
 \n\
 RADIX is d for decimal, o for octal, x for hexadecimal or n for none.\n\
-BYTES is hexadecimal with 0x or 0X prefix, it is multiplied by 512\n\
-with b suffix, by 1024 with k and by 1048576 with m.  Adding a z suffix to\n\
-any type adds a display of printable characters to the end of each line\n\
-of output.  \
+BYTES is hexadecimal with 0x or 0X prefix, and may have a multiplier suffix:\n\
+b 512, kB 1000, K 1024, MB 1000*1000, M 1024*1024,\n\
+GB 1000*1000*1000, G 1024*1024*1024, and so on for T, P, E, Z, Y.\n\
+Adding a z suffix to any type displays printable characters at the end of 
each\n\
+output line.  \
 "), stdout);
       fputs (_("\
 --string without a number implies 3.  --width without a number\n\
@@ -1563,6 +1564,7 @@ main (int argc, char **argv)
   bool modern = false;
   bool width_specified = false;
   bool ok = true;
+  static char const multipliers[] = "bEGKkMmPTYZ0";

   /* The old-style `pseudo starting address' to be printed in parentheses
      after any true address.  */
@@ -1650,7 +1652,7 @@ it must be one character from [doxn]"),

        case 'j':
          modern = true;
-         s_err = xstrtoumax (optarg, NULL, 0, &n_bytes_to_skip, "bkm");
+         s_err = xstrtoumax (optarg, NULL, 0, &n_bytes_to_skip, multipliers);
          if (s_err != LONGINT_OK)
            STRTOL_FATAL_ERROR (optarg, _("skip argument"), s_err);
          break;
@@ -1659,7 +1661,8 @@ it must be one character from [doxn]"),
          modern = true;
          limit_bytes_to_format = true;

-         s_err = xstrtoumax (optarg, NULL, 0, &max_bytes_to_format, "bkm");
+         s_err = xstrtoumax (optarg, NULL, 0, &max_bytes_to_format,
+                             multipliers);
          if (s_err != LONGINT_OK)
            STRTOL_FATAL_ERROR (optarg, _("limit argument"), s_err);
          break;
@@ -1670,7 +1673,7 @@ it must be one character from [doxn]"),
            string_min = 3;
          else
            {
-             s_err = xstrtoumax (optarg, NULL, 0, &tmp, "bkm");
+             s_err = xstrtoumax (optarg, NULL, 0, &tmp, multipliers);
              if (s_err != LONGINT_OK)
                STRTOL_FATAL_ERROR (optarg, _("minimum string length"), s_err);

diff --git a/src/split.c b/src/split.c
index 207cc13..a5ab8fa 100644
--- a/src/split.c
+++ b/src/split.c
@@ -131,7 +131,9 @@ Mandatory arguments to long options are mandatory for short 
options too.\n\
       fputs (VERSION_OPTION_DESCRIPTION, stdout);
       fputs (_("\
 \n\
-SIZE may have a multiplier suffix: b for 512, k for 1K, m for 1 Meg.\n\
+SIZE may have a multiplier suffix:\n\
+b 512, kB 1000, K 1024, MB 1000*1000, M 1024*1024,\n\
+GB 1000*1000*1000, G 1024*1024*1024, and so on for T, P, E, Z, Y.\n\
 "), stdout);
       emit_bug_reporting_address ();
     }
@@ -388,6 +390,7 @@ main (int argc, char **argv)
   char *buf;                   /* file i/o buffer */
   size_t page_size = getpagesize ();
   uintmax_t n_units;
+  static char const multipliers[] = "bEGKkMmPTYZ0";
   int c;
   int digits_optind = 0;

@@ -432,7 +435,7 @@ main (int argc, char **argv)
          if (split_type != type_undef)
            FAIL_ONLY_ONE_WAY ();
          split_type = type_bytes;
-         if (xstrtoumax (optarg, NULL, 10, &n_units, "bkm") != LONGINT_OK
+         if (xstrtoumax (optarg, NULL, 10, &n_units, multipliers) != LONGINT_OK
              || n_units == 0)
            {
              error (0, 0, _("%s: invalid number of bytes"), optarg);
@@ -456,7 +459,7 @@ main (int argc, char **argv)
          if (split_type != type_undef)
            FAIL_ONLY_ONE_WAY ();
          split_type = type_byteslines;
-         if (xstrtoumax (optarg, NULL, 10, &n_units, "bkm") != LONGINT_OK
+         if (xstrtoumax (optarg, NULL, 10, &n_units, multipliers) != LONGINT_OK
              || n_units == 0 || SIZE_MAX < n_units)
            {
              error (0, 0, _("%s: invalid number of bytes"), optarg);
diff --git a/src/tail.c b/src/tail.c
index 210a681..2e7db0b 100644
--- a/src/tail.c
+++ b/src/tail.c
@@ -267,7 +267,8 @@ Mandatory arguments to long options are mandatory for short 
options too.\n\
 If the first character of N (the number of bytes or lines) is a `+',\n\
 print beginning with the Nth item from the start of each file, otherwise,\n\
 print the last N items in the file.  N may have a multiplier suffix:\n\
-b 512, k 1024, m 1024*1024.\n\
+b 512, kB 1000, K 1024, MB 1000*1000, M 1024*1024,\n\
+GB 1000*1000*1000, G 1024*1024*1024, and so on for T, P, E, Z, Y.\n\
 \n\
 "), stdout);
      fputs (_("\
@@ -1475,7 +1476,7 @@ parse_options (int argc, char **argv,

          {
            strtol_error s_err;
-           s_err = xstrtoumax (optarg, NULL, 10, n_units, "bkm");
+           s_err = xstrtoumax (optarg, NULL, 10, n_units, "bkKmMGTPEZY0");
            if (s_err != LONGINT_OK)
              {
                error (EXIT_FAILURE, 0, "%s: %s", optarg,
diff --git a/doc/coreutils.texi b/doc/coreutils.texi
index c535dad..6fc72ec 100644
--- a/doc/coreutils.texi
+++ b/doc/coreutils.texi
@@ -1661,8 +1661,11 @@ The default is octal.
 Skip @var{bytes} input bytes before formatting and writing.  If
 @var{bytes} begins with @samp{0x} or @samp{0X}, it is interpreted in
 hexadecimal; otherwise, if it begins with @samp{0}, in octal; otherwise,
-in decimal.  Appending @samp{b} multiplies @var{bytes} by 512, @samp{k}
-by 1024, and @samp{m} by 1048576.
+in decimal.  Appending @samp{b} multiplies @var{bytes} by 512,
address@hidden by 1000, @samp{K} by 1024,
address@hidden by 1000*1000, @samp{M} by 1024*1024,
address@hidden by 1000*1000*1000, @samp{GB} by 1024*1024*1024,
+and so on for @samp{T}, @samp{P}, @samp{E}, @samp{Z}, and @samp{Y}.

 @item -N @var{bytes}
 @itemx address@hidden
@@ -1671,14 +1674,16 @@ by 1024, and @samp{m} by 1048576.
 Output at most @var{bytes} bytes of the input.  Prefixes and suffixes on
 @code{bytes} are interpreted as for the @option{-j} option.

address@hidden -S @var{n}
address@hidden address@hidden
address@hidden -S @var{bytes}
address@hidden address@hidden
 @opindex -S
 @opindex --strings
 @cindex string constants, outputting
 Instead of the normal output, output only @dfn{string constants}: at
-least @var{n} consecutive @acronym{ASCII} graphic characters,
+least @var{bytes} consecutive @acronym{ASCII} graphic characters,
 followed by a null (zero) byte.
+Prefixes and suffixes on @code{bytes} are interpreted as for the
address@hidden option.

 If @var{n} is omitted with @option{--strings}, the default is 3.

@@ -2490,11 +2495,14 @@ The program accepts the following options.  Also see 
@ref{Common options}.
 @itemx address@hidden
 @opindex -c
 @opindex --bytes
-Print the first @var{n} bytes, instead of initial lines.  Appending
address@hidden multiplies @var{n} by 512, @samp{k} by 1024, and @samp{m}
-by 1048576.
+Print the first @var{n} bytes, instead of initial lines.
 However, if @var{n} starts with a @samp{-},
 print all but the last @var{n} bytes of each file.
+Appending @samp{b} multiplies @var{n} by 512,
address@hidden by 1000, @samp{K} by 1024,
address@hidden by 1000*1000, @samp{M} by 1024*1024,
address@hidden by 1000*1000*1000, @samp{GB} by 1024*1024*1024,
+and so on for @samp{T}, @samp{P}, @samp{E}, @samp{Z}, and @samp{Y}.

 @itemx -n @var{n}
 @itemx address@hidden
@@ -2503,6 +2511,7 @@ print all but the last @var{n} bytes of each file.
 Output the first @var{n} lines.
 However, if @var{n} starts with a @samp{-},
 print all but the last @var{n} lines of each file.
+Size multiplier suffixes are the same as with the @option{-c} option.

 @item -q
 @itemx --quiet
@@ -2567,10 +2576,6 @@ only reverse files that are at most as large as its 
buffer, which is
 typically 32 address@hidden  A more reliable and versatile way to reverse 
files is
 the @sc{gnu} @command{tac} command.

-If any option-argument is a number @var{n} starting with a @samp{+},
address@hidden begins printing with the @var{n}th item from the start of
-each file, instead of from the end.
-
 The program accepts the following options.  Also see @ref{Common options}.

 @table @samp
@@ -2579,9 +2584,14 @@ The program accepts the following options.  Also see 
@ref{Common options}.
 @itemx address@hidden
 @opindex -c
 @opindex --bytes
-Output the last @var{bytes} bytes, instead of final lines.  Appending
address@hidden multiplies @var{bytes} by 512, @samp{k} by 1024, and @samp{m}
-by 1048576.
+Output the last @var{bytes} bytes, instead of final lines.
+However, if @var{n} starts with a @samp{+}, start printing with the
address@hidden byte from the start of each file, instead of from the end.
+Appending @samp{b} multiplies @var{bytes} by 512,
address@hidden by 1000, @samp{K} by 1024,
address@hidden by 1000*1000, @samp{M} by 1024*1024,
address@hidden by 1000*1000*1000, @samp{GB} by 1024*1024*1024,
+and so on for @samp{T}, @samp{P}, @samp{E}, @samp{Z}, and @samp{Y}.

 @item -f
 @itemx address@hidden
@@ -2691,6 +2701,9 @@ This option is meaningful only when following by name.
 @opindex -n
 @opindex --lines
 Output the last @var{n} lines.
+However, if @var{n} starts with a @samp{+}, start printing with the
address@hidden line from the start of each file, instead of from the end.
+Size multiplier suffixes are the same as with the @option{-c} option.

 @item -q
 @itemx --quiet
@@ -2797,8 +2810,11 @@ option syntax @address@hidden  New scripts should use 
@option{-l
 @opindex -b
 @opindex --bytes
 Put the first @var{bytes} bytes of @var{input} into each output file.
-Appending @samp{b} multiplies @var{bytes} by 512, @samp{k} by 1024, and
address@hidden by 1048576.
+Appending @samp{b} multiplies @var{bytes} by 512,
address@hidden by 1000, @samp{K} by 1024,
address@hidden by 1000*1000, @samp{M} by 1024*1024,
address@hidden by 1000*1000*1000, @samp{GB} by 1024*1024*1024,
+and so on for @samp{T}, @samp{P}, @samp{E}, @samp{Z}, and @samp{Y}.

 @item -C @var{bytes}
 @itemx address@hidden
[Prev in Thread]
Current Thread
[Next in Thread]
PATCH: larger output file sizes for 'split', Evan Hunt, 2007/05/01
- Re: PATCH: larger output file sizes for 'split', Paul Eggert <=
  - Re: PATCH: larger output file sizes for 'split', Jim Meyering, 2007/05/03
Prev by Date: Re: Failed: phase compiling: coreutils-6.9-3 failed
Next by Date: Re: coreutils-6.9 build/check-root: test/rm failed on no-give-up
Previous by thread: PATCH: larger output file sizes for 'split'
Next by thread: Re: PATCH: larger output file sizes for 'split'
Index(es):
- Date
- Thread