coreutils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: RFE: head,tail: -z, --zero-terminated


From: Pádraig Brady
Subject: Re: RFE: head,tail: -z, --zero-terminated
Date: Sat, 26 Sep 2015 18:42:19 +0100
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:38.0) Gecko/20100101 Thunderbird/38.2.0

On 26/09/15 15:43, Richard Russon wrote:
> I'd like to add an option to both head and tail,
> to allow them to work with NUL-terminated lines of text
>      -z, --zero-terminated
> 
> Thus allowing:
> 
>      find dir -type f -print0 | head -z -n 10 | xargs -0 command
> 
> I've written the code (trivial), tested it and documented the changes.
> The code is in git (against today's master), with a diff below.
> 
>      https://github.com/flatcap/coreutils/tree/null
> 
> Cheers,
>      Rich Russon (flatcap)
> 
>   NEWS               |  5 +++++
>   doc/coreutils.texi | 12 ++++++++++++
>   src/head.c         | 29 ++++++++++++++++++++++-------
>   src/tail.c         | 24 +++++++++++++++++-------
>   4 files changed, 56 insertions(+), 14 deletions(-)
> ---
> diff --git c/NEWS w/NEWS
> index 9aec259..aad4758 100644
> --- c/NEWS
> +++ w/NEWS
> @@ -12,6 +12,11 @@ GNU coreutils NEWS                                    
> -*- outline -*-
>     base32 is added to complement the existing base64 command,
>     and encodes and decodes printable text as per RFC 4648.
> 
> +** New features
> +
> +  head, tail now have -z, --zero-terminated options to work with
> +  NUL-terminated lines of text.
> +
>   ** Changes in behavior
> 
>     base64 no longer supports hex or oct --wrap parameters,
> diff --git c/doc/coreutils.texi w/doc/coreutils.texi
> index a029ec6..99330d4 100644
> --- c/doc/coreutils.texi
> +++ w/doc/coreutils.texi
> @@ -2808,6 +2808,12 @@ Never print file name headers.
>   @opindex --verbose
>   Always print file name headers.
> 
> +@item -z
> +@itemx --zero-terminated
> +@opindex -z
> +@opindex --zero-terminated
> +Input lines are NUL-terminated.
> +
>   @end table
> 
>   For compatibility @command{head} also supports an obsolete option 
> syntax
> @@ -3023,6 +3029,12 @@ Never print file name headers.
>   @opindex --verbose
>   Always print file name headers.
> 
> +@item -z
> +@itemx --zero-terminated
> +@opindex -z
> +@opindex --zero-terminated
> +Input lines are NUL-terminated.
> +
>   @end table
> 
>   For compatibility @command{tail} also supports an obsolete usage
> diff --git c/src/head.c w/src/head.c
> index 410cc4f..11b636b 100644
> --- c/src/head.c
> +++ w/src/head.c
> @@ -59,6 +59,9 @@ static bool presume_input_pipe;
>   /* If true, print filename headers. */
>   static bool print_headers;
> 
> +/* Character to split lines by. */
> +static char line_end;
> +
>   /* When to print the filename banners. */
>   enum header_mode
>   {
> @@ -91,6 +94,7 @@ static struct option const long_options[] =
>     {"quiet", no_argument, NULL, 'q'},
>     {"silent", no_argument, NULL, 'q'},
>     {"verbose", no_argument, NULL, 'v'},
> +  {"zero-terminated", no_argument, NULL, 'z'},
>     {GETOPT_HELP_OPTION_DECL},
>     {GETOPT_VERSION_OPTION_DECL},
>     {NULL, 0, NULL, 0}
> @@ -126,6 +130,7 @@ With more than one FILE, precede each with a header 
> giving the file name.\n\
>         fputs (_("\
>     -q, --quiet, --silent    never print headers giving file names\n\
>     -v, --verbose            always print headers giving file names\n\
> +  -z, --zero-terminated    line delimiter is NUL, not newline\n\
>   "), stdout);
>         fputs (HELP_OPTION_DESCRIPTION, stdout);
>         fputs (VERSION_OPTION_DESCRIPTION, stdout);
> @@ -533,7 +538,7 @@ elide_tail_lines_pipe (const char *filename, int fd, 
> uintmax_t n_elide,
>         {
>           char const *buffer_end = tmp->buffer + n_read;
>           char const *p = tmp->buffer;
> -        while ((p = memchr (p, '\n', buffer_end - p)))
> +        while ((p = memchr (p, line_end, buffer_end - p)))
>             {
>               ++p;
>               ++tmp->nlines;
> @@ -582,7 +587,7 @@ elide_tail_lines_pipe (const char *filename, int fd, 
> uintmax_t n_elide,
> 
>     /* If we read any bytes at all, count the incomplete line
>        on files that don't end with a newline.  */
> -  if (last->nbytes && last->buffer[last->nbytes - 1] != '\n')
> +  if (last->nbytes && last->buffer[last->nbytes - 1] != line_end)
>       {
>         ++last->nlines;
>         ++total_lines;
> @@ -601,7 +606,7 @@ elide_tail_lines_pipe (const char *filename, int fd, 
> uintmax_t n_elide,
>         size_t n = total_lines - n_elide;
>         char const *buffer_end = tmp->buffer + tmp->nbytes;
>         char const *p = tmp->buffer;
> -      while (n && (p = memchr (p, '\n', buffer_end - p)))
> +      while (n && (p = memchr (p, line_end, buffer_end - p)))
>           {
>             ++p;
>             ++tmp->nlines;
> @@ -665,7 +670,7 @@ elide_tail_lines_seekable (const char 
> *pretty_filename, int fd,
>     const bool all_lines = !n_lines;
> 
>     /* Count the incomplete line on files that don't end with a newline.  
> */
> -  if (n_lines && bytes_read && buffer[bytes_read - 1] != '\n')
> +  if (n_lines && bytes_read && buffer[bytes_read - 1] != line_end)
>       --n_lines;
> 
>     while (1)
> @@ -680,7 +685,7 @@ elide_tail_lines_seekable (const char 
> *pretty_filename, int fd,
>             else
>               {
>                 char const *nl;
> -              nl = memrchr (buffer, '\n', n);
> +              nl = memrchr (buffer, line_end, n);
>                 if (nl == NULL)
>                   break;
>                 n = nl - buffer;
> @@ -805,7 +810,7 @@ head_lines (const char *filename, int fd, uintmax_t 
> lines_to_write)
>         if (bytes_read == 0)
>           break;
>         while (bytes_to_write < bytes_read)
> -        if (buffer[bytes_to_write++] == '\n' && --lines_to_write == 0)
> +        if (buffer[bytes_to_write++] == line_end && --lines_to_write == 
> 0)
>             {
>               off_t n_bytes_past_EOL = bytes_read - bytes_to_write;
>               /* If we have read more data than that on the specified 
> number
> @@ -943,6 +948,8 @@ main (int argc, char **argv)
> 
>     print_headers = false;
> 
> +  line_end = '\n';
> +
>     if (1 < argc && argv[1][0] == '-' && ISDIGIT (argv[1][1]))
>       {
>         char *a = argv[1];
> @@ -987,6 +994,10 @@ main (int argc, char **argv)
>                 header_mode = always;
>                 break;
> 
> +            case 'z':
> +              line_end = '\0';
> +              break;
> +
>               default:
>                 error (0, 0, _("invalid trailing option -- %c"), *a);
>                 usage (EXIT_FAILURE);
> @@ -1007,7 +1018,7 @@ main (int argc, char **argv)
>         argc--;
>       }
> 
> -  while ((c = getopt_long (argc, argv, "c:n:qv0123456789", 
> long_options, NULL))
> +  while ((c = getopt_long (argc, argv, "c:n:qvz0123456789", 
> long_options, NULL))
>            != -1)
>       {
>         switch (c)
> @@ -1040,6 +1051,10 @@ main (int argc, char **argv)
>             header_mode = always;
>             break;
> 
> +        case 'z':
> +          line_end = '\0';
> +          break;
> +
>           case_GETOPT_HELP_CHAR;
> 
>           case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
> diff --git c/src/tail.c w/src/tail.c
> index f916d74..f4b6a91 100644
> --- c/src/tail.c
> +++ w/src/tail.c
> @@ -180,6 +180,9 @@ static bool from_start;
>   /* If true, print filename headers.  */
>   static bool print_headers;
> 
> +/* Character to split lines by. */
> +static char line_end;
> +
>   /* When to print the filename banners.  */
>   enum header_mode
>   {
> @@ -238,6 +241,7 @@ static struct option const long_options[] =
>     {"silent", no_argument, NULL, 'q'},
>     {"sleep-interval", required_argument, NULL, 's'},
>     {"verbose", no_argument, NULL, 'v'},
> +  {"zero-terminated", no_argument, NULL, 'z'},
>     {GETOPT_HELP_OPTION_DECL},
>     {GETOPT_VERSION_OPTION_DECL},
>     {NULL, 0, NULL, 0}
> @@ -296,6 +300,7 @@ With more than one FILE, precede each with a header 
> giving the file name.\n\
>                                with inotify and --pid=P, check process P 
> at\n\
>                                least once every N seconds\n\
>     -v, --verbose            always output headers giving file names\n\
> +  -z, --zero-terminated    line delimiter is NUL, not newline\n\
>   "), stdout);
>        fputs (HELP_OPTION_DESCRIPTION, stdout);
>        fputs (VERSION_OPTION_DESCRIPTION, stdout);
> @@ -501,7 +506,7 @@ file_lines (const char *pretty_filename, int fd, 
> uintmax_t n_lines,
>     *read_pos = pos + bytes_read;
> 
>     /* Count the incomplete line on files that don't end with a newline.  
> */
> -  if (bytes_read && buffer[bytes_read - 1] != '\n')
> +  if (bytes_read && buffer[bytes_read - 1] != line_end)
>       --n_lines;
> 
>     do
> @@ -512,7 +517,7 @@ file_lines (const char *pretty_filename, int fd, 
> uintmax_t n_lines,
>         while (n)
>           {
>             char const *nl;
> -          nl = memrchr (buffer, '\n', n);
> +          nl = memrchr (buffer, line_end, n);
>             if (nl == NULL)
>               break;
>             n = nl - buffer;
> @@ -597,7 +602,7 @@ pipe_lines (const char *pretty_filename, int fd, 
> uintmax_t n_lines,
>         {
>           char const *buffer_end = tmp->buffer + n_read;
>           char const *p = tmp->buffer;
> -        while ((p = memchr (p, '\n', buffer_end - p)))
> +        while ((p = memchr (p, line_end, buffer_end - p)))
>             {
>               ++p;
>               ++tmp->nlines;
> @@ -651,7 +656,7 @@ pipe_lines (const char *pretty_filename, int fd, 
> uintmax_t n_lines,
>       goto free_lbuffers;
> 
>     /* Count the incomplete line on files that don't end with a newline.  
> */
> -  if (last->buffer[last->nbytes - 1] != '\n')
> +  if (last->buffer[last->nbytes - 1] != line_end)
>       {
>         ++last->nlines;
>         ++total_lines;
> @@ -673,7 +678,7 @@ pipe_lines (const char *pretty_filename, int fd, 
> uintmax_t n_lines,
>           size_t j;
>           for (j = total_lines - n_lines; j; --j)
>             {
> -            beg = memchr (beg, '\n', buffer_end - beg);
> +            beg = memchr (beg, line_end, buffer_end - beg);
>               assert (beg);
>               ++beg;
>             }
> @@ -859,7 +864,7 @@ start_lines (const char *pretty_filename, int fd, 
> uintmax_t n_lines,
>         *read_pos += bytes_read;
> 
>         char *p = buffer;
> -      while ((p = memchr (p, '\n', buffer_end - p)))
> +      while ((p = memchr (p, line_end, buffer_end - p)))
>           {
>             ++p;
>             if (--n_lines == 0)
> @@ -2036,7 +2041,7 @@ parse_options (int argc, char **argv,
>   {
>     int c;
> 
> -  while ((c = getopt_long (argc, argv, "c:n:fFqs:v0123456789",
> +  while ((c = getopt_long (argc, argv, "c:n:fFqs:vz0123456789",
>                              long_options, NULL))
>            != -1)
>       {
> @@ -2113,6 +2118,10 @@ parse_options (int argc, char **argv,
>             *header_mode = always;
>             break;
> 
> +        case 'z':
> +          line_end = '\0';
> +          break;
> +
>           case_GETOPT_HELP_CHAR;
> 
>           case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
> @@ -2210,6 +2219,7 @@ main (int argc, char **argv)
> 
>     count_lines = true;
>     forever = from_start = print_headers = false;
> +  line_end = '\n';
>     obsolete_option = parse_obsolete_option (argc, argv, &n_units);
>     argc -= obsolete_option;
>     argv += obsolete_option;

This makes sense I think.
You can filter through tr before and after to achieve this,
though it's awkward, and also head(1) and tail(1) have
different internal logic for pipes and seekable files.
So I'll probably merge this after adding a test.

cheers,
Pádraig



reply via email to

[Prev in Thread] Current Thread [Next in Thread]