coreutils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

RFE: head,tail: -z, --zero-terminated


From: Richard Russon
Subject: RFE: head,tail: -z, --zero-terminated
Date: Sat, 26 Sep 2015 15:43:40 +0100
User-agent: Roundcube Webmail/1.1.2

I'd like to add an option to both head and tail,
to allow them to work with NUL-terminated lines of text
    -z, --zero-terminated

Thus allowing:

    find dir -type f -print0 | head -z -n 10 | xargs -0 command

I've written the code (trivial), tested it and documented the changes.
The code is in git (against today's master), with a diff below.

    https://github.com/flatcap/coreutils/tree/null

Cheers,
    Rich Russon (flatcap)

 NEWS               |  5 +++++
 doc/coreutils.texi | 12 ++++++++++++
 src/head.c         | 29 ++++++++++++++++++++++-------
 src/tail.c         | 24 +++++++++++++++++-------
 4 files changed, 56 insertions(+), 14 deletions(-)
---
diff --git c/NEWS w/NEWS
index 9aec259..aad4758 100644
--- c/NEWS
+++ w/NEWS
@@ -12,6 +12,11 @@ GNU coreutils NEWS -*- outline -*-
   base32 is added to complement the existing base64 command,
   and encodes and decodes printable text as per RFC 4648.

+** New features
+
+  head, tail now have -z, --zero-terminated options to work with
+  NUL-terminated lines of text.
+
 ** Changes in behavior

   base64 no longer supports hex or oct --wrap parameters,
diff --git c/doc/coreutils.texi w/doc/coreutils.texi
index a029ec6..99330d4 100644
--- c/doc/coreutils.texi
+++ w/doc/coreutils.texi
@@ -2808,6 +2808,12 @@ Never print file name headers.
 @opindex --verbose
 Always print file name headers.

+@item -z
+@itemx --zero-terminated
+@opindex -z
+@opindex --zero-terminated
+Input lines are NUL-terminated.
+
 @end table

For compatibility @command{head} also supports an obsolete option syntax
@@ -3023,6 +3029,12 @@ Never print file name headers.
 @opindex --verbose
 Always print file name headers.

+@item -z
+@itemx --zero-terminated
+@opindex -z
+@opindex --zero-terminated
+Input lines are NUL-terminated.
+
 @end table

 For compatibility @command{tail} also supports an obsolete usage
diff --git c/src/head.c w/src/head.c
index 410cc4f..11b636b 100644
--- c/src/head.c
+++ w/src/head.c
@@ -59,6 +59,9 @@ static bool presume_input_pipe;
 /* If true, print filename headers. */
 static bool print_headers;

+/* Character to split lines by. */
+static char line_end;
+
 /* When to print the filename banners. */
 enum header_mode
 {
@@ -91,6 +94,7 @@ static struct option const long_options[] =
   {"quiet", no_argument, NULL, 'q'},
   {"silent", no_argument, NULL, 'q'},
   {"verbose", no_argument, NULL, 'v'},
+  {"zero-terminated", no_argument, NULL, 'z'},
   {GETOPT_HELP_OPTION_DECL},
   {GETOPT_VERSION_OPTION_DECL},
   {NULL, 0, NULL, 0}
@@ -126,6 +130,7 @@ With more than one FILE, precede each with a header giving the file name.\n\
       fputs (_("\
   -q, --quiet, --silent    never print headers giving file names\n\
   -v, --verbose            always print headers giving file names\n\
+  -z, --zero-terminated    line delimiter is NUL, not newline\n\
 "), stdout);
       fputs (HELP_OPTION_DESCRIPTION, stdout);
       fputs (VERSION_OPTION_DESCRIPTION, stdout);
@@ -533,7 +538,7 @@ elide_tail_lines_pipe (const char *filename, int fd, uintmax_t n_elide,
       {
         char const *buffer_end = tmp->buffer + n_read;
         char const *p = tmp->buffer;
-        while ((p = memchr (p, '\n', buffer_end - p)))
+        while ((p = memchr (p, line_end, buffer_end - p)))
           {
             ++p;
             ++tmp->nlines;
@@ -582,7 +587,7 @@ elide_tail_lines_pipe (const char *filename, int fd, uintmax_t n_elide,

   /* If we read any bytes at all, count the incomplete line
      on files that don't end with a newline.  */
-  if (last->nbytes && last->buffer[last->nbytes - 1] != '\n')
+  if (last->nbytes && last->buffer[last->nbytes - 1] != line_end)
     {
       ++last->nlines;
       ++total_lines;
@@ -601,7 +606,7 @@ elide_tail_lines_pipe (const char *filename, int fd, uintmax_t n_elide,
       size_t n = total_lines - n_elide;
       char const *buffer_end = tmp->buffer + tmp->nbytes;
       char const *p = tmp->buffer;
-      while (n && (p = memchr (p, '\n', buffer_end - p)))
+      while (n && (p = memchr (p, line_end, buffer_end - p)))
         {
           ++p;
           ++tmp->nlines;
@@ -665,7 +670,7 @@ elide_tail_lines_seekable (const char *pretty_filename, int fd,
   const bool all_lines = !n_lines;

/* Count the incomplete line on files that don't end with a newline. */
-  if (n_lines && bytes_read && buffer[bytes_read - 1] != '\n')
+  if (n_lines && bytes_read && buffer[bytes_read - 1] != line_end)
     --n_lines;

   while (1)
@@ -680,7 +685,7 @@ elide_tail_lines_seekable (const char *pretty_filename, int fd,
           else
             {
               char const *nl;
-              nl = memrchr (buffer, '\n', n);
+              nl = memrchr (buffer, line_end, n);
               if (nl == NULL)
                 break;
               n = nl - buffer;
@@ -805,7 +810,7 @@ head_lines (const char *filename, int fd, uintmax_t lines_to_write)
       if (bytes_read == 0)
         break;
       while (bytes_to_write < bytes_read)
-        if (buffer[bytes_to_write++] == '\n' && --lines_to_write == 0)
+ if (buffer[bytes_to_write++] == line_end && --lines_to_write == 0)
           {
             off_t n_bytes_past_EOL = bytes_read - bytes_to_write;
/* If we have read more data than that on the specified number
@@ -943,6 +948,8 @@ main (int argc, char **argv)

   print_headers = false;

+  line_end = '\n';
+
   if (1 < argc && argv[1][0] == '-' && ISDIGIT (argv[1][1]))
     {
       char *a = argv[1];
@@ -987,6 +994,10 @@ main (int argc, char **argv)
               header_mode = always;
               break;

+            case 'z':
+              line_end = '\0';
+              break;
+
             default:
               error (0, 0, _("invalid trailing option -- %c"), *a);
               usage (EXIT_FAILURE);
@@ -1007,7 +1018,7 @@ main (int argc, char **argv)
       argc--;
     }

- while ((c = getopt_long (argc, argv, "c:n:qv0123456789", long_options, NULL)) + while ((c = getopt_long (argc, argv, "c:n:qvz0123456789", long_options, NULL))
          != -1)
     {
       switch (c)
@@ -1040,6 +1051,10 @@ main (int argc, char **argv)
           header_mode = always;
           break;

+        case 'z':
+          line_end = '\0';
+          break;
+
         case_GETOPT_HELP_CHAR;

         case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
diff --git c/src/tail.c w/src/tail.c
index f916d74..f4b6a91 100644
--- c/src/tail.c
+++ w/src/tail.c
@@ -180,6 +180,9 @@ static bool from_start;
 /* If true, print filename headers.  */
 static bool print_headers;

+/* Character to split lines by. */
+static char line_end;
+
 /* When to print the filename banners.  */
 enum header_mode
 {
@@ -238,6 +241,7 @@ static struct option const long_options[] =
   {"silent", no_argument, NULL, 'q'},
   {"sleep-interval", required_argument, NULL, 's'},
   {"verbose", no_argument, NULL, 'v'},
+  {"zero-terminated", no_argument, NULL, 'z'},
   {GETOPT_HELP_OPTION_DECL},
   {GETOPT_VERSION_OPTION_DECL},
   {NULL, 0, NULL, 0}
@@ -296,6 +300,7 @@ With more than one FILE, precede each with a header giving the file name.\n\ with inotify and --pid=P, check process P at\n\
                              least once every N seconds\n\
   -v, --verbose            always output headers giving file names\n\
+  -z, --zero-terminated    line delimiter is NUL, not newline\n\
 "), stdout);
      fputs (HELP_OPTION_DESCRIPTION, stdout);
      fputs (VERSION_OPTION_DESCRIPTION, stdout);
@@ -501,7 +506,7 @@ file_lines (const char *pretty_filename, int fd, uintmax_t n_lines,
   *read_pos = pos + bytes_read;

/* Count the incomplete line on files that don't end with a newline. */
-  if (bytes_read && buffer[bytes_read - 1] != '\n')
+  if (bytes_read && buffer[bytes_read - 1] != line_end)
     --n_lines;

   do
@@ -512,7 +517,7 @@ file_lines (const char *pretty_filename, int fd, uintmax_t n_lines,
       while (n)
         {
           char const *nl;
-          nl = memrchr (buffer, '\n', n);
+          nl = memrchr (buffer, line_end, n);
           if (nl == NULL)
             break;
           n = nl - buffer;
@@ -597,7 +602,7 @@ pipe_lines (const char *pretty_filename, int fd, uintmax_t n_lines,
       {
         char const *buffer_end = tmp->buffer + n_read;
         char const *p = tmp->buffer;
-        while ((p = memchr (p, '\n', buffer_end - p)))
+        while ((p = memchr (p, line_end, buffer_end - p)))
           {
             ++p;
             ++tmp->nlines;
@@ -651,7 +656,7 @@ pipe_lines (const char *pretty_filename, int fd, uintmax_t n_lines,
     goto free_lbuffers;

/* Count the incomplete line on files that don't end with a newline. */
-  if (last->buffer[last->nbytes - 1] != '\n')
+  if (last->buffer[last->nbytes - 1] != line_end)
     {
       ++last->nlines;
       ++total_lines;
@@ -673,7 +678,7 @@ pipe_lines (const char *pretty_filename, int fd, uintmax_t n_lines,
         size_t j;
         for (j = total_lines - n_lines; j; --j)
           {
-            beg = memchr (beg, '\n', buffer_end - beg);
+            beg = memchr (beg, line_end, buffer_end - beg);
             assert (beg);
             ++beg;
           }
@@ -859,7 +864,7 @@ start_lines (const char *pretty_filename, int fd, uintmax_t n_lines,
       *read_pos += bytes_read;

       char *p = buffer;
-      while ((p = memchr (p, '\n', buffer_end - p)))
+      while ((p = memchr (p, line_end, buffer_end - p)))
         {
           ++p;
           if (--n_lines == 0)
@@ -2036,7 +2041,7 @@ parse_options (int argc, char **argv,
 {
   int c;

-  while ((c = getopt_long (argc, argv, "c:n:fFqs:v0123456789",
+  while ((c = getopt_long (argc, argv, "c:n:fFqs:vz0123456789",
                            long_options, NULL))
          != -1)
     {
@@ -2113,6 +2118,10 @@ parse_options (int argc, char **argv,
           *header_mode = always;
           break;

+        case 'z':
+          line_end = '\0';
+          break;
+
         case_GETOPT_HELP_CHAR;

         case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
@@ -2210,6 +2219,7 @@ main (int argc, char **argv)

   count_lines = true;
   forever = from_start = print_headers = false;
+  line_end = '\n';
   obsolete_option = parse_obsolete_option (argc, argv, &n_units);
   argc -= obsolete_option;
   argv += obsolete_option;



reply via email to

[Prev in Thread] Current Thread [Next in Thread]