diff --git a/NEWS b/NEWS index 5a25377..de4ebf4 100644 --- a/NEWS +++ b/NEWS @@ -9,6 +9,11 @@ GNU coreutils NEWS -*- outline -*- permissions. [This bug was present in "the beginning".] +** New features + + uniq --group is a new option to print all items while separating + unique groups with empty lines. + * Noteworthy changes in release 8.21 (2013-02-14) [stable] diff --git a/doc/coreutils.texi b/doc/coreutils.texi index 2c16dc4..02e6f62 100644 --- a/doc/coreutils.texi +++ b/doc/coreutils.texi @@ -5067,7 +5067,7 @@ Do not discard the second and subsequent repeated input lines, but discard lines that are not repeated. This option is useful mainly in conjunction with other options e.g., to ignore case or to compare only selected fields. -The optional @var{delimit-method} tells how to delimit +The optional @var{delimit-method} specifies how to delimit groups of repeated lines, and must be one of the following: @table @samp @@ -5078,26 +5078,60 @@ This is equivalent to @option{--all-repeated} (@option{-D}). @item prepend Output a newline before each group of repeated lines. +@macro nulOutputNote With @option{--zero-terminated} (@option{-z}), use a zero -byte (ASCII NUL) instead of a newline. +byte (ASCII NUL) instead of a newline as the delimiter. +@end macro +@nulOutputNote @item separate Separate groups of repeated lines with a single newline. -With @option{--zero-terminated} (@option{-z}), use a zero -byte (ASCII NUL) instead of a newline. This is the same as using @samp{prepend}, except that no delimiter is inserted before the first group, and hence may be better suited for output direct to users. +@nulOutputNote @end table +@macro ambiguousGroupNote Note that when groups are delimited and the input stream contains two or more consecutive blank lines, then the output is ambiguous. To avoid that, filter the input through @samp{tr -s '\n'} to replace each sequence of consecutive newlines with a single newline. +@end macro +@ambiguousGroupNote This is a GNU extension. @c FIXME: give an example showing *how* it's useful +@item --group[=@var{delimit-method}] +@opindex --group +@cindex all lines, grouping +Output all lines, and delimit each unique group. +@nulOutputNote +The optional @var{delimit-method} specifies how to delimit +groups, and must be one of the following: + +@table @samp + +@item separate +Separate unique groups with a single delimiter. +This is the default delimiting method if none is specified, +and better suited for output direct to users. + +@item prepend +Output a delimiter before each group of unique items. + +@item append +Output a delimiter after each group of unique items. + +@item both +Output a delimiter around each group of unique items. +@end table + +@ambiguousGroupNote + +This is a GNU extension. + @item -u @itemx --unique @opindex -u diff --git a/src/uniq.c b/src/uniq.c index 598c62d..835b5b1 100644 --- a/src/uniq.c +++ b/src/uniq.c @@ -185,21 +185,24 @@ With no options, matching lines are merged to the first occurrence.\n\ -d, --repeated only print duplicate lines\n\ "), stdout); fputs (_("\ - -D, --all-repeated[=delimit-method] print all duplicate lines\n\ - delimit-method={none(default),prepend,separate}\n\ - Delimiting is done with blank lines\n\ + -D, --all-repeated[=METHOD] print all duplicate lines\n\ + groups can be delimited with an empty line\n\ + METHOD={none(default),prepend,separate}\n\ +"), stdout); + fputs (_("\ -f, --skip-fields=N avoid comparing the first N fields\n\ +"), stdout); + fputs (_("\ + --group[=METHOD] show all items, separating groups with an empty line\n\ + METHOD={separate(default),prepend,append,both}\n\ +"), stdout); + fputs (_("\ -i, --ignore-case ignore differences in case when comparing\n\ -s, --skip-chars=N avoid comparing the first N characters\n\ -u, --unique only print unique lines\n\ -z, --zero-terminated end lines with 0 byte, not newline\n\ "), stdout); fputs (_("\ - --group=[method] separate each unique group (whether duplicated or not)\n\ - with an empty line.\n\ - method={separate(default),prepend,append,both)\n\ -"), stdout); - fputs (_("\ -w, --check-chars=N compare no more than N characters in lines\n\ "), stdout); fputs (HELP_OPTION_DESCRIPTION, stdout); @@ -334,21 +337,21 @@ check_file (const char *infile, const char *outfile, char delimiter) initbuffer (prevline); /* The duplication in the following 'if' and 'else' blocks is an - optimization to distinguish several cases: + optimization to distinguish between when we can print input + lines immediately (1. & 2.) or not. - 1. grouping (--group=X) - all input lines are printed. + 1. --group => all input lines are printed. checking for unique/duplicated lines is used only for printing group separators. - 2. The common case - - In which none of the following options has been specified: + 2. The default case in which none of these options has been specified: --count, --repeated, --all-repeated, --unique - In the common case, this optimization lets uniq output each different + In the default case, this optimization lets uniq output each different line right away, without waiting to see if the next one is different. 3. All other cases. */ - if (grouping != GM_NONE) + if (output_unique && output_first_repeated && countmode == count_none) { char *prevfield IF_LINT ( = NULL); size_t prevlen IF_LINT ( = 0); @@ -359,46 +362,23 @@ check_file (const char *infile, const char *outfile, char delimiter) char *thisfield; size_t thislen; bool new_group; + if (readlinebuffer_delim (thisline, stdin, delimiter) == 0) break; + thisfield = find_field (thisline); thislen = thisline->length - 1 - (thisfield - thisline->buffer); new_group = (prevline->length == 0 || different (thisfield, prevfield, thislen, prevlen)); - if (new_group - && ( (grouping == GM_PREPEND) || (grouping == GM_BOTH) - || ( first_group_printed - && - ( grouping == GM_APPEND || grouping == GM_SEPARATE )))) + if (new_group && grouping != GM_NONE + && (grouping == GM_PREPEND || grouping == GM_BOTH + || (first_group_printed && (grouping == GM_APPEND + || grouping == GM_SEPARATE)))) putchar (delimiter); - fwrite (thisline->buffer, sizeof (char), thisline->length, stdout); - SWAP_LINES (prevline, thisline); - prevfield = thisfield; - prevlen = thislen; - first_group_printed = true; - } - if ( (grouping == GM_BOTH || grouping == GM_APPEND ) - && first_group_printed) - putchar (delimiter); - } - else if (output_unique && output_first_repeated && countmode == count_none) - { - char *prevfield IF_LINT ( = NULL); - size_t prevlen IF_LINT ( = 0); - - while (!feof (stdin)) - { - char *thisfield; - size_t thislen; - if (readlinebuffer_delim (thisline, stdin, delimiter) == 0) - break; - thisfield = find_field (thisline); - thislen = thisline->length - 1 - (thisfield - thisline->buffer); - if (prevline->length == 0 - || different (thisfield, prevfield, thislen, prevlen)) + if (new_group || grouping != GM_NONE) { fwrite (thisline->buffer, sizeof (char), thisline->length, stdout); @@ -406,8 +386,11 @@ check_file (const char *infile, const char *outfile, char delimiter) SWAP_LINES (prevline, thisline); prevfield = thisfield; prevlen = thislen; + first_group_printed = true; } } + if ((grouping == GM_BOTH || grouping == GM_APPEND) && first_group_printed) + putchar (delimiter); } else { @@ -651,10 +634,13 @@ main (int argc, char **argv) } } + /* Note we could allow --group with -D at least, and that would + avoid the need to specify a grouping method to --all-repeated. + It was thought best to avoid deprecating those parameters though + and keep --group separate to other options. */ if (grouping != GM_NONE && output_option_used) { - /* TODO: improve wording? */ - error (0, 0, _("grouping can not be combined with -c/-d/-D/-u")); + error (0, 0, _("--group is mutually exclusive with -c/-d/-D/-u")); usage (EXIT_FAILURE); } diff --git a/tests/misc/uniq.pl b/tests/misc/uniq.pl index 8933a5c..b71717e 100755 --- a/tests/misc/uniq.pl +++ b/tests/misc/uniq.pl @@ -219,16 +219,16 @@ my @Tests = ['137', '--group=both', {IN=>""}, {OUT=>""}], # Grouping with other options - must fail ['138', '--group -c', {IN=>""}, {OUT=>""}, {EXIT=>1}, - {ERR=>"$prog: grouping can not be combined with -c/-d/-D/-u\n" . + {ERR=>"$prog: --group is mutually exclusive with -c/-d/-D/-u\n" . "Try 'uniq --help' for more information.\n"}], ['139', '--group -d', {IN=>""}, {OUT=>""}, {EXIT=>1}, - {ERR=>"$prog: grouping can not be combined with -c/-d/-D/-u\n" . + {ERR=>"$prog: --group is mutually exclusive with -c/-d/-D/-u\n" . "Try 'uniq --help' for more information.\n"}], ['140', '--group -u', {IN=>""}, {OUT=>""}, {EXIT=>1}, - {ERR=>"$prog: grouping can not be combined with -c/-d/-D/-u\n" . + {ERR=>"$prog: --group is mutually exclusive with -c/-d/-D/-u\n" . "Try 'uniq --help' for more information.\n"}], ['141', '--group -D', {IN=>""}, {OUT=>""}, {EXIT=>1}, - {ERR=>"$prog: grouping can not be combined with -c/-d/-D/-u\n" . + {ERR=>"$prog: --group is mutually exclusive with -c/-d/-D/-u\n" . "Try 'uniq --help' for more information.\n"}], # Grouping with badoption ['142', '--group=badoption',{IN=>""}, {OUT=>""}, {EXIT=>1},