>From 072ffee0f45a67465607cde3d984e6fd7e37a1af Mon Sep 17 00:00:00 2001 From: Assaf Gordon Date: Wed, 20 Feb 2013 13:31:22 -0500 Subject: [PATCH] uniq: add "--group" option * src/uniq.c: implement "--group" options. * tests/misc/uniq.pl: add tests. --- src/uniq.c | 125 +++++++++++++++++++++++++++++++++++++++++++++++++--- tests/misc/uniq.pl | 40 +++++++++++++++++ 2 files changed, 159 insertions(+), 6 deletions(-) diff --git a/src/uniq.c b/src/uniq.c index 5efdad7..598c62d 100644 --- a/src/uniq.c +++ b/src/uniq.c @@ -108,11 +108,47 @@ static enum delimit_method const delimit_method_map[] = /* Select whether/how to delimit groups of duplicate lines. */ static enum delimit_method delimit_groups; +enum grouping_method +{ + /* No grouping, when "--group" isn't used */ + GM_NONE, + + /* Delimiter preceges all groups. --group=prepend */ + GM_PREPEND, + + /* Delimiter follows all groups. --group=append */ + GM_APPEND, + + /* Delimiter between groups. --group[=separate] */ + GM_SEPARATE, + + /* Delimiter before and after each group. --group=both */ + GM_BOTH +}; + +static char const *const grouping_method_string[] = +{ + "prepend", "append", "separate", "both", NULL +}; + +static enum grouping_method const grouping_method_map[] = +{ + GM_PREPEND, GM_APPEND, GM_SEPARATE, GM_BOTH +}; + +static enum grouping_method grouping = GM_NONE; + +enum +{ + GROUP_OPTION = CHAR_MAX + 1 +}; + static struct option const longopts[] = { {"count", no_argument, NULL, 'c'}, {"repeated", no_argument, NULL, 'd'}, {"all-repeated", optional_argument, NULL, 'D'}, + {"group", optional_argument, NULL, GROUP_OPTION}, {"ignore-case", no_argument, NULL, 'i'}, {"unique", no_argument, NULL, 'u'}, {"skip-fields", required_argument, NULL, 'f'}, @@ -159,6 +195,11 @@ With no options, matching lines are merged to the first occurrence.\n\ -z, --zero-terminated end lines with 0 byte, not newline\n\ "), stdout); fputs (_("\ + --group=[method] separate each unique group (whether duplicated or not)\n\ + with an empty line.\n\ + method={separate(default),prepend,append,both)\n\ +"), stdout); + fputs (_("\ -w, --check-chars=N compare no more than N characters in lines\n\ "), stdout); fputs (HELP_OPTION_DESCRIPTION, stdout); @@ -293,13 +334,57 @@ check_file (const char *infile, const char *outfile, char delimiter) initbuffer (prevline); /* The duplication in the following 'if' and 'else' blocks is an - optimization to distinguish the common case (in which none of - the following options has been specified: --count, -repeated, - --all-repeated, --unique) from the others. In the common case, - this optimization lets uniq output each different line right away, - without waiting to see if the next one is different. */ + optimization to distinguish several cases: - if (output_unique && output_first_repeated && countmode == count_none) + 1. grouping (--group=X) - all input lines are printed. + checking for unique/duplicated lines is used only for printing + group separators. + + 2. The common case - + In which none of the following options has been specified: + --count, --repeated, --all-repeated, --unique + In the common case, this optimization lets uniq output each different + line right away, without waiting to see if the next one is different. + + 3. All other cases. + */ + if (grouping != GM_NONE) + { + char *prevfield IF_LINT ( = NULL); + size_t prevlen IF_LINT ( = 0); + bool first_group_printed = false; + + while (!feof (stdin)) + { + char *thisfield; + size_t thislen; + bool new_group; + if (readlinebuffer_delim (thisline, stdin, delimiter) == 0) + break; + thisfield = find_field (thisline); + thislen = thisline->length - 1 - (thisfield - thisline->buffer); + + new_group = (prevline->length == 0 + || different (thisfield, prevfield, thislen, prevlen)); + + if (new_group + && ( (grouping == GM_PREPEND) || (grouping == GM_BOTH) + || ( first_group_printed + && + ( grouping == GM_APPEND || grouping == GM_SEPARATE )))) + putchar (delimiter); + + fwrite (thisline->buffer, sizeof (char), thisline->length, stdout); + SWAP_LINES (prevline, thisline); + prevfield = thisfield; + prevlen = thislen; + first_group_printed = true; + } + if ( (grouping == GM_BOTH || grouping == GM_APPEND ) + && first_group_printed) + putchar (delimiter); + } + else if (output_unique && output_first_repeated && countmode == count_none) { char *prevfield IF_LINT ( = NULL); size_t prevlen IF_LINT ( = 0); @@ -415,6 +500,7 @@ main (int argc, char **argv) int nfiles = 0; char const *file[2]; char delimiter = '\n'; /* change with --zero-terminated, -z */ + bool output_option_used = false; /* if true, one of -u/-d/-D/-c was used */ file[0] = file[1] = "-"; initialize_main (&argc, &argv); @@ -498,10 +584,12 @@ main (int argc, char **argv) case 'c': countmode = count_occurrences; + output_option_used = true; break; case 'd': output_unique = false; + output_option_used = true; break; case 'D': @@ -513,6 +601,16 @@ main (int argc, char **argv) delimit_groups = XARGMATCH ("--all-repeated", optarg, delimit_method_string, delimit_method_map); + output_option_used = true; + break; + + case GROUP_OPTION: + if (optarg == NULL) + grouping = GM_SEPARATE; + else + grouping = XARGMATCH ("--group", optarg, + grouping_method_string, + grouping_method_map); break; case 'f': @@ -532,6 +630,7 @@ main (int argc, char **argv) case 'u': output_first_repeated = false; + output_option_used = true; break; case 'w': @@ -552,6 +651,20 @@ main (int argc, char **argv) } } + if (grouping != GM_NONE && output_option_used) + { + /* TODO: improve wording? */ + error (0, 0, _("grouping can not be combined with -c/-d/-D/-u")); + usage (EXIT_FAILURE); + } + + if (grouping != GM_NONE && countmode != count_none) + { + error (0, 0, + _("grouping and printing repeat counts is meaningless")); + usage (EXIT_FAILURE); + } + if (countmode == count_occurrences && output_later_repeated) { error (0, 0, diff --git a/tests/misc/uniq.pl b/tests/misc/uniq.pl index 140a49b..8933a5c 100755 --- a/tests/misc/uniq.pl +++ b/tests/misc/uniq.pl @@ -199,6 +199,46 @@ my @Tests = # Check that --zero-terminated is synonymous with -z. ['123', '--zero-terminated', {IN=>"a\na\nb"}, {OUT=>"a\na\nb\0"}], ['124', '--zero-terminated', {IN=>"a\0a\0b"}, {OUT=>"a\0b\0"}], + + # Check grouping + ['125', '--group=prepend', {IN=>"a\na\nb\n"}, {OUT=>"\na\na\n\nb\n"}], + ['126', '--group=append', {IN=>"a\na\nb\n"}, {OUT=>"a\na\n\nb\n\n"}], + ['127', '--group=separate',{IN=>"a\na\nb\n"}, {OUT=>"a\na\n\nb\n"}], + # no explicit grouping = separate + ['128', '--group', {IN=>"a\na\nb\n"}, {OUT=>"a\na\n\nb\n"}], + ['129', '--group=both', {IN=>"a\na\nb\n"}, {OUT=>"\na\na\n\nb\n\n"}], + # Grouping in the special case of a single group + ['130', '--group=prepend', {IN=>"a\na\n"}, {OUT=>"\na\na\n"}], + ['131', '--group=append', {IN=>"a\na\n"}, {OUT=>"a\na\n\n"}], + ['132', '--group=separate',{IN=>"a\na\n"}, {OUT=>"a\na\n"}], + ['133', '--group', {IN=>"a\na\n"}, {OUT=>"a\na\n"}], + # Grouping with empty input - should never print anything + ['134', '--group=prepend', {IN=>""}, {OUT=>""}], + ['135', '--group=append', {IN=>""}, {OUT=>""}], + ['136', '--group=separate', {IN=>""}, {OUT=>""}], + ['137', '--group=both', {IN=>""}, {OUT=>""}], + # Grouping with other options - must fail + ['138', '--group -c', {IN=>""}, {OUT=>""}, {EXIT=>1}, + {ERR=>"$prog: grouping can not be combined with -c/-d/-D/-u\n" . + "Try 'uniq --help' for more information.\n"}], + ['139', '--group -d', {IN=>""}, {OUT=>""}, {EXIT=>1}, + {ERR=>"$prog: grouping can not be combined with -c/-d/-D/-u\n" . + "Try 'uniq --help' for more information.\n"}], + ['140', '--group -u', {IN=>""}, {OUT=>""}, {EXIT=>1}, + {ERR=>"$prog: grouping can not be combined with -c/-d/-D/-u\n" . + "Try 'uniq --help' for more information.\n"}], + ['141', '--group -D', {IN=>""}, {OUT=>""}, {EXIT=>1}, + {ERR=>"$prog: grouping can not be combined with -c/-d/-D/-u\n" . + "Try 'uniq --help' for more information.\n"}], + # Grouping with badoption + ['142', '--group=badoption',{IN=>""}, {OUT=>""}, {EXIT=>1}, + {ERR=>"$prog: invalid argument 'badoption' for '--group'\n" . + "Valid arguments are:\n" . + " - 'prepend'\n" . + " - 'append'\n" . + " - 'separate'\n" . + " - 'both'\n" . + "Try '$prog --help' for more information.\n"}], ); # Set _POSIX2_VERSION=199209 in the environment of each obs-plus* test. -- 1.7.7.4