>From d5d58eeb0bf5a399b2d65e174c72d0f8c11b2c01 Mon Sep 17 00:00:00 2001 From: Assaf Gordon Date: Wed, 5 Jan 2022 13:04:08 -0700 Subject: [PATCH 2/9] cut: iniitial -D implmentation, currently only with "-f" --- src/cut.c | 161 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 156 insertions(+), 5 deletions(-) diff --git a/src/cut.c b/src/cut.c index 5143c8bd9..84caad091 100644 --- a/src/cut.c +++ b/src/cut.c @@ -20,7 +20,9 @@ /* POSIX changes, bug fixes, long-named options, and cleanup by David MacKenzie . - Rewrite cut_fields and cut_bytes -- Jim Meyering. */ + Rewrite cut_fields and cut_bytes -- Jim Meyering. + + Match toybox's -D,-F,-O options -- Assaf Gordon. */ #include @@ -43,7 +45,8 @@ #define AUTHORS \ proper_name ("David M. Ihnat"), \ proper_name ("David MacKenzie"), \ - proper_name ("Jim Meyering") + proper_name ("Jim Meyering"), \ + proper_name ("Assaf Gordon") #define FATAL_ERROR(Message) \ do \ @@ -113,6 +116,15 @@ static char *output_delimiter_string; /* True if we have ever read standard input. */ static bool have_read_stdin; +/* If true use different (but less optimized) code, + Used with -F and/or -D. */ +static bool adv_mode; + +/* True if -D is used: allow duplicated output bytes/chars/fields + and do not sort the output list */ +static bool allow_duplicates; + + /* For long options that have no equivalent short option, use a non-character as a pseudo short option, starting with CHAR_MAX + 1. */ enum @@ -127,6 +139,7 @@ static struct option const longopts[] = {"characters", required_argument, NULL, 'c'}, {"fields", required_argument, NULL, 'f'}, {"delimiter", required_argument, NULL, 'd'}, + {"allow-duplicates", required_argument, NULL, 'D'}, {"only-delimited", no_argument, NULL, 's'}, {"output-delimiter", required_argument, NULL, OUTPUT_DELIMITER_OPTION}, {"complement", no_argument, NULL, COMPLEMENT_OPTION}, @@ -158,6 +171,10 @@ Print selected parts of lines from each FILE to standard output.\n\ -b, --bytes=LIST select only these bytes\n\ -c, --characters=LIST select only these characters\n\ -d, --delimiter=DELIM use DELIM instead of TAB for field delimiter\n\ +"), stdout); + fputs (_("\ + -D, --allow-duplicates keep duplicated bytes/charaters/fields in LIST;\n\ + do not sort LIST; implies -s\n\ "), stdout); fputs (_("\ -f, --fields=LIST select only these fields; also print any line\n\ @@ -424,10 +441,127 @@ cut_fields (FILE *stream) } } +static void +cut_adv (FILE *stream) +{ + char *linebuf = NULL; + size_t bufsize = 0; + ssize_t len; + + char **fieldpos = NULL ; + idx_t alloc_flds = 0; + + /* Minor optimization: save a pointer to the last field pair sentinel + (which is always added by set_fields() */ + struct field_range_pair *last_frp = frp; + while ( ! (last_frp->hi==UINTMAX_MAX && last_frp->lo==UINTMAX_MAX) ) + ++last_frp; + + while (true) + { + /* Read the entire line */ + len = getdelim (&linebuf, &bufsize, line_delim, stream); + #if 0 + fprintf(stderr,"Read line, len = %dz\n", len); + #endif + if (len==-1) + { + if (ferror (stream) || feof (stream)) + break; + xalloc_die (); + } + + /* Chomp */ + if (len>0 && linebuf[len-1]==line_delim) + { + linebuf[len-1] = '\0'; + --len; + } + + /* Split into fields */ + char *p = linebuf; + size_t l = len; + idx_t fld = 0 ; + while (true) + { + char *endp = memchr (p, delim, l); + + /* NUL-terminate the field if not the last */ + if (endp) + *endp = '\0'; + + //fprintf(stderr,"Field %ld: '%s'\n", fld, p); + + /* Store this field */ + if (fld >= alloc_flds) + fieldpos = xpalloc (fieldpos, &alloc_flds, 10, -1, sizeof(char*)); + fieldpos[fld] = p; + fld++; + + if (!endp) + break; + + l -= (endp-p+1); + p = endp+1; + } + + + bool first = true; + + if (fld>1) + { + /* Iterate the requested field LIST, and print accordingly */ + for (struct field_range_pair* r = frp; r != last_frp ; ++r) + { + /* If open-ended range, print up to the available fields */ + uintmax_t hi = (r->hi == UINTMAX_MAX) ? fld : r->hi; + + for (uintmax_t i = r->lo - 1 ; i < hi ; ++i ) + { +#if 0 + fprintf(stderr,"Requested field: %zu\n", i); + fprintf(stderr,"Field %zu: '%s'\n", i, fieldpos[i-1]); +#endif + + if (i >=fld) + break; + + if (!first) + fwrite (output_delimiter_string, sizeof (char), + output_delimiter_length, stdout); + + fputs (fieldpos[i], stdout); + first = false; + } + } + } + + /* Print non-delimited lines */ + if (first && fld==1) + { + if (!suppress_non_delimited) + { + fputs(linebuf, stdout); + putchar (line_delim); + } + continue; + } + + //fprintf(stderr,"end of line\n"); + putchar (line_delim); + + } + + free (fieldpos); + free (linebuf); +} + static void cut_stream (FILE *stream) { - if (operating_mode == byte_mode) + if (adv_mode) + cut_adv (stream); + else if (operating_mode == byte_mode) cut_bytes (stream); else cut_fields (stream); @@ -499,7 +633,7 @@ main (int argc, char **argv) delim = '\0'; have_read_stdin = false; - while ((optc = getopt_long (argc, argv, "b:c:d:f:nsz", longopts, NULL)) != -1) + while ((optc = getopt_long (argc, argv, "b:c:d:Df:nsz", longopts, NULL)) != -1) { switch (optc) { @@ -520,6 +654,11 @@ main (int argc, char **argv) spec_list_string = optarg; break; + case 'D': + adv_mode = true; + allow_duplicates = true; + break; + case 'd': /* New delimiter. */ /* Interpret -d '' to mean 'use the NUL byte as the delimiter.' */ @@ -569,17 +708,29 @@ main (int argc, char **argv) FATAL_ERROR (_("an input delimiter may be specified only\ when operating on fields")); + if (adv_mode && complement) + FATAL_ERROR (_("--complement cannot be used with -D")); + + /* -D implies -s with -f */ + if (allow_duplicates && operating_mode == field_mode) + suppress_non_delimited = true; + if (suppress_non_delimited && operating_mode != field_mode) FATAL_ERROR (_("suppressing non-delimited lines makes sense\n\ \tonly when operating on fields")); set_fields (spec_list_string, ( (operating_mode == field_mode) ? 0 : SETFLD_ERRMSG_USE_POS) - | (complement ? SETFLD_COMPLEMENT : 0) ); + | (complement ? SETFLD_COMPLEMENT : 0) + | (allow_duplicates ? SETFLD_NO_SORT : 0) ); if (!delim_specified) delim = '\t'; + if (adv_mode && line_delim==delim) + FATAL_ERROR (_("line-delimiter must differ from field delimiter\ + with -D")); + if (output_delimiter_string == NULL) { static char dummy[2]; -- 2.20.1