bug-coreutils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

coreutils fmt int-related fixes


From: Paul Eggert
Subject: coreutils fmt int-related fixes
Date: Mon, 02 Aug 2004 17:08:37 -0700
User-agent: Gnus/5.1006 (Gnus v5.10.6) Emacs/21.3 (gnu/linux)

I didn't have time to fix all the int-related problems with coreutils fmt.
So I patched it to report an error if you specify a column width
that exceeds its arbitrary limits, instead of silently messing up.

Is the original author Ross Paterson still available?  If so, I could
describe the problem a bit more to him.

Here's what I installed:

2004-08-02  Paul Eggert  <address@hidden>

        * src/fmt.c (COST, MAXWORDS): Add a comment describing some of
        fmt's arbitrary limits.
        (TRUE, FALSE): Remove; all uses changed to (true, false).
        (main): Use bool for booleans.
        Limit maximum width to MAXCHARS / 2.  Use xstrtoul, not xstrtol,
        to parse width.
        (copy_rest): Remove unnecessary cast.
        (get_prefix): Rewrite to avoid cast.
        (check_punctuation): Use char *, not unsigned char *; C89 requires
        this.  Avoid off-by-one buffer read overrun when line is empty.
        (flush_paragraph): Don't assume wptr-parabuf is <= INT_MAX.
        Remove unnecessary casts.
        * tests/fmt/basic (wide-1, wide-2, bad-suffix): Adjust to above changes.

Index: src/fmt.c
===================================================================
RCS file: /home/eggert/coreutils/cu/src/fmt.c,v
retrieving revision 1.87
retrieving revision 1.88
diff -p -u -r1.87 -r1.88
--- src/fmt.c   21 Jan 2004 23:07:15 -0000      1.87
+++ src/fmt.c   3 Aug 2004 00:00:49 -0000       1.88
@@ -56,7 +56,12 @@
    or too long.  The definition of SHORT_COST(n) should not be changed.
    However, EQUIV(n) may need tuning.  */
 
-typedef long COST;
+/* FIXME: "fmt" misbehaves given large inputs or options.  One
+   possible workaround for part of the problem is to change COST to be
+   a floating-point type.  There are other problems besides COST,
+   though; see MAXWORDS below.  */
+
+typedef long int COST;
 
 #define MAXCOST        TYPE_MAXIMUM (COST)
 
@@ -100,8 +105,12 @@ typedef long COST;
 #define LINE_CREDIT    EQUIV(3)
 
 /* Size of paragraph buffer, in words and characters.  Longer paragraphs
-   are handled neatly (cf. flush_paragraph()), so there's little to gain
-   by making these larger.  */
+   are handled neatly (cf. flush_paragraph()), so long as these values
+   are considerably greater than required by the width.  These values
+   cannot be extended indefinitely: doing so would run into size limits
+   and/or cause more overflows in cost calculations.  FIXME: Remove these
+   arbitrary limits.  */
+
 #define MAXWORDS       1000
 #define MAXCHARS       5000
 
@@ -115,13 +124,6 @@ typedef long COST;
    output.  */
 #define TABWIDTH       8
 
-/* Miscellaneous definitions.  */
-
-#undef TRUE
-#define TRUE true
-#undef FALSE
-#define FALSE false
-
 /* Word descriptor structure.  */
 
 typedef struct Word WORD;
@@ -171,16 +173,16 @@ const char *program_name;
 
 /* Option values.  */
 
-/* If TRUE, first 2 lines may have different indent (default FALSE).  */
+/* If true, first 2 lines may have different indent (default false).  */
 static bool crown;
 
-/* If TRUE, first 2 lines _must_ have different indent (default FALSE).  */
+/* If true, first 2 lines _must_ have different indent (default false).  */
 static bool tagged;
 
-/* If TRUE, each line is a paragraph on its own (default FALSE).  */
+/* If true, each line is a paragraph on its own (default false).  */
 static bool split;
 
-/* If TRUE, don't preserve inter-word spacing (default FALSE).  */
+/* If true, don't preserve inter-word spacing (default false).  */
 static bool uniform;
 
 /* Prefix minus leading and trailing spaces (default "").  */
@@ -228,7 +230,7 @@ static WORD word[MAXWORDS];
    word.  */
 static WORD *word_limit;
 
-/* If TRUE, current input file contains tab characters, and so tabs can be
+/* If true, current input file contains tab characters, and so tabs can be
    used for white space on output.  */
 static bool tabs;
 
@@ -318,7 +320,8 @@ int
 main (register int argc, register char **argv)
 {
   int optchar;
-  int fail;
+  bool ok = true;
+  char const *max_width_option = NULL;
 
   initialize_main (&argc, &argv);
   program_name = argv[0];
@@ -328,30 +331,17 @@ main (register int argc, register char *
 
   atexit (close_stdout);
 
-  crown = tagged = split = uniform = FALSE;
+  crown = tagged = split = uniform = false;
   max_width = WIDTH;
   prefix = "";
   prefix_length = prefix_lead_space = prefix_full_length = 0;
 
   if (argc > 1 && argv[1][0] == '-' && ISDIGIT (argv[1][1]))
     {
-      const char *s = argv[1] + 1;
-      max_width = 0;
-      /* Old option syntax; a dash followed by one or more digits.
-         Move past the number. */
-      for (; ISDIGIT (*s); ++s)
-       {
-         int old_max = max_width;
-         max_width = max_width * 10 + *s - '0';
-         if (INT_MAX / 10 < old_max || max_width < old_max)
-           break;
-       }
+      /* Old option syntax; a dash followed by one or more digits.  */
+      max_width_option = argv[1] + 1;
 
-      if (*s)
-       error (EXIT_FAILURE, 0, _("invalid width option: %s"),
-              quote (argv[1]));
-
-      /* Make the options we just parsed invisible to getopt. */
+      /* Make the option we just parsed invisible to getopt.  */
       argv[1] = argv[0];
       argv++;
       argc--;
@@ -370,29 +360,23 @@ main (register int argc, register char *
        usage (EXIT_FAILURE);
 
       case 'c':
-       crown = TRUE;
+       crown = true;
        break;
 
       case 's':
-       split = TRUE;
+       split = true;
        break;
 
       case 't':
-       tagged = TRUE;
+       tagged = true;
        break;
 
       case 'u':
-       uniform = TRUE;
+       uniform = true;
        break;
 
       case 'w':
-       {
-         long int tmp_long;
-         if (xstrtol (optarg, NULL, 10, &tmp_long, "") != LONGINT_OK
-             || tmp_long <= 0 || tmp_long > INT_MAX)
-           error (EXIT_FAILURE, 0, _("invalid width: %s"), quote (optarg));
-         max_width = (int) tmp_long;
-       }
+       max_width_option = optarg;
        break;
 
       case 'p':
@@ -405,9 +389,20 @@ main (register int argc, register char *
 
       }
 
+  if (max_width_option)
+    {
+      /* Limit max_width to MAXCHARS / 2; otherwise, the resulting
+        output can be quite ugly.  */
+      unsigned long int tmp;
+      if (! (xstrtoul (max_width_option, NULL, 10, &tmp, "") == LONGINT_OK
+            && tmp <= MAXCHARS / 2))
+       error (EXIT_FAILURE, 0, _("invalid width: %s"),
+              quote (max_width_option));
+      max_width = tmp;
+    }
+
   best_width = max_width * (2 * (100 - LEEWAY) + 1) / 200;
 
-  fail = 0;
   if (optind == argc)
     fmt (stdin);
   else
@@ -427,20 +422,20 @@ main (register int argc, register char *
                  if (fclose (in_stream) == EOF)
                    {
                      error (0, errno, "%s", file);
-                     fail = 1;
+                     ok = false;
                    }
                }
              else
                {
                  error (0, errno, _("cannot open %s for reading"),
                         quote (file));
-                 fail = 1;
+                 ok = false;
                }
            }
        }
     }
 
-  exit (fail ? EXIT_FAILURE : EXIT_SUCCESS);
+  exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);
 }
 
 /* Trim space from the front and back of the string P, yielding the prefix,
@@ -471,7 +466,7 @@ set_prefix (register char *p)
 static void
 fmt (FILE *f)
 {
-  tabs = FALSE;
+  tabs = false;
   other_indent = 0;
   next_char = get_prefix (f);
   while (get_paragraph (f))
@@ -526,8 +521,8 @@ set_other_indent (bool same_paragraph)
    If a prefix is in effect, it must be present at the same indent for
    each line in the paragraph.
 
-   Return FALSE if end-of-file was encountered before the start of a
-   paragraph, else TRUE.  */
+   Return false if end-of-file was encountered before the start of a
+   paragraph, else true.  */
 
 static bool
 get_paragraph (FILE *f)
@@ -547,7 +542,7 @@ get_paragraph (FILE *f)
       if (c == EOF)
        {
          next_char = EOF;
-         return FALSE;
+         return false;
        }
       putchar ('\n');
       c = get_prefix (f);
@@ -595,9 +590,9 @@ get_paragraph (FILE *f)
       while (same_para (c) && in_column == other_indent)
        c = get_line (f, c);
     }
-  (word_limit - 1)->period = (word_limit - 1)->final = TRUE;
+  (word_limit - 1)->period = (word_limit - 1)->final = true;
   next_char = c;
-  return TRUE;
+  return true;
 }
 
 /* Copy to the output a line that failed to match the prefix, or that
@@ -615,7 +610,7 @@ copy_rest (FILE *f, register int c)
     {
       put_space (next_prefix_indent);
       for (s = prefix; out_column != in_column && *s; out_column++)
-       putchar (*(unsigned char *)s++);
+       putchar (*s++);
       put_space (in_column - out_column);
     }
   while (c != '\n' && c != EOF)
@@ -626,9 +621,9 @@ copy_rest (FILE *f, register int c)
   return c;
 }
 
-/* Return TRUE if a line whose first non-blank character after the
+/* Return true if a line whose first non-blank character after the
    prefix (if any) is C could belong to the current paragraph,
-   otherwise FALSE.  */
+   otherwise false.  */
 
 static bool
 same_para (register int c)
@@ -718,7 +713,8 @@ get_prefix (FILE *f)
       next_prefix_indent = in_column;
       for (p = prefix; *p != '\0'; p++)
        {
-         if (c != *(unsigned char *)p)
+         unsigned char pc = *p;
+         if (c != pc)
            return c;
          in_column++;
          c = getc (f);
@@ -740,7 +736,7 @@ get_space (FILE *f, register int c)
        in_column++;
       else if (c == '\t')
        {
-         tabs = TRUE;
+         tabs = true;
          in_column = (in_column / TABWIDTH + 1) * TABWIDTH;
        }
       else
@@ -754,13 +750,13 @@ get_space (FILE *f, register int c)
 static void
 check_punctuation (register WORD *w)
 {
-  const unsigned char *start, *finish;
+  char const *start = w->text;
+  char const *finish = start + (w->length - 1);
+  unsigned char fin = *finish;
 
-  start = (unsigned char *) w->text;
-  finish = start + (w->length - 1);
   w->paren = isopen (*start);
-  w->punct = ISPUNCT (*finish);
-  while (isclose (*finish) && finish > start)
+  w->punct = ISPUNCT (fin);
+  while (start < finish && isclose (*finish))
     finish--;
   w->period = isperiod (*finish);
 }
@@ -780,7 +776,7 @@ flush_paragraph (void)
 
   if (word_limit == word)
     {
-      printf ("%*s", (int) (wptr - parabuf), parabuf);
+      fwrite (parabuf, sizeof *parabuf, wptr - parabuf, stdout);
       wptr = parabuf;
       return;
     }
@@ -812,7 +808,7 @@ flush_paragraph (void)
   /* Copy text of words down to start of parabuf -- we use memmove because
      the source and target may overlap.  */
 
-  memmove (parabuf, split_point->text, (size_t) (wptr - split_point->text));
+  memmove (parabuf, split_point->text, wptr - split_point->text);
   shift = split_point->text - parabuf;
   wptr -= shift;
 
@@ -824,8 +820,7 @@ flush_paragraph (void)
   /* Copy words from split_point down to word -- we use memmove because
      the source and target may overlap.  */
 
-  memmove ((char *) word, (char *) split_point,
-        (word_limit - split_point + 1) * sizeof (WORD));
+  memmove (word, split_point, (word_limit - split_point + 1) * sizeof *word);
   word_limit -= split_point - word;
 }
 
Index: tests/fmt/basic
===================================================================
RCS file: /home/eggert/coreutils/cu/tests/fmt/basic,v
retrieving revision 1.9
retrieving revision 1.10
diff -p -u -r1.9 -r1.10
--- tests/fmt/basic     3 May 2003 20:14:20 -0000       1.9
+++ tests/fmt/basic     3 Aug 2004 00:03:57 -0000       1.10
@@ -27,10 +27,12 @@ my @Tests =
      ['8-bit-pfx', qw (-p 'ç'),
       {IN=> "ça\nçb\n"},
       {OUT=>"ça b\n"}],
-     ['wide-1', '-32768',      {IN=> "a\n"}, {OUT=>"a\n"}],
-     ['wide-2', '-2147483647', {IN=> "a\n"}, {OUT=>"a\n"}],
+     ['wide-1', '-w 32768',
+      {ERR => "fmt: invalid width: `32768'\n"}, {EXIT => 1}],
+     ['wide-2', '-w 2147483647',
+      {ERR => "fmt: invalid width: `2147483647'\n"}, {EXIT => 1}],
      ['bad-suffix', '-72x',    {IN=> ''},
-      {ERR => "fmt: invalid width option: `-72x'\n"}, {EXIT => 1}],
+      {ERR => "fmt: invalid width: `72x'\n"}, {EXIT => 1}],
      ['no-file', 'no-such-file',
       {ERR => "fmt: cannot open `no-such-file' for reading:"
        . " No such file or directory\n"}, {EXIT => 1}],




reply via email to

[Prev in Thread] Current Thread [Next in Thread]