bug-grep
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH] grep: check stdin like other files


From: Paul Eggert
Subject: [PATCH] grep: check stdin like other files
Date: Sun, 01 Jan 2012 03:44:07 -0800
User-agent: Mozilla/5.0 (X11; Linux i686; rv:8.0) Gecko/20111124 Thunderbird/8.0

I found this bug while thinking about improving the performance
of 'grep' on binary files.

=====

* src/main.c (grepfile): Revamp tests for input files so that
standard input is tested like other files.  For example, report
an error if standard input equals standard output.
Prefer open+fstat to stat+open if possible, as open+fstat is
usually a bit faster and avoids a race condition.
* tests/in-eq-out-infloop: Add tests for cases like
'grep pat <file >>file'.
---
 src/main.c              |  110 ++++++++++++++++++++++++++++-------------------
 tests/in-eq-out-infloop |   33 ++++++++------
 2 files changed, 84 insertions(+), 59 deletions(-)

diff --git a/src/main.c b/src/main.c
index 6fdc982..cc93920 100644
--- a/src/main.c
+++ b/src/main.c
@@ -376,7 +376,7 @@ int match_lines;
 unsigned char eolbyte;
 
 /* For error messages. */
-/* The name the program was run with, stripped of any leading path. */
+/* The input file name, or (if standard input) "-" or a --label argument.  */
 static char const *filename;
 static int errseen;
 
@@ -1211,68 +1211,88 @@ grepfile (char const *file, struct stats *stats)
   int desc;
   int count;
   int status;
+  int stat_result;
+
+  filename = (file ? file : label ? label : _("(standard input)"));
 
   if (! file)
+    desc = STDIN_FILENO;
+  else if (devices == SKIP_DEVICES)
     {
-      desc = 0;
-      filename = label ? label : _("(standard input)");
+      /* Don't open yet, since that might have side effects on a device.  */
+      desc = -1;
     }
   else
     {
-      if (stat (file, &stats->stat) != 0)
+      /* When skipping directories, don't worry about directories
+         that can't be opened.  */
+      desc = open (file, O_RDONLY);
+      if (desc < 0 && directories != SKIP_DIRECTORIES)
         {
           suppressible_error (file, errno);
           return 1;
         }
-      if (directories == SKIP_DIRECTORIES && S_ISDIR (stats->stat.st_mode))
-        return 1;
-      if (devices == SKIP_DEVICES && (S_ISCHR (stats->stat.st_mode)
+    }
+
+  stat_result = (desc < 0
+                 ? stat (file, &stats->stat)
+                 : fstat (desc, &stats->stat));
+  if (stat_result != 0)
+    {
+      suppressible_error (filename, errno);
+      if (file)
+        close (desc);
+      return 1;
+    }
+
+  if ((directories == SKIP_DIRECTORIES && S_ISDIR (stats->stat.st_mode))
+      || (devices == SKIP_DEVICES && (S_ISCHR (stats->stat.st_mode)
                                       || S_ISBLK (stats->stat.st_mode)
                                       || S_ISSOCK (stats->stat.st_mode)
-                                      || S_ISFIFO (stats->stat.st_mode)))
-        return 1;
-
-      /* If there is a regular file on stdout and the current file refers
-         to the same i-node, we have to report the problem and skip it.
-         Otherwise when matching lines from some other input reach the
-         disk before we open this file, we can end up reading and matching
-         those lines and appending them to the file from which we're reading.
-         Then we'd have what appears to be an infinite loop that'd terminate
-         only upon filling the output file system or reaching a quota.
-         However, there is no risk of an infinite loop if grep is generating
-         no output, i.e., with --silent, --quiet, -q.
-         Similarly, with any of these:
-           --max-count=N (-m) (for N >= 2)
-           --files-with-matches (-l)
-           --files-without-match (-L)
-         there is no risk of trouble.
-         For --max-count=1, grep stops after printing the first match,
-         so there is no risk of malfunction.  But even --max-count=2, with
-         input==output, while there is no risk of infloop, there is a race
-         condition that could result in "alternate" output.  */
-      if (!out_quiet && list_files == 0 && 1 < max_count
-          && S_ISREG (out_stat.st_mode) && out_stat.st_ino
-          && SAME_INODE (stats->stat, out_stat))
-        {
-          error (0, 0, _("input file %s is also the output"), quote (file));
-          errseen = 1;
-          return 1;
-        }
+                                      || S_ISFIFO (stats->stat.st_mode))))
+    {
+      if (file)
+        close (desc);
+      return 1;
+    }
 
-      while ((desc = open (file, O_RDONLY)) < 0 && errno == EINTR)
-        continue;
+  /* If there is a regular file on stdout and the current file refers
+     to the same i-node, we have to report the problem and skip it.
+     Otherwise when matching lines from some other input reach the
+     disk before we open this file, we can end up reading and matching
+     those lines and appending them to the file from which we're reading.
+     Then we'd have what appears to be an infinite loop that'd terminate
+     only upon filling the output file system or reaching a quota.
+     However, there is no risk of an infinite loop if grep is generating
+     no output, i.e., with --silent, --quiet, -q.
+     Similarly, with any of these:
+       --max-count=N (-m) (for N >= 2)
+       --files-with-matches (-l)
+       --files-without-match (-L)
+     there is no risk of trouble.
+     For --max-count=1, grep stops after printing the first match,
+     so there is no risk of malfunction.  But even --max-count=2, with
+     input==output, while there is no risk of infloop, there is a race
+     condition that could result in "alternate" output.  */
+  if (!out_quiet && list_files == 0 && 1 < max_count
+      && S_ISREG (out_stat.st_mode) && out_stat.st_ino
+      && SAME_INODE (stats->stat, out_stat))
+    {
+      error (0, 0, _("input file %s is also the output"), quote (filename));
+      errseen = 1;
+      if (file)
+        close (desc);
+      return 1;
+    }
 
+  if (desc < 0)
+    {
+      desc = open (file, O_RDONLY);
       if (desc < 0)
         {
-          int e = errno;
-          /* When skipping directories, don't worry about directories
-             that can't be opened.  */
-          if (! (directories == SKIP_DIRECTORIES && isdir (file)))
-            suppressible_error (file, e);
+          suppressible_error (file, errno);
           return 1;
         }
-
-      filename = file;
     }
 
 #if defined SET_BINARY
diff --git a/tests/in-eq-out-infloop b/tests/in-eq-out-infloop
index dcb7ac0..21abadd 100755
--- a/tests/in-eq-out-infloop
+++ b/tests/in-eq-out-infloop
@@ -13,23 +13,28 @@ for i in 1 2 3 4 5 6 7 8 9 10 11 12; do
 done
 
 echo "$v" > out || framework_failure_
-echo 'grep: input file `out'\'' is also the output' \
-    > err.exp || framework_failure_
 
-# Require an exit status of 2.
-# grep-2.8 and earlier would infloop.
-timeout 10 grep 0 out >> out 2> err; st=$?
-test $st = 2 || fail=1
+for arg in out - ''; do
+  case $arg in
+    out) echo 'grep: input file `out'\'' is also the output';;
+    *)   echo 'grep: input file `(standard input)'\'' is also the output';;
+  esac > err.exp || framework_failure_
 
-compare err.exp err || fail=1
+  # Require an exit status of 2.
+  # grep-2.8 and earlier would infloop with $arg = out.
+  # grep-2.9 and earlier would infloop with $arg = - or $arg = ''.
+  timeout 10 grep 0 $arg < out >> out 2> err; st=$?
+  test $st = 2 || fail=1
+  compare err.exp err || fail=1
 
-# But with each of the following options it must not exit-2.
-for i in -q -m1 -l -L; do
-  timeout 10 grep $i 0 out >> out 2> err; st=$?
-  test $st = 2 && fail=1
-done
+  # But with each of the following options it must not exit-2.
+  for i in -q -m1 -l -L; do
+    timeout 10 grep $i 0 $arg < out >> out 2> err; st=$?
+    test $st = 2 && fail=1
+  done
 
-timeout 10 grep -2 0 out >> out 2> err; st=$?
-test $st = 2 || fail=1
+  timeout 10 grep -2 0 $arg < out >> out 2> err; st=$?
+  test $st = 2 || fail=1
+done
 
 Exit $fail
-- 
1.7.6.4




reply via email to

[Prev in Thread] Current Thread [Next in Thread]