bug-findutils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Patch] Locate: Read each database only once.


From: Bas van Gompel
Subject: [Patch] Locate: Read each database only once.
Date: Sun, 29 May 2005 23:11:52 +0200 (MET DST)
User-agent: slrn/0.9.8.1 (Win32) Hamster/2.0.6.0 KorrNews/4.2

Hi,

Here is a patch which will cause locate to read every database only
once. This will cause a functional change: When using multiple
databases /and/ multiple patterns, the order in which results are
output changes from {d1p1, d2p1, d1p2, d2p2} to {d1p1, d1p2, d2p1,
d2p2}. Also, --statistics can be combined with patterns to get
statistics on the matches, and --print (-p) can be used to output names
when they otherwise would not (because of -S/-c).

Inline is a version of the patch ignoring whitespace-changes, for easy
review. The attached version includes the whitespace-changes, and is
intended to be applied.


A possible ChangeLog-entry:

2005-05-29  Bas van Gompel  <address@hidden>

        * doc/find.texi: Add documentation for locate --print (-p), and the
        use of non-options with --statistics.
        * locate/locate.1: ditto.
        * locate/locate.c: Read each database only once.
        Add a ``--print'' (``-p'') option. (to cancel side-effects of -c/-S)
        Allow ``--statistics'' in combination with non-options.


diff --exclude='*~' -Ndrupb curr/findutils/doc/find.texi 
mine2/findutils/doc/find.texi
--- findutils/doc/find.texi     2005-05-26 04:00:50.000000000 +0200
+++ findutils/doc/find.texi     2005-05-29 19:21:58.000000000 +0200
@@ -2652,7 +2652,8 @@ it must match part of the base name.
 @item --count
 @itemx -c
 Instead of printing the matched filenames, just print the total 
-number of matches we found.
+number of matches we found, unless @samp{--print} (@samp{-p}) is also
+present.
 
 
 @item address@hidden
@@ -2717,6 +2718,11 @@ Results are separated with the ASCII NUL
 newline character.  To get the full benefit of the use of this option,
 use the new locate database format (that is the default anyway).
 
address@hidden --print
address@hidden -p
+Print search results when they normally would not, because of the presence
+of @samp{--statistics} (@samp{-S}) or @samp{--count} (@samp{-c}).
+
 @item --wholename
 @itemx -w
 The specified pattern is matched against the whole name of the file in
@@ -2744,7 +2750,7 @@ compatibility with BSD's @code{locate}.
 @item --statistics
 @itemx -S
 Print some summary information for each locate database.  No search is
-performed. 
+performed unless non-option arguments are given.
 
 @item --help
 Print a summary of the options to @code{locate} and exit.
diff --exclude='*~' -Ndrupb curr/findutils/locate/locate.1 
mine2/findutils/locate/locate.1
--- findutils/locate/locate.1   2005-04-18 21:54:42.000000000 +0200
+++ findutils/locate/locate.1   2005-05-29 19:31:46.000000000 +0200
@@ -7,7 +7,7 @@ locate \- list files in databases that m
 | \-\-ignore-case] [\-0 | \-\-null] [\-c | \-\-count] [\-w | \-\-wholename]
 |\-b | \-\-basename] [\-l N | \-\-limit=N] [\-S | \-\-statistics] [\-r
 | \-\-regex ] [\-P | \-H | \-\-nofollow] [\-L | \-\-follow] [\-\-version]
-[\-\-help] pattern...
+[\-p | \-\-print] [\-\-help] pattern...
 .SH DESCRIPTION
 This manual page
 documents the GNU version of
@@ -43,7 +43,7 @@ entries; see \fBupdatedb\fP(1).
 .TP
 .I "\-c, \-\-count"
 Instead of printing the matched filenames, just print the total 
-number of matches we found.
+number of matches we found, unless \-\-\fIprint\fP (\-p) is also present.
 .TP
 .I "\-d \fIpath\fP, \-\-database=\fIpath\fP"
 Instead of searching the default file name database, search the file
@@ -113,6 +113,10 @@ Accepted but does nothing, for compatibi
 .I "\-0, \-\-null"
 Use ASCII NUL as a separator, instead of newline.   
 .TP
+.I "\-p, \-\-print"
+Print search results when they normally would not, because of the presence
+of \-\-statistics (\-S) or \-\-count (\-c).
+.TP
 .I "\-w, \-\-wholename"
 Match against the whole name of the file as listed in the database.
 This is the default.
@@ -137,8 +141,8 @@ Accepted but does nothing, for compatibi
 .TP
 .I "\-S, \-\-statistics"
 Print various statistics about each locate database and then exit
-without performing a search.  Any patterns given on the command line
-are ignored.  For compatibility with BSD, \-S is accepted as a synonym
+without performing a search, unless non-option arguments are given.
+For compatibility with BSD, \-S is accepted as a synonym
 for \-\-statistics.
 .TP
 .I "\-\-help"
diff --exclude='*~' -Ndrupb curr/findutils/locate/locate.c 
mine2/findutils/locate/locate.c
--- findutils/locate/locate.c   2005-04-22 05:08:42.000000000 +0200
+++ findutils/locate/locate.c   2005-05-18 22:23:40.000000000 +0200
@@ -246,6 +246,14 @@ lc_strcpy(char *dest, const char *src)
   *dest = 0;
 }
 
+struct locate_limits
+{
+  uintmax_t limit;
+  uintmax_t items_accepted;
+};
+static struct locate_limits limits;
+
+
 struct locate_stats
 {
   uintmax_t compressed_bytes;
@@ -286,13 +294,45 @@ struct visitor
 
 static struct visitor *inspectors = NULL;
 static struct visitor *lastinspector = NULL;
+static struct visitor *past_pat_inspector = NULL;
 
+/* 0 or 1 pattern(s) */
 static int
-process_filename(const char *munged_filename, const char *original_filename)
+process_simple(const char *munged_filename, const char *original_filename)
+{
+  int result = VISIT_CONTINUE;
+  const struct visitor *p = inspectors;
+  
+  while ( ((VISIT_CONTINUE | VISIT_ACCEPTED) & result) && (NULL != p) )
+    {
+      result = (p->inspector)(munged_filename, original_filename, p->context);
+      p = p->next;
+    }
+
+    return result;
+}
+
+/* Accept if any pattern matches. */
+static int
+process_or (const char *munged_filename, const char *original_filename)
 {
   int result = VISIT_CONTINUE;
   const struct visitor *p = inspectors;
   
+  while ( ((VISIT_CONTINUE | VISIT_REJECTED) & result) && (past_pat_inspector 
!= p) )
+    {
+      result = (p->inspector)(munged_filename, original_filename, p->context);
+      p = p->next;
+    }
+
+  if (result == VISIT_CONTINUE)
+    result = VISIT_REJECTED;
+  if (result & (VISIT_ABORT | VISIT_REJECTED))
+    return result;
+
+  p = past_pat_inspector;
+  result = VISIT_CONTINUE;
+
   while ( (VISIT_CONTINUE == result) && (NULL != p) )
     {
       result = (p->inspector)(munged_filename, original_filename, p->context);
@@ -305,6 +345,11 @@ process_filename(const char *munged_file
     return result;
 }
 
+typedef int (*processfunc)(const char *munged_filename,
+                           const char *original_filename);
+
+static processfunc mainprocessor = NULL;
+
 static void
 add_visitor(visitfunc fn, void *context)
 {
@@ -445,7 +490,7 @@ visit_substring_match_nocasefold(const c
   (void) original_filename;
 
   if (NULL != strstr(munged_filename, pattern))
-    return VISIT_CONTINUE;
+    return VISIT_ACCEPTED;
   else
     return VISIT_REJECTED;
 }
@@ -468,7 +513,7 @@ visit_substring_match_casefold(const cha
   
   
   if (NULL != strstr(p->buffer, p->pattern))
-    return VISIT_CONTINUE;
+    return VISIT_ACCEPTED;
   else
     return VISIT_REJECTED;
 }
@@ -484,7 +529,7 @@ visit_globmatch_nofold(const char *munge
   if (fnmatch(glob, munged_filename, 0) != 0)
     return VISIT_REJECTED;
   else
-    return VISIT_CONTINUE;
+    return VISIT_ACCEPTED;
 }
 
 
@@ -498,7 +543,7 @@ visit_globmatch_casefold(const char *mun
   if (fnmatch(glob, munged_filename, FNM_CASEFOLD) != 0)
     return VISIT_REJECTED;
   else
-    return VISIT_CONTINUE;
+    return VISIT_ACCEPTED;
 }
 
 
@@ -511,7 +556,7 @@ visit_regex(const char *munged_filename,
   (void) original_filename;
   
   if (0 == regexec(&p->re, munged_filename, 0u, NULL, 0))
-    return VISIT_CONTINUE;     /* match */
+    return VISIT_ACCEPTED;     /* match */
   else
     return VISIT_REJECTED;     /* no match */
 }
@@ -560,7 +605,7 @@ visit_stats(const char *munged_filename,
 /* Emit the statistics.
  */
 static void
-print_stats(size_t database_file_size)
+print_stats(int argc, size_t database_file_size)
 {
   char hbuf[LONGEST_HUMAN_READABLE + 1];
   
@@ -585,6 +630,7 @@ print_stats(size_t database_file_size)
         human_readable (statistics.highbit_filename_count,
                         hbuf, human_ceiling, 1, 1));
   
+  if (!argc)
   printf(_("Compression ratio %4.2f%%\n"),
         100.0 * ((double)statistics.total_filename_length
                  - (double) database_file_size)
@@ -593,20 +639,23 @@ print_stats(size_t database_file_size)
 }
 
 
-/* Print the entries in DBFILE that match shell globbing pattern PATHPART.
+/* Print the entries in DBFILE that match shell globbing patterns in ARGV.
    Return the number of entries printed.  */
 
 static unsigned long
-new_locate (char *pathpart,
+new_locate (int argc,
+           char **argv,
            char *dbfile,
            int ignore_case,
            int enable_print,
            int basename_only,
            int use_limit,
-           uintmax_t limit,
+           struct locate_limits *plimit,
            int stats,
            int regex)
 {
+  char *pathpart;              /* A pattern to consider. */
+  int argn;                    /* Index to current pattern in argv. */
   FILE *fp;                    /* The pathname database.  */
   int c;                       /* An input byte.  */
   int nread;                /* number of bytes read from an entry. */
@@ -616,14 +665,12 @@ new_locate (char *pathpart,
   int count = 0; /* The length of the prefix shared with the previous database 
entry.  */
   
   int old_format = 0; /* true if reading a bigram-encoded database.  */
+  struct visitor* pvis; /* temp for determining past_pat_inspector. */
   
   /* for the old database format,
      the first and second characters of the most common bigrams.  */
   char bigram1[128], bigram2[128];
 
-  /* number of items accepted (i.e. printed) */
-  unsigned long int items_accepted = 0uL;
-
   /* To check the age of the database.  */
   struct stat st;
   time_t now;
@@ -631,14 +678,11 @@ new_locate (char *pathpart,
   /* Set up the inspection regime */
   inspectors = NULL;
   lastinspector = NULL;
+  past_pat_inspector = NULL;
 
-  if (stats)
-    {
-      assert(!use_limit);
-      add_visitor(visit_stats, &statistics);
-    }
-  else
+  for ( argn = 0; argn < argc; argn++ )
     {
+      pathpart = argv[argn];
       if (regex)
        {
          struct regular_expression *p = xmalloc(sizeof(*p));
@@ -676,12 +720,19 @@ new_locate (char *pathpart,
              cf->buffer = NULL;
              cf->buffersize = 0;
              add_visitor(visit_substring_match_casefold, cf);
+             /* If we ignore case, convert it to lower now so we don't have to
+              * do it every time
+              */
+             lc_strcpy(pathpart, pathpart);
            }
          else
            {
              add_visitor(visit_substring_match_nocasefold, pathpart);
            }
        }
+    }
+
+  pvis = lastinspector;
 
       /* We add visit_existing_*() as late as possible to reduce the
        * number of stat() calls.
@@ -707,11 +758,19 @@ new_locate (char *pathpart,
          break;
        }
       
+  if (stats)
+    add_visitor(visit_stats, &statistics);
       
       if (enable_print)
        add_visitor(visit_justprint, NULL);
-    }
   
+  if (argc > 1)
+    {
+      past_pat_inspector = pvis->next;
+      mainprocessor = process_or;
+    }
+  else
+    mainprocessor = process_simple;
 
   if (stat (dbfile, &st) || (fp = fopen (dbfile, "r")) == NULL)
     {
@@ -752,18 +811,8 @@ new_locate (char *pathpart,
               old_format ? _("old") : "LOCATE02");
     }
   
-  /* If we ignore case, convert it to lower first so we don't have to
-   * do it every time
-   */
-  if (!stats && ignore_case)
-    {
-      lc_strcpy(pathpart, pathpart);
-    }
-  
-  items_accepted = 0;
-
   c = getc (fp);
-  while ( (c != EOF) && (!use_limit || (limit > 0)) )
+  while ( (c != EOF) && (!use_limit || (plimit->limit > 0)) )
     {
       register char *s;                /* Scan the path we read in.  */
 
@@ -818,9 +867,9 @@ new_locate (char *pathpart,
        }
 
       testpath = basename_only ? base_name(path) : path;
-      if (VISIT_ACCEPTED == process_filename(testpath, path))
+      if ((VISIT_ACCEPTED | VISIT_CONTINUE) & (mainprocessor)(testpath, path))
        {
-         if ((++items_accepted >= limit) && use_limit)
+         if ((++plimit->items_accepted >= plimit->limit) && use_limit)
            {
              break;
            }
@@ -830,7 +879,7 @@ new_locate (char *pathpart,
       
   if (stats)
     {
-      print_stats(st.st_size);
+      print_stats(argc, st.st_size);
     }
   
   if (ferror (fp))
@@ -844,7 +893,7 @@ new_locate (char *pathpart,
       return 0;
     }
 
-  return items_accepted;
+  return plimit->items_accepted;
 }
 
 
@@ -864,7 +913,7 @@ Usage: %s [-d path | --database=path] [-
       [-i | --ignore-case] [-w | --wholename] [-b | --basename] \n\
       [--limit=N | -l N] [-S | --statistics] [-0 | --null] [-c | --count]\n\
       [-P | -H | --nofollow] [-L | --follow] [-m | --mmap ] [ -s | --stdio ]\n\
-      [-r | --regex ] [--version] [--help] pattern...\n"),
+      [-p | --print] [-r | --regex ] [--version] [--help] pattern...\n"),
           program_name);
   fputs (_("\nReport bugs to <address@hidden>.\n"), stream);
 }
@@ -882,6 +931,7 @@ static struct option const longopts[] =
   {"wholename", no_argument, NULL, 'w'},
   {"wholepath", no_argument, NULL, 'w'}, /* Synonym. */
   {"basename", no_argument, NULL, 'b'},
+  {"print", no_argument, NULL, 'p'},
   {"stdio", no_argument, NULL, 's'},
   {"mmap",  no_argument, NULL, 'm'},
   {"limit",  required_argument, NULL, 'l'},
@@ -901,13 +951,13 @@ main (argc, argv)
   unsigned long int found = 0uL;
   int optc;
   int ignore_case = 0;
-  int print = 1;
+  int print = 0;
   int just_count = 0;
   int basename_only = 0;
-  uintmax_t limit = 0;
   int use_limit = 0;
   int regex = 0;
   int stats = 0;
+  char *e;
   
   program_name = argv[0];
 
@@ -918,13 +968,16 @@ main (argc, argv)
   textdomain (PACKAGE);
   atexit (close_stdout);
 
+  limits.limit = 0;
+  limits.items_accepted = 0;
+
   dbpath = getenv ("LOCATE_PATH");
   if (dbpath == NULL)
     dbpath = LOCATE_DB;
 
   check_existence = ACCEPT_EITHER;
 
-  while ((optc = getopt_long (argc, argv, "bcd:eEil:rsm0SwHPL", longopts, (int 
*) 0)) != -1)
+  while ((optc = getopt_long (argc, argv, "bcd:eEil:prsm0SwHPL", longopts, 
(int *) 0)) != -1)
     switch (optc)
       {
       case '0':
@@ -937,7 +990,6 @@ main (argc, argv)
 
       case 'c':
        just_count = 1;
-       print = 0;
        break;
 
       case 'd':
@@ -960,6 +1012,10 @@ main (argc, argv)
        usage (stdout);
        return 0;
 
+      case 'p':
+       print = 1;
+       break;
+
       case 'v':
        printf (_("GNU locate version %s\n"), version_string);
        return 0;
@@ -993,7 +1049,7 @@ main (argc, argv)
       case 'l':
        {
          char *end = optarg;
-         strtol_error err = xstrtoumax(optarg, &end, 10, &limit, NULL);
+         strtol_error err = xstrtoumax(optarg, &end, 10, &limits.limit, NULL);
          if (LONGINT_OK != err)
            {
              STRTOL_FATAL_ERROR(optarg, _("argument to --limit"), err);
@@ -1014,26 +1070,24 @@ main (argc, argv)
        return 1;
       }
 
+  if (!just_count && !stats)
+    print = 1;
+
   if (stats)
     {
+      if (optind == argc)
       use_limit = 0;
-      print = 0;
     }
   else
     {
-      if (optind == argc)
+      if (!just_count && optind == argc)
        {
          usage (stderr);
          return 1;
        }
     }
   
-  for (; stats || optind < argc; optind++)
-    {
-      char *e;
-      const char *needle;
       next_element (dbpath, 0);        /* Initialize.  */
-      needle = stats ? NULL : argv[optind];
       while ((e = next_element ((char *) NULL, 0)) != NULL)
        {
          statistics.compressed_bytes = 
@@ -1052,10 +1106,7 @@ main (argc, argv)
              e = LOCATE_DB;
            }
          
-         found += new_locate (needle, e, ignore_case, print, basename_only, 
use_limit, limit, stats, regex);
-       }
-      if (stats)
-       break;
+      found = new_locate (argc - optind, &argv[optind], e, ignore_case, print, 
basename_only, use_limit, &limits, stats, regex);
     }
 
   if (just_count)
@@ -1063,7 +1114,7 @@ main (argc, argv)
       printf("%ld\n", found);
     }
   
-  if (found || (use_limit && (limit==0)) || stats )
+  if (found || (use_limit && (limits.limit==0)) || stats )
     return 0;
   else
     return 1;


L8r,

Buzz.
-- 
  ) |  | ---/ ---/  Yes, this | This message consists of true | I do not
--  |  |   /    /   really is |   and false bits entirely.    | mail for
  ) |  |  /    /    a 72 by 4 +-------------------------------+ any1 but
--  \--| /--- /---  .sigfile. |   |perl -pe "s.u(z)\1.as."    | me. 4^re

Attachment: locate-readonce.diff
Description: Text document


reply via email to

[Prev in Thread] Current Thread [Next in Thread]