bug-gnu-utils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: Patch to gperf 2.7.2 for case-independent keyword matching


From: Bruce Lilly
Subject: Re: Patch to gperf 2.7.2 for case-independent keyword matching
Date: Wed, 20 Feb 2002 09:28:05 -0500

Bruce Lilly wrote:
> >
> > In several important situations, keywords must be matched
> > independent of case (email header field names, domain names,
> > etc.).
> >
> > The attached patch to gperf 2.7.2 provides the option to
> > generate output which does case-independent matching and
> > documents that option.
> >
> > Best regards,
> >   Bruce Lilly
> 

This is a revised patch to the original gperf 2.7.2 files.
The first set of patches sometimes generated a collision
for large keyword sets (could generally be worked around
with -r) and had a typo in the usage message.  In addition,
this runs a bit faster.

As with the earlier patches, the performance of the case-
independent lookups is an improvement over alternative
methods (try it several ways to convince yourself).
*** doc/gperf.1.orig    Fri Feb 15 00:33:37 2002
--- doc/gperf.1 Fri Feb 15 00:37:30 2002
***************
*** 54,62 ****
  \fB\-7\fR, \fB\-\-seven\-bit\fR
  Assume 7-bit characters.
  .TP
  \fB\-c\fR, \fB\-\-compare\-strncmp\fR
  Generate comparison code using strncmp rather than
! strcmp.
  .TP
  \fB\-C\fR, \fB\-\-readonly\-tables\fR
  Make the contents of generated lookup tables
--- 54,65 ----
  \fB\-7\fR, \fB\-\-seven\-bit\fR
  Assume 7-bit characters.
  .TP
+ \fB\-A\fR, \fB\-\-ignore\-case\fR
+ Match keys independent of case.
+ .TP
  \fB\-c\fR, \fB\-\-compare\-strncmp\fR
  Generate comparison code using strncmp rather than
! strcmp (strncasecmp if -A is specified).
  .TP
  \fB\-C\fR, \fB\-\-readonly\-tables\fR
  Make the contents of generated lookup tables
*** doc/gperf.texi.orig Fri Feb 15 00:33:59 2002
--- doc/gperf.texi      Fri Feb 15 00:40:18 2002
***************
*** 671,680 ****
  default in versions of @code{gperf} earlier than 2.7; now the default is
  to assume 8-bit characters.
  
  @item -c
  @itemx --compare-strncmp
  Generates C code that uses the @code{strncmp} function to perform
! string comparisons.  The default action is to use @code{strcmp}.
  
  @item -C
  @itemx --readonly-tables
--- 671,685 ----
  default in versions of @code{gperf} earlier than 2.7; now the default is
  to assume 8-bit characters.
  
+ @item -A
+ @itemx --ignore-case
+ Matches keywords in a case-independent manner.
+ 
  @item -c
  @itemx --compare-strncmp
  Generates C code that uses the @code{strncmp} function to perform
! string comparisons (@code{strncasecmp} if -A is specified).  The default
! action is to use @code{strcmp} (@code{strcasecmp} if -A is specified).
  
  @item -C
  @itemx --readonly-tables
*** src/gen-perf.cc.orig        Thu Feb 14 22:19:30 2002
--- src/gen-perf.cc     Wed Feb 20 09:00:10 2002
***************
*** 19,24 ****
--- 19,25 ----
  along with GNU GPERF; see the file COPYING.  If not, write to the Free
  Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111, USA.  */
  
+ #include <ctype.h>
  #include <stdio.h>
  #include <stdlib.h> /* declares rand(), srand() */
  #include <time.h> /* declares time() */
***************
*** 61,67 ****
        srand ((long) time (0));
  
        for (int i = 0; i < ALPHA_SIZE; i++)
!         asso_values[i] = (rand () & asso_value_max - 1);
      }
    else
      {
--- 62,71 ----
        srand ((long) time (0));
  
        for (int i = 0; i < ALPHA_SIZE; i++)
!         if ((option[NOCASE] != 0) && (islower(i) != 0))
!           asso_values[i] = asso_values[toupper(i)];
!         else
!           asso_values[i] = (rand () & asso_value_max - 1);
      }
    else
      {
***************
*** 187,192 ****
--- 191,201 ----
        asso_values[(unsigned char)c] =
          (asso_values[(unsigned char)c] + (option.get_jump () ? 
option.get_jump () : rand ()))
          & (option.get_asso_max () - 1);
+       if (option[NOCASE])
+         if (isupper((unsigned char)c))
+           asso_values[tolower(c)] = asso_values[(unsigned char)c];
+         else if (islower((unsigned char)c))
+           asso_values[toupper(c)] = asso_values[(unsigned char)c];
  
        /* Iteration Number array is a win, O(1) intialization time! */
        reset ();
***************
*** 208,213 ****
--- 217,227 ----
  
    /* Restore original values, no more tries. */
    asso_values[(unsigned char)c] = original_char;
+   if (option[NOCASE])
+     if (isupper((unsigned char)c))
+       asso_values[tolower(c)] = original_char;
+     else if (islower((unsigned char)c))
+       asso_values[toupper(c)] = original_char;
    /* If we're this far it's time to try the next character.... */
    return 1;
  }
*** src/key-list.cc.orig        Thu Feb 14 22:10:58 2002
--- src/key-list.cc     Fri Feb 15 01:32:32 2002
***************
*** 972,977 ****
--- 972,1023 ----
    printf ("[len] == '\\0'");
  }
  
+ /* This class outputs a comparison using strcasecmp. */
+ 
+ struct Output_Compare_Strcasecmp : public Output_Compare
+ {
+   virtual void output_comparison (const Output_Expr& expr1,
+                                   const Output_Expr& expr2) const;
+   Output_Compare_Strcasecmp () {}
+   virtual ~Output_Compare_Strcasecmp () {}
+ };
+ 
+ void Output_Compare_Strcasecmp::output_comparison (const Output_Expr& expr1,
+                                                const Output_Expr& expr2) const
+ {
+   T (Trace t ("Output_Compare_Strcasecmp::output_comparison");)
+   printf ("!strcasecmp (");
+   expr1.output_expr ();
+   printf (", ");
+   expr2.output_expr ();
+   printf (")");
+ }
+ 
+ /* This class outputs a comparison using strncasecmp.
+    Note that the length of expr1 will be available through the local variable
+    `len'. */
+ 
+ struct Output_Compare_Strncasecmp : public Output_Compare
+ {
+   virtual void output_comparison (const Output_Expr& expr1,
+                                   const Output_Expr& expr2) const;
+   Output_Compare_Strncasecmp () {}
+   virtual ~Output_Compare_Strncasecmp () {}
+ };
+ 
+ void Output_Compare_Strncasecmp::output_comparison (const Output_Expr& expr1,
+                                                 const Output_Expr& expr2) 
const
+ {
+   T (Trace t ("Output_Compare_Strncasecmp::output_comparison");)
+   printf ("!strncasecmp (");
+   expr1.output_expr ();
+   printf (", ");
+   expr2.output_expr ();
+   printf (", len) && ");
+   expr2.output_expr ();
+   printf ("[len] == '\\0'");
+ }
+ 
  /* This class outputs a comparison using memcmp.
     Note that the length of expr1 (available through the local variable `len')
     must be verified to be equal to the length of expr2 prior to this
***************
*** 1009,1015 ****
  Key_List::output_hash_function (void)
  {
    T (Trace t ("Key_List::output_hash_function");)
!   const int max_column  = 10;
    int field_width;
  
    /* Calculate maximum number of digits required for MAX_HASH_VALUE. */
--- 1055,1061 ----
  Key_List::output_hash_function (void)
  {
    T (Trace t ("Key_List::output_hash_function");)
!   const int max_column  = 8;
    int field_width;
  
    /* Calculate maximum number of digits required for MAX_HASH_VALUE. */
***************
*** 1063,1070 ****
      {
        if (count > 0)
          printf (",");
!       if (!(count % max_column))
          printf ("\n     ");
        printf ("%*d", field_width,
                occurrences[count] ? asso_values[count] : max_hash_value + 1);
      }
--- 1109,1119 ----
      {
        if (count > 0)
          printf (",");
!       if (!(count % max_column)) {
!         if (count && isalnum(count-1) && isalnum(count-max_column))
!           printf ("\t/* %c - %c */", count-max_column, count-1);
          printf ("\n     ");
+       }
        printf ("%*d", field_width,
                occurrences[count] ? asso_values[count] : max_hash_value + 1);
      }
***************
*** 2014,2021 ****
      output_lookup_function_body (Output_Compare_Memcmp ());
    else
      {
!       if (option[COMP])
!         output_lookup_function_body (Output_Compare_Strncmp ());
        else
          output_lookup_function_body (Output_Compare_Strcmp ());
      }
--- 2063,2075 ----
      output_lookup_function_body (Output_Compare_Memcmp ());
    else
      {
!       if (option[COMP]) {
!         if (option[NOCASE])
!           output_lookup_function_body (Output_Compare_Strncasecmp ());
!         else
!           output_lookup_function_body (Output_Compare_Strncmp ());
!       } else if (option[NOCASE])
!         output_lookup_function_body (Output_Compare_Strcasecmp ());
        else
          output_lookup_function_body (Output_Compare_Strcmp ());
      }
*** src/list-node.cc.orig       Thu Feb 14 23:09:40 2002
--- src/list-node.cc    Fri Feb 15 00:27:06 2002
***************
*** 20,25 ****
--- 20,26 ----
  
  #include "list-node.h"
  
+ #include <ctype.h>
  #include <stdio.h>
  #include <stdlib.h> /* declares exit() */
  #include "options.h"
***************
*** 66,73 ****
    int i;
  
    if (option[ALLCHARS])         /* Use all the character positions in the 
KEY. */
!     for (i = len; i > 0; k++, ptr++, i--)
        ++occurrences[(unsigned char)(*ptr = *k)];
    else                          /* Only use those character positions 
specified by the user. */
      {
        /* Iterate through the list of key_positions, initializing occurrences 
table
--- 67,80 ----
    int i;
  
    if (option[ALLCHARS])         /* Use all the character positions in the 
KEY. */
!     for (i = len; i > 0; k++, ptr++, i--) {
        ++occurrences[(unsigned char)(*ptr = *k)];
+       if (option[NOCASE])
+         if (isupper(*k))
+           ++occurrences[tolower(*k)];
+         else if (islower(*k))
+           ++occurrences[toupper(*k)];
+     }
    else                          /* Only use those character positions 
specified by the user. */
      {
        /* Iterate through the list of key_positions, initializing occurrences 
table
***************
*** 81,87 ****
              *ptr = key[i - 1];
            else                  /* Out of range of KEY length, so we'll just 
skip it. */
              continue;
!           ++occurrences[(unsigned char)(*ptr++)];
          }
  
        /* Didn't get any hits and user doesn't want to consider the
--- 88,100 ----
              *ptr = key[i - 1];
            else                  /* Out of range of KEY length, so we'll just 
skip it. */
              continue;
!           ++occurrences[(unsigned char)(*ptr)];
!           if (option[NOCASE])
!             if (isupper(*ptr))
!               ++occurrences[tolower(*ptr)];
!             else if (islower(*ptr))
!               ++occurrences[toupper(*ptr)];
!           ++ptr;
          }
  
        /* Didn't get any hits and user doesn't want to consider the
*** src/options.cc.orig Thu Feb 14 21:02:42 2002
--- src/options.cc      Thu Feb 14 21:19:56 2002
***************
*** 83,89 ****
  Options::short_usage (FILE * strm)
  {
    T (Trace t ("Options::short_usage");)
!   fprintf (strm, "Usage: %s 
[-cCdDef[num]F<initializers>GhH<hashname>i<init>Ijk<keys>K<keyname>lL<language>nN<function
 name>ors<size>S<switches>tTvW<wordlistname>Z<class name>7] [input-file]\n"
                   "Try `%s --help' for more information.\n",
                   program_name, program_name);
  }
--- 83,89 ----
  Options::short_usage (FILE * strm)
  {
    T (Trace t ("Options::short_usage");)
!   fprintf (strm, "Usage: %s 
[-aAcCdDef[num]F<initializers>GhH<hashname>i<init>Ijk<keys>K<keyname>lL<language>nN<function
 name>ors<size>S<switches>tTvW<wordlistname>Z<class name>7] [input-file]\n"
                   "Try `%s --help' for more information.\n",
                   program_name, program_name);
  }
***************
*** 133,139 ****
             "                         `Perfect_Hash'.\n"
             "  -7, --seven-bit        Assume 7-bit characters.\n"
             "  -c, --compare-strncmp  Generate comparison code using strncmp 
rather than\n"
!            "                         strcmp.\n"
             "  -C, --readonly-tables  Make the contents of generated lookup 
tables\n"
             "                         constant, i.e., readonly.\n"
             "  -E, --enum             Define constant values using an enum 
local to the\n"
--- 133,140 ----
             "                         `Perfect_Hash'.\n"
             "  -7, --seven-bit        Assume 7-bit characters.\n"
             "  -c, --compare-strncmp  Generate comparison code using strncmp 
rather than\n"
!            "                         strcmp, or strncasecmp rather than 
strcasecmp if -A\n"
!            "                         is specified.\n"
             "  -C, --readonly-tables  Make the contents of generated lookup 
tables\n"
             "                         constant, i.e., readonly.\n"
             "  -E, --enum             Define constant values using an enum 
local to the\n"
***************
*** 160,165 ****
--- 161,170 ----
             "                         Prevents the transfer of the type 
declaration to the\n"
             "                         output file. Use this option if the type 
is already\n"
             "                         defined elsewhere.\n"
+            "  -A, --ignore-case      Match keywords in a case-independent 
manner. String\n"
+            "                         comparisons are made with strcasecmp 
(strncasecmp if\n"
+            "                         -c is specified) and the associate 
values are symmetric\n"
+            "                         with respect to lower-case and 
upper-case ASCII indices.\n"
             "\n"
             "Algorithm employed by gperf:\n"
             "  -k, --key-positions=KEYS\n"
***************
*** 355,360 ****
--- 360,366 ----
                 "\nENUM is........: %s"
                 "\nINCLUDE is.....: %s"
                 "\nSEVENBIT is....: %s"
+                "\nNOCASE is......: %s"
                 "\niterations = %d"
                 "\nlookup function name = %s"
                 "\nhash function name = %s"
***************
*** 387,392 ****
--- 393,399 ----
                 option_word & ENUM ? "enabled" : "disabled",
                 option_word & INCLUDE ? "enabled" : "disabled",
                 option_word & SEVENBIT ? "enabled" : "disabled",
+                option_word & NOCASE ? "enabled" : "disabled",
                 iterations,
                 function_name, hash_name, wordlist_name, key_name,
                 initializer_suffix, jump, size - 1, initial_asso_value,
***************
*** 421,426 ****
--- 428,434 ----
    { "lookup-fn-name", required_argument, 0, 'N' },
    { "class-name", required_argument, 0, 'Z' },
    { "seven-bit", no_argument, 0, '7' },
+   { "ignore-case", no_argument, 0, 'A' },
    { "compare-strncmp", no_argument, 0, 'c' },
    { "readonly-tables", no_argument, 0, 'C' },
    { "enum", no_argument, 0, 'E' },
***************
*** 457,463 ****
  
    while ((option_char =
              getopt_long (argument_count, argument_vector,
!                          "adcCDe:Ef:F:gGhH:i:Ij:k:K:lL:nN:oprs:S:tTvW:Z:7",
                           long_options, (int *)0))
           != -1)
      {
--- 465,471 ----
  
    while ((option_char =
              getopt_long (argument_count, argument_vector,
!                          "aAdcCDe:Ef:F:gGhH:i:Ij:k:K:lL:nN:oprs:S:tTvW:Z:7",
                           long_options, (int *)0))
           != -1)
      {
***************
*** 465,470 ****
--- 473,483 ----
          {
          case 'a':               /* Generated code uses the ANSI prototype 
format. */
            break;                /* This is now the default. */
+         case 'A':               /* Generate strcasecmp rather than strcmp, 
make associate values case-symmetric. */
+           {
+             option_word |= NOCASE;
+             break;
+           }
          case 'c':               /* Generate strncmp rather than strcmp. */
            {
              option_word |= COMP;
*** src/options.h.orig  Thu Feb 14 21:09:03 2002
--- src/options.h       Thu Feb 14 21:11:08 2002
***************
*** 59,65 ****
    CPLUSPLUS    = 01000000,      /* Generate C++ code: prototypes, const, 
class, inline, enum. */
    ENUM         = 02000000,      /* Use enum for constants. */
    INCLUDE      = 04000000,      /* Generate #include statements. */
!   SEVENBIT     = 010000000      /* Assume 7-bit, not 8-bit, characters. */
  };
  
  /* Define some useful constants (these don't really belong here, but I'm
--- 59,66 ----
    CPLUSPLUS    = 01000000,      /* Generate C++ code: prototypes, const, 
class, inline, enum. */
    ENUM         = 02000000,      /* Use enum for constants. */
    INCLUDE      = 04000000,      /* Generate #include statements. */
!   SEVENBIT     = 010000000,     /* Assume 7-bit, not 8-bit, characters. */
!   NOCASE       = 020000000    /* Case-independent comparisons */
  };
  
  /* Define some useful constants (these don't really belong here, but I'm

reply via email to

[Prev in Thread] Current Thread [Next in Thread]