bug-gnu-utils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Patch to gperf 2.7.2 for case-independent keyword matching


From: Bruce Lilly
Subject: Patch to gperf 2.7.2 for case-independent keyword matching
Date: Fri, 15 Feb 2002 01:04:40 -0500

In several important situations, keywords must be matched
independent of case (email header field names, domain names,
etc.).

The attached patch to gperf 2.7.2 provides the option to
generate output which does case-independent matching and
documents that option.

Best regards,
  Bruce Lilly
*** doc/gperf.1.orig    Fri Feb 15 00:33:37 2002
--- doc/gperf.1 Fri Feb 15 00:37:29 2002
***************
*** 54,62 ****
  \fB\-7\fR, \fB\-\-seven\-bit\fR
  Assume 7-bit characters.
  .TP
  \fB\-c\fR, \fB\-\-compare\-strncmp\fR
  Generate comparison code using strncmp rather than
! strcmp.
  .TP
  \fB\-C\fR, \fB\-\-readonly\-tables\fR
  Make the contents of generated lookup tables
--- 54,65 ----
  \fB\-7\fR, \fB\-\-seven\-bit\fR
  Assume 7-bit characters.
  .TP
+ \fB\-A\fR, \fB\-\-ignore\-case\fR
+ Match keys independent of case.
+ .TP
  \fB\-c\fR, \fB\-\-compare\-strncmp\fR
  Generate comparison code using strncmp rather than
! strcmp (strncasecmp if -A is specified).
  .TP
  \fB\-C\fR, \fB\-\-readonly\-tables\fR
  Make the contents of generated lookup tables
*** doc/gperf.texi.orig Fri Feb 15 00:33:59 2002
--- doc/gperf.texi      Fri Feb 15 00:40:19 2002
***************
*** 671,680 ****
  default in versions of @code{gperf} earlier than 2.7; now the default is
  to assume 8-bit characters.
  
  @item -c
  @itemx --compare-strncmp
  Generates C code that uses the @code{strncmp} function to perform
! string comparisons.  The default action is to use @code{strcmp}.
  
  @item -C
  @itemx --readonly-tables
--- 671,685 ----
  default in versions of @code{gperf} earlier than 2.7; now the default is
  to assume 8-bit characters.
  
+ @item -A
+ @itemx --ignore-case
+ Matches keywords in a case-independent manner.
+ 
  @item -c
  @itemx --compare-strncmp
  Generates C code that uses the @code{strncmp} function to perform
! string comparisons (@code{strncasecmp} if -A is specified).  The default
! action is to use @code{strcmp} (@code{strcasecmp} if -A is specified).
  
  @item -C
  @itemx --readonly-tables
*** src/gen-perf.cc.orig        Thu Feb 14 22:19:30 2002
--- src/gen-perf.cc     Thu Feb 14 23:06:54 2002
***************
*** 19,24 ****
--- 19,25 ----
  along with GNU GPERF; see the file COPYING.  If not, write to the Free
  Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111, USA.  */
  
+ #include <ctype.h>
  #include <stdio.h>
  #include <stdlib.h> /* declares rand(), srand() */
  #include <time.h> /* declares time() */
***************
*** 61,67 ****
        srand ((long) time (0));
  
        for (int i = 0; i < ALPHA_SIZE; i++)
!         asso_values[i] = (rand () & asso_value_max - 1);
      }
    else
      {
--- 62,71 ----
        srand ((long) time (0));
  
        for (int i = 0; i < ALPHA_SIZE; i++)
!         if ((option[NOCASE] != 0) && (islower(i) != 0))
!           asso_values[i] = asso_values[toupper(i)];
!         else
!           asso_values[i] = (rand () & asso_value_max - 1);
      }
    else
      {
***************
*** 202,207 ****
--- 206,216 ----
              fewest_collisions = collisions;
              if (option[DEBUG])
                fprintf (stderr, "- resolved after %d iterations", 
total_iterations - i);
+             if (option[NOCASE])
+               if (isupper((unsigned char)c))
+                 asso_values[tolower(c)] = asso_values[(unsigned char)c];
+               else if (islower((unsigned char)c))
+                 asso_values[toupper(c)] = asso_values[(unsigned char)c];
              return 0;
            }
      }
*** src/key-list.cc.orig        Thu Feb 14 22:10:58 2002
--- src/key-list.cc     Fri Feb 15 00:00:09 2002
***************
*** 972,977 ****
--- 972,1023 ----
    printf ("[len] == '\\0'");
  }
  
+ /* This class outputs a comparison using strcasecmp. */
+ 
+ struct Output_Compare_Strcasecmp : public Output_Compare
+ {
+   virtual void output_comparison (const Output_Expr& expr1,
+                                   const Output_Expr& expr2) const;
+   Output_Compare_Strcasecmp () {}
+   virtual ~Output_Compare_Strcasecmp () {}
+ };
+ 
+ void Output_Compare_Strcasecmp::output_comparison (const Output_Expr& expr1,
+                                                const Output_Expr& expr2) const
+ {
+   T (Trace t ("Output_Compare_Strcasecmp::output_comparison");)
+   printf ("!strcasecmp (");
+   expr1.output_expr ();
+   printf (", ");
+   expr2.output_expr ();
+   printf (")");
+ }
+ 
+ /* This class outputs a comparison using strncasecmp.
+    Note that the length of expr1 will be available through the local variable
+    `len'. */
+ 
+ struct Output_Compare_Strncasecmp : public Output_Compare
+ {
+   virtual void output_comparison (const Output_Expr& expr1,
+                                   const Output_Expr& expr2) const;
+   Output_Compare_Strncasecmp () {}
+   virtual ~Output_Compare_Strncasecmp () {}
+ };
+ 
+ void Output_Compare_Strncasecmp::output_comparison (const Output_Expr& expr1,
+                                                 const Output_Expr& expr2) 
const
+ {
+   T (Trace t ("Output_Compare_Strncasecmp::output_comparison");)
+   printf ("!strncasecmp (");
+   expr1.output_expr ();
+   printf (", ");
+   expr2.output_expr ();
+   printf (", len) && ");
+   expr2.output_expr ();
+   printf ("[len] == '\\0'");
+ }
+ 
  /* This class outputs a comparison using memcmp.
     Note that the length of expr1 (available through the local variable `len')
     must be verified to be equal to the length of expr2 prior to this
***************
*** 1009,1015 ****
  Key_List::output_hash_function (void)
  {
    T (Trace t ("Key_List::output_hash_function");)
!   const int max_column  = 10;
    int field_width;
  
    /* Calculate maximum number of digits required for MAX_HASH_VALUE. */
--- 1055,1061 ----
  Key_List::output_hash_function (void)
  {
    T (Trace t ("Key_List::output_hash_function");)
!   const int max_column  = 8;
    int field_width;
  
    /* Calculate maximum number of digits required for MAX_HASH_VALUE. */
***************
*** 1063,1070 ****
      {
        if (count > 0)
          printf (",");
!       if (!(count % max_column))
          printf ("\n     ");
        printf ("%*d", field_width,
                occurrences[count] ? asso_values[count] : max_hash_value + 1);
      }
--- 1109,1119 ----
      {
        if (count > 0)
          printf (",");
!       if (!(count % max_column)) {
!         if (isalnum(count) && isalnum(count+1-max_column))
!           printf ("\t/* %c - %c */", count+1-max_column, count);
          printf ("\n     ");
+       }
        printf ("%*d", field_width,
                occurrences[count] ? asso_values[count] : max_hash_value + 1);
      }
***************
*** 2014,2021 ****
      output_lookup_function_body (Output_Compare_Memcmp ());
    else
      {
!       if (option[COMP])
!         output_lookup_function_body (Output_Compare_Strncmp ());
        else
          output_lookup_function_body (Output_Compare_Strcmp ());
      }
--- 2063,2075 ----
      output_lookup_function_body (Output_Compare_Memcmp ());
    else
      {
!       if (option[COMP]) {
!         if (option[NOCASE])
!           output_lookup_function_body (Output_Compare_Strncasecmp ());
!         else
!           output_lookup_function_body (Output_Compare_Strncmp ());
!       } else if (option[NOCASE])
!         output_lookup_function_body (Output_Compare_Strcasecmp ());
        else
          output_lookup_function_body (Output_Compare_Strcmp ());
      }
*** src/list-node.cc.orig       Thu Feb 14 23:09:40 2002
--- src/list-node.cc    Fri Feb 15 00:27:05 2002
***************
*** 20,25 ****
--- 20,26 ----
  
  #include "list-node.h"
  
+ #include <ctype.h>
  #include <stdio.h>
  #include <stdlib.h> /* declares exit() */
  #include "options.h"
***************
*** 66,73 ****
    int i;
  
    if (option[ALLCHARS])         /* Use all the character positions in the 
KEY. */
!     for (i = len; i > 0; k++, ptr++, i--)
        ++occurrences[(unsigned char)(*ptr = *k)];
    else                          /* Only use those character positions 
specified by the user. */
      {
        /* Iterate through the list of key_positions, initializing occurrences 
table
--- 67,80 ----
    int i;
  
    if (option[ALLCHARS])         /* Use all the character positions in the 
KEY. */
!     for (i = len; i > 0; k++, ptr++, i--) {
        ++occurrences[(unsigned char)(*ptr = *k)];
+       if (option[NOCASE])
+         if (isupper(*k))
+           ++occurrences[tolower(*k)];
+         else if (islower(*k))
+           ++occurrences[toupper(*k)];
+     }
    else                          /* Only use those character positions 
specified by the user. */
      {
        /* Iterate through the list of key_positions, initializing occurrences 
table
***************
*** 81,87 ****
              *ptr = key[i - 1];
            else                  /* Out of range of KEY length, so we'll just 
skip it. */
              continue;
!           ++occurrences[(unsigned char)(*ptr++)];
          }
  
        /* Didn't get any hits and user doesn't want to consider the
--- 88,100 ----
              *ptr = key[i - 1];
            else                  /* Out of range of KEY length, so we'll just 
skip it. */
              continue;
!           ++occurrences[(unsigned char)(*ptr)];
!           if (option[NOCASE])
!             if (isupper(*ptr))
!               ++occurrences[tolower(*ptr)];
!             else if (islower(*ptr))
!               ++occurrences[toupper(*ptr)];
!           ++ptr;
          }
  
        /* Didn't get any hits and user doesn't want to consider the
*** src/options.cc.orig Thu Feb 14 21:02:42 2002
--- src/options.cc      Thu Feb 14 21:19:56 2002
***************
*** 83,89 ****
  Options::short_usage (FILE * strm)
  {
    T (Trace t ("Options::short_usage");)
!   fprintf (strm, "Usage: %s 
[-cCdDef[num]F<initializers>GhH<hashname>i<init>Ijk<keys>K<keyname>lL<language>nN<function
 name>ors<size>S<switches>tTvW<wordlistname>Z<class name>7] [input-file]\n"
                   "Try `%s --help' for more information.\n",
                   program_name, program_name);
  }
--- 83,89 ----
  Options::short_usage (FILE * strm)
  {
    T (Trace t ("Options::short_usage");)
!   fprintf (strm, "Usage: %s 
[-aAcCdDef[num]F<initializers>GhH<hashname>i<init>Ijk<keys>K<keyname>lL<language>nN<function
 name>ors<size>S<switches>tTvW<wordlistname>Z<class name>7] [input-file]\n"
                   "Try `%s --help' for more information.\n",
                   program_name, program_name);
  }
***************
*** 133,139 ****
             "                         `Perfect_Hash'.\n"
             "  -7, --seven-bit        Assume 7-bit characters.\n"
             "  -c, --compare-strncmp  Generate comparison code using strncmp 
rather than\n"
!            "                         strcmp.\n"
             "  -C, --readonly-tables  Make the contents of generated lookup 
tables\n"
             "                         constant, i.e., readonly.\n"
             "  -E, --enum             Define constant values using an enum 
local to the\n"
--- 133,140 ----
             "                         `Perfect_Hash'.\n"
             "  -7, --seven-bit        Assume 7-bit characters.\n"
             "  -c, --compare-strncmp  Generate comparison code using strncmp 
rather than\n"
!            "                         strcmp, or strncasecmp rather than 
strcasecmp if -A\n"
!            "                         is specified.\n"
             "  -C, --readonly-tables  Make the contents of generated lookup 
tables\n"
             "                         constant, i.e., readonly.\n"
             "  -E, --enum             Define constant values using an enum 
local to the\n"
***************
*** 160,165 ****
--- 161,170 ----
             "                         Prevents the transfer of the type 
declaration to the\n"
             "                         output file. Use this option if the type 
is already\n"
             "                         defined elsewhere.\n"
+            "  -A, --ignore-case      Match keywords in a case-independent 
manner. String\n"
+            "                         comparisons are made with strcasecmp 
(strncasecmp if\n"
+            "                         -c is specified) and the associate 
values are symmetric\n"
+            "                         with respect to lower-ase and upper-case 
ASCII indices.\n"
             "\n"
             "Algorithm employed by gperf:\n"
             "  -k, --key-positions=KEYS\n"
***************
*** 355,360 ****
--- 360,366 ----
                 "\nENUM is........: %s"
                 "\nINCLUDE is.....: %s"
                 "\nSEVENBIT is....: %s"
+                "\nNOCASE is......: %s"
                 "\niterations = %d"
                 "\nlookup function name = %s"
                 "\nhash function name = %s"
***************
*** 387,392 ****
--- 393,399 ----
                 option_word & ENUM ? "enabled" : "disabled",
                 option_word & INCLUDE ? "enabled" : "disabled",
                 option_word & SEVENBIT ? "enabled" : "disabled",
+                option_word & NOCASE ? "enabled" : "disabled",
                 iterations,
                 function_name, hash_name, wordlist_name, key_name,
                 initializer_suffix, jump, size - 1, initial_asso_value,
***************
*** 421,426 ****
--- 428,434 ----
    { "lookup-fn-name", required_argument, 0, 'N' },
    { "class-name", required_argument, 0, 'Z' },
    { "seven-bit", no_argument, 0, '7' },
+   { "ignore-case", no_argument, 0, 'A' },
    { "compare-strncmp", no_argument, 0, 'c' },
    { "readonly-tables", no_argument, 0, 'C' },
    { "enum", no_argument, 0, 'E' },
***************
*** 457,463 ****
  
    while ((option_char =
              getopt_long (argument_count, argument_vector,
!                          "adcCDe:Ef:F:gGhH:i:Ij:k:K:lL:nN:oprs:S:tTvW:Z:7",
                           long_options, (int *)0))
           != -1)
      {
--- 465,471 ----
  
    while ((option_char =
              getopt_long (argument_count, argument_vector,
!                          "aAdcCDe:Ef:F:gGhH:i:Ij:k:K:lL:nN:oprs:S:tTvW:Z:7",
                           long_options, (int *)0))
           != -1)
      {
***************
*** 465,470 ****
--- 473,483 ----
          {
          case 'a':               /* Generated code uses the ANSI prototype 
format. */
            break;                /* This is now the default. */
+         case 'A':               /* Generate strcasecmp rather than strcmp, 
make associate values case-symmetric. */
+           {
+             option_word |= NOCASE;
+             break;
+           }
          case 'c':               /* Generate strncmp rather than strcmp. */
            {
              option_word |= COMP;
*** src/options.h.orig  Thu Feb 14 21:09:03 2002
--- src/options.h       Thu Feb 14 21:11:09 2002
***************
*** 59,65 ****
    CPLUSPLUS    = 01000000,      /* Generate C++ code: prototypes, const, 
class, inline, enum. */
    ENUM         = 02000000,      /* Use enum for constants. */
    INCLUDE      = 04000000,      /* Generate #include statements. */
!   SEVENBIT     = 010000000      /* Assume 7-bit, not 8-bit, characters. */
  };
  
  /* Define some useful constants (these don't really belong here, but I'm
--- 59,66 ----
    CPLUSPLUS    = 01000000,      /* Generate C++ code: prototypes, const, 
class, inline, enum. */
    ENUM         = 02000000,      /* Use enum for constants. */
    INCLUDE      = 04000000,      /* Generate #include statements. */
!   SEVENBIT     = 010000000,     /* Assume 7-bit, not 8-bit, characters. */
!   NOCASE       = 020000000    /* Case-independent comparisons */
  };
  
  /* Define some useful constants (these don't really belong here, but I'm

reply via email to

[Prev in Thread] Current Thread [Next in Thread]