m4-patches
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: poor m4 hash performance


From: Eric Blake
Subject: Re: poor m4 hash performance
Date: Sun, 04 Jun 2006 17:33:35 -0600
User-agent: Thunderbird 1.5.0.4 (Windows/20060516)

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

According to Eric Blake on 6/4/2006 4:09 PM:
> 
> For CVS coreutils, with 83303 definitions and 2526066 lookups, using
> -H99991 reduced comparisons from 9364329 (36022688 bytes, 3.5 comparisons
> per lookup) to 2415937 (22883978 bytes, less than 1 compare per lookup),
> but never bought me much more than .5 sec out of 30 on my machine (1.5 %
> speedup, but that was with naive profiling turned on rather than an
> optimized strcmp).  So having autom4te play with -H may not buy us much.
> I'll report separately about potential m4 1.4.5 improvements; this mail is
> just limited to the framework for testing the various ideas.

This patch stores string hashes, to avoid strcmp.  With the default 509
buckets on CVS coreutils, it causes only 2362141 strcmp, with only 25
failed comparisons.  There are still 22829893 bytes compared (ie. the 2.3M
successful lookups results in 22M of length, or about 9 characters per
average token); this shaved only 60k of comparisons from the earlier run
with -H99991 buckets without this patch.  Comparing the time for m4 1.4.4
and my version of m4 with this patch, both compiled at -O2, still shows no
noticeable difference in timing of autoconf.  But keep in mind that
autoconf is also invoking several other processes, perhaps the actual m4
invocations are noticeably faster.  So I don't know whether it is worth
applying this patch.

2006-06-04  Eric Blake  <address@hidden>

        * src/m4.h (struct symbol, SYMBOL_HASH): Store symbol's hash.
        * src/symtab.c (hash): Don't perform modulo here.
        (lookup_symbol): Store hash, use it to reduce strcmp calls.
        (symtab_print_list) [DEBUG_SYM]: Display hash.

- --
Life is short - so eat dessert first!

Eric Blake             address@hidden
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.2.1 (Cygwin)
Comment: Public key at home.comcast.net/~ericblake/eblake.gpg
Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org

iD8DBQFEg23O84KuGfSFAYARAoj/AJ9L5ujZrluhCIIp/lJ8s03XsGQyewCfal77
rAnmz/AeBwETqyymOzApHZ8=
=+S6M
-----END PGP SIGNATURE-----
Index: src/m4.h
===================================================================
RCS file: /sources/m4/m4/src/m4.h,v
retrieving revision 1.1.1.1.2.5
diff -u -p -r1.1.1.1.2.5 m4.h
--- src/m4.h    27 May 2006 18:11:23 -0000      1.1.1.1.2.5
+++ src/m4.h    4 Jun 2006 23:31:33 -0000
@@ -382,6 +382,7 @@ struct symbol
   boolean blind_no_args;
 
   char *name;
+  unsigned hash;
   token_data data;
 };
 
@@ -391,6 +392,7 @@ struct symbol
 #define SYMBOL_MACRO_ARGS(S)   ((S)->macro_args)
 #define SYMBOL_BLIND_NO_ARGS(S)        ((S)->blind_no_args)
 #define SYMBOL_NAME(S)         ((S)->name)
+#define SYMBOL_HASH(S)         ((S)->hash)
 #define SYMBOL_TYPE(S)         (TOKEN_DATA_TYPE (&(S)->data))
 #define SYMBOL_TEXT(S)         (TOKEN_DATA_TEXT (&(S)->data))
 #define SYMBOL_FUNC(S)         (TOKEN_DATA_FUNC (&(S)->data))
Index: src/symtab.c
===================================================================
RCS file: /sources/m4/m4/src/Attic/symtab.c,v
retrieving revision 1.1.1.1.2.3
diff -u -p -r1.1.1.1.2.3 symtab.c
--- src/symtab.c        4 Jun 2006 22:09:30 -0000       1.1.1.1.2.3
+++ src/symtab.c        4 Jun 2006 23:31:33 -0000
@@ -117,10 +117,10 @@ symtab_init (void)
 | Return a hashvalue for a string, from GNU-emacs.  |
 `--------------------------------------------------*/
 
-static int
+static unsigned
 hash (const char *s)
 {
-  register int val = 0;
+  register unsigned val = 0;
 
   register const char *ptr = s;
   register char ch;
@@ -131,8 +131,7 @@ hash (const char *s)
        ch -= 40;
       val = ((val << 3) + (val >> 28) + ch);
     };
-  val = (val < 0) ? -val : val;
-  return val % hash_table_size;
+  return val;
 }
 
 /*--------------------------------------------.
@@ -165,7 +164,8 @@ free_symbol (symbol *sym)
 symbol *
 lookup_symbol (const char *name, symbol_lookup mode)
 {
-  int h, cmp = 1;
+  unsigned h;
+  int cmp = 1;
   symbol *sym, *prev;
   symbol **spp;
 
@@ -175,11 +175,13 @@ lookup_symbol (const char *name, symbol_
 #endif /* DEBUG_SYM */
 
   h = hash (name);
-  sym = symtab[h];
+  sym = symtab[h % hash_table_size];
 
   for (prev = NULL; sym != NULL; prev = sym, sym = sym->next)
     {
-      cmp = strcmp (SYMBOL_NAME (sym), name);
+      cmp = (h < SYMBOL_HASH (sym) ? -1
+            : h > SYMBOL_HASH (sym) ? 1
+            : strcmp (SYMBOL_NAME (sym), name));
       if (cmp >= 0)
        break;
     }
@@ -191,7 +193,7 @@ lookup_symbol (const char *name, symbol_
 
   /* Symbol not found.  */
 
-  spp = (prev != NULL) ?  &prev->next : &symtab[h];
+  spp = (prev != NULL) ?  &prev->next : &symtab[h % hash_table_size];
 
   switch (mode)
     {
@@ -215,6 +217,7 @@ lookup_symbol (const char *name, symbol_
       SYMBOL_TYPE (sym) = TOKEN_VOID;
       SYMBOL_TRACED (sym) = SYMBOL_SHADOWED (sym) = FALSE;
       SYMBOL_NAME (sym) = xstrdup (name);
+      SYMBOL_HASH (sym) = h;
       SYMBOL_SHADOWED (sym) = FALSE;
       SYMBOL_MACRO_ARGS (sym) = FALSE;
       SYMBOL_BLIND_NO_ARGS (sym) = FALSE;
@@ -330,9 +333,9 @@ symtab_print_list (int i)
   printf ("Symbol dump #%d:\n", i);
   for (h = 0; h < hash_table_size; h++)
     for (sym = symtab[h]; sym != NULL; sym = sym->next)
-      printf ("\tname %s, bucket %d, addr %p, next %p, "
+      printf ("\tname %s, hash %u, bucket %d, addr %p, next %p, "
               "flags%s%s\n",
-              SYMBOL_NAME (sym),
+              SYMBOL_NAME (sym), SYMBOL_HASH (sym),
               h, sym, SYMBOL_NEXT (sym),
               SYMBOL_TRACED (sym) ? " traced" : "",
               SYMBOL_SHADOWED (sym) ? " shadowed" : "");

reply via email to

[Prev in Thread] Current Thread [Next in Thread]