m4-patches
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH 1/3] symtab: sort by hash before name


From: Eric Blake
Subject: [PATCH 1/3] symtab: sort by hash before name
Date: Sat, 17 Apr 2021 14:58:27 -0500

It is faster to do an integer compare than a string compare when
managing hash table collisions (reserving a string compare for ties).
Testing with CFLAGS=-DDEBUG_SYM=1 and 'time M4=src/m4 autoconf -f',
the results are noticeable; execution speeds up from 2.3s to 2.2s, and
the debug trace that used to report:

m4: lookup mode 0 called 1243301 times, 7859589 compares, 6734330 misses, 
23941043 bytes

now reports

m4: lookup mode 0 called 1243301 times, 1125259 compares, 0 misses, 12433237 
bytes

* src/m4.h (struct symbol): Add hash member.
* src/symtab.c (lookup_symbol): Sort by hash first, then name.
(symtab_print_list): Add hash debug.
---
 src/m4.h     |  1 +
 src/symtab.c | 15 ++++++++++-----
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/src/m4.h b/src/m4.h
index a156376e..efb21c04 100644
--- a/src/m4.h
+++ b/src/m4.h
@@ -367,6 +367,7 @@ struct symbol
   bool_bitfield deleted : 1;
   int pending_expansions;

+  size_t hash;
   char *name;
   token_data data;
 };
diff --git a/src/symtab.c b/src/symtab.c
index 4266d54f..45f9da31 100644
--- a/src/symtab.c
+++ b/src/symtab.c
@@ -23,7 +23,7 @@
    symbol table is a simple chained hash table.  Each symbol is described
    by a struct symbol, which is placed in the hash table based upon the
    symbol name.  Symbols that hash to the same entry in the table are
-   kept on a list, sorted by name.  As a special case, to facilitate the
+   kept on a list, sorted by hash.  As a special case, to facilitate the
    "pushdef" and "popdef" builtins, a symbol can be several times in the
    symbol table, one for each definition.  Since the name is the same,
    all the entries for the symbol will be on the same list, and will
@@ -182,7 +182,9 @@ lookup_symbol (const char *name, symbol_lookup mode)

   for (prev = NULL; sym != NULL; prev = sym, sym = sym->next)
     {
-      cmp = strcmp (SYMBOL_NAME (sym), name);
+      cmp = (h > sym->hash) - (h < sym->hash);
+      if (cmp == 0)
+        cmp = strcmp (SYMBOL_NAME (sym), name);
       if (cmp >= 0)
         break;
     }
@@ -216,6 +218,7 @@ lookup_symbol (const char *name, symbol_lookup mode)
               sym = (symbol *) xmalloc (sizeof (symbol));
               SYMBOL_TYPE (sym) = TOKEN_VOID;
               SYMBOL_TRACED (sym) = SYMBOL_TRACED (old);
+              sym->hash = h;
               SYMBOL_NAME (sym) = xstrdup (name);
               SYMBOL_MACRO_ARGS (sym) = false;
               SYMBOL_BLIND_NO_ARGS (sym) = false;
@@ -240,6 +243,7 @@ lookup_symbol (const char *name, symbol_lookup mode)
       sym = (symbol *) xmalloc (sizeof (symbol));
       SYMBOL_TYPE (sym) = TOKEN_VOID;
       SYMBOL_TRACED (sym) = false;
+      sym->hash = h;
       SYMBOL_NAME (sym) = xstrdup (name);
       SYMBOL_MACRO_ARGS (sym) = false;
       SYMBOL_BLIND_NO_ARGS (sym) = false;
@@ -298,6 +302,7 @@ lookup_symbol (const char *name, symbol_lookup mode)
             sym = (symbol *) xmalloc (sizeof (symbol));
             SYMBOL_TYPE (sym) = TOKEN_VOID;
             SYMBOL_TRACED (sym) = true;
+            sym->hash = h;
             SYMBOL_NAME (sym) = xstrdup (name);
             SYMBOL_MACRO_ARGS (sym) = false;
             SYMBOL_BLIND_NO_ARGS (sym) = false;
@@ -395,9 +400,9 @@ symtab_print_list (int i)
   for (h = 0; h < hash_table_size; h++)
     for (bucket = symtab[h]; bucket != NULL; bucket = bucket->next)
       for (sym = bucket; sym; sym = sym->stack)
-        xprintf ("\tname %s, bucket %lu, addr %p, stack %p, next %p, "
-                 "flags%s%s, pending %d\n",
-                 SYMBOL_NAME (sym),
+        xprintf ("\tname %s, hash %lu, bucket %lu, addr %p, stack %p, "
+                 "next %p, flags%s%s, pending %d\n",
+                 SYMBOL_NAME (sym), (unsigned long int) sym->hash,
                  (unsigned long int) h, sym, SYMBOL_STACK (sym),
                  SYMBOL_NEXT (sym),
                  SYMBOL_TRACED (sym) ? " traced" : "",
-- 
2.31.1




reply via email to

[Prev in Thread] Current Thread [Next in Thread]