m4-patches
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH] symtab: Better handling of macro stacks


From: Eric Blake
Subject: [PATCH] symtab: Better handling of macro stacks
Date: Sat, 17 Apr 2021 13:19:04 -0500

I ran into a scenario where running a program took 22s with the
default -H509, but less than a second with -H517 [1].  The culprit?  A
collision between 'stack' and 'substr' in the default hash table size
caused lookups for substr to get progressively slower as pushdef stack
got deeper.  This is easy enough to fix, and may also make it easier
to dynamically grow the hashtable.

[1] https://lists.gnu.org/archive/html/bug-m4/2021-04/msg00000.html

* src/m4.h (struct symbol): Add stack member.
* src/symtab.c (lookup_symbol): Separate stack from bucket list.
(symtab_print_list): Update traversal to match.
* src/freeze.c (produce_frozen_state): Likewise.
(reverse_symbol_list): Reverse stack, not bucket.
---

If you must know, I wrote the program for
https://adventofcode.com/2018/day/5

 src/freeze.c | 92 +++++++++++++++++++++++++++-------------------------
 src/m4.h     |  6 ++--
 src/symtab.c | 56 ++++++++++++++++++++------------
 3 files changed, 87 insertions(+), 67 deletions(-)

diff --git a/src/freeze.c b/src/freeze.c
index 18b6def0..2a59a3aa 100644
--- a/src/freeze.c
+++ b/src/freeze.c
@@ -1,6 +1,6 @@
 /* GNU m4 -- A simple macro processor

-   Copyright (C) 1989-1994, 2006-2014, 2016-2017, 2020 Free Software
+   Copyright (C) 1989-1994, 2006-2014, 2016-2017, 2020-2021 Free Software
    Foundation, Inc.

    This file is part of GNU M4.
@@ -36,8 +36,8 @@ reverse_symbol_list (symbol *sym)
   result = NULL;
   while (sym)
     {
-      next = SYMBOL_NEXT (sym);
-      SYMBOL_NEXT (sym) = result;
+      next = SYMBOL_STACK (sym);
+      SYMBOL_STACK (sym) = result;
       result = sym;
       sym = next;
     }
@@ -54,6 +54,7 @@ produce_frozen_state (const char *name)
   FILE *file;
   size_t h;
   symbol *sym;
+  symbol *bucket;
   const builtin *bp;

   file = fopen (name, O_BINARY ? "wb" : "w");
@@ -90,56 +91,57 @@ produce_frozen_state (const char *name)

   for (h = 0; h < hash_table_size; h++)
     {
-
-      /* Process all entries in one bucket, from the last to the first.
-         This order ensures that, at reload time, pushdef's will be
-         executed with the oldest definitions first.  */
-
-      symtab[h] = reverse_symbol_list (symtab[h]);
-      for (sym = symtab[h]; sym; sym = SYMBOL_NEXT (sym))
+      for (bucket = symtab[h]; bucket; bucket = SYMBOL_NEXT (bucket))
         {
-          switch (SYMBOL_TYPE (sym))
+          /* Process all entries in one stack, from the last to the first.
+             This order ensures that, at reload time, pushdef's will be
+             executed with the oldest definitions first.  */
+
+          bucket = reverse_symbol_list (bucket);
+          for (sym = bucket; sym; sym = SYMBOL_STACK (sym))
             {
-            case TOKEN_TEXT:
-              xfprintf (file, "T%d,%d\n",
-                        (int) strlen (SYMBOL_NAME (sym)),
-                        (int) strlen (SYMBOL_TEXT (sym)));
-              fputs (SYMBOL_NAME (sym), file);
-              fputs (SYMBOL_TEXT (sym), file);
-              fputc ('\n', file);
-              break;
-
-            case TOKEN_FUNC:
-              bp = find_builtin_by_addr (SYMBOL_FUNC (sym));
-              if (bp == NULL)
+              switch (SYMBOL_TYPE (sym))
                 {
-                  M4ERROR ((warning_status, 0, "\
+                case TOKEN_TEXT:
+                  xfprintf (file, "T%d,%d\n",
+                            (int) strlen (SYMBOL_NAME (sym)),
+                            (int) strlen (SYMBOL_TEXT (sym)));
+                  fputs (SYMBOL_NAME (sym), file);
+                  fputs (SYMBOL_TEXT (sym), file);
+                  fputc ('\n', file);
+                  break;
+
+                case TOKEN_FUNC:
+                  bp = find_builtin_by_addr (SYMBOL_FUNC (sym));
+                  if (bp == NULL)
+                    {
+                      M4ERROR ((warning_status, 0, "\
 INTERNAL ERROR: builtin not found in builtin table!"));
+                      abort ();
+                    }
+                  xfprintf (file, "F%d,%d\n",
+                            (int) strlen (SYMBOL_NAME (sym)),
+                            (int) strlen (bp->name));
+                  fputs (SYMBOL_NAME (sym), file);
+                  fputs (bp->name, file);
+                  fputc ('\n', file);
+                  break;
+
+                case TOKEN_VOID:
+                  /* Ignore placeholder tokens that exist due to traceon.  */
+                  break;
+
+                default:
+                  M4ERROR ((warning_status, 0, "\
+INTERNAL ERROR: bad token data type in freeze_one_symbol ()"));
                   abort ();
+                  break;
                 }
-              xfprintf (file, "F%d,%d\n",
-                        (int) strlen (SYMBOL_NAME (sym)),
-                        (int) strlen (bp->name));
-              fputs (SYMBOL_NAME (sym), file);
-              fputs (bp->name, file);
-              fputc ('\n', file);
-              break;
-
-            case TOKEN_VOID:
-              /* Ignore placeholder tokens that exist due to traceon.  */
-              break;
-
-            default:
-              M4ERROR ((warning_status, 0, "\
-INTERNAL ERROR: bad token data type in freeze_one_symbol ()"));
-              abort ();
-              break;
             }
+
+          /* Reverse the bucket once more, putting it back as it was.  */
+          bucket = reverse_symbol_list (bucket);
         }
-
-      /* Reverse the bucket once more, putting it back as it was.  */
-
-      symtab[h] = reverse_symbol_list (symtab[h]);
     }

   /* Let diversions be issued from output.c module, its cleaner to have this
diff --git a/src/m4.h b/src/m4.h
index 55fc2928..ef5bf9a5 100644
--- a/src/m4.h
+++ b/src/m4.h
@@ -1,6 +1,6 @@
 /* GNU m4 -- A simple macro processor

-   Copyright (C) 1989-1994, 2004-2014, 2016-2017, 2020 Free Software
+   Copyright (C) 1989-1994, 2004-2014, 2016-2017, 2020-2021 Free Software
    Foundation, Inc.

    This file is part of GNU M4.
@@ -359,7 +359,8 @@ enum symbol_lookup
 /* Symbol table entry.  */
 struct symbol
 {
-  struct symbol *next;
+  struct symbol *stack; /* pushdef stack */
+  struct symbol *next; /* hash bucket chain */
   bool_bitfield traced : 1;
   bool_bitfield shadowed : 1;
   bool_bitfield macro_args : 1;
@@ -371,6 +372,7 @@ struct symbol
   token_data data;
 };

+#define SYMBOL_STACK(S)         ((S)->stack)
 #define SYMBOL_NEXT(S)          ((S)->next)
 #define SYMBOL_TRACED(S)        ((S)->traced)
 #define SYMBOL_SHADOWED(S)      ((S)->shadowed)
diff --git a/src/symtab.c b/src/symtab.c
index a8f9461d..ac88a4cc 100644
--- a/src/symtab.c
+++ b/src/symtab.c
@@ -1,6 +1,6 @@
 /* GNU m4 -- A simple macro processor

-   Copyright (C) 1989-1994, 2003, 2006-2014, 2016-2017, 2020 Free
+   Copyright (C) 1989-1994, 2003, 2006-2014, 2016-2017, 2020-2021 Free
    Software Foundation, Inc.

    This file is part of GNU M4.
@@ -223,6 +223,8 @@ lookup_symbol (const char *name, symbol_lookup mode)
               SYMBOL_DELETED (sym) = false;
               SYMBOL_PENDING_EXPANSIONS (sym) = 0;

+              SYMBOL_STACK (sym) = SYMBOL_STACK (old);
+              SYMBOL_STACK (old) = NULL;
               SYMBOL_NEXT (sym) = SYMBOL_NEXT (old);
               SYMBOL_NEXT (old) = NULL;
               (*spp) = sym;
@@ -247,13 +249,17 @@ lookup_symbol (const char *name, symbol_lookup mode)
       SYMBOL_DELETED (sym) = false;
       SYMBOL_PENDING_EXPANSIONS (sym) = 0;

+      SYMBOL_STACK (sym) = NULL;
       SYMBOL_NEXT (sym) = *spp;
       (*spp) = sym;

       if (mode == SYMBOL_PUSHDEF && cmp == 0)
         {
-          SYMBOL_SHADOWED (SYMBOL_NEXT (sym)) = true;
-          SYMBOL_TRACED (sym) = SYMBOL_TRACED (SYMBOL_NEXT (sym));
+          SYMBOL_STACK (sym) = SYMBOL_NEXT (sym);
+          SYMBOL_NEXT (sym) = SYMBOL_NEXT (SYMBOL_STACK (sym));
+          SYMBOL_NEXT (SYMBOL_STACK (sym)) = NULL;
+          SYMBOL_SHADOWED (SYMBOL_STACK (sym)) = true;
+          SYMBOL_TRACED (sym) = SYMBOL_TRACED (SYMBOL_STACK (sym));
         }
       return sym;

@@ -271,22 +277,28 @@ lookup_symbol (const char *name, symbol_lookup mode)
         return NULL;
       {
         bool traced = false;
-        if (SYMBOL_NEXT (sym) != NULL
-            && SYMBOL_SHADOWED (SYMBOL_NEXT (sym))
+        symbol *next = SYMBOL_NEXT (sym);
+        if (SYMBOL_STACK (sym) != NULL
+            && SYMBOL_SHADOWED (SYMBOL_STACK (sym))
             && mode == SYMBOL_POPDEF)
           {
-            SYMBOL_SHADOWED (SYMBOL_NEXT (sym)) = false;
-            SYMBOL_TRACED (SYMBOL_NEXT (sym)) = SYMBOL_TRACED (sym);
+            SYMBOL_SHADOWED (SYMBOL_STACK (sym)) = false;
+            SYMBOL_TRACED (SYMBOL_STACK (sym)) = SYMBOL_TRACED (sym);
+            SYMBOL_NEXT (SYMBOL_STACK (sym)) = next;
+            *spp = SYMBOL_STACK (sym);
           }
         else
-          traced = SYMBOL_TRACED (sym);
+          {
+            traced = SYMBOL_TRACED (sym);
+            *spp = next;
+          }
         do
           {
-            *spp = SYMBOL_NEXT (sym);
+            next = SYMBOL_STACK (sym);
             free_symbol (sym);
-            sym = *spp;
+            sym = next;
           }
-        while (*spp != NULL && SYMBOL_SHADOWED (*spp)
+        while (next != NULL && SYMBOL_SHADOWED (next)
                && mode == SYMBOL_DELETE);
         if (traced)
           {
@@ -300,6 +312,7 @@ lookup_symbol (const char *name, symbol_lookup mode)
             SYMBOL_DELETED (sym) = false;
             SYMBOL_PENDING_EXPANSIONS (sym) = 0;

+            SYMBOL_STACK (sym) = NULL;
             SYMBOL_NEXT (sym) = *spp;
             (*spp) = sym;
           }
@@ -383,19 +396,22 @@ static void
 symtab_print_list (int i)
 {
   symbol *sym;
+  symbol *bucket;
   size_t h;

   xprintf ("Symbol dump #%d:\n", i);
   for (h = 0; h < hash_table_size; h++)
-    for (sym = symtab[h]; sym != NULL; sym = sym->next)
-      xprintf ("\tname %s, bucket %lu, addr %p, next %p, "
-               "flags%s%s%s, pending %d\n",
-               SYMBOL_NAME (sym),
-               (unsigned long int) h, sym, SYMBOL_NEXT (sym),
-               SYMBOL_TRACED (sym) ? " traced" : "",
-               SYMBOL_SHADOWED (sym) ? " shadowed" : "",
-               SYMBOL_DELETED (sym) ? " deleted" : "",
-               SYMBOL_PENDING_EXPANSIONS (sym));
+    for (bucket = symtab[h]; bucket != NULL; bucket = bucket->next)
+      for (sym = bucket; sym; sym = sym->stack)
+        xprintf ("\tname %s, bucket %lu, addr %p, stack %p, next %p, "
+                 "flags%s%s%s, pending %d\n",
+                 SYMBOL_NAME (sym),
+                 (unsigned long int) h, sym, SYMBOL_STACK (sym),
+                 SYMBOL_NEXT (sym),
+                 SYMBOL_TRACED (sym) ? " traced" : "",
+                 SYMBOL_SHADOWED (sym) ? " shadowed" : "",
+                 SYMBOL_DELETED (sym) ? " deleted" : "",
+                 SYMBOL_PENDING_EXPANSIONS (sym));
 }

 #endif /* DEBUG_SYM */
-- 
2.31.1




reply via email to

[Prev in Thread] Current Thread [Next in Thread]