bison-patches
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

20-fyi-hash-symtab.patch


From: Akim Demaille
Subject: 20-fyi-hash-symtab.patch
Date: Sun, 07 Apr 2002 17:24:26 +0200

Index: ChangeLog
from  Akim Demaille  <address@hidden>

        Use lib/hash for the symbol table.

        * src/gram.c (ntokens): Initialize to 1, to reserve a slot for
        EOF.
        * src/lex.c (lex): Set the `number' member of new terminals.
        * src/reader.c (bucket_check_defined, bucket_make_alias)
        (bucket_check_alias_consistence, bucket_translation): New.
        (reader, grammar_free, readgram, token_translations_init)
        (packsymbols): Adjust.
        (reader): Number the predefined tokens.
        * src/reduce.c (inaccessable_symbols): Just use hard coded numbers
        for predefined tokens.
        * src/symtab.h (bucket): Remove all the hash table related
        members.
        * src/symtab.c (symtab): Replace by...
        (bucket_table): this.
        (bucket_new, bucket_free, hash_compare_bucket, hash_bucket)
        (buckets_new, buckets_do): New.

Index: src/gram.c
--- src/gram.c Sat, 06 Apr 2002 21:48:00 +0200 akim
+++ src/gram.c Sat, 06 Apr 2002 21:25:15 +0200 akim
@@ -30,7 +30,7 @@
 int nitems = 0;
 int nrules = 0;
 int nsyms = 0;
-int ntokens = 0;
+int ntokens = 1;
 int nvars = 0;

 short *ritem = NULL;
Index: src/lex.c
--- src/lex.c Fri, 01 Mar 2002 16:28:34 +0100 akim
+++ src/lex.c Sat, 06 Apr 2002 21:18:54 +0200 akim
@@ -364,9 +364,13 @@
        obstack_1grow (&token_obstack, '\0');
        token_buffer = obstack_finish (&token_obstack);
        symval = getsym (token_buffer);
-       symval->class = token_sym;
-       if (symval->user_token_number == SUNDEF)
-         symval->user_token_number = code;
+       if (symval->number == -1)
+         {
+           symval->number = ntokens++;
+           symval->class = token_sym;
+           if (symval->user_token_number == SUNDEF)
+             symval->user_token_number = code;
+         }
        return tok_identifier;
       }

@@ -388,7 +392,11 @@
        token_buffer = obstack_finish (&token_obstack);

        symval = getsym (token_buffer);
-       symval->class = token_sym;
+       if (symval->number == -1)
+         {
+           symval->number = ntokens++;
+           symval->class = token_sym;
+         }

        return tok_identifier;
       }
Index: src/reader.c
--- src/reader.c Sat, 06 Apr 2002 21:53:45 +0200 akim
+++ src/reader.c Sat, 06 Apr 2002 21:56:44 +0200 akim
@@ -86,6 +86,181 @@
   return res;
 }

+/*------------------------.
+| Operations on buckets.  |
+`------------------------*/
+
+
+/*-----------------------------------------------------------.
+| If THIS is not defined, report an error, and consider it a |
+| nonterminal.                                               |
+`-----------------------------------------------------------*/
+
+static bool
+bucket_check_defined (bucket *this)
+{
+  if (this->class == unknown_sym)
+    {
+      complain
+       (_("symbol %s is used, but is not defined as a token and has no rules"),
+        this->tag);
+      this->class = nterm_sym;
+      this->number = nvars++;
+    }
+
+  return TRUE;
+}
+
+
+/*-------------------------------------------------------------------.
+| Assign a symbol number, and write the definition of the token name |
+| into FDEFINES.  Put in SYMBOLS.                                    |
+`-------------------------------------------------------------------*/
+
+static bool
+bucket_make_alias (bucket *symbol, char *typename)
+{
+  if (symval->alias)
+    warn (_("symbol `%s' used more than once as a literal string"),
+         symval->tag);
+  else if (symbol->alias)
+    warn (_("symbol `%s' given more than one literal string"),
+         symbol->tag);
+  else
+    {
+      symval->class = token_sym;
+      symval->type_name = typename;
+      symval->user_token_number = symbol->user_token_number;
+      symbol->user_token_number = SALIAS;
+      symval->alias = symbol;
+      symbol->alias = symval;
+      /* symbol and symval combined are only one symbol */
+      nsyms--;
+      ntokens--;
+      assert (ntokens == symbol->number || ntokens == symval->number);
+      symbol->number = symval->number =
+       (symval->number < symbol->number) ? symval->number : symbol->number;
+    }
+
+  return TRUE;
+}
+
+/*---------------------------------------------------------.
+| Check that THIS, and its alias, have same precedence and |
+| associativity.                                           |
+`---------------------------------------------------------*/
+
+static bool
+bucket_check_alias_consistence (bucket *this)
+{
+  /* Check only those who _are_ the aliases. */
+  if (this->alias && this->user_token_number == SALIAS)
+    {
+      if (this->prec != this->alias->prec)
+       {
+         if (this->prec != 0 && this->alias->prec != 0)
+           complain (_("conflicting precedences for %s and %s"),
+                     this->tag, this->alias->tag);
+         if (this->prec != 0)
+           this->alias->prec = this->prec;
+         else
+           this->prec = this->alias->prec;
+       }
+
+      if (this->assoc != this->alias->assoc)
+       {
+         if (this->assoc != 0 && this->alias->assoc != 0)
+           complain (_("conflicting assoc values for %s and %s"),
+                     this->tag, this->alias->tag);
+         if (this->assoc != 0)
+           this->alias->assoc = this->assoc;
+         else
+           this->assoc = this->alias->assoc;
+       }
+    }
+  return TRUE;
+}
+
+
+/*-------------------------------------------------------------------.
+| Assign a symbol number, and write the definition of the token name |
+| into FDEFINES.  Put in SYMBOLS.                                    |
+`-------------------------------------------------------------------*/
+
+static bool
+bucket_pack (bucket *this)
+{
+  if (getenv ("DEBUG"))
+    fprintf (stderr, "Packing %s, %s, number = %d\n",
+            this->tag,
+            this->class == nterm_sym ? "nterm" : "TERM",
+            this->number);
+  if (this->class == nterm_sym)
+    {
+      this->number += ntokens;
+    }
+  else if (this->alias)
+    {
+      /* This symbol and its alias are a single token defn.
+        Allocate a tokno, and assign to both check agreement of
+        prec and assoc fields and make both the same */
+      if (this->number == -1)
+       {
+         if (this == eoftoken || this->alias == eoftoken)
+           this->number = this->alias->number = 0;
+         else
+           {
+             assert (this->alias->number != -1);
+             this->number = this->alias->number;
+           }
+       }
+      /* Do not do processing below for SALIASs.  */
+      if (this->user_token_number == SALIAS)
+       return TRUE;
+    }
+  else /* this->class == token_sym */
+    {
+      assert (this->number != -1);
+    }
+
+  if (getenv ("DEBUG"))
+    fprintf (stderr, "Setting %d = %s\n", this->number, this->tag);
+  symbols[this->number] = this;
+  return TRUE;
+}
+
+
+
+
+/*--------------------------------------------------.
+| Put THIS in TOKEN_TRANSLATIONS if it is a token.  |
+`--------------------------------------------------*/
+
+static bool
+bucket_translation (bucket *this)
+{
+  if (getenv ("DEBUG"))
+    fprintf (stderr, "Considering Setting UserVal %s = %d (val = %d)\n",
+            this->tag, this->user_token_number, this->number);
+
+  /* Non-terminal? */
+  if (this->class == token_sym
+      && this->user_token_number != SALIAS)
+    {
+      /* A token which translation has already been set? */
+      if (token_translations[this->user_token_number] != 2)
+       complain (_("tokens %s and %s both assigned number %d"),
+                 symbols[token_translations[this->user_token_number]]->tag,
+                 this->tag, this->user_token_number);
+
+      if (getenv ("DEBUG"))
+       fprintf (stderr, "Setting UserVal %s = %d (val = %d)\n",
+                this->tag, this->user_token_number, this->number);
+      token_translations[this->user_token_number] = this->number;
+    }
+
+  return TRUE;
+}
 

 /*===================\
@@ -531,23 +706,7 @@
        }
       else if (token == tok_identifier && *symval->tag == '\"' && symbol)
        {
-         if (symval->alias)
-           warn (_("symbol `%s' used more than once as a literal string"),
-                 symval->tag);
-         else if (symbol->alias)
-           warn (_("symbol `%s' given more than one literal string"),
-                 symbol->tag);
-         else
-           {
-             symval->class = token_sym;
-             symval->type_name = typename;
-             symval->user_token_number = symbol->user_token_number;
-             symbol->user_token_number = SALIAS;
-             symval->alias = symbol;
-             symbol->alias = symval;
-             /* symbol and symval combined are only one symbol */
-             nsyms--;
-           }
+         bucket_make_alias (symbol, typename);
          symbol = NULL;
        }
       else if (token == tok_identifier)
@@ -560,6 +719,13 @@
          symbol->class = what_is;
          if (what_is == nterm_sym && oldclass != nterm_sym)
            symbol->number = nvars++;
+         if (what_is == token_sym && symbol->number == -1)
+           {
+             symbol->number = ntokens++;
+             if (getenv ("DEBUG"))
+               fprintf (stderr, "Set %s to %d\n",
+                        symbol->tag, symbol->number);
+           }

          if (typename)
            {
@@ -574,7 +740,13 @@
          symbol->user_token_number = numval;
          /* User defined EOF token? */
          if (numval == 0)
-           eoftoken = symbol;
+           {
+             eoftoken = symbol;
+             eoftoken->number = 0;
+             /* It is always mapped to 0, so it was already counted in
+                NTOKENS.  */
+             --ntokens;
+           }
        }
       else
        {
@@ -703,7 +875,11 @@
          symval->assoc = assoc;
          if (symval->class == nterm_sym)
            complain (_("symbol %s redefined"), symval->tag);
-         symval->class = token_sym;
+         if (symval->number == -1)
+           {
+             symval->number = ntokens++;
+             symval->class = token_sym;
+           }
          if (name)
            {                   /* record the type, if one is specified */
              if (symval->type_name == NULL)
@@ -720,9 +896,9 @@
            }
          else
            {
-             complain (_
-                       ("invalid text (%s) - number should be after 
identifier"),
-token_buffer);
+             complain
+               (_("invalid text (%s) - number should be after identifier"),
+                token_buffer);
              skip_to_char ('%');
            }
          break;
@@ -1073,7 +1249,7 @@
            case tok_stropt:
            case tok_intopt:
            case tok_obsolete:
-             abort ();
+             assert (0);
              break;

            case tok_illegal:
@@ -1231,7 +1407,6 @@
   bucket *lhs = NULL;
   symbol_list *p = NULL;
   symbol_list *p1 = NULL;
-  bucket *bp;

   /* Points to first symbol_list of current rule. its symbol is the
      lhs of the rule.  */
@@ -1463,16 +1638,7 @@
     fatal (_("no rules in the input grammar"));

   /* Report any undefined symbols and consider them nonterminals.  */
-
-  for (bp = firstsymbol; bp; bp = bp->next)
-    if (bp->class == unknown_sym)
-      {
-       complain (_
-                 ("symbol %s is used, but is not defined as a token and has no 
rules"),
-                 bp->tag);
-       bp->class = nterm_sym;
-       bp->number = nvars++;
-      }
+  buckets_do (bucket_check_defined, NULL);

   /* Insert the initial rule, which line is that of the first rule
      (not that of the start symbol):
@@ -1493,7 +1659,10 @@
     fatal (_("too many symbols (tokens plus nonterminals); maximum %d"),
           MAXSHORT);

-  ntokens = nsyms - nvars;
+  if (getenv ("DEBUG"))
+    fprintf (stderr, "nsyms == ntokens + nvars: %d == %d + %d\n",
+            nsyms, ntokens, nvars);
+  assert (nsyms == ntokens + nvars);
 }

 /* At the end of the grammar file, some C source code must
@@ -1530,9 +1699,25 @@
 static void
 token_translations_init (void)
 {
-  bucket *bp = NULL;
+  int last_user_token_number = 256;
   int i;

+  /* Set the user numbers. */
+  for (i = 0; i < ntokens; ++i)
+    {
+      bucket *this = symbols[i];
+      if (getenv ("DEBUG"))
+       fprintf (stderr, "UserVal %s = %d (val = %d)\n",
+                this->tag, this->user_token_number, this->number);
+      if (this->user_token_number == SUNDEF)
+       this->user_token_number = ++last_user_token_number;
+      if (this->user_token_number > max_user_token_number)
+       max_user_token_number = this->user_token_number;
+      if (getenv ("DEBUG"))
+       fprintf (stderr, "Now: UserVal %s = %d (val = %d)\n",
+                this->tag, this->user_token_number, this->number);
+    }
+
   token_translations = XCALLOC (short, max_user_token_number + 1);

   /* Initialize all entries for literal tokens to 2, the internal
@@ -1541,24 +1726,7 @@
   for (i = 0; i < max_user_token_number + 1; i++)
     token_translations[i] = 2;

-  for (bp = firstsymbol; bp; bp = bp->next)
-    {
-      /* Non-terminal? */
-      if (bp->number >= ntokens)
-       continue;
-      /* A token string alias? */
-      if (bp->user_token_number == SALIAS)
-       continue;
-
-      assert (bp->user_token_number != SUNDEF);
-
-      /* A token which translation has already been set? */
-      if (token_translations[bp->user_token_number] != 2)
-       complain (_("tokens %s and %s both assigned number %d"),
-                 symbols[token_translations[bp->user_token_number]]->tag,
-                 bp->tag, bp->user_token_number);
-      token_translations[bp->user_token_number] = bp->number;
-    }
+  buckets_do (bucket_translation, NULL);
 }


@@ -1570,83 +1738,10 @@
 static void
 packsymbols (void)
 {
-  bucket *bp = NULL;
-  int tokno = 1;
-  int last_user_token_number;
-
   symbols = XCALLOC (bucket *, nsyms);

-  max_user_token_number = 256;
-  last_user_token_number = 256;
-
-  for (bp = firstsymbol; bp; bp = bp->next)
-    {
-      if (bp->class == nterm_sym)
-       {
-         bp->number += ntokens;
-       }
-      else if (bp->alias)
-       {
-         /* This symbol and its alias are a single token defn.
-            Allocate a tokno, and assign to both check agreement of
-            prec and assoc fields and make both the same */
-         if (bp->number == -1)
-           {
-             if (bp == eoftoken || bp->alias == eoftoken)
-               bp->number = bp->alias->number = 0;
-             else
-               {
-                 bp->number = bp->alias->number = tokno++;
-               }
-           }
-
-         if (bp->prec != bp->alias->prec)
-           {
-             if (bp->prec != 0 && bp->alias->prec != 0
-                 && bp->user_token_number == SALIAS)
-               complain (_("conflicting precedences for %s and %s"),
-                         bp->tag, bp->alias->tag);
-             if (bp->prec != 0)
-               bp->alias->prec = bp->prec;
-             else
-               bp->prec = bp->alias->prec;
-           }
-
-         if (bp->assoc != bp->alias->assoc)
-           {
-             if (bp->assoc != 0 && bp->alias->assoc != 0
-                 && bp->user_token_number == SALIAS)
-               complain (_("conflicting assoc values for %s and %s"),
-                         bp->tag, bp->alias->tag);
-             if (bp->assoc != 0)
-               bp->alias->assoc = bp->assoc;
-             else
-               bp->assoc = bp->alias->assoc;
-           }
-
-         /* Do not do processing below for SALIASs.  */
-         if (bp->user_token_number == SALIAS)
-           continue;
-
-       }
-      else /* bp->class == token_sym */
-       {
-         if (bp == eoftoken)
-           bp->number = 0;
-         else
-           bp->number = tokno++;
-       }
-
-      if (bp->class == token_sym)
-       {
-         if (bp->user_token_number == SUNDEF)
-           bp->user_token_number = ++last_user_token_number;
-         if (bp->user_token_number > max_user_token_number)
-           max_user_token_number = bp->user_token_number;
-       }
-
-      symbols[bp->number] = bp;
-    }
+  buckets_do (bucket_check_alias_consistence, NULL);
+  buckets_do (bucket_pack, NULL);

   token_translations_init ();

@@ -1750,7 +1845,7 @@
   obstack_init (&muscle_obstack);

   /* Initialize the symbol table.  */
-  tabinit ();
+  buckets_new ();

   /* Construct the axiom symbol. */
   axiom = getsym ("$axiom");
@@ -1760,12 +1855,14 @@
   /* Construct the error token */
   errtoken = getsym ("error");
   errtoken->class = token_sym;
+  errtoken->number = ntokens++;
   errtoken->user_token_number = 256;   /* Value specified by POSIX.  */

   /* Construct a token that represents all undefined literal tokens.
      It is always token number 2.  */
   undeftoken = getsym ("$undefined.");
   undeftoken->class = token_sym;
+  undeftoken->number = ntokens++;
   undeftoken->user_token_number = 2;

   /* Initialize the obstacks. */
@@ -1785,6 +1882,7 @@
     {
       eoftoken = getsym ("$");
       eoftoken->class = token_sym;
+      eoftoken->number = 0;
       /* Value specified by POSIX.  */
       eoftoken->user_token_number = 0;
     }
@@ -1815,5 +1913,5 @@
   XFREE (ritem);
   free (rules + 1);
   /* Free the symbol table data structure.  */
-  free_symtab ();
+  buckets_free ();
 }
Index: src/reduce.c
--- src/reduce.c Sat, 06 Apr 2002 21:53:45 +0200 akim
+++ src/reduce.c Sat, 06 Apr 2002 20:00:04 +0200 akim
@@ -199,9 +199,9 @@
   V = Vp;

   /* Tokens 0, 1, and 2 are internal to Bison.  Consider them useful. */
-  bitset_set (V, 0);           /* end-of-input token */
-  bitset_set (V, 1);           /* error token */
-  bitset_set (V, 2);           /* some undefined token */
+  bitset_set (V, eoftoken->number);            /* end-of-input token */
+  bitset_set (V, errtoken->number);            /* error token */
+  bitset_set (V, undeftoken->number);          /* some undefined token */

   bitset_free (P);
   P = Pp;
Index: src/symtab.c
--- src/symtab.c Sat, 23 Mar 2002 12:58:09 +0100 akim
+++ src/symtab.c Sat, 06 Apr 2002 20:07:54 +0200 akim
@@ -1,5 +1,5 @@
 /* Symbol table manager for Bison,
-   Copyright 1984, 1989, 2000, 2001, 2002 Free Software Foundation, Inc.
+   Copyright (C) 1984, 1989, 2000, 2001, 2002 Free Software Foundation, Inc.

    This file is part of Bison, the GNU Compiler Compiler.

@@ -20,39 +20,19 @@


 #include "system.h"
+#include "hash.h"
 #include "symtab.h"
 #include "gram.h"

-
-bucket *firstsymbol;
-static bucket *lastsymbol;
-static bucket **symtab;
-
-static int
-hash (const char *key)
-{
-  const char *cp;
-  int k;
-
-  cp = key;
-  k = 0;
-  while (*cp)
-    k = ((k << 1) ^ (*cp++)) & 0x3fff;
-
-  return k % TABSIZE;
-}
-
-/*--------------------------------------------------------------.
-| Create a new symbol, named TAG, which hash value is HASHVAL.  |
-`--------------------------------------------------------------*/
+/*---------------------------------.
+| Create a new symbol, named TAG.  |
+`---------------------------------*/

 static bucket *
-bucket_new (const char *tag, int hashval)
+bucket_new (const char *tag)
 {
   bucket *res = XMALLOC (bucket, 1);

-  res->link = symtab[hashval];
-  res->next = NULL;
   res->tag = xstrdup (tag);
   res->type_name = NULL;
   res->number = -1;
@@ -62,19 +42,67 @@
   res->alias = NULL;
   res->class = unknown_sym;

+  if (getenv ("DEBUG"))
+    fprintf (stderr, "Creating: nsyms = %d, ntokens = %d: %s\n",
+            nsyms, ntokens, tag);
   nsyms++;

   return res;
 }


-void
-tabinit (void)
+/*------------.
+| Free THIS.  |
+`------------*/
+
+static void
+bucket_free (bucket *this)
 {
-  symtab = XCALLOC (bucket *, TABSIZE);
+#if 0
+  /* This causes crashes because one string can appear more
+     than once.  */
+  XFREE (this->type_name);
+#endif
+  XFREE (this->tag);
+  XFREE (this);
+}
+
+
+
+/*----------------------.
+| A bucket hash table.  |
+`----------------------*/

-  firstsymbol = NULL;
-  lastsymbol = NULL;
+/* Initial capacity of buckets hash table.  */
+#define HT_INITIAL_CAPACITY 257
+
+static struct hash_table *bucket_table = NULL;
+
+static bool
+hash_compare_bucket (const bucket *m1, const bucket *m2)
+{
+  return strcmp (m1->tag, m2->tag) ? FALSE : TRUE;
+}
+
+static unsigned int
+hash_bucket (const bucket *m, unsigned int tablesize)
+{
+  return hash_string (m->tag, tablesize);
+}
+
+
+/*-------------------------------.
+| Create the bucket hash table.  |
+`-------------------------------*/
+
+void
+buckets_new (void)
+{
+  bucket_table = hash_initialize (HT_INITIAL_CAPACITY,
+                                 NULL,
+                                 (Hash_hasher) hash_bucket,
+                                 (Hash_comparator) hash_compare_bucket,
+                                 (Hash_data_freer) bucket_free);
 }


@@ -86,66 +114,42 @@
 bucket *
 getsym (const char *key)
 {
-  int hashval;
-  bucket *bp;
-  int found;
+  bucket probe;
+  bucket *entry;

-  hashval = hash (key);
-  bp = symtab[hashval];
+  (const char *) probe.tag = key;
+  entry = hash_lookup (bucket_table, &probe);

-  found = 0;
-  while (bp != NULL && found == 0)
+  if (!entry)
     {
-      if (strcmp (key, bp->tag) == 0)
-       found = 1;
-      else
-       bp = bp->link;
+      /* First insertion in the hash. */
+      entry = bucket_new (key);
+      hash_insert (bucket_table, entry);
     }
+  return entry;
+}

-  if (found == 0)
-    {
-      bp = bucket_new (key, hashval);

-      if (firstsymbol == NULL)
-       {
-         firstsymbol = bp;
-         lastsymbol = bp;
-       }
-      else
-       {
-         lastsymbol->next = bp;
-         lastsymbol = bp;
-       }
+/*-------------------.
+| Free the buckets.  |
+`-------------------*/

-      symtab[hashval] = bp;
-    }
-
-  return bp;
+void
+buckets_free (void)
+{
+  hash_free (bucket_table);
 }


+/*---------------------------------------------------------------.
+| Look for undefined buckets, report an error, and consider them |
+| terminals.                                                     |
+`---------------------------------------------------------------*/
+
 void
-free_symtab (void)
+buckets_do (bucket_processor processor, void *processor_data)
 {
-  int i;
-  bucket *bp, *bptmp;          /* JF don't use ptr after free */
-
-  for (i = 0; i < TABSIZE; i++)
-    {
-      bp = symtab[i];
-      while (bp)
-       {
-         bptmp = bp->link;
-#if 0
-         /* This causes crashes because one string can appear more
-            than once.  */
-         if (bp->type_name)
-           XFREE (bp->type_name);
-#endif
-         XFREE (bp->tag);
-         XFREE (bp);
-         bp = bptmp;
-       }
-    }
-  XFREE (symtab);
+  hash_do_for_each (bucket_table,
+                   (Hash_processor) processor,
+                   processor_data);
 }
Index: src/symtab.h
--- src/symtab.h Sat, 06 Apr 2002 21:53:45 +0200 akim
+++ src/symtab.h Sat, 06 Apr 2002 20:54:38 +0200 akim
@@ -46,10 +46,6 @@

 typedef struct bucket
 {
-  /* Needed for the hash table. */
-  struct bucket *link;
-  struct bucket *next;
-
   /* The key, name of the symbol. */
   char *tag;
   /* Its type. */
@@ -66,12 +62,13 @@
   symbol_class class;
 } bucket;

-
-extern bucket *firstsymbol;
+/* A function to apply to each symbol. */
+typedef bool (*bucket_processor) PARAMS ((bucket *));

 bucket *getsym PARAMS ((const char *));

-void tabinit PARAMS ((void));
-void free_symtab PARAMS ((void));
+void buckets_new PARAMS ((void));
+void buckets_do PARAMS ((bucket_processor processor, void *processor_data));
+void buckets_free PARAMS ((void));

 #endif /* !SYMTAB_H_ */



reply via email to

[Prev in Thread] Current Thread [Next in Thread]