bison-patches
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

07-fyi-remove-useless-rules.patch


From: Akim Demaille
Subject: 07-fyi-remove-useless-rules.patch
Date: Sun, 07 Apr 2002 17:23:20 +0200

Index: ChangeLog
-2002-03-23  Akim Demaille  <address@hidden>
+2002-03-25  Akim Demaille  <address@hidden>
 
-       * src/derives.c, src/gram.h, src/nullable.c, src/options.c,
-       * src/print.c, src/print_graph.c, src/reduce.c, src/symtab.h:
-       `lhs' of `rule_t' is now a `bucket'.
+       * src/gram.h, src/gram.c (rules_swap): New.
+       (ritem_longest_rhs): Use it.
+       * src/gram.h (rule_t): `number' is a new member.
+       * src/reader.c (packgram): Set it.
+       * src/reduce.c (reduce_grammar_tables): Move the useless rules at
+       the end of `rules', and count them out of `nrules'.
+       (reduce_output, dump_grammar): Adjust.
+       (reduce_grammar): First renumber the nonterminals.
+       * src/print.c (print_grammar): It is no longer needed to check for
+       the usefulness of a rule, as useless rules are beyond `nrules + 1'.
+       * tests/reduce.at (Reduced Automaton): New test.
 
 2002-03-23  Akim Demaille  <address@hidden>
 
Index: NEWS
--- NEWS Sat, 23 Mar 2002 16:33:45 +0100 akim
+++ NEWS Mon, 25 Mar 2002 21:01:18 +0100 akim
@@ -3,6 +3,10 @@
 
 Changes in version 1.49a:
 
+* Useless rules are actually removed.
+  Before, Bison reported the useless rules, but, although not used,
+  included them in the parsers.
+
 * False `Token not used' report fixed.
   On a grammar such as
 
Index: src/derives.c
--- src/derives.c Sat, 23 Mar 2002 18:04:07 +0100 akim
+++ src/derives.c Mon, 25 Mar 2002 21:17:00 +0100 akim
@@ -70,7 +70,7 @@
   for (i = nrules; i > 0; i--)
     if (rules[i].useful)
       {
-       int lhs = rules[i].lhs->number;
+       int lhs = rules[i].lhs;
        p->next = dset[lhs];
        p->value = i;
        dset[lhs] = p;
Index: src/gram.c
--- src/gram.c Fri, 28 Dec 2001 16:41:48 +0100 akim
+++ src/gram.c Mon, 25 Mar 2002 21:01:18 +0100 akim
@@ -1,5 +1,5 @@
 /* Allocate input grammar variables for bison,
-   Copyright 1984, 1986, 1989, 2001 Free Software Foundation, Inc.
+   Copyright 1984, 1986, 1989, 2001, 2002 Free Software Foundation, Inc.
 
    This file is part of Bison, the GNU Compiler Compiler.
 
@@ -51,6 +51,21 @@
 int error_token_number;
 
 
+/*--------------------------------------.
+| Return the number of symbols in RHS.  |
+`--------------------------------------*/
+
+int
+rule_rhs_length (rule_t *rule)
+{
+  int res = 0;
+  short *rhsp;
+  for (rhsp = rule->rhs; *rhsp >= 0; ++rhsp)
+    ++res;
+  return res;
+}
+
+
 /*------------------------.
 | Dump RITEM for traces.  |
 `------------------------*/
@@ -76,23 +91,15 @@
 size_t
 ritem_longest_rhs (void)
 {
-  int length;
-  int max;
+  int max = 0;
   int i;
 
-  length = 0;
-  max = 0;
-  for (i = 0; i < nritems; ++i)
-    if (ritem[i] >= 0)
-      {
-       length++;
-      }
-    else
-      {
-       if (length > max)
-         max = length;
-       length = 0;
-      }
+  for (i = 1; i < nrules + 1; ++i)
+    {
+      int length = rule_rhs_length (&rules[i]);
+      if (length > max)
+       max = length;
+    }
 
   return max;
 }
Index: src/gram.h
--- src/gram.h Sat, 23 Mar 2002 18:04:07 +0100 akim
+++ src/gram.h Mon, 25 Mar 2002 21:01:18 +0100 akim
@@ -54,8 +54,9 @@
 
    RULES is an array of struct rule_s, which members are:
 
-   RULES[R].lhs -- the symbol of the left hand side of rule R.  If NULL,
-   the rule has been thrown out by reduce.c and should be ignored.
+   RULES[R].lhs -- the symbol number of the left hand side of rule R.
+   If -1, the rule has been thrown out by reduce.c and should be
+   ignored.
 
    RULES[R].rhs -- the index in RITEM of the beginning of the portion
    for rule R.
@@ -97,7 +98,6 @@
 
    Associativities are recorded similarly in SYMBOLS[I]->assoc.  */
 
-#include "symtab.h"
 
 #define        ISTOKEN(s)      ((s) < ntokens)
 #define        ISVAR(s)        ((s) >= ntokens)
@@ -113,10 +113,22 @@
 
 extern int start_symbol;
 
+/* Associativity values for tokens and rules.  */
+typedef enum
+{
+  right_assoc,
+  left_assoc,
+  non_assoc
+} associativity;
+
 
 typedef struct rule_s
 {
-  bucket *lhs;
+  /* The number of the rule in the source.  It is usually the index in
+     RULES too, except if there are useless rules.  */
+  short number;
+
+  short lhs;
   short *rhs;
   short prec;
   short precsym;
@@ -158,6 +170,8 @@
 
 extern int error_token_number;
 
+/* Report the length of the RHS. */
+int rule_rhs_length PARAMS ((rule_t *rule));
 
 /* Dump RITEM for traces. */
 void ritem_print PARAMS ((FILE *out));
Index: src/nullable.c
--- src/nullable.c Sat, 23 Mar 2002 18:04:07 +0100 akim
+++ src/nullable.c Sat, 23 Mar 2002 12:55:43 +0100 akim
@@ -96,10 +96,10 @@
          {
            /* This rule has an empty RHS. */
            assert (rules[ruleno].rhs[0] == -ruleno);
-           if (rules[ruleno].useful && !nullable[rules[ruleno].lhs->number])
+           if (rules[ruleno].useful && !nullable[rules[ruleno].lhs])
              {
-               nullable[rules[ruleno].lhs->number] = 1;
-               *s2++ = rules[ruleno].lhs->number;
+               nullable[rules[ruleno].lhs] = 1;
+               *s2++ = rules[ruleno].lhs;
              }
          }
       }
@@ -109,10 +109,10 @@
       {
        ruleno = p->value;
        if (--rcount[ruleno] == 0)
-         if (rules[ruleno].useful && !nullable[rules[ruleno].lhs->number])
+         if (rules[ruleno].useful && !nullable[rules[ruleno].lhs])
            {
-             nullable[rules[ruleno].lhs->number] = 1;
-             *s2++ = rules[ruleno].lhs->number;
+             nullable[rules[ruleno].lhs] = 1;
+             *s2++ = rules[ruleno].lhs;
            }
       }
 
Index: src/options.c
--- src/options.c Sat, 23 Mar 2002 18:04:07 +0100 akim
+++ src/options.c Sat, 23 Mar 2002 12:53:38 +0100 akim
@@ -23,7 +23,6 @@
 #include "files.h"
 #include "getargs.h"
 #include "symtab.h"
-#include "gram.h"
 #include "lex.h"
 #include "output.h"
 #include "options.h"
Index: src/print.c
--- src/print.c Sat, 23 Mar 2002 18:04:07 +0100 akim
+++ src/print.c Mon, 25 Mar 2002 21:01:18 +0100 akim
@@ -94,7 +94,7 @@
            sp++;
 
          rule = -(*sp);
-         fprintf (out, "    %s  ->  ", escape (rules[rule].lhs->tag));
+         fprintf (out, "    %s  ->  ", escape (symbols[rules[rule].lhs]->tag));
 
          for (sp = rules[rule].rhs; sp < sp1; sp++)
            fprintf (out, "%s ", escape (symbols[*sp]->tag));
@@ -189,7 +189,7 @@
   if (state->consistent)
     {
       int rule = redp->rules[0];
-      int symbol = rules[rule].lhs->number;
+      int symbol = rules[rule].lhs;
       fprintf (out, _("    $default\treduce using rule %d (%s)\n\n"),
               rule - 1, escape (symbols[symbol]->tag));
       return;
@@ -221,10 +221,10 @@
        if (bitset_test (lookaheadset, i))
          fprintf (out, _("    %-4s\t[reduce using rule %d (%s)]\n"),
                   escape (symbols[i]->tag), default_rule - 1,
-                  escape2 (rules[default_rule].lhs->tag));
+                  escape2 (symbols[rules[default_rule].lhs]->tag));
 
       fprintf (out, _("    $default\treduce using rule %d (%s)\n\n"),
-              default_rule - 1, escape (rules[default_rule].lhs->tag));
+              default_rule - 1, escape 
(symbols[rules[default_rule].lhs]->tag));
     }
   else if (state->nlookaheads >= 1)
     {
@@ -276,7 +276,7 @@
                               _("    %-4s\treduce using rule %d (%s)\n"),
                               escape (symbols[i]->tag),
                               LAruleno[state->lookaheadsp + j] - 1,
-                              escape2 (rules[LAruleno[state->lookaheadsp + 
j]].lhs->tag));
+                              escape2 
(symbols[rules[LAruleno[state->lookaheadsp + j]].lhs]->tag));
                    else
                      defaulted = 1;
 
@@ -289,13 +289,13 @@
                               _("    %-4s\treduce using rule %d (%s)\n"),
                               escape (symbols[i]->tag),
                               LAruleno[default_LA] - 1,
-                              escape2 (rules[LAruleno[default_LA]].lhs->tag));
+                              escape2 
(symbols[rules[LAruleno[default_LA]].lhs]->tag));
                    defaulted = 0;
                    fprintf (out,
                             _("    %-4s\t[reduce using rule %d (%s)]\n"),
                             escape (symbols[i]->tag),
                             LAruleno[state->lookaheadsp + j] - 1,
-                            escape2 (rules[LAruleno[state->lookaheadsp + 
j]].lhs->tag));
+                            escape2 (symbols[rules[LAruleno[state->lookaheadsp 
+ j]].lhs]->tag));
                  }
              }
        }
@@ -303,7 +303,7 @@
       if (default_LA >= 0)
        fprintf (out, _("    $default\treduce using rule %d (%s)\n"),
                 default_rule - 1,
-                escape (rules[default_rule].lhs->tag));
+                escape (symbols[rules[default_rule].lhs]->tag));
     }
 }
 
@@ -366,19 +366,17 @@
   fprintf (out, "%s\n\n", _("Grammar"));
   fprintf (out, "  %s\n", _("Number, Line, Rule"));
   for (i = 1; i < nrules + 1; i++)
-    /* Don't print rules disabled in reduce_grammar_tables.  */
-    if (rules[i].useful)
-      {
-       fprintf (out, _("  %3d %3d %s ->"),
-                i - 1, rules[i].line, escape (rules[i].lhs->tag));
-       rule = rules[i].rhs;
-       if (*rule >= 0)
-         while (*rule >= 0)
-           fprintf (out, " %s", escape (symbols[*rule++]->tag));
-       else
-         fprintf (out, " /* %s */", _("empty"));
-       fputc ('\n', out);
-      }
+    {
+      fprintf (out, _("  %3d %3d %s ->"),
+              i - 1, rules[i].line, escape (symbols[rules[i].lhs]->tag));
+      rule = rules[i].rhs;
+      if (*rule >= 0)
+       while (*rule >= 0)
+         fprintf (out, " %s", escape (symbols[*rule++]->tag));
+      else
+       fprintf (out, " /* %s */", _("empty"));
+      fputc ('\n', out);
+    }
   fputs ("\n\n", out);
 
 
@@ -413,7 +411,7 @@
 
       for (j = 1; j < nrules + 1; j++)
        {
-         if (rules[j].lhs->number == i)
+         if (rules[j].lhs == i)
            left_count++;
          for (rule = rules[j].rhs; *rule >= 0; rule++)
            if (*rule == i)
@@ -437,7 +435,7 @@
          for (j = 1; j < nrules + 1; j++)
            {
              END_TEST (65);
-             if (rules[j].lhs->number == i)
+             if (rules[j].lhs == i)
                sprintf (buffer + strlen (buffer), " %d", j - 1);
            }
        }
Index: src/print_graph.c
--- src/print_graph.c Sat, 23 Mar 2002 18:04:07 +0100 akim
+++ src/print_graph.c Sat, 23 Mar 2002 12:53:38 +0100 akim
@@ -78,7 +78,7 @@
       if (i)
        obstack_1grow (node_obstack, '\n');
       obstack_fgrow1 (node_obstack, " %s -> ",
-                     escape (rules[rule].lhs->tag));
+                     escape (symbols[rules[rule].lhs]->tag));
 
       for (sp = rules[rule].rhs; sp < sp1; sp++)
        obstack_fgrow1 (node_obstack, "%s ", escape (symbols[*sp]->tag));
Index: src/reader.c
--- src/reader.c Sat, 23 Mar 2002 13:51:12 +0100 akim
+++ src/reader.c Mon, 25 Mar 2002 21:01:18 +0100 akim
@@ -1687,6 +1687,7 @@
   while (p)
     {
       bucket *ruleprec = p->ruleprec;
+      rules[ruleno].number = ruleno;
       rules[ruleno].lhs = p->sym->number;
       rules[ruleno].rhs = ritem + itemno;
       rules[ruleno].line = p->line;
Index: src/reduce.c
--- src/reduce.c Sat, 23 Mar 2002 18:04:07 +0100 akim
+++ src/reduce.c Mon, 25 Mar 2002 21:18:06 +0100 akim
@@ -118,7 +118,7 @@
        if (!bitset_test (P, i)
            && useful_production (i, N))
          {
-           bitset_set (Np, rules[i].lhs->number - ntokens);
+           bitset_set (Np, rules[i].lhs - ntokens);
            bitset_set (P, i);
          }
       if (bitset_equal_p (N, Np))
@@ -178,7 +178,7 @@
            {
              if (!bitset_test (Pp, i)
                  && bitset_test (P, i)
-                 && bitset_test (V, rules[i].lhs->number))
+                 && bitset_test (V, rules[i].lhs))
                {
                  for (r = rules[i].rhs; *r >= 0; r++)
                    if (ISTOKEN (t = *r) || bitset_test (N, t - ntokens))
@@ -220,70 +220,64 @@
       bitset_set (V1, rules[i].precsym);
 }
 
+
+/*-------------------------------------------------------------------.
+| Put the useless productions at the end of RULES, and adjust NRULES |
+| accordingly.                                                       |
+`-------------------------------------------------------------------*/
+
 static void
 reduce_grammar_tables (void)
 {
-  /* This is turned off because we would need to change the numbers in
-     the case statements in the actions file.
-
-     We don't disable it via CPP so that it is still checked with the
-     rest of the code, to avoid its becoming completely obsolete.
-
-     FIXME: I think the comment above demonstrates this code must be
-     turned off for *semantic* parser, not in the general case.  Try
-     to understand this better --akim.  */
-
-  if (0)
-    /* remove useless productions */
-    if (nuseless_productions > 0)
-      {
-       short np, pn, ni, pi;
-
-       np = 0;
-       ni = 0;
-       for (pn = 1; pn < nrules + 1; pn++)
-         if (bitset_test (P, pn))
-           {
-             np++;
-             if (pn != np)
-               {
-                 rules[np].lhs   = rules[pn].lhs;
-                 rules[np].line  = rules[pn].line;
-                 rules[np].prec  = rules[pn].prec;
-                 rules[np].assoc = rules[pn].assoc;
-                 rules[np].rhs   = rules[pn].rhs;
-                 if (rules[np].rhs - ritem != ni)
-                   {
-                     pi = rules[np].rhs - ritem;
-                     rules[np].rhs = ritem + ni;
-                     while (ritem[pi] >= 0)
-                       ritem[ni++] = ritem[pi++];
-                     ritem[ni++] = -np;
-                   }
-               }
-             else
-               {
-                 while (ritem[ni++] >= 0)
-                   /* Nothing. */;
-               }
-           }
-
-       ritem[ni] = 0;
-       nrules -= nuseless_productions;
-       nitems = ni;
-       nritems = ni;
+  /* Flag useless productions.  */
+  {
+    int pn;
+    for (pn = 1; pn < nrules + 1; pn++)
+      rules[pn].useful = bitset_test (P, pn);
+  }
 
-       /* Is it worth it to reduce the amount of memory for the
-          grammar? Probably not.  */
-      }
+  /* Map the nonterminals to their new index: useful first, useless
+     afterwards.  Kept for later report.  */
+  {
+    int useful = 1;
+    int useless = nrules + 1 - nuseless_productions;
+    rule_t *rules_sorted = XMALLOC (rule_t, nrules + 1) - 1;
+    int i;
+    for (i = 1; i < nrules + 1; ++i)
+      rules_sorted[rules[i].useful ? useful++ : useless++] = rules[i];
+    free (rules + 1);
+    rules = rules_sorted;
 
-  /* Disable useless productions. */
-  if (nuseless_productions > 0)
+    /* Also reorder ritems. */
     {
-      int pn;
-      for (pn = 1; pn < nrules + 1; pn++)
-       rules[pn].useful = bitset_test (P, pn);
+      short *ritems_sorted = XCALLOC (short, nitems + 1);
+      short *ritemsp = ritems_sorted;
+      for (i = 1; i < nrules + 1; ++i)
+       {
+         short *rhsp = rules[i].rhs;
+         rules[i].rhs = ritemsp;
+         for (/* Nothing. */; *rhsp >= 0; ++rhsp)
+           *ritemsp++ = *rhsp;
+         *ritemsp++ = -i;
+       }
+      *ritemsp++ = 0;
+      free (ritem);
+      ritem = ritems_sorted;
     }
+    nrules -= nuseless_productions;
+  }
+
+  /* Adjust NRITEMS and NITEMS.  */
+  {
+    int r;
+    int length;
+    for (r = nrules + 1; r < nrules + 1 + nuseless_productions; ++r)
+      {
+       length = rule_rhs_length (&rules[r]);
+       nritems -= length + 1;
+       nitems -= length + 1;
+      }
+  }
 }
 
 
@@ -324,6 +318,7 @@
 
   for (i = 1; i < nrules + 1; i++)
     {
+      rules[i].lhs = nontermmap[rules[i].lhs];
       if (ISVAR (rules[i].precsym))
        /* Can this happen?  */
        rules[i].precsym = nontermmap[rules[i].precsym];
@@ -377,16 +372,15 @@
     {
       int i;
       fprintf (out, "%s\n\n", _("Useless rules:"));
-      for (i = 1; i < nrules + 1; i++)
-       if (!rules[i].useful)
-         {
-           rule r;
-           fprintf (out, "#%-4d  ", i - 1);
-           fprintf (out, "%s:", rules[i].lhs->tag);
-           for (r = rules[i].rhs; *r >= 0; r++)
-             fprintf (out, " %s", symbols[*r]->tag);
-           fputs (";\n", out);
-         }
+      for (i = nrules + 1; i < nuseless_productions + nrules + 1; i++)
+       {
+         rule r;
+         fprintf (out, "#%-4d  ", rules[i].number - 1);
+         fprintf (out, "%s:", symbols[rules[i].lhs]->tag);
+         for (r = rules[i].rhs; *r >= 0; r++)
+           fprintf (out, " %s", symbols[*r]->tag);
+         fputs (";\n", out);
+       }
       fputs ("\n\n", out);
     }
 }
@@ -410,7 +404,7 @@
   fprintf (out, "\n\n");
   fprintf (out, "Rules\n-----\n\n");
   fprintf (out, "Num (Prec, Assoc, Useful, Ritem Range) Lhs -> Rhs (Ritem 
range) [Num]\n");
-  for (i = 1; i < nrules + 1; i++)
+  for (i = 1; i < nrules + nuseless_productions + 1; i++)
     {
       int rhs_count = 0;
       /* Find the last RHS index in ritems. */
@@ -428,9 +422,9 @@
     }
   fprintf (out, "\n\n");
   fprintf (out, "Rules interpreted\n-----------------\n\n");
-  for (i = 1; i < nrules + 1; i++)
+  for (i = 1; i < nrules + nuseless_productions + 1; i++)
     {
-      fprintf (out, "%-5d  %s :", i, rules[i].lhs->tag);
+      fprintf (out, "%-5d  %s :", i, symbols[rules[i].lhs]->tag);
       for (r = rules[i].rhs; *r >= 0; r++)
        fprintf (out, " %s", symbols[*r]->tag);
       fputc ('\n', out);
@@ -498,9 +492,13 @@
     fatal (_("Start symbol %s does not derive any sentence"),
           symbols[start_symbol]->tag);
 
-  reduce_grammar_tables ();
+  /* First reduce the nonterminals, as they renumber themselves in the
+     whole grammar.  If you change the order, nonterms would be
+     renumbered only in the reduced grammar.  */
   if (nuseless_nonterminals > 0)
     nonterminals_reduce ();
+  if (nuseless_productions > 0)
+    reduce_grammar_tables ();
 
   if (trace_flag)
     {
Index: src/symtab.h
--- src/symtab.h Sat, 23 Mar 2002 18:04:07 +0100 akim
+++ src/symtab.h Sat, 23 Mar 2002 12:54:02 +0100 akim
@@ -20,14 +20,7 @@
 
 #ifndef SYMTAB_H_
 # define SYMTAB_H_
-
-/* Associativity values for tokens and rules.  */
-typedef enum
-{
-  right_assoc,
-  left_assoc,
-  non_assoc
-} associativity;
+# include "gram.h"
 
 #define        TABSIZE 1009
 
Index: tests/reduce.at
--- tests/reduce.at Fri, 30 Nov 2001 00:04:35 +0100 akim
+++ tests/reduce.at Mon, 25 Mar 2002 21:01:18 +0100 akim
@@ -174,6 +174,89 @@ useless9: '9';
 
 
 ## ------------------- ##
+## Reduced Automaton.  ##
+## ------------------- ##
+
+# Check that the automaton is that as the for the grammar reduced by
+# hand.
+
+AT_SETUP([Reduced Automaton])
+
+# The non reduced grammar.
+# ------------------------
+AT_DATA([[not-reduced.y]],
+[[/* A useless token. */
+%token useless_token
+/* A useful one. */
+%token useful
+%verbose
+%output="not-reduced.c"
+
+%%
+
+exp: useful            { /* A useful action. */ }
+   | non_productive    { /* A non productive action. */ }
+   ;
+
+not_reachable: useful  { /* A not reachable action. */ }
+             ;
+
+non_productive: non_productive useless_token
+                       { /* Another non productive action. */ }
+              ;
+]])
+
+AT_CHECK([[bison not-reduced.y]], 0, [],
+[[not-reduced.y contains 2 useless nonterminals and 3 useless rules
+]])
+
+AT_CHECK([[sed -n '/^Grammar/q;/^$/!p' not-reduced.output]], 0,
+[[Useless nonterminals:
+   not_reachable
+   non_productive
+Terminals which are not used:
+   useless_token
+Useless rules:
+#2     exp: non_productive;
+#3     not_reachable: useful;
+#4     non_productive: non_productive useless_token;
+]])
+
+# The reduced grammar.
+# --------------------
+AT_DATA([[reduced.y]],
+[[/* A useless token. */
+%token useless_token
+/* A useful one. */
+%token useful
+%verbose
+%output="reduced.c"
+
+%%
+
+exp: useful            { /* A useful action. */ }
+//   | non_productive    { /* A non productive action. */ } */
+   ;
+
+//not_reachable: useful  { /* A not reachable action. */ }
+//             ;
+
+//non_productive: non_productive useless_token
+//                       { /* Another non productive action. */ }
+//              ;
+]])
+
+AT_CHECK([[bison reduced.y]])
+
+# Comparing the parsers.
+cp reduced.c expout
+AT_CHECK([sed 's/not-reduced/reduced/g' not-reduced.c], 0, [expout])
+
+AT_CLEANUP
+
+
+
+## ------------------- ##
 ## Underivable Rules.  ##
 ## ------------------- ##
 



reply via email to

[Prev in Thread] Current Thread [Next in Thread]