bug-grep
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH 05/15] Avoid using llink and rlink during prep


From: Nick Cleaton
Subject: [PATCH 05/15] Avoid using llink and rlink during prep
Date: Sat, 2 Oct 2010 07:07:44 +0100

Laying groundwork for struct trie size reductions based on sharing
storage between fields used only at prep time and fields used only
at match time:

Don't make any use of the trie->llink and trie->rlink pointers during
the trie preparation process.
---
 src/kwset.c |  144 ++++++++++++++++++++++++-----------------------------------
 1 files changed, 59 insertions(+), 85 deletions(-)

diff --git a/src/kwset.c b/src/kwset.c
index 079c28a..f36f036 100644
--- a/src/kwset.c
+++ b/src/kwset.c
@@ -60,6 +60,7 @@ struct trie
   struct trie *llink;          /* Left tree link (to smaller labels). */
   struct trie *rlink;          /* Right tree link (to larger labels). */
   unsigned char label;         /* Label on the incoming edge. */
+  unsigned char is_lastkid;     /* True for the last of a group of siblings. */
   unsigned int accepting;      /* Word index of accepted word, or zero. */
   struct trie *links;          /* Tree of edges leaving this node. */
   struct trie *parent;         /* Parent of this node. */
@@ -254,6 +255,7 @@ alloc_and_plumb_siblings (struct kwset *kwset, int count, 
int depth)
 
   t->rlink = alloc_and_plumb_siblings (kwset, rcount, depth);
   t->depth = depth;
+  t->is_lastkid = 0;
   t->links = kwset->next_prealloced;
   kwset->next_prealloced = t;
   t->llink = alloc_and_plumb_siblings (kwset, lcount, depth);
@@ -263,8 +265,8 @@ alloc_and_plumb_siblings (struct kwset *kwset, int count, 
int depth)
 
 /* Used to allocate each trie node from the node vector when building the
    trie from the sorted reversed keyword list, and to plumb the new node
-   into the trie.  Responsible for setting 'llink' and 'rlink', and for
-   updating 'links' in the parent node.  */
+   into the trie.  Responsible for setting 'llink', 'rlink' and 'is_lastkid',
+   and for updating 'links' in the parent node.  */
 static struct trie *
 allocate_and_plumb_trie_node (struct kwset *kwset, int index, int depth,
                               struct trie *parent)
@@ -284,6 +286,7 @@ allocate_and_plumb_trie_node (struct kwset *kwset, int 
index, int depth,
       parent->links = alloc_and_plumb_siblings (kwset,
                                                 kwset->bn_cursor->count,
                                                 depth);
+      kwset->next_free_node[-1].is_lastkid = 1;
       kwset->bn_cursor++;
     }
 
@@ -299,6 +302,7 @@ allocate_and_plumb_trie_node (struct kwset *kwset, int 
index, int depth,
   /* This node is an only child. */
   parent->links = kwset->next_free_node++;
   parent->links->llink = parent->links->rlink = NULL;
+  parent->links->is_lastkid = 1;
   return parent->links;
 }
 
@@ -365,6 +369,7 @@ build_trie_from_sorted_keywords (struct kwset *kwset)
   kwset->trie->fail = NULL;
   kwset->trie->depth = 0;
   kwset->trie->shift = 0;
+  kwset->trie->is_lastkid = 1;
 
   /* Set up a vector by depth of the last trie node visited. */
   MALLOC (kwset->prev_kw_tries, kwset->maxd + 1);
@@ -393,97 +398,54 @@ build_trie_from_sorted_keywords (struct kwset *kwset)
   return NULL;
 }
 
-/* Enqueue the trie nodes referenced from the given tree in the
-   given queue. */
+/* Compute the Aho-Corasick failure function for the given trie node,
+   given the failure function for its parent as well as a last resort
+   failure node. */
 static void
-enqueue (struct trie *tree, struct trie **last)
-{
-  if (!tree)
-    return;
-  enqueue(tree->llink, last);
-  enqueue(tree->rlink, last);
-  (*last) = (*last)->next = tree;
-}
-
-/* Compute the Aho-Corasick failure function for the trie nodes referenced
-   from the given tree, given the failure function for their parent as
-   well as a last resort failure node. */
-static void
-treefails (struct trie *tree, struct trie const *fail,
-           struct trie *recourse)
+triefail (struct trie *trie, struct trie const *fail, struct trie *recourse)
 {
   struct trie *link;
 
-  if (!tree)
-    return;
-
-  treefails(tree->llink, fail, recourse);
-  treefails(tree->rlink, fail, recourse);
-
   /* Find, in the chain of fails going back to the root, the first
      node that has a descendent on the current label. */
   while (fail)
     {
-      link = fail->links;
-      while (link && tree->label != link->label)
-        if (tree->label < link->label)
-          link = link->llink;
-        else
-          link = link->rlink;
-      if (link)
+      if ((link = fail->links))
         {
-          tree->fail = link;
-          return;
+          do
+            {
+              if (trie->label == link->label)
+                {
+                  trie->fail = link;
+                  return;
+                }
+            }
+          while (!link++->is_lastkid);
         }
       fail = fail->fail;
     }
 
-  tree->fail = recourse;
-}
-
-/* Set delta entries for the links of the given tree such that
-   the preexisting delta value is larger than the current depth. */
-static void
-treedelta (struct trie const *tree,
-           unsigned int depth,
-           unsigned char delta[])
-{
-  if (!tree)
-    return;
-  treedelta(tree->llink, depth, delta);
-  treedelta(tree->rlink, depth, delta);
-  if (depth < delta[tree->label])
-    delta[tree->label] = depth;
+  trie->fail = recourse;
 }
 
 /* Return true if A has every label in B. */
 static int
 hasevery (struct trie const *a, struct trie const *b)
 {
-  if (!b)
-    return 1;
-  if (!hasevery(a, b->llink))
-    return 0;
-  if (!hasevery(a, b->rlink))
-    return 0;
-  while (a && b->label != a->label)
-    if (b->label < a->label)
-      a = a->llink;
-    else
-      a = a->rlink;
-  return !!a;
-}
+  unsigned char a_has[NCHAR];
+  int i;
 
-/* Compute a vector, indexed by character code, of the trie nodes
-   referenced from the given tree. */
-static void
-treenext (struct trie *tree, struct trie *next[])
-{
-  if (!tree)
-    return;
-  treenext(tree->llink, next);
-  treenext(tree->rlink, next);
-  next[tree->label] = tree;
+  memset (a_has, 0, NCHAR);
+  do
+    a_has[a->label] = 1;
+  while (!a++->is_lastkid);
+
+  do
+    if (!a_has[b->label])
+      return 0;
+  while (!b++->is_lastkid);
+
+  return 1;
 }
 
 /* Compute the shift for each trie node, as well as the delta
@@ -530,7 +492,7 @@ kwsprep (kwset_t kws)
   else
     {
       struct trie *fail;
-      struct trie *last, *next[NCHAR];
+      struct trie *last, *next[NCHAR], *child;
 
       if ((err = build_trie_from_sorted_keywords (kwset)))
         return err;
@@ -542,17 +504,26 @@ kwsprep (kwset_t kws)
          computing the delta table, failure function, and shift function. */
       for (curr = last = kwset->trie; curr; curr = curr->next)
         {
-          /* Enqueue the immediate descendents in the level order queue. */
-          enqueue(curr->links, &last);
-
           curr->shift = kwset->mind;
           curr->maxshift = kwset->mind;
 
-          /* Update the delta table for the descendents of this node. */
-          treedelta(curr->links, curr->depth, delta);
+          /* Loop over the immediate descendents of this node. */
+          if ((child = curr->links))
+            {
+              do
+                {
+                  /* Enqueue the descendent in the level order queue. */
+                  last = last->next = child;
 
-          /* Compute the failure function for the decendents of this node. */
-          treefails(curr->links, curr->fail, kwset->trie);
+                  /* Update the delta table. */
+                  if (curr->depth < delta[child->label])
+                    delta[child->label] = curr->depth;
+
+                  /* Compute the failure function for this node. */
+                  triefail (child, curr->fail, kwset->trie);
+                }
+              while (!child++->is_lastkid);
+            }
 
           /* Update the shifts at each node in the current node's chain
              of fails back to the root. */
@@ -561,8 +532,8 @@ kwsprep (kwset_t kws)
               /* If the current node has some outgoing edge that the fail
                  doesn't, then the shift at the fail should be no larger
                  than the difference of their depths. */
-              if (!hasevery(fail->links, curr->links))
-                if (curr->depth - fail->depth < fail->shift)
+              if (curr->links && curr->depth - fail->depth < fail->shift)
+                if (!fail->links || !hasevery(fail->links, curr->links))
                   fail->shift = curr->depth - fail->depth;
 
               /* If the current node is accepting then the shift at the
@@ -587,7 +558,10 @@ kwsprep (kwset_t kws)
          from the root node. */
       for (i = 0; i < NCHAR; ++i)
         next[i] = NULL;
-      treenext(kwset->trie->links, next);
+      if ((curr = kwset->trie->links))
+        do
+          next[curr->label] = curr;
+        while (!curr++->is_lastkid);
 
       if ((trans = kwset->trans) != NULL)
         for (i = 0; i < NCHAR; ++i)
-- 
1.5.6.3




reply via email to

[Prev in Thread] Current Thread [Next in Thread]