bug-grep
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH 11/15] Eliminate the trie->next field


From: Nick Cleaton
Subject: [PATCH 11/15] Eliminate the trie->next field
Date: Sat, 2 Oct 2010 07:07:44 +0100

Don't use trie->next, instead iterate over the nodes in node vector
order.  This requires a change to the fail and shift computations,
since the traversal is no-longer breadth first.
---
 src/kwset.c |   57 ++++++++++++++++++++++++++++++++++-----------------------
 1 files changed, 34 insertions(+), 23 deletions(-)

diff --git a/src/kwset.c b/src/kwset.c
index c30e951..27a47c3 100644
--- a/src/kwset.c
+++ b/src/kwset.c
@@ -73,7 +73,6 @@ struct trie
   unsigned char is_lastkid;     /* True for the last of a group of siblings. */
   unsigned int accepting;      /* Word index of accepted word, or zero. */
   struct trie *links;          /* Tree of edges leaving this node. */
-  struct trie *next;           /* List of all trie nodes in level order. */
   int shift;                   /* Shift function for search failures. */
   int maxshift;                        /* Max shift of self and descendents. */
 };
@@ -103,6 +102,7 @@ struct kwset
   int kwbuf_capacity;           /* Capacity of the buffer. */
   struct trie **prev_kw_tries;  /* The trie nodes of the last keyword added. */
   struct trie *node_storage;    /* Vector of trie nodes. */
+  size_t node_count;            /* The total number of trie nodes. */
   struct trie *next_free_node;  /* Next unallocated trie in node_storage. */
   struct broodnote *bn_cursor;  /* A cursor for walking the broodnotes. */
   struct trie *next_prealloced; /* Linked list of pre-allocated trie nodes. */
@@ -130,6 +130,7 @@ kwsalloc (char const *trans)
   kwset->kwbuf_capacity = 0;
   kwset->prev_kw_tries = NULL;
   kwset->node_storage = NULL;
+  kwset->node_count = 0;
   kwset->next_free_node = NULL;
   kwset->bn_cursor = NULL;
   kwset->next_prealloced = NULL;
@@ -349,7 +350,6 @@ install_suffix (kwset_t kws, struct cpsort_md *md, char 
const *text,
     {
       link = allocate_and_plumb_trie_node (kwset, index, ++trie_depth, trie);
       link->accepting = 0;
-      link->next = NULL;
       link->lf.fail = NULL;
       link->rd.depth = trie_depth;
       link->shift = 0;
@@ -377,7 +377,8 @@ build_trie_from_sorted_keywords (struct kwset *kwset)
     return err;
 
   /* Allocate a single vector to hold all of the trie nodes. */
-  MALLOC (kwset->node_storage, 1 + cpsdump.tot_suf_len);
+  kwset->node_count = 1 + cpsdump.tot_suf_len;
+  MALLOC (kwset->node_storage, kwset->node_count);
   if (!kwset->node_storage)
     return _("memory exhausted");
   kwset->next_free_node = kwset->node_storage;
@@ -386,7 +387,6 @@ build_trie_from_sorted_keywords (struct kwset *kwset)
   kwset->trie = kwset->next_free_node++;
   kwset->trie->accepting = 0;
   kwset->trie->links = NULL;
-  kwset->trie->next = NULL;
   kwset->trie->lf.fail = NULL;
   kwset->trie->rd.depth = 0;
   kwset->trie->shift = 0;
@@ -421,8 +421,9 @@ build_trie_from_sorted_keywords (struct kwset *kwset)
 
 /* Compute the Aho-Corasick failure function for the given trie node,
    given the failure function for its parent as well as a last resort
-   failure node. */
-static void
+   failure node.
+   Return the parent of the installed fail node. */
+static struct trie const *
 triefail (struct trie *trie, struct trie const *fail, struct trie *recourse)
 {
   struct trie *link;
@@ -438,7 +439,7 @@ triefail (struct trie *trie, struct trie const *fail, 
struct trie *recourse)
               if (trie->label == link->label)
                 {
                   trie->lf.fail = link;
-                  return;
+                  return fail->lf.fail;
                 }
             }
           while (!link++->is_lastkid);
@@ -447,6 +448,7 @@ triefail (struct trie *trie, struct trie const *fail, 
struct trie *recourse)
     }
 
   trie->lf.fail = recourse;
+  return NULL;
 }
 
 /* Return true if A has every label in B. */
@@ -513,7 +515,7 @@ kwsprep (kwset_t kws)
   else
     {
       struct trie *fail;
-      struct trie *last, *next[NCHAR], *child;
+      struct trie *next[NCHAR], *child, *nodes_end;
 
       if ((err = build_trie_from_sorted_keywords (kwset)))
         return err;
@@ -521,27 +523,36 @@ kwsprep (kwset_t kws)
       cpsort_free (kwset->cps);
       kwset->cps = NULL;
 
-      /* Traverse the nodes of the trie in level order, simultaneously
-         computing the delta table, failure function, and shift function. */
-      for (curr = last = kwset->trie; curr; curr = curr->next)
+      /* Traverse the nodes of the trie, simultaneously computing the delta
+         table, failure function, and shift function. */
+      nodes_end = kwset->trie + kwset->node_count;
+      kwset->trie->shift = kwset->trie->maxshift = kwset->mind;
+      for (curr = kwset->trie; curr < nodes_end; ++curr)
         {
-          curr->shift = kwset->mind;
-          curr->maxshift = kwset->mind;
-
           /* Loop over the immediate descendents of this node. */
           if ((child = curr->links))
             {
               do
                 {
-                  /* Enqueue the descendent in the level order queue. */
-                  last = last->next = child;
-
                   /* Update the delta table. */
                   if (curr->rd.depth < delta[child->label])
                     delta[child->label] = curr->rd.depth;
 
-                  /* Compute the failure function for this node. */
-                  triefail (child, curr->lf.fail, kwset->trie);
+                  /* Initialize shift and maxshift, and compute the failure
+                     function for this node.  Since we are not traversing
+                     the trie in level order, we may need to recurse ahead
+                     to compute the fail of the fail and so on. */
+                  {
+                    struct trie *target = child;
+                    struct trie const *pfail = curr->lf.fail;
+
+                    while (target->lf.fail == NULL && target != kwset->trie)
+                      {
+                        target->shift = target->maxshift = kwset->mind;
+                        pfail = triefail (target, pfail, kwset->trie);
+                        target = target->lf.fail;
+                      }
+                  }
                 }
               while (!child++->is_lastkid);
             }
@@ -565,10 +576,10 @@ kwsprep (kwset_t kws)
             }
         }
 
-      /* Traverse the trie in level order again, fixing up all nodes whose
-         shift exceeds their inherited maxshift and plumbing in the rlink
-         and llink fields. */
-      for (curr = kwset->trie; curr; curr = curr->next)
+      /* Traverse the trie again, fixing up all nodes whose shift exceeds
+         their inherited maxshift and plumbing in the rlink and llink
+         fields. */
+      for (curr = kwset->trie; curr < nodes_end; ++curr)
         {
           if (!(child = curr->links))
             continue;
-- 
1.5.6.3




reply via email to

[Prev in Thread] Current Thread [Next in Thread]