bug-grep
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH 04/15] Lay out the trees more systematically


From: Nick Cleaton
Subject: [PATCH 04/15] Lay out the trees more systematically
Date: Sat, 2 Oct 2010 07:07:44 +0100

Lay out sibling nodes according to location within the sibling trees,
rather than adding sibling nodes as they are encountered and
rebalancing the trees with rotations.
---
 src/kwset.c |  187 ++++++++++++++++-------------------------------------------
 1 files changed, 51 insertions(+), 136 deletions(-)

diff --git a/src/kwset.c b/src/kwset.c
index e4712fb..079c28a 100644
--- a/src/kwset.c
+++ b/src/kwset.c
@@ -57,10 +57,9 @@
    arranged as a balanced tree.  */
 struct trie
 {
-  struct trie *llink;          /* Left tree link; MUST be first field. */
+  struct trie *llink;          /* Left tree link (to smaller labels). */
   struct trie *rlink;          /* Right tree link (to larger labels). */
   unsigned char label;         /* Label on the incoming edge. */
-  char balance;                        /* Difference in depths of subtrees. */
   unsigned int accepting;      /* Word index of accepted word, or zero. */
   struct trie *links;          /* Tree of edges leaving this node. */
   struct trie *parent;         /* Parent of this node. */
@@ -241,10 +240,34 @@ compute_brood_notes (struct kwset *kwset, struct 
cpsort_dump *keywords,
   return brood_dest + 1;
 }
 
-/* Used to allocate trie nodes from the node vector when building the trie
-   from the sorted reversed keyword list.  */
+/* Pre-allocate and plumb in a tree of 'count' trie nodes.  Push the nodes
+   onto the prealloced node list in right to left order. */
 static struct trie *
-allocate_trie_node (struct kwset *kwset, int index, int depth)
+alloc_and_plumb_siblings (struct kwset *kwset, int count, int depth)
+{
+  if (count == 0)
+    return NULL;
+
+  int lcount = --count / 2;
+  int rcount = count - lcount;
+  struct trie *t = kwset->next_free_node++;
+
+  t->rlink = alloc_and_plumb_siblings (kwset, rcount, depth);
+  t->depth = depth;
+  t->links = kwset->next_prealloced;
+  kwset->next_prealloced = t;
+  t->llink = alloc_and_plumb_siblings (kwset, lcount, depth);
+
+  return t;
+}
+
+/* Used to allocate each trie node from the node vector when building the
+   trie from the sorted reversed keyword list, and to plumb the new node
+   into the trie.  Responsible for setting 'llink' and 'rlink', and for
+   updating 'links' in the parent node.  */
+static struct trie *
+allocate_and_plumb_trie_node (struct kwset *kwset, int index, int depth,
+                              struct trie *parent)
 {
   if (kwset->bn_cursor->index == index && kwset->bn_cursor->depth == depth)
     {
@@ -252,29 +275,31 @@ allocate_trie_node (struct kwset *kwset, int index, int 
depth)
          2 or more siblings.  Pre-allocate a block of nodes for the siblings,
          so that they are kept adjacent.  Pre-allocated nodes are stored in
          a linked list (using 'links' as the list pointer) and tagged with
-         the depth at which they are to be used. */
-      int siblings = kwset->bn_cursor->count - 1;
-
-      while (siblings--)
-        {
-          kwset->next_free_node->depth = depth;
-          kwset->next_free_node->links = kwset->next_prealloced;
-          kwset->next_prealloced = kwset->next_free_node++;
-        }
+         the depth at which they are to be used.
+
+         Plumb in llink and rlink for the pre-allocated node block now.
+         This requires that they be pushed onto the pre-alloced list in right
+         to left order, so that they will come off the list in left to right
+         order, matching the order in which keywords are being added. */
+      parent->links = alloc_and_plumb_siblings (kwset,
+                                                kwset->bn_cursor->count,
+                                                depth);
       kwset->bn_cursor++;
-      return kwset->next_free_node++;
     }
 
   if (kwset->next_prealloced && kwset->next_prealloced->depth == depth)
     {
       /* This node is part of a group of siblings for which a block of
-         adjacent nodes has been pre-allocated.  */
+         adjacent nodes has been allocated and plumbed in.  */
       struct trie *t = kwset->next_prealloced;
       kwset->next_prealloced = kwset->next_prealloced->links;
       return t;
     }
 
-  return kwset->next_free_node++;
+  /* This node is an only child. */
+  parent->links = kwset->next_free_node++;
+  parent->links->llink = parent->links->rlink = NULL;
+  return parent->links;
 }
 
 /* Add a keyword to the under-construction trie. */
@@ -285,143 +310,33 @@ install_suffix (kwset_t kws, struct cpsort_md *md, char 
const *text,
   struct kwset *kwset;
   struct trie *trie;
   struct trie *link;
-  int depth, trie_depth = md->cpl, len = md->suf_len;
-  struct trie *links[DEPTH_SIZE];
-  enum { L, R } dirs[DEPTH_SIZE];
-  struct trie *t, *r, *l, *rl, *lr;
+  int trie_depth = md->cpl, len = md->suf_len;
 
   kwset = (struct kwset *) kws;
 
   /* Start at the trie node at which this suffix slots in. */
   trie = kwset->prev_kw_tries[trie_depth];
-  if (len == 0)
-    {
-      trie->accepting = 1 + 2 * md->index;
-      return;
-    }
 
-  /* Descend the tree of outgoing links, looking for the place that the first
-     character of the suffix goes and keeping track of the path followed. */
-  link = trie->links;
-  links[0] = (struct trie *) &trie->links;
-  dirs[0] = L;
-  depth = 1;
-
-  while (link)
-    {
-      links[depth] = link;
-      if ((unsigned char) text[0] < link->label)
-        dirs[depth++] = L, link = link->llink;
-      else
-        dirs[depth++] = R, link = link->rlink;
-    }
-
-  /* Install a new trie node for the first character of this suffix. */
-  link = allocate_trie_node (kwset, index, trie_depth + 1);
-  link->llink = NULL;
-  link->rlink = NULL;
-  link->balance = 0;
-  if (dirs[--depth] == L)
-    links[depth]->llink = link;
-  else
-    links[depth]->rlink = link;
-
-  /* Back up the tree fixing the balance flags. */
-  while (depth && !links[depth]->balance)
-    {
-      if (dirs[depth] == L)
-        --links[depth]->balance;
-      else
-        ++links[depth]->balance;
-      --depth;
-    }
-
-  /* Rebalance the tree by pointer rotations if necessary. */
-  if (depth && ((dirs[depth] == L && --links[depth]->balance)
-                || (dirs[depth] == R && ++links[depth]->balance)))
-    {
-      switch (links[depth]->balance)
-        {
-        case (char) -2:
-          switch (dirs[depth + 1])
-            {
-            case L:
-              r = links[depth], t = r->llink, rl = t->rlink;
-              t->rlink = r, r->llink = rl;
-              t->balance = r->balance = 0;
-              break;
-            case R:
-              r = links[depth], l = r->llink, t = l->rlink;
-              rl = t->rlink, lr = t->llink;
-              t->llink = l, l->rlink = lr, t->rlink = r, r->llink = rl;
-              l->balance = t->balance != 1 ? 0 : -1;
-              r->balance = t->balance != (char) -1 ? 0 : 1;
-              t->balance = 0;
-              break;
-            default:
-              abort ();
-            }
-          break;
-        case 2:
-          switch (dirs[depth + 1])
-            {
-            case R:
-              l = links[depth], t = l->rlink, lr = t->llink;
-              t->llink = l, l->rlink = lr;
-              t->balance = l->balance = 0;
-              break;
-            case L:
-              l = links[depth], r = l->rlink, t = r->llink;
-              lr = t->llink, rl = t->rlink;
-              t->llink = l, l->rlink = lr, t->rlink = r, r->llink = rl;
-              l->balance = t->balance != 1 ? 0 : -1;
-              r->balance = t->balance != (char) -1 ? 0 : 1;
-              t->balance = 0;
-              break;
-            default:
-              abort ();
-            }
-          break;
-        default:
-          abort ();
-        }
-
-      if (dirs[depth - 1] == L)
-        links[depth - 1]->llink = t;
-      else
-        links[depth - 1]->rlink = t;
-    }
-
-
-  /* Populate trie nodes for the characters of this suffix.  The first node
-     has already been allocated and installed, the others are simple because
-     they have no siblings yet. */
-  while (1)
+  /* Populate trie nodes for the characters of this suffix. */
+  while (len--)
     {
+      link = allocate_and_plumb_trie_node (kwset, index, ++trie_depth, trie);
       link->accepting = 0;
-      link->links = NULL;
       link->parent = trie;
       link->next = NULL;
       link->fail = NULL;
-      link->depth = trie->depth + 1;
+      link->depth = trie_depth;
       link->shift = 0;
       link->label = *text++;
-      kwset->prev_kw_tries[++trie_depth] = link;
-
-      if (--len == 0)
-        break;
-
+      kwset->prev_kw_tries[trie_depth] = link;
       trie = link;
-      link = allocate_trie_node (kwset, index, trie_depth + 1);
-      link->llink = NULL;
-      link->rlink = NULL;
-      link->balance = 0;
-      trie->links = link;
     }
 
   /* Mark the node we finally reached as accepting, encoding the
      index number of this word in the keyword set. */
-  link->accepting = 1 + 2 * md->index;
+  trie->accepting = 1 + 2 * md->index;
+
+  trie->links = NULL;
 }
 
 static char const *
-- 
1.5.6.3




reply via email to

[Prev in Thread] Current Thread [Next in Thread]