[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH 14/15] Merge flags into a trie->flags field
From: |
Nick Cleaton |
Subject: |
[PATCH 14/15] Merge flags into a trie->flags field |
Date: |
Sat, 2 Oct 2010 07:07:44 +0100 |
Merge trie->accepting and trie->is_lastkid into a single flags field,
saving one char field.
---
src/kwset.c | 81 ++++++++++++++++++++++++++++++++--------------------------
1 files changed, 45 insertions(+), 36 deletions(-)
diff --git a/src/kwset.c b/src/kwset.c
index aa75fbd..2e4ab6d 100644
--- a/src/kwset.c
+++ b/src/kwset.c
@@ -70,12 +70,16 @@ struct trie
}
rd;
unsigned char label; /* Label on the incoming edge. */
- unsigned char is_lastkid; /* True for the last of a group of siblings. */
- unsigned char accepting; /* Flag value of accepted word, or zero. */
+ unsigned char flags; /* Flags for this node. */
struct trie *links; /* Tree of edges leaving this node. */
int shift; /* Shift function for search failures. */
};
+/* Bit masks for trie->flags */
+#define FLAG_IS_ACCEPTING 1 /* Set on accepting nodes. */
+#define FLAG_KW_FLAGED 2 /* Set on accepting nodes of flagged words. */
+#define FLAG_IS_LASTKID 4 /* Set on the last of a sibling group. */
+
/* A record of a trie node that has one or more siblings. */
struct broodnote
{
@@ -264,7 +268,7 @@ alloc_siblings (struct kwset *kwset, int count, int depth)
alloc_siblings (kwset, rcount, depth);
t->rd.depth = depth;
- t->is_lastkid = 0;
+ t->flags = 0;
t->links = kwset->next_prealloced;
kwset->next_prealloced = t;
alloc_siblings (kwset, lcount, depth);
@@ -292,7 +296,7 @@ plumb_siblings (struct trie **nodes, int count)
/* Used to allocate each trie node from the node vector when building the
trie from the sorted reversed keyword list, and to plumb the new node
- into the trie. Responsible for setting 'is_lastkid' and for updating
+ into the trie. Responsible for setting up 'flags' and for updating
'links' in the parent node. */
static struct trie *
allocate_and_plumb_trie_node (struct kwset *kwset, int index, int depth,
@@ -312,7 +316,7 @@ allocate_and_plumb_trie_node (struct kwset *kwset, int
index, int depth,
keywords are being added. */
parent->links = kwset->next_free_node;
alloc_siblings (kwset, kwset->bn_cursor->count, depth);
- kwset->next_free_node[-1].is_lastkid = 1;
+ kwset->next_free_node[-1].flags = FLAG_IS_LASTKID;
kwset->bn_cursor++;
}
@@ -327,7 +331,7 @@ allocate_and_plumb_trie_node (struct kwset *kwset, int
index, int depth,
/* This node is an only child. */
parent->links = kwset->next_free_node++;
- parent->links->is_lastkid = 1;
+ parent->links->flags = FLAG_IS_LASTKID;
return parent->links;
}
@@ -350,7 +354,6 @@ install_suffix (kwset_t kws, struct cpsort_md *md, char
const *text,
while (len--)
{
link = allocate_and_plumb_trie_node (kwset, index, ++trie_depth, trie);
- link->accepting = 0;
link->lf.fail = NULL;
link->rd.depth = trie_depth;
link->shift = 0;
@@ -361,7 +364,9 @@ install_suffix (kwset_t kws, struct cpsort_md *md, char
const *text,
/* Mark the node we finally reached as accepting, encoding the
flag value for this keyword. */
- trie->accepting = 1 + 2 * md->flag;
+ trie->flags |= FLAG_IS_ACCEPTING;
+ if (md->flag)
+ trie->flags |= FLAG_KW_FLAGED;
trie->links = NULL;
}
@@ -386,12 +391,11 @@ build_trie_from_sorted_keywords (struct kwset *kwset)
/* Install the top level trie node. */
kwset->trie = kwset->next_free_node++;
- kwset->trie->accepting = 0;
+ kwset->trie->flags = FLAG_IS_LASTKID;
kwset->trie->links = NULL;
kwset->trie->lf.fail = NULL;
kwset->trie->rd.depth = 0;
kwset->trie->shift = 0;
- kwset->trie->is_lastkid = 1;
/* Set up a vector by depth of the last trie node visited. */
MALLOC (kwset->prev_kw_tries, kwset->maxd + 1);
@@ -443,7 +447,7 @@ triefail (struct trie *trie, struct trie const *fail,
struct trie *recourse)
return fail->lf.fail;
}
}
- while (!link++->is_lastkid);
+ while (!(link++->flags & FLAG_IS_LASTKID));
}
fail = fail->lf.fail;
}
@@ -462,12 +466,12 @@ hasevery (struct trie const *a, struct trie const *b)
memset (a_has, 0, NCHAR);
do
a_has[a->label] = 1;
- while (!a++->is_lastkid);
+ while (!(a++->flags & FLAG_IS_LASTKID));
do
if (!a_has[b->label])
return 0;
- while (!b++->is_lastkid);
+ while (!(b++->flags & FLAG_IS_LASTKID));
return 1;
}
@@ -522,6 +526,21 @@ kwsprep (kwset_t kws)
if ((err = build_trie_from_sorted_keywords (kwset)))
return err;
+ /* Create a vector, indexed by character code, of the outgoing links
+ from the root node. */
+ for (i = 0; i < NCHAR; ++i)
+ next[i] = NULL;
+ if ((child = kwset->trie->links))
+ do
+ next[child->label] = child;
+ while (!(child++->flags & FLAG_IS_LASTKID));
+
+ if ((trans = kwset->trans) != NULL)
+ for (i = 0; i < NCHAR; ++i)
+ kwset->next[i] = next[U(trans[i])];
+ else
+ memcpy(kwset->next, next, NCHAR * sizeof(struct trie *));
+
MALLOC (maxshift, kwset->node_count);
if (!maxshift)
return _("memory exhausted");
@@ -562,7 +581,7 @@ kwsprep (kwset_t kws)
}
}
}
- while (!child++->is_lastkid);
+ while (!(child++->flags & FLAG_IS_LASTKID));
}
/* Update the shifts at each node in the current node's chain
@@ -579,7 +598,7 @@ kwsprep (kwset_t kws)
/* If the current node is accepting then the shift at the
fail and its descendents should be no larger than the
difference of their depths. */
- if (curr->accepting
+ if (curr->flags & FLAG_IS_ACCEPTING
&& maxshift[fail - kwset->trie] >
curr->rd.depth - fail->rd.depth)
maxshift[fail - kwset->trie] =
@@ -607,29 +626,19 @@ kwsprep (kwset_t kws)
child->shift = *child_maxshift_ptr;
child_maxshift_ptr++;
}
- while (!child++->is_lastkid);
+ while (!(child++->flags & FLAG_IS_LASTKID));
{
struct trie *links = curr->links;
plumb_siblings (&links, child - links);
}
+
+ /* Clear the lastkid flag, so that from now no only accepting
+ nodes have non-zero flags values. */
+ child[-1].flags &= (~FLAG_IS_LASTKID);
}
+ kwset->trie->flags &= (~FLAG_IS_LASTKID);
free (maxshift);
-
- /* Create a vector, indexed by character code, of the outgoing links
- from the root node. */
- for (i = 0; i < NCHAR; ++i)
- next[i] = NULL;
- if ((child = kwset->trie->links))
- do
- next[child->label] = child;
- while (!child++->is_lastkid);
-
- if ((trans = kwset->trans) != NULL)
- for (i = 0; i < NCHAR; ++i)
- kwset->next[i] = next[U(trans[i])];
- else
- memcpy(kwset->next, next, NCHAR * sizeof(struct trie *));
}
/* Fix things up for any translation table. */
@@ -788,7 +797,7 @@ cwexec (kwset_t kws, char const *text, size_t len, struct
kwsmatch *kwsmatch)
continue;
beg = end - 1;
trie = next[c];
- if (trie->accepting)
+ if (trie->flags)
{
mch = beg;
accept = trie;
@@ -805,7 +814,7 @@ cwexec (kwset_t kws, char const *text, size_t len, struct
kwsmatch *kwsmatch)
trie = trie->rd.rlink;
if (trie)
{
- if (trie->accepting)
+ if (trie->flags)
{
mch = beg;
accept = trie;
@@ -839,7 +848,7 @@ cwexec (kwset_t kws, char const *text, size_t len, struct
kwsmatch *kwsmatch)
d = 1;
continue;
}
- if (trie->accepting && beg <= mch)
+ if (trie->flags && beg <= mch)
{
lmch = beg;
accept = trie;
@@ -856,7 +865,7 @@ cwexec (kwset_t kws, char const *text, size_t len, struct
kwsmatch *kwsmatch)
trie = trie->rd.rlink;
if (trie)
{
- if (trie->accepting && beg <= mch)
+ if (trie->flags && beg <= mch)
{
lmch = beg;
accept = trie;
@@ -876,7 +885,7 @@ cwexec (kwset_t kws, char const *text, size_t len, struct
kwsmatch *kwsmatch)
d = 1;
}
- kwsmatch->flag = accept->accepting / 2;
+ kwsmatch->flag = (accept->flags & FLAG_KW_FLAGED) ? 1 : 0;
kwsmatch->offset[0] = mch - text;
kwsmatch->size[0] = mch_len;
--
1.5.6.3
- [PATCH 04/15] Lay out the trees more systematically, (continued)
- [PATCH 04/15] Lay out the trees more systematically, Nick Cleaton, 2010/10/02
- [PATCH 05/15] Avoid using llink and rlink during prep, Nick Cleaton, 2010/10/02
- [PATCH 06/15] Defer llink/rlink plumbing to the end of prep, Nick Cleaton, 2010/10/02
- [PATCH 07/15] Share storage between llink and fail, Nick Cleaton, 2010/10/02
- [PATCH 08/15] Avoid using trie->depth during matching, Nick Cleaton, 2010/10/02
- [PATCH 09/15] Share storage between rlink and depth, Nick Cleaton, 2010/10/02
- [PATCH 10/15] Eliminate the trie->parent field, Nick Cleaton, 2010/10/02
- [PATCH 11/15] Eliminate the trie->next field, Nick Cleaton, 2010/10/02
- [PATCH 12/15] Eliminate the trie->maxshift field, Nick Cleaton, 2010/10/02
- [PATCH 13/15] Reduce trie->accepting to a flag, Nick Cleaton, 2010/10/02
- [PATCH 14/15] Merge flags into a trie->flags field,
Nick Cleaton <=
- [PATCH 15/15] Make trie->shift an unsigned short, Nick Cleaton, 2010/10/02
- Re: Proof of concept: kwset running 3 times smaller, Nick Cleaton, 2010/10/30