[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH 4/8] dfa: refactor common context computations
From: |
Paolo Bonzini |
Subject: |
[PATCH 4/8] dfa: refactor common context computations |
Date: |
Fri, 20 Jan 2012 16:35:17 +0100 |
* src/dfa.c (CTX_ANY, charclass_context, state_wanted_contexts): New.
(dfaanalyze): Use state_wanted_contexts.
(dfastate): Use charclass_context and state_wanted_contexts.
---
src/dfa.c | 75 ++++++++++++++++++++++++++++++++++++++----------------------
1 files changed, 47 insertions(+), 28 deletions(-)
diff --git a/src/dfa.c b/src/dfa.c
index 26d2bfb..f33a4c3 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -102,6 +102,7 @@ static inline unsigned char to_uchar (char ch) { return ch;
}
#define CTX_NONE 1
#define CTX_LETTER 2
#define CTX_NEWLINE 4
+#define CTX_ANY 7
/* Sometimes characters can only be matched depending on the surrounding
context. Such context decisions depend on what the previous character
@@ -2097,6 +2098,45 @@ epsclosure (position_set *s, struct dfa const *d)
free(visited);
}
+int
+charclass_context(charclass c)
+{
+ int context = 0;
+ int j;
+
+ if (tstbit(eolbyte, c))
+ context |= CTX_NEWLINE;
+
+ for (j = 0; j < CHARCLASS_INTS; ++j)
+ {
+ if (c[j] & letters[j])
+ context |= CTX_LETTER;
+ if (c[j] & ~(letters[j] | newline[j]))
+ context |= CTX_NONE;
+ }
+
+ return context;
+}
+
+int state_wanted_contexts(position_set *s, int possible_contexts)
+{
+ int wanted_context = 0;
+ int j;
+
+ for (j = 0; j < s->nelem; ++j)
+ {
+ if ((possible_contexts & CTX_NEWLINE)
+ && PREV_NEWLINE_DEPENDENT(s->elems[j].constraint))
+ wanted_context |= CTX_NEWLINE;
+ if ((possible_contexts & CTX_LETTER)
+ && PREV_LETTER_DEPENDENT(s->elems[j].constraint))
+ wanted_context |= CTX_LETTER;
+ }
+
+ return wanted_context;
+}
+
+
/* Perform bottom-up analysis on the parse tree, computing various functions.
Note that at this point, we're pretending constructs like \< are real
characters rather than constraints on what can follow them.
@@ -2349,16 +2389,12 @@ dfaanalyze (struct dfa *d, int searchflag)
insert(firstpos[i], &merged);
epsclosure(&merged, d);
- /* Check if any of the positions of state 0 will want newline context. */
- wanted_context = 0;
- for (i = 0; i < merged.nelem; ++i)
- if (PREV_NEWLINE_DEPENDENT(merged.elems[i].constraint))
- wanted_context |= CTX_NEWLINE;
-
/* Build the initial state. */
d->salloc = 1;
d->sindex = 0;
MALLOC(d->states, d->salloc);
+
+ wanted_context = state_wanted_contexts(&merged, CTX_NEWLINE);
state_index(d, &merged, wanted_context);
free(o_nullable);
@@ -2369,6 +2405,7 @@ dfaanalyze (struct dfa *d, int searchflag)
free(merged.elems);
}
+
/* Find, for each character, the transition out of state s of d, and store
it in the appropriate slot of trans.
@@ -2414,6 +2451,7 @@ dfastate (int s, struct dfa *d, int trans[])
int leftoversf; /* True if leftovers is nonempty. */
position_set follows; /* Union of the follows of some group.
*/
position_set tmp; /* Temporary space for merging sets. */
+ int possible_contexts; /* Contexts that this group can match. */
int wanted_context; /* Context that new state wants to know. */
int state; /* New state. */
int state_newline; /* New state on a newline transition. */
@@ -2549,17 +2587,9 @@ dfastate (int s, struct dfa *d, int trans[])
is to fail miserably. */
if (d->searchflag)
{
- wanted_context = 0;
- for (i = 0; i < d->states[0].elems.nelem; ++i)
- {
- if (PREV_NEWLINE_DEPENDENT(d->states[0].elems.elems[i].constraint))
- wanted_context |= CTX_NEWLINE;
- if (PREV_LETTER_DEPENDENT(d->states[0].elems.elems[i].constraint))
- wanted_context |= CTX_LETTER;
- }
-
/* Find the state(s) corresponding to the positions of state 0. */
copy(&d->states[0].elems, &follows);
+ wanted_context = state_wanted_contexts(&follows, CTX_ANY);
state = state_index(d, &follows, 0);
if (wanted_context & CTX_NEWLINE)
state_newline = state_index(d, &follows, CTX_NEWLINE);
@@ -2628,19 +2658,8 @@ dfastate (int s, struct dfa *d, int trans[])
insert(d->states[0].elems.elems[j], &follows);
/* Find out if the new state will want any context information. */
- wanted_context = 0;
- if (tstbit(eolbyte, labels[i]))
- for (j = 0; j < follows.nelem; ++j)
- if (PREV_NEWLINE_DEPENDENT(follows.elems[j].constraint))
- wanted_context |= CTX_NEWLINE;
-
- for (j = 0; j < CHARCLASS_INTS; ++j)
- if (labels[i][j] & letters[j])
- break;
- if (j < CHARCLASS_INTS)
- for (j = 0; j < follows.nelem; ++j)
- if (PREV_LETTER_DEPENDENT(follows.elems[j].constraint))
- wanted_context |= CTX_LETTER;
+ possible_contexts = charclass_context(labels[i]);
+ wanted_context = state_wanted_contexts(&follows, possible_contexts);
/* Find the state(s) corresponding to the union of the follows. */
state = state_index(d, &follows, 0);
--
1.7.7.1
- [PATCH 7/8] dfa: fix constraint encoding, (continued)
- [PATCH 7/8] dfa: fix constraint encoding, Paolo Bonzini, 2012/01/20
- [PATCH 8/8] dfa: merge calls to SUCCEEDS_IN_CONTEXT, Paolo Bonzini, 2012/01/20
- [PATCH 1/8] dfa: remove useless check, Paolo Bonzini, 2012/01/20
- [PATCH 2/8] dfa: introduce contexts for the values in d->success, Paolo Bonzini, 2012/01/20
- [PATCH 5/8] dfa: change meaning of a state context, Paolo Bonzini, 2012/01/20
- [PATCH 3/8] dfa: change newline/letter to a single context value, Paolo Bonzini, 2012/01/20
- [PATCH 6/8] dfa: do not use MATCHES_*_CONTEXT directly, Paolo Bonzini, 2012/01/20
- [PATCH 4/8] dfa: refactor common context computations,
Paolo Bonzini <=
- Re: [PATCH 0/8] fix problems with ^ and $ together with \< and \>, Paul Eggert, 2012/01/20