[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH 04/11] dfa: refactor common context computations
From: |
Paolo Bonzini |
Subject: |
[PATCH 04/11] dfa: refactor common context computations |
Date: |
Wed, 4 Jan 2012 11:59:45 +0100 |
* src/dfa.c (CTX_ANY, charclass_context, state_wanted_contexts): New.
(dfaanalyze): Use state_wanted_contexts.
(dfastate): Use charclass_context and state_wanted_contexts.
---
src/dfa.c | 74 +++++++++++++++++++++++++++++++++++++-----------------------
1 files changed, 46 insertions(+), 28 deletions(-)
diff --git a/src/dfa.c b/src/dfa.c
index c37cae2..a271883 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -102,6 +102,7 @@ static inline unsigned char to_uchar (char ch) { return ch;
}
#define CTX_NONE 1
#define CTX_LETTER 2
#define CTX_NEWLINE 4
+#define CTX_ANY 7
/* Sometimes characters can only be matched depending on the surrounding
context. Such context decisions depend on what the previous character
@@ -2099,6 +2100,44 @@ epsclosure (position_set *s, struct dfa const *d)
free(visited);
}
+int
+charclass_context(charclass c)
+{
+ int context = 0;
+ int j;
+
+ if (tstbit(eolbyte, c))
+ context |= CTX_NEWLINE;
+
+ for (j = 0; j < CHARCLASS_INTS; ++j)
+ if (c[j] & letters[j])
+ {
+ context |= CTX_LETTER;
+ break;
+ }
+
+ return context;
+}
+
+int state_wanted_contexts(position_set *s, int possible_contexts)
+{
+ int wanted_context = 0;
+ int j;
+
+ for (j = 0; j < s->nelem; ++j)
+ {
+ if ((possible_contexts & CTX_NEWLINE)
+ && PREV_NEWLINE_DEPENDENT(s->elems[j].constraint))
+ wanted_context |= CTX_NEWLINE;
+ if ((possible_contexts & CTX_LETTER)
+ && PREV_LETTER_DEPENDENT(s->elems[j].constraint))
+ wanted_context |= CTX_LETTER;
+ }
+
+ return wanted_context;
+}
+
+
/* Perform bottom-up analysis on the parse tree, computing various functions.
Note that at this point, we're pretending constructs like \< are real
characters rather than constraints on what can follow them.
@@ -2351,16 +2390,12 @@ dfaanalyze (struct dfa *d, int searchflag)
insert(firstpos[i], &merged);
epsclosure(&merged, d);
- /* Check if any of the positions of state 0 will want newline context. */
- wanted_context = 0;
- for (i = 0; i < merged.nelem; ++i)
- if (PREV_NEWLINE_DEPENDENT(merged.elems[i].constraint))
- wanted_context |= CTX_NEWLINE;
-
/* Build the initial state. */
d->salloc = 1;
d->sindex = 0;
MALLOC(d->states, d->salloc);
+
+ wanted_context = state_wanted_contexts(&merged, CTX_NEWLINE);
state_index(d, &merged, wanted_context);
free(o_nullable);
@@ -2371,6 +2406,7 @@ dfaanalyze (struct dfa *d, int searchflag)
free(merged.elems);
}
+
/* Find, for each character, the transition out of state s of d, and store
it in the appropriate slot of trans.
@@ -2416,6 +2452,7 @@ dfastate (int s, struct dfa *d, int trans[])
int leftoversf; /* True if leftovers is nonempty. */
position_set follows; /* Union of the follows of some group.
*/
position_set tmp; /* Temporary space for merging sets. */
+ int possible_contexts; /* Contexts that this group can match. */
int wanted_context; /* Context that new state wants to know. */
int state; /* New state. */
int state_newline; /* New state on a newline transition. */
@@ -2551,17 +2588,9 @@ dfastate (int s, struct dfa *d, int trans[])
is to fail miserably. */
if (d->searchflag)
{
- wanted_context = 0;
- for (i = 0; i < d->states[0].elems.nelem; ++i)
- {
- if (PREV_NEWLINE_DEPENDENT(d->states[0].elems.elems[i].constraint))
- wanted_context |= CTX_NEWLINE;
- if (PREV_LETTER_DEPENDENT(d->states[0].elems.elems[i].constraint))
- wanted_context |= CTX_LETTER;
- }
-
/* Find the state(s) corresponding to the positions of state 0. */
copy(&d->states[0].elems, &follows);
+ wanted_context = state_wanted_contexts(&follows, CTX_ANY);
state = state_index(d, &follows, 0);
if (wanted_context & CTX_NEWLINE)
state_newline = state_index(d, &follows, CTX_NEWLINE);
@@ -2630,19 +2659,8 @@ dfastate (int s, struct dfa *d, int trans[])
insert(d->states[0].elems.elems[j], &follows);
/* Find out if the new state will want any context information. */
- wanted_context = 0;
- if (tstbit(eolbyte, labels[i]))
- for (j = 0; j < follows.nelem; ++j)
- if (PREV_NEWLINE_DEPENDENT(follows.elems[j].constraint))
- wanted_context |= CTX_NEWLINE;
-
- for (j = 0; j < CHARCLASS_INTS; ++j)
- if (labels[i][j] & letters[j])
- break;
- if (j < CHARCLASS_INTS)
- for (j = 0; j < follows.nelem; ++j)
- if (PREV_LETTER_DEPENDENT(follows.elems[j].constraint))
- wanted_context |= CTX_LETTER;
+ possible_contexts = charclass_context(labels[i]);
+ wanted_context = state_wanted_contexts(&follows, possible_contexts);
/* Find the state(s) corresponding to the union of the follows. */
state = state_index(d, &follows, 0);
--
1.7.7.1
- [RFC/RFT PATCH 00/11] Differentiate ^/$ from \` and \' in grep -z mode, Paolo Bonzini, 2012/01/04
- [PATCH 01/11] dfa: fix corner case with anchors, Paolo Bonzini, 2012/01/04
- [PATCH 02/11] dfa: introduce contexts for the values in d->success, Paolo Bonzini, 2012/01/04
- [PATCH 04/11] dfa: refactor common context computations,
Paolo Bonzini <=
- [PATCH 03/11] dfa: change newline/letter to a single context value, Paolo Bonzini, 2012/01/04
- [PATCH 05/11] dfa: change meaning of a state context, Paolo Bonzini, 2012/01/04
- [PATCH 06/11] dfa: remove useless check, Paolo Bonzini, 2012/01/04
- [PATCH 07/11] dfa: make repetitive code *really* repetitive, Paolo Bonzini, 2012/01/04
- [PATCH 08/11] dfa: remove redundant line constraints, Paolo Bonzini, 2012/01/04
- [PATCH 11/11] dfa: introduce BEGBUF/ENDBUF, Paolo Bonzini, 2012/01/04
- [PATCH 09/11] dfa: rename "newline" to "buffer delimiter", Paolo Bonzini, 2012/01/04
- [PATCH 10/11] dfa: introduce bufdelim context, Paolo Bonzini, 2012/01/04
- Re: [RFC/RFT PATCH 00/11] Differentiate ^/$ from \` and \' in grep -z mode, Jim Meyering, 2012/01/04