[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH 2/8] dfa: introduce contexts for the values in d->success
From: |
Jim Meyering |
Subject: |
Re: [PATCH 2/8] dfa: introduce contexts for the values in d->success |
Date: |
Sun, 22 Jan 2012 10:15:17 +0100 |
Paolo Bonzini wrote:
> Also initialize all tables in a single place in dfasyntax.
>
> * src/dfa.c (CTX_NONE, CTX_LETTER, CTX_NEWLINE, char_context): New.
> (sbit, letters, newline): New.
> (dfasyntax): Fill them.
> (dfastate): Remove letters, newline, initialized.
> (build_state): Use CTX_* constants.
> (dfaexec): Remove sbit and sbit_init.
ACK. definite readability improvement
> src/dfa.c | 101
> +++++++++++++++++++++++++++++++++++--------------------------
> 1 files changed, 58 insertions(+), 43 deletions(-)
>
> diff --git a/src/dfa.c b/src/dfa.c
...
> +/* Return non-zero if C is a `word-constituent' byte; zero otherwise. */
> +#define IS_WORD_CONSTITUENT(C) \
> + (is_valid_unibyte_character(C) && (isalnum(C) || (C) == '_'))
Please insert space-before "(" above:
(true, this is not new code, but if you move it,
in a way you do take partial responsibility)
(is_valid_unibyte_character (C) && (isalnum (C) || (C) == '_'))
> +static int
> +char_context(unsigned char c)
Here, too, but this *is* new.
It's hard to be too exigent about this, given that there are so many
preexisting inconsistencies, but it'd be nice not to add many more.
> +{
> + if (c == eolbyte || c == 0)
> + return CTX_NEWLINE;
> + if (IS_WORD_CONSTITUENT (c))
> + return CTX_LETTER;
> + return CTX_NONE;
> +}
> +
> /* Entry point to set syntax options. */
> void
> dfasyntax (reg_syntax_t bits, int fold, unsigned char eol)
> {
> + unsigned int i;
> +
> syntax_bits_set = 1;
> syntax_bits = bits;
> case_fold = fold;
> eolbyte = eol;
> +
> + for (i = 0; i < NOTCHAR; ++i)
> + {
> + sbit[i] = char_context (i);
> + switch (sbit[i])
> + {
> + case CTX_LETTER:
> + setbit(i, letters);
And here.
> + break;
> + case CTX_NEWLINE:
> + setbit(i, newline);
And here.
> + break;
> + }
> + }
> }
>
> /* Set a bit in the charclass for the given wchar_t. Do nothing if WC
> @@ -1073,19 +1126,6 @@ parse_bracket_exp (void)
> return CSET + charclass_index(ccl);
> }
>
> -/* Add this to the test for whether a byte is word-constituent, since on
> - BSD-based systems, many values in the 128..255 range are classified as
> - alphabetic, while on glibc-based systems, they are not. */
> -#ifdef __GLIBC__
> -# define is_valid_unibyte_character(c) 1
> -#else
> -# define is_valid_unibyte_character(c) (MBS_SUPPORT && btowc (c) != WEOF)
> -#endif
> -
> -/* Return non-zero if C is a `word-constituent' byte; zero otherwise. */
> -#define IS_WORD_CONSTITUENT(C) \
> - (is_valid_unibyte_character(C) && (isalnum(C) || (C) == '_'))
> -
> static token
> lex (void)
> {
> @@ -2362,8 +2402,6 @@ dfastate (int s, struct dfa *d, int trans[])
> int intersectf; /* True if intersect is nonempty. */
> charclass leftovers; /* Stuff in the label that didn't
> match. */
> int leftoversf; /* True if leftovers is nonempty. */
> - static charclass letters; /* Set of characters considered letters. */
> - static charclass newline; /* Set of characters that are newline. */
> position_set follows; /* Union of the follows of some group.
> */
> position_set tmp; /* Temporary space for merging sets. */
> int state; /* New state. */
> @@ -2371,23 +2409,12 @@ dfastate (int s, struct dfa *d, int trans[])
> int state_newline; /* New state on a newline transition. */
> int wants_letter; /* New state wants to know letter context. */
> int state_letter; /* New state on a letter transition. */
> - static int initialized; /* Flag for static initialization. */
> int next_isnt_1st_byte = 0; /* Flag if we can't add state0. */
> int i, j, k;
>
> MALLOC (grps, NOTCHAR);
> MALLOC (labels, NOTCHAR);
>
> - /* Initialize the set of letters, if necessary. */
> - if (! initialized)
> - {
> - initialized = 1;
> - for (i = 0; i < NOTCHAR; ++i)
> - if (IS_WORD_CONSTITUENT(i))
> - setbit(i, letters);
> - setbit(eolbyte, newline);
> - }
> -
> zeroset(matches);
>
> for (i = 0; i < d->states[s].elems.nelem; ++i)
> @@ -2672,13 +2699,13 @@ build_state (int s, struct dfa *d)
> d->success[s] = 0;
> if (ACCEPTS_IN_CONTEXT(d->states[s].newline, 1, d->states[s].letter, 0,
> s, *d))
> - d->success[s] |= 4;
> + d->success[s] |= CTX_NEWLINE;
> if (ACCEPTS_IN_CONTEXT(d->states[s].newline, 0, d->states[s].letter, 1,
> s, *d))
> - d->success[s] |= 2;
> + d->success[s] |= CTX_LETTER;
> if (ACCEPTS_IN_CONTEXT(d->states[s].newline, 0, d->states[s].letter, 0,
> s, *d))
> - d->success[s] |= 1;
> + d->success[s] |= CTX_NONE;
>
> MALLOC(trans, NOTCHAR);
> dfastate(s, d, trans);
> @@ -3226,18 +3253,6 @@ dfaexec (struct dfa *d, char const *begin, char *end,
> into a register. */
> unsigned char eol = eolbyte; /* Likewise for eolbyte. */
> unsigned char saved_end;
> - static int sbit[NOTCHAR]; /* Table for anding with d->success. */
> - static int sbit_init;
> -
> - if (! sbit_init)
> - {
> - unsigned int i;
> -
> - sbit_init = 1;
> - for (i = 0; i < NOTCHAR; ++i)
> - sbit[i] = (IS_WORD_CONSTITUENT(i)) ? 2 : 1;
> - sbit[eol] = 4;
> - }
>
> if (! d->tralloc)
> build_state_zero(d);
- [PATCH 0/8] fix problems with ^ and $ together with \< and \>, Paolo Bonzini, 2012/01/20
- [PATCH 7/8] dfa: fix constraint encoding, Paolo Bonzini, 2012/01/20
- [PATCH 8/8] dfa: merge calls to SUCCEEDS_IN_CONTEXT, Paolo Bonzini, 2012/01/20
- [PATCH 1/8] dfa: remove useless check, Paolo Bonzini, 2012/01/20
- [PATCH 2/8] dfa: introduce contexts for the values in d->success, Paolo Bonzini, 2012/01/20
- Re: [PATCH 2/8] dfa: introduce contexts for the values in d->success,
Jim Meyering <=
- [PATCH 5/8] dfa: change meaning of a state context, Paolo Bonzini, 2012/01/20
- [PATCH 3/8] dfa: change newline/letter to a single context value, Paolo Bonzini, 2012/01/20
- [PATCH 6/8] dfa: do not use MATCHES_*_CONTEXT directly, Paolo Bonzini, 2012/01/20
- [PATCH 4/8] dfa: refactor common context computations, Paolo Bonzini, 2012/01/20
- Re: [PATCH 0/8] fix problems with ^ and $ together with \< and \>, Paul Eggert, 2012/01/20