[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
grep branch, master, updated. v2.25-52-g0c85b9f
From: |
Jim Meyering |
Subject: |
grep branch, master, updated. v2.25-52-g0c85b9f |
Date: |
Fri, 19 Aug 2016 22:51:08 +0000 (UTC) |
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "grep".
The branch, master has been updated
via 0c85b9f1cc1acf386ae921452225a77e68834243 (commit)
from 954a3eb3d15fee91b5d78632c6f6a5ed5f93505f (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
http://git.savannah.gnu.org/cgit/grep.git/commit/?id=0c85b9f1cc1acf386ae921452225a77e68834243
commit 0c85b9f1cc1acf386ae921452225a77e68834243
Author: Zev Weiss <address@hidden>
Date: Thu Aug 18 05:50:14 2016 -0500
dfa: thread-safety: remove dfa.c's "dfa" global
Remove the global dfa struct. Instead, add a struct dfa pointer
parameter to each function that had been using the global.
* src/dfa.c (dfa): Remove file-scoped global.
(charclass_index): Remove now-unnecessary function.
(using_simple_locale): Add a dfa parameter and update all callers.
(FETCH_WC, parse_bracket_exp, lex, addtok_mb, addtok): Likewise.
(addtok_wc, add_utf8_anychar, atom, nsubtoks, copytoks): Likewise.
(closure, branch, regexp): Likewise.
(dfaparse): No longer set the global.
http://bugs.gnu.org/24260
diff --git a/src/dfa.c b/src/dfa.c
index d337bb6..f519c05 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -451,7 +451,7 @@ struct dfa
#define ACCEPTS_IN_CONTEXT(prev, curr, state, dfa) \
SUCCEEDS_IN_CONTEXT ((dfa).states[state].constraint, prev, curr)
-static void regexp (void);
+static void regexp (struct dfa *dfa);
/* A table indexed by byte values that contains the corresponding wide
character (if any) for that byte. WEOF means the byte is not a
@@ -670,16 +670,6 @@ dfa_charclass_index (struct dfa *d, charclass const s)
return i;
}
-/* A pointer to the current dfa is kept here during parsing. */
-static struct dfa *dfa;
-
-/* Find the index of charclass S in the current DFA, or allocate a new one. */
-static size_t
-charclass_index (charclass const s)
-{
- return dfa_charclass_index (dfa, s);
-}
-
/* Syntax bits controlling the behavior of the lexical analyzer. */
static reg_syntax_t syntax_bits;
static bool syntax_bits_set;
@@ -807,7 +797,7 @@ using_utf8 (void)
processed more efficiently. */
static bool
-using_simple_locale (void)
+using_simple_locale (struct dfa const *dfa)
{
/* The native character set is known to be compatible with
the C locale. The following test isn't perfect, but it's good
@@ -870,7 +860,7 @@ static wint_t wctok; /* Wide character
representation of the current
of length 1); otherwise set WC to WEOF. If there is no more input,
report EOFERR if EOFERR is not null, and return lasttok = END
otherwise. */
-# define FETCH_WC(c, wc, eoferr) \
+# define FETCH_WC(dfa, c, wc, eoferr) \
do { \
if (! lexleft) \
{ \
@@ -984,7 +974,7 @@ find_pred (const char *str)
/* Multibyte character handling sub-routine for lex.
Parse a bracket expression and build a struct mb_char_classes. */
static token
-parse_bracket_exp (void)
+parse_bracket_exp (struct dfa *dfa)
{
bool invert;
int c, c1, c2;
@@ -1028,12 +1018,12 @@ parse_bracket_exp (void)
work_mbc = NULL;
memset (ccl, 0, sizeof ccl);
- FETCH_WC (c, wc, _("unbalanced ["));
+ FETCH_WC (dfa, c, wc, _("unbalanced ["));
if (c == '^')
{
- FETCH_WC (c, wc, _("unbalanced ["));
+ FETCH_WC (dfa, c, wc, _("unbalanced ["));
invert = true;
- known_bracket_exp = using_simple_locale ();
+ known_bracket_exp = using_simple_locale (dfa);
}
else
invert = false;
@@ -1050,7 +1040,7 @@ parse_bracket_exp (void)
dfa is ever called. */
if (c == '[')
{
- FETCH_WC (c1, wc1, _("unbalanced ["));
+ FETCH_WC (dfa, c1, wc1, _("unbalanced ["));
if ((c1 == ':' && (syntax_bits & RE_CHAR_CLASSES))
|| c1 == '.' || c1 == '=')
@@ -1060,7 +1050,7 @@ parse_bracket_exp (void)
size_t len = 0;
for (;;)
{
- FETCH_WC (c, wc, _("unbalanced ["));
+ FETCH_WC (dfa, c, wc, _("unbalanced ["));
if ((c == c1 && *lexptr == ']') || lexleft == 0)
break;
if (len < MAX_BRACKET_STRING_LEN)
@@ -1072,7 +1062,7 @@ parse_bracket_exp (void)
str[len] = '\0';
/* Fetch bracket. */
- FETCH_WC (c, wc, _("unbalanced ["));
+ FETCH_WC (dfa, c, wc, _("unbalanced ["));
if (c1 == ':')
/* Build character class. POSIX allows character
classes to match multicharacter collating elements,
@@ -1099,7 +1089,7 @@ parse_bracket_exp (void)
colon_warning_state |= 8;
/* Fetch new lookahead character. */
- FETCH_WC (c1, wc1, _("unbalanced ["));
+ FETCH_WC (dfa, c1, wc1, _("unbalanced ["));
continue;
}
@@ -1108,15 +1098,15 @@ parse_bracket_exp (void)
}
if (c == '\\' && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS))
- FETCH_WC (c, wc, _("unbalanced ["));
+ FETCH_WC (dfa, c, wc, _("unbalanced ["));
if (c1 == NOTCHAR)
- FETCH_WC (c1, wc1, _("unbalanced ["));
+ FETCH_WC (dfa, c1, wc1, _("unbalanced ["));
if (c1 == '-')
/* build range characters. */
{
- FETCH_WC (c2, wc2, _("unbalanced ["));
+ FETCH_WC (dfa, c2, wc2, _("unbalanced ["));
/* A bracket expression like [a-[.aa.]] matches an unknown set.
Treat it like [-a[.aa.]] while parsing it, and
@@ -1137,17 +1127,17 @@ parse_bracket_exp (void)
else
{
if (c2 == '\\' && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS))
- FETCH_WC (c2, wc2, _("unbalanced ["));
+ FETCH_WC (dfa, c2, wc2, _("unbalanced ["));
colon_warning_state |= 8;
- FETCH_WC (c1, wc1, _("unbalanced ["));
+ FETCH_WC (dfa, c1, wc1, _("unbalanced ["));
/* Treat [x-y] as a range if x != y. */
if (wc != wc2 || wc == WEOF)
{
if (dfa->multibyte)
known_bracket_exp = false;
- else if (using_simple_locale ())
+ else if (using_simple_locale (dfa))
{
int ci;
for (ci = c; ci <= c2; ci++)
@@ -1214,7 +1204,7 @@ parse_bracket_exp (void)
if (dfa->multibyte)
{
work_mbc->invert = invert;
- work_mbc->cset = emptyset (ccl) ? -1 : charclass_index (ccl);
+ work_mbc->cset = emptyset (ccl) ? -1 : dfa_charclass_index (dfa, ccl);
return MBCSET;
}
@@ -1226,7 +1216,7 @@ parse_bracket_exp (void)
clrbit ('\n', ccl);
}
- return CSET + charclass_index (ccl);
+ return CSET + dfa_charclass_index (dfa, ccl);
}
#define PUSH_LEX_STATE(s) \
@@ -1244,7 +1234,7 @@ parse_bracket_exp (void)
while (false)
static token
-lex (void)
+lex (struct dfa *dfa)
{
int c, c2;
bool backslash = false;
@@ -1259,7 +1249,7 @@ lex (void)
"if (backslash) ...". */
for (i = 0; i < 2; ++i)
{
- FETCH_WC (c, wctok, NULL);
+ FETCH_WC (dfa, c, wctok, NULL);
switch (c)
{
@@ -1472,7 +1462,7 @@ lex (void)
if (syntax_bits & RE_DOT_NOT_NULL)
clrbit ('\0', ccl);
laststart = false;
- return lasttok = CSET + charclass_index (ccl);
+ return lasttok = CSET + dfa_charclass_index (dfa, ccl);
case 's':
case 'S':
@@ -1487,7 +1477,7 @@ lex (void)
if (c == 'S')
notset (ccl);
laststart = false;
- return lasttok = CSET + charclass_index (ccl);
+ return lasttok = CSET + dfa_charclass_index (dfa, ccl);
}
/* FIXME: see if optimizing this, as is done with ANYCHAR and
@@ -1498,7 +1488,7 @@ lex (void)
strings, each minus its "already processed" '['. */
PUSH_LEX_STATE (c == 's' ? "[:space:]]" : "^[:space:]]");
- lasttok = parse_bracket_exp ();
+ lasttok = parse_bracket_exp (dfa);
POP_LEX_STATE ();
@@ -1519,7 +1509,7 @@ lex (void)
if (c == 'W')
notset (ccl);
laststart = false;
- return lasttok = CSET + charclass_index (ccl);
+ return lasttok = CSET + dfa_charclass_index (dfa, ccl);
}
/* FIXME: see if optimizing this, as is done with ANYCHAR and
@@ -1530,7 +1520,7 @@ lex (void)
strings, each minus its "already processed" '['. */
PUSH_LEX_STATE (c == 'w' ? "_[:alnum:]]" : "^_[:alnum:]]");
- lasttok = parse_bracket_exp ();
+ lasttok = parse_bracket_exp (dfa);
POP_LEX_STATE ();
@@ -1541,7 +1531,7 @@ lex (void)
if (backslash)
goto normal_char;
laststart = false;
- return lasttok = parse_bracket_exp ();
+ return lasttok = parse_bracket_exp (dfa);
default:
normal_char:
@@ -1555,7 +1545,7 @@ lex (void)
{
zeroset (ccl);
setbit_case_fold_c (c, ccl);
- return lasttok = CSET + charclass_index (ccl);
+ return lasttok = CSET + dfa_charclass_index (dfa, ccl);
}
return lasttok = c;
@@ -1578,7 +1568,7 @@ static size_t depth; /* Current depth of a
hypothetical stack
dfaanalyze. */
static void
-addtok_mb (token t, int mbprop)
+addtok_mb (struct dfa *dfa, token t, int mbprop)
{
if (dfa->talloc == dfa->tindex)
{
@@ -1618,12 +1608,12 @@ addtok_mb (token t, int mbprop)
dfa->depth = depth;
}
-static void addtok_wc (wint_t wc);
+static void addtok_wc (struct dfa *dfa, wint_t wc);
/* Add the given token to the parse tree, maintaining the depth count and
updating the maximum depth if necessary. */
static void
-addtok (token t)
+addtok (struct dfa *dfa, token t)
{
if (dfa->multibyte && t == MBCSET)
{
@@ -1635,9 +1625,9 @@ addtok (token t)
This does not require UTF-8. */
for (i = 0; i < work_mbc->nchars; i++)
{
- addtok_wc (work_mbc->chars[i]);
+ addtok_wc (dfa, work_mbc->chars[i]);
if (need_or)
- addtok (OR);
+ addtok (dfa, OR);
need_or = true;
}
work_mbc->nchars = 0;
@@ -1646,14 +1636,14 @@ addtok (token t)
that the mbcset is empty now. Do nothing in that case. */
if (work_mbc->cset != -1)
{
- addtok (CSET + work_mbc->cset);
+ addtok (dfa, CSET + work_mbc->cset);
if (need_or)
- addtok (OR);
+ addtok (dfa, OR);
}
}
else
{
- addtok_mb (t, 3);
+ addtok_mb (dfa, t, 3);
}
}
@@ -1664,7 +1654,7 @@ addtok (token t)
<mb1(1st-byte)><mb1(2nd-byte)><CAT><mb1(3rd-byte)><CAT>
<mb2(1st-byte)><mb2(2nd-byte)><CAT><mb2(3rd-byte)><CAT><CAT> */
static void
-addtok_wc (wint_t wc)
+addtok_wc (struct dfa *dfa, wint_t wc)
{
unsigned char buf[MB_LEN_MAX];
mbstate_t s = { 0 };
@@ -1681,16 +1671,16 @@ addtok_wc (wint_t wc)
buf[0] = 0;
}
- addtok_mb (buf[0], cur_mb_len == 1 ? 3 : 1);
+ addtok_mb (dfa, buf[0], cur_mb_len == 1 ? 3 : 1);
for (i = 1; i < cur_mb_len; i++)
{
- addtok_mb (buf[i], i == cur_mb_len - 1 ? 2 : 0);
- addtok (CAT);
+ addtok_mb (dfa, buf[i], i == cur_mb_len - 1 ? 2 : 0);
+ addtok (dfa, CAT);
}
}
static void
-add_utf8_anychar (void)
+add_utf8_anychar (struct dfa *dfa)
{
static charclass const utf8_classes[5] = {
/* 80-bf: non-leading bytes. */
@@ -1724,7 +1714,7 @@ add_utf8_anychar (void)
if (syntax_bits & RE_DOT_NOT_NULL)
clrbit ('\0', c);
}
- dfa->utf8_anychar_classes[i] = CSET + charclass_index (c);
+ dfa->utf8_anychar_classes[i] = CSET + dfa_charclass_index (dfa, c);
}
/* A valid UTF-8 character is
@@ -1738,12 +1728,12 @@ add_utf8_anychar (void)
and you get "B|(C|(D|EA)A)A". And since the token buffer is in reverse
Polish notation, you get "B C D E A CAT OR A CAT OR A CAT OR". */
for (i = 1; i < n; i++)
- addtok (dfa->utf8_anychar_classes[i]);
+ addtok (dfa, dfa->utf8_anychar_classes[i]);
while (--i > 1)
{
- addtok (dfa->utf8_anychar_classes[0]);
- addtok (CAT);
- addtok (OR);
+ addtok (dfa, dfa->utf8_anychar_classes[0]);
+ addtok (dfa, CAT);
+ addtok (dfa, OR);
}
}
@@ -1783,15 +1773,15 @@ add_utf8_anychar (void)
The parser builds a parse tree in postfix form in an array of tokens. */
static void
-atom (void)
+atom (struct dfa *dfa)
{
if (tok == WCHAR)
{
if (wctok == WEOF)
- addtok (BACKREF);
+ addtok (dfa, BACKREF);
else
{
- addtok_wc (wctok);
+ addtok_wc (dfa, wctok);
if (case_fold)
{
@@ -1799,13 +1789,13 @@ atom (void)
unsigned int i, n = case_folded_counterparts (wctok, folded);
for (i = 0; i < n; i++)
{
- addtok_wc (folded[i]);
- addtok (OR);
+ addtok_wc (dfa, folded[i]);
+ addtok (dfa, OR);
}
}
}
- tok = lex ();
+ tok = lex (dfa);
}
else if (tok == ANYCHAR && using_utf8 ())
{
@@ -1816,32 +1806,32 @@ atom (void)
it is done above in add_utf8_anychar. So, let's start with
UTF-8: it is the most used, and the structure of the encoding
makes the correctness more obvious. */
- add_utf8_anychar ();
- tok = lex ();
+ add_utf8_anychar (dfa);
+ tok = lex (dfa);
}
else if ((tok >= 0 && tok < NOTCHAR) || tok >= CSET || tok == BACKREF
|| tok == BEGLINE || tok == ENDLINE || tok == BEGWORD
|| tok == ANYCHAR || tok == MBCSET
|| tok == ENDWORD || tok == LIMWORD || tok == NOTLIMWORD)
{
- addtok (tok);
- tok = lex ();
+ addtok (dfa, tok);
+ tok = lex (dfa);
}
else if (tok == LPAREN)
{
- tok = lex ();
- regexp ();
+ tok = lex (dfa);
+ regexp (dfa);
if (tok != RPAREN)
dfaerror (_("unbalanced ("));
- tok = lex ();
+ tok = lex (dfa);
}
else
- addtok (EMPTY);
+ addtok (dfa, EMPTY);
}
/* Return the number of tokens in the given subexpression. */
static size_t _GL_ATTRIBUTE_PURE
-nsubtoks (size_t tindex)
+nsubtoks (struct dfa const *dfa, size_t tindex)
{
size_t ntoks1;
@@ -1852,90 +1842,90 @@ nsubtoks (size_t tindex)
case QMARK:
case STAR:
case PLUS:
- return 1 + nsubtoks (tindex - 1);
+ return 1 + nsubtoks (dfa, tindex - 1);
case CAT:
case OR:
- ntoks1 = nsubtoks (tindex - 1);
- return 1 + ntoks1 + nsubtoks (tindex - 1 - ntoks1);
+ ntoks1 = nsubtoks (dfa, tindex - 1);
+ return 1 + ntoks1 + nsubtoks (dfa, tindex - 1 - ntoks1);
}
}
/* Copy the given subexpression to the top of the tree. */
static void
-copytoks (size_t tindex, size_t ntokens)
+copytoks (struct dfa *dfa, size_t tindex, size_t ntokens)
{
size_t i;
if (dfa->multibyte)
for (i = 0; i < ntokens; ++i)
- addtok_mb (dfa->tokens[tindex + i], dfa->multibyte_prop[tindex + i]);
+ addtok_mb (dfa, dfa->tokens[tindex + i], dfa->multibyte_prop[tindex +
i]);
else
for (i = 0; i < ntokens; ++i)
- addtok_mb (dfa->tokens[tindex + i], 3);
+ addtok_mb (dfa, dfa->tokens[tindex + i], 3);
}
static void
-closure (void)
+closure (struct dfa *dfa)
{
int i;
size_t tindex, ntokens;
- atom ();
+ atom (dfa);
while (tok == QMARK || tok == STAR || tok == PLUS || tok == REPMN)
if (tok == REPMN && (minrep || maxrep))
{
- ntokens = nsubtoks (dfa->tindex);
+ ntokens = nsubtoks (dfa, dfa->tindex);
tindex = dfa->tindex - ntokens;
if (maxrep < 0)
- addtok (PLUS);
+ addtok (dfa, PLUS);
if (minrep == 0)
- addtok (QMARK);
+ addtok (dfa, QMARK);
for (i = 1; i < minrep; ++i)
{
- copytoks (tindex, ntokens);
- addtok (CAT);
+ copytoks (dfa, tindex, ntokens);
+ addtok (dfa, CAT);
}
for (; i < maxrep; ++i)
{
- copytoks (tindex, ntokens);
- addtok (QMARK);
- addtok (CAT);
+ copytoks (dfa, tindex, ntokens);
+ addtok (dfa, QMARK);
+ addtok (dfa, CAT);
}
- tok = lex ();
+ tok = lex (dfa);
}
else if (tok == REPMN)
{
- dfa->tindex -= nsubtoks (dfa->tindex);
- tok = lex ();
- closure ();
+ dfa->tindex -= nsubtoks (dfa, dfa->tindex);
+ tok = lex (dfa);
+ closure (dfa);
}
else
{
- addtok (tok);
- tok = lex ();
+ addtok (dfa, tok);
+ tok = lex (dfa);
}
}
static void
-branch (void)
+branch (struct dfa* dfa)
{
- closure ();
+ closure (dfa);
while (tok != RPAREN && tok != OR && tok >= 0)
{
- closure ();
- addtok (CAT);
+ closure (dfa);
+ addtok (dfa, CAT);
}
}
static void
-regexp (void)
+regexp (struct dfa *dfa)
{
- branch ();
+ branch (dfa);
while (tok == OR)
{
- tok = lex ();
- branch ();
- addtok (OR);
+ tok = lex (dfa);
+ branch (dfa);
+ addtok (dfa, OR);
}
}
@@ -1945,13 +1935,12 @@ regexp (void)
static void
dfaparse (char const *s, size_t len, struct dfa *d)
{
- dfa = d;
lexptr = s;
lexleft = len;
lasttok = END;
laststart = true;
parens = 0;
- if (dfa->multibyte)
+ if (d->multibyte)
{
cur_mb_len = 0;
memset (&d->mbs, 0, sizeof d->mbs);
@@ -1960,19 +1949,19 @@ dfaparse (char const *s, size_t len, struct dfa *d)
if (!syntax_bits_set)
dfaerror (_("no syntax specified"));
- tok = lex ();
+ tok = lex (d);
depth = d->depth;
- regexp ();
+ regexp (d);
if (tok != END)
dfaerror (_("unbalanced )"));
- addtok (END - d->nregexps);
- addtok (CAT);
+ addtok (d, END - d->nregexps);
+ addtok (d, CAT);
if (d->nregexps)
- addtok (OR);
+ addtok (d, OR);
++d->nregexps;
}
-----------------------------------------------------------------------
Summary of changes:
src/dfa.c | 207 +++++++++++++++++++++++++++++--------------------------------
1 file changed, 98 insertions(+), 109 deletions(-)
hooks/post-receive
--
grep
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- grep branch, master, updated. v2.25-52-g0c85b9f,
Jim Meyering <=