>From a0c2b2e521e3523cc45a8a7e91fb5a42f4dd60d9 Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Wed, 1 Aug 2018 19:13:08 -0700 Subject: [PATCH 3/4] Simplify regex-emacs by assuming Emacs syntax * src/regex-emacs.c (reg_syntax_t) (RE_BACKSLASH_ESCAPE_IN_LISTS, RE_BK_PLUS_QM) (RE_CHAR_CLASSES, RE_CONTEXT_INDEP_ANCHORS) (RE_CONTEXT_INDEP_OPS, RE_CONTEXT_INVALID_OPS) (RE_DOT_NEWLINE, RE_DOT_NOT_NULL, RE_HAT_LISTS_NOT_NEWLINE) (RE_INTERVALS, RE_LIMITED_OPS, RE_NEWLINE_ALT) (RE_NO_BK_BRACES, RE_NO_BK_PARENS, RE_NO_BK_REFS) (RE_NO_BK_VBAR, RE_NO_EMPTY_RANGES) (RE_UNMATCHED_RIGHT_PAREN_ORD, RE_NO_POSIX_BACKTRACKING) (RE_NO_GNU_OPS, RE_FRUGAL, RE_SHY_GROUPS) (RE_NO_NEWLINE_ANCHOR, RE_SYNTAX_EMACS, RE_TRANSLATE_P): Remove. All uses removed and resulting code simplified. (TRANSLATE): Treat nil as an absent translation table, not zero. All uses changed. --- src/regex-emacs.c | 493 +++++++--------------------------------------- src/search.c | 4 +- 2 files changed, 70 insertions(+), 427 deletions(-) diff --git a/src/regex-emacs.c b/src/regex-emacs.c index fd335de094..c2582281da 100644 --- a/src/regex-emacs.c +++ b/src/regex-emacs.c @@ -50,133 +50,6 @@ ints. But Emacs only runs on 32 bit platforms anyway. */ #define RE_DUP_MAX (0xffff) -/* The following bits are used to determine the regexp syntax we - recognize. The set/not-set meanings where historically chosen so - that Emacs syntax had the value 0. - The bits are given in alphabetical order, and - the definitions shifted by one from the previous bit; thus, when we - add or remove a bit, only one other definition need change. */ -typedef unsigned long reg_syntax_t; - -/* If this bit is not set, then \ inside a bracket expression is literal. - If set, then such a \ quotes the following character. */ -#define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1) - -/* If this bit is not set, then + and ? are operators, and \+ and \? are - literals. - If set, then \+ and \? are operators and + and ? are literals. */ -#define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1) - -/* If this bit is set, then character classes are supported. They are: - [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:], - [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:]. - If not set, then character classes are not supported. */ -#define RE_CHAR_CLASSES (RE_BK_PLUS_QM << 1) - -/* If this bit is set, then ^ and $ are always anchors (outside bracket - expressions, of course). - If this bit is not set, then it depends: - ^ is an anchor if it is at the beginning of a regular - expression or after an open-group or an alternation operator; - $ is an anchor if it is at the end of a regular expression, or - before a close-group or an alternation operator. - - This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because - POSIX draft 11.2 says that * etc. in leading positions is undefined. - We already implemented a previous draft which made those constructs - invalid, though, so we haven't changed the code back. */ -#define RE_CONTEXT_INDEP_ANCHORS (RE_CHAR_CLASSES << 1) - -/* If this bit is set, then special characters are always special - regardless of where they are in the pattern. - If this bit is not set, then special characters are special only in - some contexts; otherwise they are ordinary. Specifically, - * + ? and intervals are only special when not after the beginning, - open-group, or alternation operator. */ -#define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1) - -/* If this bit is set, then *, +, ?, and { cannot be first in an re or - immediately after an alternation or begin-group operator. */ -#define RE_CONTEXT_INVALID_OPS (RE_CONTEXT_INDEP_OPS << 1) - -/* If this bit is set, then . matches newline. - If not set, then it doesn't. */ -#define RE_DOT_NEWLINE (RE_CONTEXT_INVALID_OPS << 1) - -/* If this bit is set, then . doesn't match NUL. - If not set, then it does. */ -#define RE_DOT_NOT_NULL (RE_DOT_NEWLINE << 1) - -/* If this bit is set, nonmatching lists [^...] do not match newline. - If not set, they do. */ -#define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1) - -/* If this bit is set, either \{...\} or {...} defines an - interval, depending on RE_NO_BK_BRACES. - If not set, \{, \}, {, and } are literals. */ -#define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1) - -/* If this bit is set, +, ? and | aren't recognized as operators. - If not set, they are. */ -#define RE_LIMITED_OPS (RE_INTERVALS << 1) - -/* If this bit is set, newline is an alternation operator. - If not set, newline is literal. */ -#define RE_NEWLINE_ALT (RE_LIMITED_OPS << 1) - -/* If this bit is set, then `{...}' defines an interval, and \{ and \} - are literals. - If not set, then `\{...\}' defines an interval. */ -#define RE_NO_BK_BRACES (RE_NEWLINE_ALT << 1) - -/* If this bit is set, (...) defines a group, and \( and \) are literals. - If not set, \(...\) defines a group, and ( and ) are literals. */ -#define RE_NO_BK_PARENS (RE_NO_BK_BRACES << 1) - -/* If this bit is set, then \ matches . - If not set, then \ is a back-reference. */ -#define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1) - -/* If this bit is set, then | is an alternation operator, and \| is literal. - If not set, then \| is an alternation operator, and | is literal. */ -#define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1) - -/* If this bit is set, then an ending range point collating higher - than the starting range point, as in [z-a], is invalid. - If not set, then when ending range point collates higher than the - starting range point, the range is ignored. */ -#define RE_NO_EMPTY_RANGES (RE_NO_BK_VBAR << 1) - -/* If this bit is set, then an unmatched ) is ordinary. - If not set, then an unmatched ) is invalid. */ -#define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1) - -/* If this bit is set, succeed as soon as we match the whole pattern, - without further backtracking. */ -#define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1) - -/* If this bit is set, do not process the GNU regex operators. - If not set, then the GNU regex operators are recognized. */ -#define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1) - -/* If this bit is set, then *?, +? and ?? match non greedily. */ -#define RE_FRUGAL (RE_NO_GNU_OPS << 1) - -/* If this bit is set, then (?:...) is treated as a shy group. */ -#define RE_SHY_GROUPS (RE_FRUGAL << 1) - -/* If this bit is set, ^ and $ only match at beg/end of buffer. */ -#define RE_NO_NEWLINE_ANCHOR (RE_SHY_GROUPS << 1) - -/* This global variable defines the particular regexp syntax to use (for - some interfaces). When a regexp is compiled, the syntax used is - stored in the pattern buffer, so changing this does not affect - already-compiled regexps. */ -/* extern reg_syntax_t re_syntax_options; */ -/* Define combinations of the above bits for the standard possibilities. */ -#define RE_SYNTAX_EMACS \ - (RE_CHAR_CLASSES | RE_INTERVALS | RE_SHY_GROUPS | RE_FRUGAL) - /* Make syntax table lookup grant data in gl_state. */ #define SYNTAX(c) syntax_property (c, 1) @@ -1298,10 +1171,8 @@ static void insert_op1 (re_opcode_t op, unsigned char *loc, int arg, unsigned char *end); static void insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, unsigned char *end); -static bool at_begline_loc_p (re_char *pattern, re_char *p, - reg_syntax_t syntax); -static bool at_endline_loc_p (re_char *p, re_char *pend, - reg_syntax_t syntax); +static bool at_begline_loc_p (re_char *pattern, re_char *p); +static bool at_endline_loc_p (re_char *p, re_char *pend); static re_char *skip_one_char (re_char *p); static int analyze_first (re_char *p, re_char *pend, char *fastmap, const int multibyte); @@ -1318,15 +1189,7 @@ static int analyze_first (re_char *p, re_char *pend, #define RE_TRANSLATE(TBL, C) char_table_translate (TBL, C) -#define RE_TRANSLATE_P(TBL) (!EQ (TBL, make_number (0))) - -/* If `translate' is non-zero, return translate[D], else just D. We - cast the subscript to translate because some data is declared as - `char *', to avoid warnings when a string constant is passed. But - when we use a character as a subscript we must make it unsigned. */ -#define TRANSLATE(d) \ - (RE_TRANSLATE_P (translate) ? RE_TRANSLATE (translate, (d)) : (d)) - +#define TRANSLATE(d) (!NILP (translate) ? RE_TRANSLATE (translate, d) : (d)) /* Macros for outputting the compiled pattern into `buffer'. */ @@ -1846,8 +1709,6 @@ regex_compile (re_char *pattern, size_t size, const char *whitespace_regexp, struct re_pattern_buffer *bufp) { - reg_syntax_t syntax = RE_SYNTAX_EMACS; - /* We fetch characters from PATTERN here. */ int c, c1; @@ -2010,51 +1871,24 @@ regex_compile (re_char *pattern, size_t size, } case '^': - { - if ( /* If at start of pattern, it's an operator. */ - p == pattern + 1 - /* If context independent, it's an operator. */ - || syntax & RE_CONTEXT_INDEP_ANCHORS - /* Otherwise, depends on what's come before. */ - || at_begline_loc_p (pattern, p, syntax)) - BUF_PUSH ((syntax & RE_NO_NEWLINE_ANCHOR) ? begbuf : begline); - else - goto normal_char; - } + if (! (p == pattern + 1 || at_begline_loc_p (pattern, p))) + goto normal_char; + BUF_PUSH (begline); break; - case '$': - { - if ( /* If at end of pattern, it's an operator. */ - p == pend - /* If context independent, it's an operator. */ - || syntax & RE_CONTEXT_INDEP_ANCHORS - /* Otherwise, depends on what's next. */ - || at_endline_loc_p (p, pend, syntax)) - BUF_PUSH ((syntax & RE_NO_NEWLINE_ANCHOR) ? endbuf : endline); - else - goto normal_char; - } - break; + if (! (p == pend || at_endline_loc_p (p, pend))) + goto normal_char; + BUF_PUSH (endline); + break; case '+': case '?': - if ((syntax & RE_BK_PLUS_QM) - || (syntax & RE_LIMITED_OPS)) - goto normal_char; - FALLTHROUGH; case '*': - handle_plus: /* If there is no previous pattern... */ if (!laststart) - { - if (syntax & RE_CONTEXT_INVALID_OPS) - FREE_STACK_RETURN (REG_BADRPT); - else if (!(syntax & RE_CONTEXT_INDEP_OPS)) - goto normal_char; - } + goto normal_char; { /* 1 means zero (many) matches is allowed. */ @@ -2068,8 +1902,7 @@ regex_compile (re_char *pattern, size_t size, for (;;) { - if ((syntax & RE_FRUGAL) - && c == '?' && (zero_times_ok || many_times_ok)) + if (c == '?' && (zero_times_ok || many_times_ok)) greedy = false; else { @@ -2077,25 +1910,10 @@ regex_compile (re_char *pattern, size_t size, many_times_ok |= c != '?'; } - if (p == pend) - break; - else if (*p == '*' - || (!(syntax & RE_BK_PLUS_QM) - && (*p == '+' || *p == '?'))) - ; - else if (syntax & RE_BK_PLUS_QM && *p == '\\') - { - if (p+1 == pend) - FREE_STACK_RETURN (REG_EESCAPE); - if (p[1] == '+' || p[1] == '?') - PATFETCH (c); /* Gobble up the backslash. */ - else - break; - } - else + if (! (p < pend && (*p == '*' || *p == '+' || *p == '?'))) break; /* If we get here, we found another repeat character. */ - PATFETCH (c); + c = *p++; } /* Star, etc. applied to an empty pattern is equivalent @@ -2227,24 +2045,18 @@ regex_compile (re_char *pattern, size_t size, /* Clear the whole map. */ memset (b, 0, (1 << BYTEWIDTH) / BYTEWIDTH); - /* charset_not matches newline according to a syntax bit. */ - if ((re_opcode_t) b[-2] == charset_not - && (syntax & RE_HAT_LISTS_NOT_NEWLINE)) - SET_LIST_BIT ('\n'); - /* Read in characters and ranges, setting map bits. */ for (;;) { const unsigned char *p2 = p; - re_wctype_t cc; int ch; if (p == pend) FREE_STACK_RETURN (REG_EBRACK); /* See if we're at the beginning of a possible character class. */ - if (syntax & RE_CHAR_CLASSES && - (cc = re_wctype_parse(&p, pend - p)) != -1) + re_wctype_t cc = re_wctype_parse (&p, pend - p); + if (cc != -1) { if (cc == 0) FREE_STACK_RETURN (REG_ECTYPE); @@ -2296,21 +2108,11 @@ regex_compile (re_char *pattern, size_t size, (let ((case-fold-search t)) (string-match "[A-_]" "A")) */ PATFETCH (c); - /* \ might escape characters inside [...] and [^...]. */ - if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') - { - if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); - - PATFETCH (c); - } - else - { - /* Could be the end of the bracket expression. If it's - not (i.e., when the bracket expression is `[]' so - far), the ']' character bit gets set way below. */ - if (c == ']' && p2 != p1) - break; - } + /* Could be the end of the bracket expression. If it's + not (i.e., when the bracket expression is `[]' so + far), the ']' character bit gets set way below. */ + if (c == ']' && p2 != p1) + break; if (p < pend && p[0] == '-' && p[1] != ']') { @@ -2331,13 +2133,7 @@ regex_compile (re_char *pattern, size_t size, /* Range from C to C. */ c1 = c; - if (c > c1) - { - if (syntax & RE_NO_EMPTY_RANGES) - FREE_STACK_RETURN (REG_ERANGEX); - /* Else, repeat the loop. */ - } - else + if (c <= c1) { if (c < 128) { @@ -2347,24 +2143,17 @@ regex_compile (re_char *pattern, size_t size, if (CHAR_BYTE8_P (c1)) c = BYTE8_TO_CHAR (128); } - if (c <= c1) + if (CHAR_BYTE8_P (c)) { - if (CHAR_BYTE8_P (c)) - { - c = CHAR_TO_BYTE8 (c); - c1 = CHAR_TO_BYTE8 (c1); - for (; c <= c1; c++) - SET_LIST_BIT (c); - } - else if (multibyte) - { - SETUP_MULTIBYTE_RANGE (range_table_work, c, c1); - } - else - { - SETUP_UNIBYTE_RANGE (range_table_work, c, c1); - } + c = CHAR_TO_BYTE8 (c); + c1 = CHAR_TO_BYTE8 (c1); + for (; c <= c1; c++) + SET_LIST_BIT (c); } + else if (multibyte) + SETUP_MULTIBYTE_RANGE (range_table_work, c, c1); + else + SETUP_UNIBYTE_RANGE (range_table_work, c, c1); } } @@ -2402,41 +2191,6 @@ regex_compile (re_char *pattern, size_t size, break; - case '(': - if (syntax & RE_NO_BK_PARENS) - goto handle_open; - else - goto normal_char; - - - case ')': - if (syntax & RE_NO_BK_PARENS) - goto handle_close; - else - goto normal_char; - - - case '\n': - if (syntax & RE_NEWLINE_ALT) - goto handle_alt; - else - goto normal_char; - - - case '|': - if (syntax & RE_NO_BK_VBAR) - goto handle_alt; - else - goto normal_char; - - - case '{': - if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES) - goto handle_interval; - else - goto normal_char; - - case '\\': if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); @@ -2448,17 +2202,13 @@ regex_compile (re_char *pattern, size_t size, switch (c) { case '(': - if (syntax & RE_NO_BK_PARENS) - goto normal_backslash; - - handle_open: { int shy = 0; regnum_t regnum = 0; if (p+1 < pend) { /* Look for a special (?...) construct */ - if ((syntax & RE_SHY_GROUPS) && *p == '?') + if (*p == '?') { PATFETCH (c); /* Gobble up the '?'. */ while (!shy) @@ -2539,27 +2289,14 @@ regex_compile (re_char *pattern, size_t size, } case ')': - if (syntax & RE_NO_BK_PARENS) goto normal_backslash; - if (COMPILE_STACK_EMPTY) - { - if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) - goto normal_backslash; - else - FREE_STACK_RETURN (REG_ERPAREN); - } + FREE_STACK_RETURN (REG_ERPAREN); - handle_close: FIXUP_ALT_JUMP (); /* See similar code for backslashed left paren above. */ if (COMPILE_STACK_EMPTY) - { - if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) - goto normal_char; - else - FREE_STACK_RETURN (REG_ERPAREN); - } + FREE_STACK_RETURN (REG_ERPAREN); /* Since we just checked for an empty stack above, this ``can't happen''. */ @@ -2592,12 +2329,6 @@ regex_compile (re_char *pattern, size_t size, case '|': /* `\|'. */ - if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR) - goto normal_backslash; - handle_alt: - if (syntax & RE_LIMITED_OPS) - goto normal_char; - /* Insert before the previous alternative a jump which jumps to this alternative if the former fails. */ GET_BUFFER_SPACE (3); @@ -2636,17 +2367,7 @@ regex_compile (re_char *pattern, size_t size, case '{': - /* If \{ is a literal. */ - if (!(syntax & RE_INTERVALS) - /* If we're at `\{' and it's not the open-interval - operator. */ - || (syntax & RE_NO_BK_BRACES)) - goto normal_backslash; - - handle_interval: { - /* If got here, then the syntax allows intervals. */ - /* At least (most) this many matches must be made. */ int lower_bound = 0, upper_bound = -1; @@ -2661,33 +2382,19 @@ regex_compile (re_char *pattern, size_t size, upper_bound = lower_bound; if (lower_bound < 0 - || (0 <= upper_bound && upper_bound < lower_bound)) + || (0 <= upper_bound && upper_bound < lower_bound) + || c != '\\') FREE_STACK_RETURN (REG_BADBR); - - if (!(syntax & RE_NO_BK_BRACES)) - { - if (c != '\\') - FREE_STACK_RETURN (REG_BADBR); - if (p == pend) - FREE_STACK_RETURN (REG_EESCAPE); - PATFETCH (c); - } - - if (c != '}') + if (p == pend) + FREE_STACK_RETURN (REG_EESCAPE); + if (*p++ != '}') FREE_STACK_RETURN (REG_BADBR); /* We just parsed a valid interval. */ /* If it's invalid to have no preceding re. */ if (!laststart) - { - if (syntax & RE_CONTEXT_INVALID_OPS) - FREE_STACK_RETURN (REG_BADRPT); - else if (syntax & RE_CONTEXT_INDEP_OPS) - laststart = b; - else - goto unfetch_interval; - } + goto unfetch_interval; if (upper_bound == 0) /* If the upper bound is zero, just drop the sub pattern @@ -2792,17 +2499,9 @@ regex_compile (re_char *pattern, size_t size, eassert (beg_interval); p = beg_interval; beg_interval = NULL; - - /* normal_char and normal_backslash need `c'. */ + eassert (p > pattern && p[-1] == '\\'); c = '{'; - - if (!(syntax & RE_NO_BK_BRACES)) - { - eassert (p > pattern && p[-1] == '\\'); - goto normal_backslash; - } - else - goto normal_char; + goto normal_char; case '=': laststart = b; @@ -2834,38 +2533,28 @@ regex_compile (re_char *pattern, size_t size, break; case 'w': - if (syntax & RE_NO_GNU_OPS) - goto normal_char; laststart = b; BUF_PUSH_2 (syntaxspec, Sword); break; case 'W': - if (syntax & RE_NO_GNU_OPS) - goto normal_char; laststart = b; BUF_PUSH_2 (notsyntaxspec, Sword); break; case '<': - if (syntax & RE_NO_GNU_OPS) - goto normal_char; laststart = b; BUF_PUSH (wordbeg); break; case '>': - if (syntax & RE_NO_GNU_OPS) - goto normal_char; laststart = b; BUF_PUSH (wordend); break; case '_': - if (syntax & RE_NO_GNU_OPS) - goto normal_char; laststart = b; PATFETCH (c); if (c == '<') @@ -2877,38 +2566,25 @@ regex_compile (re_char *pattern, size_t size, break; case 'b': - if (syntax & RE_NO_GNU_OPS) - goto normal_char; BUF_PUSH (wordbound); break; case 'B': - if (syntax & RE_NO_GNU_OPS) - goto normal_char; BUF_PUSH (notwordbound); break; case '`': - if (syntax & RE_NO_GNU_OPS) - goto normal_char; BUF_PUSH (begbuf); break; case '\'': - if (syntax & RE_NO_GNU_OPS) - goto normal_char; BUF_PUSH (endbuf); break; case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': { - regnum_t reg; - - if (syntax & RE_NO_BK_REFS) - goto normal_backslash; - - reg = c - '0'; + regnum_t reg = c - '0'; if (reg > bufp->re_nsub || reg < 1 /* Can't back reference to a subexp before its end. */ @@ -2920,16 +2596,7 @@ regex_compile (re_char *pattern, size_t size, } break; - - case '+': - case '?': - if (syntax & RE_BK_PLUS_QM) - goto handle_plus; - else - goto normal_backslash; - default: - normal_backslash: /* You might think it would be useful for \ to mean not to translate; but if we don't translate it it will never match anything. */ @@ -2951,14 +2618,9 @@ regex_compile (re_char *pattern, size_t size, || *pending_exact >= (1 << BYTEWIDTH) - MAX_MULTIBYTE_LENGTH /* If followed by a repetition operator. */ - || (p != pend && (*p == '*' || *p == '^')) - || ((syntax & RE_BK_PLUS_QM) - ? p + 1 < pend && *p == '\\' && (p[1] == '+' || p[1] == '?') - : p != pend && (*p == '+' || *p == '?')) - || ((syntax & RE_INTERVALS) - && ((syntax & RE_NO_BK_BRACES) - ? p != pend && *p == '{' - : p + 1 < pend && p[0] == '\\' && p[1] == '{'))) + || (p != pend + && (*p == '*' || *p == '+' || *p == '?' || *p == '^')) + || (p + 1 < pend && p[0] == '\\' && p[1] == '{')) { /* Start building a new exactn. */ @@ -3087,40 +2749,35 @@ insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, unsigned cha least one character before the ^. */ static bool -at_begline_loc_p (re_char *pattern, re_char *p, reg_syntax_t syntax) +at_begline_loc_p (re_char *pattern, re_char *p) { re_char *prev = p - 2; - bool odd_backslashes; - - /* After a subexpression? */ - if (*prev == '(') - odd_backslashes = (syntax & RE_NO_BK_PARENS) == 0; - /* After an alternative? */ - else if (*prev == '|') - odd_backslashes = (syntax & RE_NO_BK_VBAR) == 0; - - /* After a shy subexpression? */ - else if (*prev == ':' && (syntax & RE_SHY_GROUPS)) + switch (*prev) { + case '(': /* After a subexpression. */ + case '|': /* After an alternative. */ + break; + + case ':': /* After a shy subexpression. */ /* Skip over optional regnum. */ - while (prev - 1 >= pattern && prev[-1] >= '0' && prev[-1] <= '9') + while (prev > pattern && '0' <= prev[-1] && prev[-1] <= '9') --prev; - if (!(prev - 2 >= pattern - && prev[-1] == '?' && prev[-2] == '(')) + if (! (prev > pattern + 1 && prev[-1] == '?' && prev[-2] == '(')) return false; prev -= 2; - odd_backslashes = (syntax & RE_NO_BK_PARENS) == 0; + break; + + default: + return false; } - else - return false; /* Count the number of preceding backslashes. */ p = prev; - while (prev - 1 >= pattern && prev[-1] == '\\') + while (prev > pattern && prev[-1] == '\\') --prev; - return (p - prev) & odd_backslashes; + return (p - prev) & 1; } @@ -3128,19 +2785,10 @@ at_begline_loc_p (re_char *pattern, re_char *p, reg_syntax_t syntax) at least one character after the $, i.e., `P < PEND'. */ static bool -at_endline_loc_p (re_char *p, re_char *pend, reg_syntax_t syntax) +at_endline_loc_p (re_char *p, re_char *pend) { - re_char *next = p; - bool next_backslash = *next == '\\'; - re_char *next_next = p + 1 < pend ? p + 1 : 0; - - return - /* Before a subexpression? */ - (syntax & RE_NO_BK_PARENS ? *next == ')' - : next_backslash && next_next && *next_next == ')') - /* Before an alternative? */ - || (syntax & RE_NO_BK_VBAR ? *next == '|' - : next_backslash && next_next && *next_next == '|'); + /* Before a subexpression or an alternative? */ + return *p == '\\' && p + 1 < pend && (p[1] == ')' || p[1] == '|'); } @@ -3654,7 +3302,7 @@ re_search_2 (struct re_pattern_buffer *bufp, const char *str1, size_t size1, /* Written out as an if-else to avoid testing `translate' inside the loop. */ - if (RE_TRANSLATE_P (translate)) + if (!NILP (translate)) { if (multibyte) while (range > lim) @@ -4642,12 +4290,11 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, break; - /* Match any character except possibly a newline or a null. */ + /* Match any character except newline. */ case anychar: { int buf_charlen; int buf_ch; - reg_syntax_t syntax; DEBUG_PRINT ("EXECUTING anychar.\n"); @@ -4655,11 +4302,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, buf_ch = RE_STRING_CHAR_AND_LENGTH (d, buf_charlen, target_multibyte); buf_ch = TRANSLATE (buf_ch); - - syntax = RE_SYNTAX_EMACS; - - if ((!(syntax & RE_DOT_NEWLINE) && buf_ch == '\n') - || ((syntax & RE_DOT_NOT_NULL) && buf_ch == '\000')) + if (buf_ch == '\n') goto fail; DEBUG_PRINT (" Matched \"%d\".\n", *d); @@ -4825,7 +4468,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, re_char *string1, /* Compare that many; failure if mismatch, else move past them. */ - if (RE_TRANSLATE_P (translate) + if (!NILP (translate) ? bcmp_translate (d, d2, dcnt, translate, target_multibyte) : memcmp (d, d2, dcnt)) { diff --git a/src/search.c b/src/search.c index 2031bdc6d2..95cbbd8c81 100644 --- a/src/search.c +++ b/src/search.c @@ -132,7 +132,7 @@ compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern, eassert (!cp->busy); cp->regexp = Qnil; - cp->buf.translate = (! NILP (translate) ? translate : make_number (0)); + cp->buf.translate = translate; cp->posix = posix; cp->buf.multibyte = STRING_MULTIBYTE (pattern); cp->buf.charset_unibyte = charset_unibyte; @@ -238,7 +238,7 @@ compile_pattern (Lisp_Object pattern, struct re_registers *regp, && !cp->busy && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern) && !NILP (Fstring_equal (cp->regexp, pattern)) - && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0))) + && EQ (cp->buf.translate, translate) && cp->posix == posix && (EQ (cp->syntax_table, Qt) || EQ (cp->syntax_table, BVAR (current_buffer, syntax_table))) -- 2.17.1