>From 180ad10aa80c22b3ca67ff7201cf578a594f6de9 Mon Sep 17 00:00:00 2001 From: Norihiro Tanaka Date: Sun, 16 Mar 2014 09:33:25 +0900 Subject: [PATCH] grep: removal of trivial_case_ignore When change kwsmusts as it's used even if fill MB_CUR_MAX > 1 and case-insensitive, DFA gives the same result in spite of whether use trivial_case_ignore function or not. So remove it. * src/main.c (trivial_case_ignore): Remove it. (main): Remove its use; this optimization is no longer needed. * src/dfasearch.c (kwsmusts): Use kwset even if fill MB_CUR_MAX > 1 and case-insensitive. --- src/dfasearch.c | 6 --- src/main.c | 118 -------------------------------------------------------- 2 files changed, 124 deletions(-) diff --git a/src/dfasearch.c b/src/dfasearch.c index 0b56960..8f17312 100644 --- a/src/dfasearch.c +++ b/src/dfasearch.c @@ -81,12 +81,6 @@ dfawarn (char const *mesg) static void kwsmusts (void) { - /* With case-insensitive matching in a multi-byte locale, do not - use kwsearch, because in that case, it would be too expensive, - requiring that we case-convert all searched input. */ - if (MB_CUR_MAX > 1 && match_icase) - return; - struct dfamust const *dm = dfamusts (dfa); if (dm) { diff --git a/src/main.c b/src/main.c index c536a5a..77b3ade 100644 --- a/src/main.c +++ b/src/main.c @@ -1866,95 +1866,6 @@ parse_grep_colors (void) return; } -#define MBRTOWC(pwc, s, n, ps) \ - (MB_CUR_MAX == 1 \ - ? (*(pwc) = btowc (*(unsigned char *) (s)), 1) \ - : mbrtowc (pwc, s, n, ps)) -#define WCRTOMB(s, wc, ps) \ - (MB_CUR_MAX == 1 \ - ? (*(s) = wctob ((wint_t) (wc)), 1) \ - : wcrtomb (s, wc, ps)) - -/* If the newline-separated regular expressions, KEYS (with length, LEN - and no trailing NUL byte), are amenable to transformation into - otherwise equivalent case-ignoring ones, perform the transformation, - put the result into malloc'd memory, *NEW_KEYS with length *NEW_LEN, - and return true. Otherwise, return false. */ - -static bool -trivial_case_ignore (size_t len, char const *keys, - size_t *new_len, char **new_keys) -{ - /* FIXME: consider removing the following restriction: - Reject if KEYS contain ASCII '\\' or '['. */ - if (memchr (keys, '\\', len) || memchr (keys, '[', len)) - return false; - - /* Worst case is that each byte B of KEYS is ASCII alphabetic and - CASE_FOLDED_BUFSIZE other_case(B) characters, C through Z, each - occupying MB_CUR_MAX bytes, so each B maps to [BC...Z], which - requires CASE_FOLDED_BUFSIZE * MB_CUR_MAX + 3 bytes; this is - bounded above by the constant expression CASE_FOLDED_BUFSIZE * - MB_LEN_MAX + 3. */ - *new_keys = xnmalloc (len + 1, CASE_FOLDED_BUFSIZE * MB_LEN_MAX + 3); - char *p = *new_keys; - - mbstate_t mb_state = { 0 }; - while (len) - { - bool initial_state = mbsinit (&mb_state) != 0; - wchar_t wc; - size_t n = MBRTOWC (&wc, keys, len, &mb_state); - - /* For an invalid, incomplete or L'\0', skip this optimization. */ - if ((size_t) -2 <= n) - { - skip_case_ignore_optimization: - free (*new_keys); - return false; - } - - char const *orig = keys; - keys += n; - len -= n; - - wchar_t folded[CASE_FOLDED_BUFSIZE]; - int nfolded = case_folded_counterparts (wc, folded); - if (nfolded <= 0) - { - memcpy (p, orig, n); - p += n; - } - else if (! initial_state) - goto skip_case_ignore_optimization; - else - { - *p++ = '['; - memcpy (p, orig, n); - p += n; - - int i = 0; - do - { - size_t nbytes = WCRTOMB (p, folded[i], &mb_state); - if (nbytes == (size_t) -1) - goto skip_case_ignore_optimization; - p += nbytes; - } - while (++i < nfolded); - - if (! mbsinit (&mb_state)) - goto skip_case_ignore_optimization; - - *p++ = ']'; - } - } - - *new_len = p - *new_keys; - - return true; -} - int main (int argc, char **argv) { @@ -2349,35 +2260,6 @@ main (int argc, char **argv) else usage (EXIT_TROUBLE); - /* Case-insensitive matching is expensive in multibyte locales - because a few characters may change size when converted to upper - or lower case. To accommodate those, search the input one line - at a time, rather than using the much more efficient buffer search. - - Try to convert a regular expression 'foo' (ignoring case) to an - equivalent regular expression '[fF][oO][oO]' (where case matters). - Not only does this avoid the expensive requirement to read and - process a line at a time, it also allows use of the kwset engine, - a win in non-UTF-8 multibyte locales. */ - if (match_icase) - { - size_t new_keycc; - char *new_keys; - /* It is not possible with -F, not useful with -P (pcre) and there is no - point when there is no regexp. It also depends on which constructs - appear in the regexp. See trivial_case_ignore for those details. */ - if (keycc - && ! (matcher - && (STREQ (matcher, "fgrep") || STREQ (matcher, "pcre"))) - && trivial_case_ignore (keycc, keys, &new_keycc, &new_keys)) - { - match_icase = 0; - free (keys); - keys = new_keys; - keycc = new_keycc; - } - } - #if MBS_SUPPORT if (MB_CUR_MAX > 1) build_mbclen_cache (); -- 1.9.0