From b4f07fa0288ad68932fc606ed760fd61db9df6d0 Mon Sep 17 00:00:00 2001 From: Norihiro Tanaka Date: Sun, 13 Jan 2019 07:53:32 +0900 Subject: [PATCH] grep: fix slow for multiple word matching grep uses KWset matcher for multiple word matching. It is very slow when most of the parts matched to a pattern are not words. So, if a part firstly matched to pattern is not a word, use the grep matcher to match for its line. By the way, if START_PTR is set, grep matcher uses regex matcher which is very slow to match words. Therefore, we use grep matcher when only START_PTR is not set. * src/kwsearch.c (Fexecute): If a part matched firstly to pattern is not word, we use the grep matcher to match for its line. --- src/kwsearch.c | 17 +++++++++++++++++ 1 files changed, 17 insertions(+), 0 deletions(-) diff --git a/src/kwsearch.c b/src/kwsearch.c index 42567e9..9240ec9 100644 --- a/src/kwsearch.c +++ b/src/kwsearch.c @@ -239,6 +239,22 @@ Fexecute (void *vcp, char const *buf, size_t size, size_t *match_size, else goto success; } + if (!start_ptr) + { + if (!kwsearch->re) + { + fgrep_to_grep_pattern (&kwsearch->pattern, &kwsearch->size); + kwsearch->re = GEAcompile (kwsearch->pattern, kwsearch->size, + RE_SYNTAX_GREP); + } + end = memchr (beg + len, eol, (buf + size) - (beg + len)); + end = end ? end + 1 : buf + size; + if (EGexecute (kwsearch->re, beg, end - beg, match_size, NULL) + != (size_t) -1) + goto success_match_words; + beg = end - 1; + break; + } if (!len) break; offset = kwsexec (kwset, beg, --len, &kwsmatch, true); @@ -259,6 +275,7 @@ Fexecute (void *vcp, char const *buf, size_t size, size_t *match_size, success: end = memchr (beg + len, eol, (buf + size) - (beg + len)); end = end ? end + 1 : buf + size; + success_match_words: beg = memrchr (buf, eol, beg - buf); beg = beg ? beg + 1 : buf; len = end - beg; -- 1.7.1