From 800bfaab412265b649c8f21b7187b01b539c4514 Mon Sep 17 00:00:00 2001 From: Norihiro Tanaka Date: Sun, 6 Nov 2016 10:25:41 +0900 Subject: [PATCH] sed: use dfa matcher for regex with anchor * sed/regex.c (compile_regex_1, match_regex): Use dfa matcher for regex with anchor. * testsuite/locale.mk: Add new test. * testsuite/Makefile.tests: * testsuite/anchor.good: New test-related files. * testsuite/anchor.inp: * testsuite/anchor.sed: --- sed/regexp.c | 6 +++--- testsuite/Makefile.tests | 2 +- testsuite/anchor.good | 3 +++ testsuite/anchor.inp | 3 +++ testsuite/anchor.sed | 8 ++++++++ testsuite/local.mk | 5 ++++- 6 files changed, 22 insertions(+), 5 deletions(-) create mode 100644 testsuite/anchor.good create mode 100644 testsuite/anchor.inp create mode 100644 testsuite/anchor.sed diff --git a/sed/regexp.c b/sed/regexp.c index c21a3a9..9078c07 100644 --- a/sed/regexp.c +++ b/sed/regexp.c @@ -146,6 +146,7 @@ compile_regex_1 (struct regex *new_regex, int needed_sub) int dfaopts = (new_regex->flags & REG_ICASE) ? DFA_CASE_FOLD : 0; dfaopts |= (buffer_delimiter == '\n') ? 0 : DFA_EOL_NUL; + dfaopts |= (new_regex->flags & REG_NEWLINE) ? 0 : DFA_ANCHOR; new_regex->dfa = dfaalloc (); dfasyntax (new_regex->dfa, &localeinfo, syntax, dfaopts); dfacomp (new_regex->re, new_regex->sz, new_regex->dfa, 1); @@ -342,15 +343,14 @@ match_regex(struct regex *regex, char *buf, size_t buflen, if (superset && !dfaexec (superset, buf, buf + buflen, true, NULL, NULL)) return 0; - if ((!regsize && (regex->flags & REG_NEWLINE)) - || (!superset && dfaisfast (regex->dfa))) + if (!regsize || (!superset && dfaisfast (regex->dfa))) { bool backref = false; if (!dfaexec (regex->dfa, buf, buf + buflen, true, NULL, &backref)) return 0; - if (!regsize && (regex->flags & REG_NEWLINE) && !backref) + if (!regsize && !backref) return 1; } } diff --git a/testsuite/Makefile.tests b/testsuite/Makefile.tests index 2c2704a..fb84a76 100644 --- a/testsuite/Makefile.tests +++ b/testsuite/Makefile.tests @@ -21,7 +21,7 @@ SKIP = :>address@hidden; exit 77 enable sep inclib 8bit 8to7 newjis xabcx dollar noeol bkslashes \ numsub head madding mac-mf empty xbxcx xbxcx3 recall recall2 xemacs \ appquit fasts uniq manis linecnt khadafy allsub flipcase space modulo \ -y-bracket y-newline y-zero insert brackets amp-escape newline-anchor:: +y-bracket y-newline y-zero insert brackets amp-escape anchor newline-anchor:: $(SEDENV) $(SED) -f $(srcdir)/address@hidden \ < $(srcdir)/address@hidden | $(TR) -d \\r > address@hidden $(CMP) $(srcdir)/address@hidden address@hidden diff --git a/testsuite/anchor.good b/testsuite/anchor.good new file mode 100644 index 0000000..91e35e3 --- /dev/null +++ b/testsuite/anchor.good @@ -0,0 +1,3 @@ +X line1 B +C line2 D +E line3 X diff --git a/testsuite/anchor.inp b/testsuite/anchor.inp new file mode 100644 index 0000000..a97e985 --- /dev/null +++ b/testsuite/anchor.inp @@ -0,0 +1,3 @@ +A line1 B +C line2 D +E line3 F diff --git a/testsuite/anchor.sed b/testsuite/anchor.sed new file mode 100644 index 0000000..46f9504 --- /dev/null +++ b/testsuite/anchor.sed @@ -0,0 +1,8 @@ +N +N +/^A/s/A/X/ +/B$/s/B/X/ +/^C/s/C/X/ +/D$/s/D/X/ +/^E/s/E/X/ +/F$/s/F/X/ diff --git a/testsuite/local.mk b/testsuite/local.mk index ae1f06c..a27985e 100644 --- a/testsuite/local.mk +++ b/testsuite/local.mk @@ -109,7 +109,7 @@ SEDTESTS += testsuite/appquit testsuite/enable testsuite/sep \ testsuite/amp-escape testsuite/help testsuite/file \ testsuite/quiet testsuite/factor testsuite/binary3 \ testsuite/binary2 testsuite/binary testsuite/dc \ - testsuite/newline-anchor testsuite/zero-anchor + testsuite/anchor testsuite/newline-anchor testsuite/zero-anchor # Note that the first lines are statements. They ensure that environment # variables that can perturb tests are unset or set to expected values. @@ -186,6 +186,9 @@ EXTRA_DIST += \ testsuite/amp-escape.good \ testsuite/amp-escape.inp \ testsuite/amp-escape.sed \ + testsuite/anchor.good \ + testsuite/anchor.inp \ + testsuite/anchor.sed \ testsuite/appquit.good \ testsuite/appquit.inp \ testsuite/appquit.sed \ -- 1.7.1