bug-grep
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH 2/3] dfa: fix {0,0}


From: Paolo Bonzini
Subject: [PATCH 2/3] dfa: fix {0,0}
Date: Sat, 17 Apr 2010 03:34:53 +0200

* NEWS: Document change.
* src/dfa.c (struct dfa): Remove "broken" field.
(lex): Do not set it.
(closure): On {0,0}, backup and lex another closure without
adding a CAT.
(dfabroken): Remove.
* src/dfa.h (dfabroken): Remove.
* tests/spencer1.tests: Add testcases for {m,n}.
---
 NEWS                 |    2 ++
 src/dfa.c            |   22 +++++++---------------
 src/dfa.h            |    6 ------
 tests/spencer1.tests |    9 +++++++++
 4 files changed, 18 insertions(+), 21 deletions(-)

diff --git a/NEWS b/NEWS
index a9ee5c0..bcca373 100644
--- a/NEWS
+++ b/NEWS
@@ -7,6 +7,8 @@ GNU grep NEWS                                    -*- outline -*-
   Searching with grep -Fw for an empty string would not match an
   empty line. [bug present since "the beginning"]
 
+  X{0,0} is implemented correctly.  It used to be a synonym of X{0,1}.
+  [bug present since "the beginning"]
 
 * Noteworthy changes in release 2.6.3 (2010-04-02) [stable]
 
diff --git a/src/dfa.c b/src/dfa.c
index fefd650..9b67240 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -387,11 +387,6 @@ struct dfa
   struct dfamust *musts;       /* List of strings, at least one of which
                                    is known to appear in any r.e. matching
                                    the dfa. */
-
-#ifdef GAWK
-  int broken;                  /* True if using a feature where there
-                                   are bugs and gawk should use regex. */
-#endif
 };
 
 /* Some macros for user access to dfa internals. */
@@ -1271,9 +1266,6 @@ lex (void)
           if (c != '}')
             dfaerror(_("malformed repeat count"));
           laststart = 0;
-#ifdef GAWK
-          dfa->broken = (minrep == maxrep && minrep == 0);
-#endif
           return lasttok = REPMN;
 
         case '|':
@@ -1606,7 +1598,7 @@ closure (void)
 
   atom();
   while (tok == QMARK || tok == STAR || tok == PLUS || tok == REPMN)
-    if (tok == REPMN)
+    if (tok == REPMN && (minrep || maxrep))
       {
         ntokens = nsubtoks(dfa->tindex);
         tindex = dfa->tindex - ntokens;
@@ -1627,6 +1619,12 @@ closure (void)
           }
         tok = lex();
       }
+    else if (tok == REPMN)
+      {
+        dfa->tindex -= nsubtoks(dfa->tindex);
+        tok = lex();
+        closure();
+      }
     else
       {
         addtok(tok);
@@ -3912,10 +3910,4 @@ dfamusts (struct dfa const *d)
   return d->musts;
 }
 
-#ifdef GAWK
-int dfabroken (struct dfa const *d)
-{
-  return d->broken;
-}
-#endif
 /* vim:set shiftwidth=2: */
diff --git a/src/dfa.h b/src/dfa.h
index 6879f8a..0a8ad42 100644
--- a/src/dfa.h
+++ b/src/dfa.h
@@ -94,9 +94,3 @@ extern void dfastate (int, struct dfa *, int []);
    takes a single argument, a NUL-terminated string describing the error.
    The user must supply a dfaerror.  */
 extern void dfaerror (const char *) __attribute__ ((noreturn));
-
-#ifdef GAWK
-/* Returns true if the regex is one where the dfa matcher
-   is broken and thus should not be used. */
-extern int dfabroken (struct dfa const *);
-#endif
diff --git a/tests/spencer1.tests b/tests/spencer1.tests
index b1aa78b..ecbed0f 100644
--- a/tests/spencer1.tests
+++ b/tests/spencer1.tests
@@ -111,6 +111,15 @@
 0@(bc+d$|ef*g.|h?i(j|k))@reffgz
 1@((((((((((a))))))))))@-
 0@(((((((((a)))))))))@a
address@hidden,address@hidden
address@hidden,address@hidden
address@hidden,address@hidden
address@hidden,address@hidden
address@hidden,address@hidden
address@hidden,address@hidden
address@hidden,address@hidden
address@hidden,address@hidden
address@hidden,address@hidden
 address@hidden words of address@hidden
 address@hidden address@hidden words, yeah
 0@(.*)c(.*)@abcde
-- 
1.6.6.1





reply via email to

[Prev in Thread] Current Thread [Next in Thread]