grep branch, master, updated. v2.11-9-ga48ed5b

grep-commit
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
grep branch, master, updated. v2.11-9-ga48ed5b

From:	Paul Eggert
Subject:	grep branch, master, updated. v2.11-9-ga48ed5b
Date:	Sun, 18 Mar 2012 23:04:07 +0000
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "grep".

The branch, master has been updated
       via  a48ed5b6953d6d40f6797f7e151e324924039b78 (commit)
      from  c6e3ea61d9f08aa0128a0eb13d31a2fbad376f99 (commit)

Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.

- Log -----------------------------------------------------------------
http://git.savannah.gnu.org/cgit/grep.git/commit/?id=a48ed5b6953d6d40f6797f7e151e324924039b78


commit a48ed5b6953d6d40f6797f7e151e324924039b78
Author: Paul Eggert <address@hidden>
Date:   Sat Mar 17 18:04:50 2012 -0700

    grep: report overflow for ERE a{1000000000}
    
    * NEWS: Document this.
    * src/dfa.c (MIN): New macro.
    (lex): Lexically analyze the repeat-count operator once, not
    twice; the double-scan complicated the code and made it harder to
    understand and fix.  Adjust the repeat-count parsing so that it
    better matches the behavior of the regex code, in three ways:
    1. Diagnose too-large repeat counts rather than treating them as
    literal characters.  2. Use RE_INVALID_INTERVAL_ORD, not
    RE_NO_BK_BRACES, to decide whether to treat invalid-syntax {...}s
    as literals.  3. Use the same wording for {...}-related
    diagnostics that the regex code uses.
    * tests/bre.tests, tests/ere.tests, tests/repetition-overflow:
    Adjust to match new behavior, and add a few tests.
    * cfg.mk (exclude_file_name_regexp--sc_error_message_uppercase):
    New macro, since the diagnostics start with uppercase letters.

diff --git a/NEWS b/NEWS
index 6dad608..b219b65 100644
--- a/NEWS
+++ b/NEWS
@@ -12,6 +12,9 @@ GNU grep NEWS                                    -*- outline 
-*-
   name too long", and it can run much faster when dealing with large
   directory hierarchies.
 
+  grep -E 'a{1000000000}' now reports an overflow error rather than
+  silently acting like grep -E 'a\{1000000000}'.
+
 ** New features
 
   The -R option now has a long-option alias --dereference-recursive.
diff --git a/cfg.mk b/cfg.mk
index 84115c2..329af43 100644
--- a/cfg.mk
+++ b/cfg.mk
@@ -88,3 +88,4 @@ exclude_file_name_regexp--sc_prohibit_xalloc_without_use = 
^src/kwset\.c$$
 exclude_file_name_regexp--sc_prohibit_tab_based_indentation = \
   (Makefile|\.(am|mk)$$|^gl/lib/.*\.c\.diff$$)
 exclude_file_name_regexp--sc_space_tab = ^gl/lib/.*\.c\.diff$$
+exclude_file_name_regexp--sc_error_message_uppercase = ^src/dfa\.c$$
diff --git a/src/dfa.c b/src/dfa.c
index 6eb4a11..613f548 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -861,6 +861,10 @@ static unsigned char const *buf_end;    /* reference to 
end in dfaexec().  */
 
 #endif /* MBS_SUPPORT */
 
+#ifndef MIN
+# define MIN(a,b) ((a) < (b) ? (a) : (b))
+#endif
+
 typedef int predicate (int);
 
 /* The following list maps the names of the Posix named character classes
@@ -1328,90 +1332,53 @@ lex (void)
           if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && laststart)
             goto normal_char;
 
-          if (syntax_bits & RE_NO_BK_BRACES)
-            {
-              /* Scan ahead for a valid interval; if it's not valid,
-                 treat it as a literal '{'.  */
-              int lo = -1, hi = -1;
-              char const *p = lexptr;
-              char const *lim = p + lexleft;
-              for (; p != lim && ISASCIIDIGIT (*p); p++)
-                {
-                  if (lo < 0)
-                    lo = *p - '0';
-                  else
-                    {
-                      lo = lo * 10 + *p - '0';
-                      if (RE_DUP_MAX < lo)
-                        goto normal_char;
-                    }
-                }
-              if (p != lim && *p == ',')
-                while (++p != lim && ISASCIIDIGIT (*p))
-                  {
-                    if (hi < 0)
-                      hi = *p - '0';
-                    else
-                      {
-                        hi = hi * 10 + *p - '0';
-                        if (RE_DUP_MAX < hi)
-                          goto normal_char;
-                      }
-                  }
-              else
-                hi = lo;
-              if (p == lim || *p != '}' || lo < 0 || (0 <= hi && hi < lo))
-                goto normal_char;
-            }
-
-          minrep = 0;
           /* Cases:
              {M} - exact count
              {M,} - minimum count, maximum is infinity
+             {,N} - 0 through N
+             {,} - 0 to infinity (same as '*')
              {M,N} - M through N */
-          FETCH (c, _("unfinished repeat count"));
-          if (ISASCIIDIGIT (c))
-            {
-              minrep = c - '0';
-              for (;;)
-                {
-                  FETCH (c, _("unfinished repeat count"));
-                  if (!ISASCIIDIGIT (c))
-                    break;
-                  minrep = 10 * minrep + c - '0';
-                }
-            }
-          else
-            dfaerror (_("malformed repeat count"));
-          if (c == ',')
-            {
-              FETCH (c, _("unfinished repeat count"));
-              if (!ISASCIIDIGIT (c))
-                maxrep = -1;
-              else
-                {
-                  maxrep = c - '0';
-                  for (;;)
-                    {
-                      FETCH (c, _("unfinished repeat count"));
-                      if (!ISASCIIDIGIT (c))
-                        break;
-                      maxrep = 10 * maxrep + c - '0';
-                    }
-                  if (0 <= maxrep && maxrep < minrep)
-                    dfaerror (_("malformed repeat count"));
-                }
-            }
-          else
-            maxrep = minrep;
-          if (!(syntax_bits & RE_NO_BK_BRACES))
-            {
-              if (c != '\\')
-                dfaerror (_("malformed repeat count"));
-              FETCH (c, _("unfinished repeat count"));
-            }
-          if (c != '}')
-            dfaerror (_("malformed repeat count"));
+          {
+            char const *p = lexptr;
+            char const *lim = p + lexleft;
+            minrep = maxrep = -1;
+            for (; p != lim && ISASCIIDIGIT (*p); p++)
+              {
+                if (minrep < 0)
+                  minrep = *p - '0';
+                else
+                  minrep = MIN (RE_DUP_MAX + 1, minrep * 10 + *p - '0');
+              }
+            if (p != lim)
+              {
+                if (*p != ',')
+                  maxrep = minrep;
+                else
+                  {
+                    if (minrep < 0)
+                      minrep = 0;
+                    while (++p != lim && ISASCIIDIGIT (*p))
+                      {
+                        if (maxrep < 0)
+                          maxrep = *p - '0';
+                        else
+                          maxrep = MIN (RE_DUP_MAX + 1, maxrep * 10 + *p - 
'0');
+                      }
+                  }
+              }
+            if (! ((! backslash || (p != lim && *p++ == '\\'))
+                   && p != lim && *p++ == '}'
+                   && 0 <= minrep && (maxrep < 0 || minrep <= maxrep)))
+              {
+                if (syntax_bits & RE_INVALID_INTERVAL_ORD)
+                  goto normal_char;
+                dfaerror (_("Invalid content of \\{\\}"));
+              }
+            if (RE_DUP_MAX < maxrep)
+              dfaerror (_("Regular expression too big"));
+            lexptr = p;
+            lexleft = lim - p;
+          }
           laststart = 0;
           return lasttok = REPMN;
 
diff --git a/tests/bre.tests b/tests/bre.tests
index 60ff1b5..9d01a3c 100644
--- a/tests/bre.tests
+++ b/tests/bre.tests
@@ -42,8 +42,9 @@
 address@hidden@EBRACE
 address@hidden@EBRACE
 address@hidden@BADBR
address@hidden,address@hidden
address@hidden,address@hidden
address@hidden,address@hidden,2\}
address@hidden,address@hidden,\}
address@hidden@BADBR
 address@hidden,address@hidden
 address@hidden,address@hidden
 address@hidden@BADBR
@@ -60,3 +61,4 @@
 address@hidden@address@hidden CORRECT
 address@hidden(b\)address@hidden
 address@hidden,address@hidden
address@hidden@ESIZE
diff --git a/tests/ere.tests b/tests/ere.tests
index 08b3dba..e0aad2a 100644
--- a/tests/ere.tests
+++ b/tests/ere.tests
@@ -76,6 +76,7 @@
 address@hidden@a{1a}
 address@hidden,address@hidden,2}
 address@hidden,address@hidden,}
address@hidden@BADBR
 address@hidden,address@hidden,,,}
 address@hidden,address@hidden@TO CORRECT
 address@hidden@address@hidden CORRECT
@@ -213,3 +214,4 @@
 address@hidden@abcdefghijklmnopqrstuv
 address@hidden|address@hidden
 address@hidden@ab
address@hidden@ESIZE
diff --git a/tests/repetition-overflow b/tests/repetition-overflow
index c92de23..66a44a6 100755
--- a/tests/repetition-overflow
+++ b/tests/repetition-overflow
@@ -11,9 +11,9 @@ fail=0
 # range of "unsigned int" would silently wrap around.   Hence, 2^32+1
 # would be treated just like "1", and both of these would mistakenly match.
 
-echo abc  | grep -E "b{$xp1}"   > out 2>&1; test $? = 1 || fail=1
+echo abc  | grep -E "b{$xp1}"   > out 2> /dev/null; test $? = 2 || fail=1
 compare out /dev/null || fail=1
-echo abbc | grep -E "b{1,$xp2}" > out 2>&1; test $? = 1 || fail=1
+echo abbc | grep -E "b{1,$xp2}" > out 2> /dev/null; test $? = 2 || fail=1
 compare out /dev/null || fail=1
 
 Exit $fail

-----------------------------------------------------------------------

Summary of changes:
 NEWS                      |    3 +
 cfg.mk                    |    1 +
 src/dfa.c                 |  127 +++++++++++++++++----------------------------
 tests/bre.tests           |    6 ++-
 tests/ere.tests           |    2 +
 tests/repetition-overflow |    4 +-
 6 files changed, 59 insertions(+), 84 deletions(-)


hooks/post-receive
-- 
grep
[Prev in Thread]
Current Thread
[Next in Thread]
grep branch, master, updated. v2.11-9-ga48ed5b, Paul Eggert <=
Prev by Date: grep branch, master, updated. v2.11-8-gc6e3ea6
Previous by thread: grep branch, master, updated. v2.11-8-gc6e3ea6
Index(es):
- Date
- Thread