bug-grep
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: sync dfaexec from gawk: fixes real bugs in grep


From: Jim Meyering
Subject: Re: sync dfaexec from gawk: fixes real bugs in grep
Date: Thu, 11 Mar 2010 17:16:27 +0100

> Here are the one-line summaries:
>
>   dfa: manually merge gawk's dfaexec
>   dfa: make search.c use the new dfaexec API
>   tests: add test cases for dfaexec bug
>
> I've just noticed that part of the 2nd change (dfa.h prototype tweak)
> snuck into the first one, so the comment in 2nd isn't right.
> I'll move param-name addition to the 2nd, where it belongs.

The body of the commit log for 2/3 was outdated.
Here's the same series, but with the above change and a corrected log comment.

>From 4b5d6197e98450eb01efb301aabea907a96cb36f Mon Sep 17 00:00:00 2001
From: Jim Meyering <address@hidden>
Date: Tue, 9 Mar 2010 15:31:22 +0100
Subject: [PATCH 1/3] dfa: manually merge gawk's dfaexec

* src/dfa.c (dfaexec): Adjust API: return pointer, not offset, and
take an "end" pointer parameter, rather than integral "size".
Adjust comment accordingly.
(build_state): Maintain d->newlines.
(copytoks): Update multibyte_prop indices.
(SKIP_REMAINS_MB_IF_INITIAL_STATE): Update a cast.
Return NULL, rather than (size_t) -1.
(realloc_trans_if_necessary): Realloc d->newlines.
* src/dfa.h (struct dfa): New member, "newlines".
(struct dfa) [GAWK]: New member, "broken".
(dfaexec): Update prototype and copy the new comment from dfa.c.
---
 src/dfa.c |  150 ++++++++++++++++++++++++++++++++++++++++---------------------
 src/dfa.h |   23 ++++++++--
 2 files changed, 117 insertions(+), 56 deletions(-)

diff --git a/src/dfa.c b/src/dfa.c
index 780b782..76c6a0e 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -1295,7 +1295,14 @@ copytoks (int tindex, int ntokens)
   int i;

   for (i = 0; i < ntokens; ++i)
-    addtok(dfa->tokens[tindex + i]);
+    {
+      addtok(dfa->tokens[tindex + i]);
+#ifdef MBS_SUPPORT
+      /* Update index into multibyte csets.  */
+      if (MB_CUR_MAX > 1 && dfa->tokens[tindex + i] == MBCSET)
+       dfa->multibyte_prop[dfa->tindex - 1] = dfa->multibyte_prop[tindex + i];
+#endif
+    }
 }

 static void
@@ -2291,6 +2298,7 @@ build_state (int s, struct dfa *d)
        d->trans = d->realtrans + 1;
        REALLOC(d->fails, int *, d->tralloc);
        REALLOC(d->success, int, d->tralloc);
+       REALLOC(d->newlines, int, d->tralloc);
        while (oldalloc < d->tralloc)
          {
            d->trans[oldalloc] = NULL;
@@ -2298,7 +2306,9 @@ build_state (int s, struct dfa *d)
          }
       }

-  /* Newline is a sentinel.  */
+  /* Keep the newline transition in a special place so we can use it as
+     a sentinel. */
+  d->newlines[s] = trans[eolbyte];
   trans[eolbyte] = -1;

   if (ACCEPTING(s, *d))
@@ -2316,6 +2326,7 @@ build_state_zero (struct dfa *d)
   d->trans = d->realtrans + 1;
   CALLOC(d->fails, int *, d->tralloc);
   MALLOC(d->success, int, d->tralloc);
+  MALLOC(d->newlines, int, d->tralloc);
   build_state(0, d);
 }

@@ -2336,11 +2347,11 @@ build_state_zero (struct dfa *d)
             && mblen_buf[p - buf_begin] > 0            \
            && p < buf_end)                             \
         ++p;                                           \
-      if (p >= end)                                    \
+      if ((char const *) p >= end)                     \
        {                                               \
           free(mblen_buf);                             \
           free(inputwcs);                              \
-         return (size_t) -1;                           \
+         return NULL;                                  \
        }                                               \
     }

@@ -2359,6 +2370,7 @@ realloc_trans_if_necessary(struct dfa *d, int new_state)
       d->trans = d->realtrans + 1;
       REALLOC(d->fails, int *, d->tralloc);
       REALLOC(d->success, int, d->tralloc);
+      REALLOC(d->newlines, int, d->tralloc);
       while (oldalloc < d->tralloc)
        {
          d->trans[oldalloc] = NULL;
@@ -2746,21 +2758,26 @@ transit_state (struct dfa *d, int s, unsigned char 
const **pp)

 /* Search through a buffer looking for a match to the given struct dfa.
    Find the first occurrence of a string matching the regexp in the buffer,
-   and the shortest possible version thereof.  Return the offset of the first
-   character after the match, or (size_t) -1 if none is found.  BEGIN points to
-   the beginning of the buffer, and SIZE is the size of the buffer.  If SIZE
-   is nonzero, BEGIN[SIZE - 1] must be a newline.  BACKREF points to a place
+   and the shortest possible version thereof.  Return a pointer to the first
+   character after the match, or NULL if none is found.  Begin points to
+   the beginning of the buffer, and end points to the first character after
+   its end.  We store a newline in *end to act as a sentinel, so end had
+   better point somewhere valid.  Newline is a flag indicating whether to
+   allow newlines to be in the matching string.  If count is non-
+   NULL it points to a place we're supposed to increment every time we
+   see a newline.  Finally, if backref is non-NULL it points to a place
    where we're supposed to store a 1 if backreferencing happened and the
    match needs to be verified by a backtracking matcher.  Otherwise
    we store a 0 in *backref. */
-size_t
-dfaexec (struct dfa *d, char const *begin, size_t size, int *backref)
+char *
+dfaexec (struct dfa *d, char const *begin, char *end,
+        int newline, int *count, int *backref)
 {
-  int s;       /* Current state. */
+  int s, s1, tmp;      /* Current state. */
   unsigned char const *p; /* Current input character. */
-  unsigned char const *end; /* One past the last input character.  */
-  int **trans, *t;     /* Local copy of d->trans. */
-  unsigned char eol = eolbyte; /* Hoping it goes into a register.  */
+  int **trans, *t;     /* Copy of d->trans so it can be optimized
+                                  into a register. */
+  unsigned char eol = eolbyte; /* Likewise for eolbyte.  */
   static int sbit[NOTCHAR];    /* Table for anding with d->success. */
   static int sbit_init;

@@ -2777,31 +2794,31 @@ dfaexec (struct dfa *d, char const *begin, size_t size, 
int *backref)
   if (! d->tralloc)
     build_state_zero(d);

-  s = 0;
+  s = s1 = 0;
   p = (unsigned char const *) begin;
-  end = p + size;
   trans = d->trans;
+  *end = eol;

 #ifdef MBS_SUPPORT
   if (MB_CUR_MAX > 1)
     {
       int remain_bytes, i;
       buf_begin = (unsigned char const *) begin;
-      buf_end = end;
+      buf_end = (unsigned char const *) end;

       /* initialize mblen_buf, and inputwcs.  */
-      MALLOC(mblen_buf, unsigned char, end - (unsigned char const *)begin + 2);
-      MALLOC(inputwcs, wchar_t, end - (unsigned char const *)begin + 2);
+      MALLOC(mblen_buf, unsigned char, end - begin + 2);
+      MALLOC(inputwcs, wchar_t, end - begin + 2);
       memset(&mbs, 0, sizeof(mbstate_t));
       remain_bytes = 0;
-      for (i = 0; i < end - (unsigned char const *)begin; i++)
+      for (i = 0; i < end - begin + 1; i++)
        {
          if (remain_bytes == 0)
            {
              remain_bytes
-               = mbrtowc(inputwcs + i, begin + i,
-                         end - (unsigned char const *)begin - i, &mbs);
-             if (remain_bytes <= 1)
+               = mbrtowc(inputwcs + i, begin + i, end - begin - i + 1, &mbs);
+             if (remain_bytes < 1
+               || (remain_bytes == 1 && inputwcs[i] == (wchar_t)begin[i]))
                {
                  remain_bytes = 0;
                  inputwcs[i] = (wchar_t)begin[i];
@@ -2831,6 +2848,9 @@ dfaexec (struct dfa *d, char const *begin, size_t size, 
int *backref)
       if (MB_CUR_MAX > 1)
        while ((t = trans[s]))
          {
+           if ((char *) p > end)
+             break;
+           s1 = s;
            SKIP_REMAINS_MB_IF_INITIAL_STATE(s, p);

            if (d->states[s].mbps.nelem == 0)
@@ -2846,25 +2866,16 @@ dfaexec (struct dfa *d, char const *begin, size_t size, 
int *backref)
          }
       else
 #endif /* MBS_SUPPORT */
-        while ((t = trans[s]))
-         s = t[*p++];
-
-      if (s < 0)
-       {
-         if (p == end)
-           {
-#ifdef MBS_SUPPORT
-             if (MB_CUR_MAX > 1)
-               {
-                 free(mblen_buf);
-                 free(inputwcs);
-               }
-#endif /* MBS_SUPPORT */
-             return (size_t) -1;
-           }
-         s = 0;
+      while ((t = trans[s]) != 0) { /* hand-optimized loop */
+       s1 = t[*p++];
+       if ((t = trans[s1]) == 0) {
+         tmp = s ; s = s1 ; s1 = tmp ; /* swap */
+         break;
        }
-      else if ((t = d->fails[s]))
+       s = t[*p++];
+      }
+
+      if (s >= 0 && p <= (unsigned char *) end && d->fails[s])
        {
          if (d->success[s] & sbit[*p])
            {
@@ -2877,19 +2888,13 @@ dfaexec (struct dfa *d, char const *begin, size_t size, 
int *backref)
                  free(inputwcs);
                }
 #endif /* MBS_SUPPORT */
-             return (char const *) p - begin;
+             return (char *) p;
            }

+         s1 = s;
 #ifdef MBS_SUPPORT
          if (MB_CUR_MAX > 1)
            {
-              SKIP_REMAINS_MB_IF_INITIAL_STATE(s, p);
-              if (d->states[s].mbps.nelem == 0)
-                {
-                  s = t[*p++];
-                  continue;
-                }
-
               /* Can match with a multibyte character (and multicharacter
                  collating element).  Transition table might be updated.  */
               s = transit_state(d, s, &p);
@@ -2897,13 +2902,41 @@ dfaexec (struct dfa *d, char const *begin, size_t size, 
int *backref)
             }
          else
 #endif /* MBS_SUPPORT */
-         s = t[*p++];
+         s = d->fails[s][*p++];
+         continue;
        }
-      else
+
+      /* If the previous character was a newline, count it. */
+      if (count && (char *) p <= end && p[-1] == eol)
+       ++*count;
+
+      /* Check if we've run off the end of the buffer. */
+      if ((char *) p > end)
+       {
+#ifdef MBS_SUPPORT
+         if (MB_CUR_MAX > 1)
+           {
+             free(mblen_buf);
+             free(inputwcs);
+           }
+#endif /* MBS_SUPPORT */
+         return NULL;
+       }
+
+      if (s >= 0)
        {
          build_state(s, d);
          trans = d->trans;
+         continue;
        }
+
+      if (p[-1] == eol && newline)
+       {
+         s = d->newlines[s1];
+         continue;
+       }
+
+      s = 0;
     }
 }

@@ -2934,6 +2967,13 @@ dfainit (struct dfa *d)
   d->tralloc = 0;

   d->musts = 0;
+  d->realtrans = 0;
+  d->fails = 0;
+  d->newlines = 0;
+  d->success = 0;
+#ifdef GAWK
+  d->broken = 0;
+#endif
 }

 /* Parse and analyze a single string of the given length. */
@@ -3016,8 +3056,13 @@ dfafree (struct dfa *d)
     }
 #endif /* MBS_SUPPORT */

-  for (i = 0; i < d->sindex; ++i)
+  for (i = 0; i < d->sindex; ++i) {
     free((ptr_t) d->states[i].elems.elems);
+#ifdef MBS_SUPPORT
+    if (d->states[i].mbps.nelem > 0)
+      free((ptr_t) d->states[i].mbps.elems);
+#endif /* MBS_SUPPORT */
+  }
   free((ptr_t) d->states);
   for (i = 0; i < d->tindex; ++i)
     if (d->follows[i].elems)
@@ -3030,6 +3075,7 @@ dfafree (struct dfa *d)
       free((ptr_t) d->fails[i]);
   if (d->realtrans) free((ptr_t) d->realtrans);
   if (d->fails) free((ptr_t) d->fails);
+  if (d->newlines) free((ptr_t) d->newlines);
   if (d->success) free((ptr_t) d->success);
   for (dm = d->musts; dm; dm = ndm)
     {
diff --git a/src/dfa.h b/src/dfa.h
index ce8b951..df558b6 100644
--- a/src/dfa.h
+++ b/src/dfa.h
@@ -355,9 +355,19 @@ struct dfa
                                   on a state that potentially could do so. */
   int *success;                        /* Table of acceptance conditions used 
in
                                   dfaexec and computed in build_state. */
+  int *newlines;               /* Transitions on newlines.  The entry for a
+                                  newline in any transition table is always
+                                  -1 so we can count lines without wasting
+                                  too many cycles.  The transition for a
+                                  newline is stored separately and handled
+                                  as a special case. */
   struct dfamust *musts;       /* List of strings, at least one of which
                                   is known to appear in any r.e. matching
                                   the dfa. */
+#ifdef GAWK
+  int broken;                  /* True if using a feature where there
+                                  are bugs and gawk should use regex. */
+#endif
 };

 /* Some macros for user access to dfa internals. */
@@ -389,13 +399,18 @@ extern void dfasyntax (reg_syntax_t, int, unsigned char);
 extern void dfacomp (char const *, size_t, struct dfa *, int);

 /* Execute the given struct dfa on the buffer of characters.  The
-   last byte of the buffer must equal the end-of-line byte.
-   The final argument points to a flag that will
+   first char * points to the beginning, and the second points to the
+   first character after the end of the buffer, which must be a writable
+   place so a sentinel end-of-buffer marker can be stored there.  The
+   second-to-last argument is a flag telling whether to allow newlines to
+   be part of a string matching the regexp.  The next-to-last argument,
+   if non-NULL, points to a place to increment every time we see a
+   newline.  The final argument, if non-NULL, points to a flag that will
    be set if further examination by a backtracking matcher is needed in
    order to verify backreferencing; otherwise the flag will be cleared.
-   Returns (size_t) -1 if no match is found, or the offset of the first
+   Returns NULL if no match is found, or a pointer to the first
    character after the first & shortest matching string in the buffer. */
-extern size_t dfaexec (struct dfa *, char const *, size_t, int *);
+extern char *dfaexec (struct dfa *, char const *, char *, int , int *, int *);

 /* Free the storage held by the components of a struct dfa. */
 extern void dfafree (struct dfa *);
--
1.7.0.2.393.gfb6b


>From 4f406ca9d86326220b699b6b93b81e47a8483f23 Mon Sep 17 00:00:00 2001
From: Jim Meyering <address@hidden>
Date: Sun, 7 Mar 2010 10:38:29 +0100
Subject: [PATCH 2/3] dfa: make search.c use the new dfaexec API

* src/search.c: Adjust to new dfaexec API.
Now, dfaexec returns a pointer, not an integer,
and the third parameter is END, not buffer size.
Rewrite the function's comment.
* src/dfa.h (dfaexec): Update comment; include parameter names.
---
 src/dfa.c    |   43 ++++++++++++++++++++++++-------------------
 src/dfa.h    |   27 ++++++++++++++-------------
 src/search.c |   12 +++++++-----
 3 files changed, 45 insertions(+), 37 deletions(-)

diff --git a/src/dfa.c b/src/dfa.c
index 76c6a0e..d4663e3 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -2345,12 +2345,13 @@ build_state_zero (struct dfa *d)
     {                                                  \
       while (inputwcs[p - buf_begin] == 0              \
             && mblen_buf[p - buf_begin] > 0            \
-           && p < buf_end)                             \
+            && (unsigned char const *) p < buf_end)    \
         ++p;                                           \
-      if ((char const *) p >= end)                     \
+      if ((char *) p >= end)                           \
        {                                               \
           free(mblen_buf);                             \
           free(inputwcs);                              \
+         *end = saved_end;                             \
          return NULL;                                  \
        }                                               \
     }
@@ -2413,8 +2414,10 @@ transit_state_singlebyte (struct dfa *d, int s, unsigned 
char const *p,
       else if (works < 0)
        {
          if (p == buf_end)
-           /* At the moment, it must not happen.  */
-           return TRANSIT_STATE_END_BUFFER;
+           {
+             /* At the moment, it must not happen.  */
+             abort ();
+           }
          works = 0;
        }
       else if (d->fails[works])
@@ -2757,18 +2760,17 @@ transit_state (struct dfa *d, int s, unsigned char 
const **pp)
 #endif /* MBS_SUPPORT */

 /* Search through a buffer looking for a match to the given struct dfa.
-   Find the first occurrence of a string matching the regexp in the buffer,
-   and the shortest possible version thereof.  Return a pointer to the first
-   character after the match, or NULL if none is found.  Begin points to
-   the beginning of the buffer, and end points to the first character after
-   its end.  We store a newline in *end to act as a sentinel, so end had
-   better point somewhere valid.  Newline is a flag indicating whether to
-   allow newlines to be in the matching string.  If count is non-
-   NULL it points to a place we're supposed to increment every time we
-   see a newline.  Finally, if backref is non-NULL it points to a place
-   where we're supposed to store a 1 if backreferencing happened and the
-   match needs to be verified by a backtracking matcher.  Otherwise
-   we store a 0 in *backref. */
+   Find the first occurrence of a string matching the regexp in the
+   buffer, and the shortest possible version thereof.  Return a pointer to
+   the first character after the match, or NULL if none is found.  BEGIN
+   points to the beginning of the buffer, and END points to the first byte
+   after its end.  Note however that we store a sentinel byte (usually
+   newline) in *END, so the actual buffer must be one byte longer.
+   When NEWLINE is nonzero, newlines may appear in the matching string.
+   If COUNT is non-NULL, increment *COUNT once for each newline processed.
+   Finally, if BACKREF is non-NULL set *BACKREF to indicate whether we
+   encountered a back-reference (1) or not (0).  The caller may use this
+   to decide whether to fall back on a backtracking matcher. */
 char *
 dfaexec (struct dfa *d, char const *begin, char *end,
         int newline, int *count, int *backref)
@@ -2797,14 +2799,15 @@ dfaexec (struct dfa *d, char const *begin, char *end,
   s = s1 = 0;
   p = (unsigned char const *) begin;
   trans = d->trans;
+  unsigned char saved_end = *(unsigned char *) end;
   *end = eol;

 #ifdef MBS_SUPPORT
   if (MB_CUR_MAX > 1)
     {
       int remain_bytes, i;
-      buf_begin = (unsigned char const *) begin;
-      buf_end = (unsigned char const *) end;
+      buf_begin = (unsigned char *) begin;
+      buf_end = (unsigned char *) end;

       /* initialize mblen_buf, and inputwcs.  */
       MALLOC(mblen_buf, unsigned char, end - begin + 2);
@@ -2875,7 +2878,7 @@ dfaexec (struct dfa *d, char const *begin, char *end,
        s = t[*p++];
       }

-      if (s >= 0 && p <= (unsigned char *) end && d->fails[s])
+      if (s >= 0 && (char *) p <= end && d->fails[s])
        {
          if (d->success[s] & sbit[*p])
            {
@@ -2888,6 +2891,7 @@ dfaexec (struct dfa *d, char const *begin, char *end,
                  free(inputwcs);
                }
 #endif /* MBS_SUPPORT */
+             *end = saved_end;
              return (char *) p;
            }

@@ -2920,6 +2924,7 @@ dfaexec (struct dfa *d, char const *begin, char *end,
              free(inputwcs);
            }
 #endif /* MBS_SUPPORT */
+         *end = saved_end;
          return NULL;
        }

diff --git a/src/dfa.h b/src/dfa.h
index df558b6..f13a257 100644
--- a/src/dfa.h
+++ b/src/dfa.h
@@ -398,19 +398,20 @@ extern void dfasyntax (reg_syntax_t, int, unsigned char);
    exact matcher. */
 extern void dfacomp (char const *, size_t, struct dfa *, int);

-/* Execute the given struct dfa on the buffer of characters.  The
-   first char * points to the beginning, and the second points to the
-   first character after the end of the buffer, which must be a writable
-   place so a sentinel end-of-buffer marker can be stored there.  The
-   second-to-last argument is a flag telling whether to allow newlines to
-   be part of a string matching the regexp.  The next-to-last argument,
-   if non-NULL, points to a place to increment every time we see a
-   newline.  The final argument, if non-NULL, points to a flag that will
-   be set if further examination by a backtracking matcher is needed in
-   order to verify backreferencing; otherwise the flag will be cleared.
-   Returns NULL if no match is found, or a pointer to the first
-   character after the first & shortest matching string in the buffer. */
-extern char *dfaexec (struct dfa *, char const *, char *, int , int *, int *);
+/* Search through a buffer looking for a match to the given struct dfa.
+   Find the first occurrence of a string matching the regexp in the
+   buffer, and the shortest possible version thereof.  Return a pointer to
+   the first character after the match, or NULL if none is found.  BEGIN
+   points to the beginning of the buffer, and END points to the first byte
+   after its end.  Note however that we store a sentinel byte (usually
+   newline) in *END, so the actual buffer must be one byte longer.
+   When NEWLINE is nonzero, newlines may appear in the matching string.
+   If COUNT is non-NULL, increment *COUNT once for each newline processed.
+   Finally, if BACKREF is non-NULL set *BACKREF to indicate whether we
+   encountered a back-reference (1) or not (0).  The caller may use this
+   to decide whether to fall back on a backtracking matcher. */
+extern char *dfaexec (struct dfa *d, char const *begin, char *end,
+                     int newline, int *count, int *backref);

 /* Free the storage held by the components of a struct dfa. */
 extern void dfafree (struct dfa *);
diff --git a/src/search.c b/src/search.c
index bd44696..e565746 100644
--- a/src/search.c
+++ b/src/search.c
@@ -333,7 +333,8 @@ EXECUTE_FCT(EGexecute)
     {
       if (match_icase)
         {
-          char *case_buf = xmalloc(size);
+          /* Add one for the sentinel byte dfaexec may add.  */
+          char *case_buf = xmalloc(size + 1);
           memcpy(case_buf, buf, size);
          if (start_ptr)
            start_ptr = case_buf + (start_ptr - buf);
@@ -375,17 +376,18 @@ EXECUTE_FCT(EGexecute)
                --beg;
              if (kwsm.index < kwset_exact_matches)
                goto success;
-             if (dfaexec (&dfa, beg, end - beg, &backref) == (size_t) -1)
+             if (dfaexec (&dfa, beg, (char *) end, 0, NULL, &backref) == NULL)
                continue;
            }
          else
            {
              /* No good fixed strings; start with DFA. */
-             size_t offset = dfaexec (&dfa, beg, buflim - beg, &backref);
-             if (offset == (size_t) -1)
+             char const *next_beg = dfaexec (&dfa, beg, (char *) buflim,
+                                             0, NULL, &backref);
+             if (next_beg == NULL)
                break;
              /* Narrow down to the line we've found. */
-             beg += offset;
+             beg = next_beg;
              if ((end = memchr(beg, eol, buflim - beg)) != NULL)
                end++;
               else
--
1.7.0.2.393.gfb6b


>From 704fa4539a4e17a6930fc51e5b47fc0b4efbc859 Mon Sep 17 00:00:00 2001
From: Jim Meyering <address@hidden>
Date: Sun, 7 Mar 2010 20:07:30 +0100
Subject: [PATCH 3/3] tests: add test cases for dfaexec bug

* tests/dfaexec-multibyte: New test.
* tests/Makefile.am (TESTS): Add it.
Reported by Paolo Bonzini in http://bugzilla.redhat.com/544407
and http://bugzilla.redhat.com/544406 .
---
 tests/Makefile.am       |    1 +
 tests/dfaexec-multibyte |   30 ++++++++++++++++++++++++++++++
 2 files changed, 31 insertions(+), 0 deletions(-)
 create mode 100644 tests/dfaexec-multibyte

diff --git a/tests/Makefile.am b/tests/Makefile.am
index 1f54ec7..06460bc 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -19,6 +19,7 @@ TESTS =                                               \
   bre.sh                                       \
   case-fold-char-class                         \
   case-fold-char-type                          \
+  dfaexec-multibyte                            \
   empty.sh                                     \
   ere.sh                                       \
   file.sh                                      \
diff --git a/tests/dfaexec-multibyte b/tests/dfaexec-multibyte
new file mode 100644
index 0000000..47db42b
--- /dev/null
+++ b/tests/dfaexec-multibyte
@@ -0,0 +1,30 @@
+#!/bin/sh
+# This would fail for grep-2.5.3
+: ${srcdir=.}
+. "$srcdir/init.sh"; path_prepend_ ../src
+
+printf 'aa\n' > exp-aa || framework_failure
+printf 'ab\n' > exp-ab || framework_failure
+printf '1 2 3\n' > exp-123 || framework_failure
+
+fail=0
+
+for LOC in en_US.UTF-8 zh_CN $LOCALE_FR_UTF8; do
+  LC_ALL=$LOC grep -E '([a]|[b]){2}' < exp-aa > out || fail=1
+  compare out exp-aa || fail=1
+
+  LC_ALL=$LOC grep -E '([b]|[a]){2}' < exp-aa > out || fail=1
+  compare out exp-aa || fail=1
+
+  LC_ALL=$LOC grep -E '([b]|[a]){2}' < exp-ab > out || fail=1
+  compare out exp-ab || fail=1
+
+  LC_ALL=$LOC grep -E '([a]|[b]){2}' < exp-ab > out || fail=1
+  compare out exp-ab || fail=1
+
+  LC_ALL=$LOC grep -E '^([[:digit:]]+[[:space:]]+){2}' < exp-123 > out || 
fail=1
+  compare out exp-123 || fail=1
+
+done
+
+Exit $fail
--
1.7.0.2.393.gfb6b




reply via email to

[Prev in Thread] Current Thread [Next in Thread]