[PATCH] RFC: rewrite handling of multibyte case folding

bug-grep
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[PATCH] RFC: rewrite handling of multibyte case folding

From:	Paolo Bonzini
Subject:	[PATCH] RFC: rewrite handling of multibyte case folding
Date:	Fri, 5 Mar 2010 17:39:16 +0100
This is a relatively big patch.  It passes the testsuite, it fixes bugs
(the handling of \W when doing multibyte matches), it removes the need
for two out of three Debian patches to parse_bracket_exp_mb (the one that
remains is the one changing [[:upper:]] and [[:lower:]] to [[:alpha:]])
and it removes more code than it adds, so I think it is the right way to
do it.  However, it needs more reviewing and testing because of its size.

The idea is to let dfacomp do the folding to lowercase of multibyte
input strings, and remove it from grep.c.  Input strings to kwset.c are
still folded outside kwset.c; this is left as a future cleanup since it
is not needed to fix bugs.

The less nice part of this is that callers of dfaexec have to do the
lowercasing themselves.  This is okay for grep, but it might be a bit
ugly for gawk.

While doing this I also rewrote the lexing of multibyte characters
in dfa.c.  The new code is simpler and allows to fit better the calls
to towlower.

Comments?

Paolo

* src/dfa.c (setbit_case_fold): Assert it is only called for SBCS.
(wctok): New.
(update_mb_len_index): Remove.
(FETCH): Do not call it.
(lex): Call fetch_wc in the main loop for MB_CUR_MAX > 1.  Rewrite
normal_char label.
(atom): Handle WCHAR instead of treating multibyte characters specially.
(dfacomp): Remove case_fold special casing.
* src/dfa.h (WCHAR): New.
* src/grep.c (mb_icase_keys): Remove.
(main): Do not call it.
* src/search.c (kwsinit): Init transition table only for MB_CUR_MAX == 1.
(mbtolower): New.
(kwsincr_case): New.
(kwsmusts): Call it instead of kwsincr).
(check_multibyte_string): Remove.
(check_multibyte_string_no_icase): Rename to check_multibyte_string.
(GEAcompile, EGexecute, Fcompile): Use mbtolower instead of the old
check_multibyte_string.
* tests/foad1.sh: Enable failing tests.
---
 src/dfa.c       |  180 +++++++++++++++++++++-------------------------------
 src/dfa.h       |    3 +
 src/grep.c      |   68 --------------------
 src/search.c    |  189 +++++++++++++++++++++++++++++++++----------------------
 tests/fedora.sh |    4 +
 tests/foad1.sh  |   10 +--
 6 files changed, 197 insertions(+), 257 deletions(-)

diff --git a/src/dfa.c b/src/dfa.c
index 60ec372..ce61e00 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -317,6 +317,7 @@ dfasyntax (reg_syntax_t bits, int fold, unsigned char eol)
 static void
 setbit_case_fold (unsigned b, charclass c)
 {
+  assert (MB_CUR_MAX == 1);
   setbit (b, c);
   if (case_fold)
     {
@@ -344,17 +345,11 @@ static int hard_LC_COLLATE;       /* Nonzero if 
LC_COLLATE is hard.  */
 #ifdef MBS_SUPPORT
 /* These variables are used only if (MB_CUR_MAX > 1).  */
 static mbstate_t mbs;          /* Mbstate for mbrlen().  */
-static int cur_mb_len;         /* Byte length of the current scanning
+static wchar_t wctok;          /* Wide character representation of the current
                                   multibyte character.  */
-static int cur_mb_index;        /* Byte index of the current scanning multibyte
-                                   character.
-
-                                  single byte character : cur_mb_index = 0
-                                  multibyte character
-                                      1st byte : cur_mb_index = 1
-                                      2nd byte : cur_mb_index = 2
-                                        ...
-                                      nth byte : cur_mb_index = n  */
+static size_t cur_mb_len;      /* Length of the multibyte representation of
+                                  wctok.  */
+static size_t cur_mb_index;    /* Passed to addtok to prepare mbprops.  */
 static unsigned char *mblen_buf;/* Correspond to the input buffer in dfaexec().
                                   Each element store the amount of remain
                                   byte of corresponding multibyte character
@@ -376,38 +371,6 @@ static unsigned char const *buf_end;       /* reference to 
end in dfaexec().  */
 #endif /* MBS_SUPPORT  */
 
 #ifdef MBS_SUPPORT
-/* This function update cur_mb_len, and cur_mb_index.
-   p points current lexptr, len is the remaining buffer length.  */
-static void
-update_mb_len_index (char const *p, int len)
-{
-  /* If last character is a part of a multibyte character,
-     we update cur_mb_index.  */
-  if (cur_mb_index)
-    cur_mb_index = (cur_mb_index >= cur_mb_len)? 0
-                       : cur_mb_index + 1;
-
-  /* If last character is a single byte character, or the
-     last portion of a multibyte character, we check whether
-     next character is a multibyte character or not.  */
-  if (! cur_mb_index)
-    {
-      cur_mb_len = mbrlen(p, len, &mbs);
-      if (cur_mb_len > 1)
-       /* It is a multibyte character.
-          cur_mb_len was already set by mbrlen().  */
-       cur_mb_index = 1;
-      else if (cur_mb_len < 1)
-       /* Invalid sequence.  We treat it as a single byte character.
-          cur_mb_index is aleady 0.  */
-       cur_mb_len = 1;
-      /* Otherwise, cur_mb_len == 1, it is a single byte character.
-        cur_mb_index is aleady 0.  */
-    }
-}
-#endif /* MBS_SUPPORT */
-
-#ifdef MBS_SUPPORT
 /* Note that characters become unsigned here. */
 # define FETCH(c, eoferr)                      \
   {                                            \
@@ -418,8 +381,6 @@ update_mb_len_index (char const *p, int len)
        else                                    \
          return lasttok = END;                 \
       }                                                \
-    if (MB_CUR_MAX > 1)                                \
-      update_mb_len_index(lexptr, lexleft);    \
     (c) = (unsigned char) *lexptr++;           \
     --lexleft;                                 \
   }
@@ -736,15 +697,20 @@ lex (void)
      "if (backslash) ...".  */
   for (i = 0; i < 2; ++i)
     {
-      FETCH(c, 0);
 #ifdef MBS_SUPPORT
-      if (MB_CUR_MAX > 1 && cur_mb_index)
-       /* If this is a part of a multi-byte character, we must treat
-          this byte data as a normal character.
-          e.g. In case of SJIS encoding, some character contains '\',
-               but they must not be backslash.  */
-       goto normal_char;
+      if (MB_CUR_MAX > 1)
+        {
+          wint_t wi = fetch_wc (NULL);
+          if (wi == WEOF)
+            return lasttok = EOF;
+          wctok = wi, c = wctob (wi);
+          if ((int)c == EOF)
+            goto normal_char;
+        }
+      else
 #endif /* MBS_SUPPORT  */
+        FETCH(c, NULL);
+
       switch (c)
        {
        case '\\':
@@ -1104,12 +1070,32 @@ lex (void)
        default:
        normal_char:
          laststart = 0;
-         if (case_fold && ISALPHA(c))
-           {
-             zeroset(ccl);
-             setbit_case_fold (c, ccl);
-             return lasttok = CSET + charclass_index(ccl);
-           }
+         if (case_fold)
+            {
+#ifdef MBS_SUPPORT
+              /* For multibyte character sets, folding is done before dfaexec
+                 here so we do not need to make a CSET.  */
+              if (MB_CUR_MAX > 1)
+                {
+                  wctok = towlower (wctok);
+                  c = wctob (wctok);
+                }
+              else
+#endif
+                {
+                  if (ISALPHA(c))
+                    {
+                      zeroset(ccl);
+                      setbit_case_fold (c, ccl);
+                      return lasttok = CSET + charclass_index(ccl);
+                    }
+                }
+            }
+
+#ifdef MBS_SUPPORT
+          if ((int)c == EOF)
+            return lasttok = WCHAR;
+#endif
          return lasttok = c;
        }
     }
@@ -1219,6 +1205,31 @@ addtok (token t)
 static void
 atom (void)
 {
+#ifdef MBS_SUPPORT
+  /* We treat a multibyte character as a single atom, so that DFA
+     can treat a multibyte character as a single expression.
+
+     e.g. We construct following tree from "<mb1><mb2>".
+     <mb1(1st-byte)><mb1(2nd-byte)><CAT><mb1(3rd-byte)><CAT>
+     <mb2(1st-byte)><mb2(2nd-byte)><CAT><mb2(3rd-byte)><CAT><CAT>
+     */
+  if (tok == WCHAR)
+    {
+      unsigned char buf[16];
+      mbstate_t s;
+      memset (&s, 0, sizeof(s));
+      cur_mb_len = wcrtomb ((char *) buf, wctok, &s);
+      addtok(buf[0]);
+      for (cur_mb_index = 1; cur_mb_index < cur_mb_len; cur_mb_index++)
+        {
+          addtok(buf[cur_mb_index]);
+          addtok(CAT);
+        }
+      tok = lex();
+      return;
+    }
+#endif /* MBS_SUPPORT  */
+
   if ((tok >= 0 && tok < NOTCHAR) || tok >= CSET || tok == BACKREF
       || tok == BEGLINE || tok == ENDLINE || tok == BEGWORD
 #ifdef MBS_SUPPORT
@@ -1228,24 +1239,6 @@ atom (void)
     {
       addtok(tok);
       tok = lex();
-#ifdef MBS_SUPPORT
-      /* We treat a multibyte character as a single atom, so that DFA
-        can treat a multibyte character as a single expression.
-
-         e.g. We construct following tree from "<mb1><mb2>".
-              <mb1(1st-byte)><mb1(2nd-byte)><CAT><mb1(3rd-byte)><CAT>
-              <mb2(1st-byte)><mb2(2nd-byte)><CAT><mb2(3rd-byte)><CAT><CAT>
-      */
-      if (MB_CUR_MAX > 1)
-       {
-         while (cur_mb_index > 1 && tok >= 0 && tok < NOTCHAR)
-           {
-             addtok(tok);
-             addtok(CAT);
-             tok = lex();
-           }
-       }
-#endif /* MBS_SUPPORT  */
     }
   else if (tok == CRANGE)
     {
@@ -2964,39 +2957,10 @@ dfainit (struct dfa *d)
 void
 dfacomp (char const *s, size_t len, struct dfa *d, int searchflag)
 {
-  if (case_fold)       /* dummy folding in service of dfamust() */
-    {
-      char *lcopy;
-      int i;
-
-      lcopy = malloc(len);
-      if (!lcopy)
-       dfaerror(_("memory exhausted"));
-
-      /* This is a kludge. */
-      case_fold = 0;
-      for (i = 0; i < len; ++i)
-       if (ISUPPER ((unsigned char) s[i]))
-         lcopy[i] = tolower ((unsigned char) s[i]);
-       else
-         lcopy[i] = s[i];
-
-      dfainit(d);
-      dfaparse(lcopy, len, d);
-      free(lcopy);
-      dfamust(d);
-      d->cindex = d->tindex = d->depth = d->nleaves = d->nregexps = 0;
-      case_fold = 1;
-      dfaparse(s, len, d);
-      dfaanalyze(d, searchflag);
-    }
-  else
-    {
-        dfainit(d);
-        dfaparse(s, len, d);
-       dfamust(d);
-        dfaanalyze(d, searchflag);
-    }
+  dfainit(d);
+  dfaparse(s, len, d);
+  dfamust(d);
+  dfaanalyze(d, searchflag);
 }
 
 /* Free the storage held by the components of a dfa. */
diff --git a/src/dfa.h b/src/dfa.h
index dbac197..a9bf08c 100644
--- a/src/dfa.h
+++ b/src/dfa.h
@@ -152,6 +152,9 @@ typedef enum
 
   MBCSET,                      /* MBCSET is similar to CSET, but for
                                   multibyte characters.  */
+
+  WCHAR,                       /* Only returned by lex.  wctok contains
+                                  the wide character representation.  */
 #endif /* MBS_SUPPORT */
 
   CSET                         /* CSET and (and any value greater) is a
diff --git a/src/grep.c b/src/grep.c
index a7f0f7c..290ca86 100644
--- a/src/grep.c
+++ b/src/grep.c
@@ -1781,69 +1781,6 @@ parse_grep_colors (void)
                "at remaining substring \"%s\"."), p, q);
 }
 
-/* mb_icase_keys() is called by main() to convert its "keys" string with
-   strlen() "len" to lowercase if match_icase is true.  Pointers are used
-   to implement in-out call-by-reference parameters.  */
-#ifdef MBS_SUPPORT
-static void
-mb_icase_keys (char **keys, size_t *len)
-{
-  wchar_t wc;
-  mbstate_t sti, stj;          /* i for input/old, j for output/new.  */
-  size_t i, j, li, lj;         /* l for total string length (minus '\0').  */
-  char *ki, *kj;               /* k for keys.  */
-  int mcm;
-
-  if ((mcm = MB_CUR_MAX) == 1)
-    return;
-
-  li = *len;
-  ki = *keys;
-  /* We use a new buffer because some multi-octet characters change
-     length through a lower-case conversion.  For example:
-       len(U+0049)=1 --> len(U+0131)=2   under tr_TR.UTF-8
-       len(U+0130)=2 --> len(U+0069)=1   under en_US.UTF-8
-       len(U+2126)=3 --> len(U+03C9)=2   under en_US.UTF-8
-       len(U+212A)=3 --> len(U+006B)=1   under en_US.UTF-8
-       len(U+212B)=3 --> len(U+00E5)=2   under en_US.UTF-8  */
-  lj = li + mcm;
-  kj = xmalloc(lj + 1);
-
-  memset(&sti, 0, sizeof(mbstate_t));
-  memset(&stj, 0, sizeof(mbstate_t));
-  for (i = j = 0; i < li ;)
-    {
-      size_t mbclen;
-      mbclen = mbrtowc(&wc, ki + i, li - i, &sti);
-      if (lj < j + mcm)
-       {
-         lj += mcm;
-         kj = xrealloc(kj, lj + 1);
-       }
-      if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0)
-       {
-         /* An invalid sequence, or a truncated multi-octet character.
-            We treat it as a single-octet character.  */
-         kj[j++] = ki[i++];
-       }
-      else
-       {
-         /* Doing towupper() before towlower() helps a few hairy cases and is
-            not too costly since this is the PATTERN and is done only once.  */
-         wc = towupper((wint_t)wc);
-         wc = towlower((wint_t)wc);
-         j += wcrtomb(kj + j, wc, &stj);
-         i += mbclen;
-       }
-    }
-  kj[j] = '\0';
-
-  free(ki);
-  *keys = kj;
-  *len = j;
-}
-#endif /* MBS_SUPPORT */
-
 int
 main (int argc, char **argv)
 {
@@ -2261,11 +2198,6 @@ There is NO WARRANTY, to the extent permitted by 
law.\n"),
 
   set_limits();
 
-#ifdef MBS_SUPPORT
-  if (match_icase)
-    mb_icase_keys (&keys, &keycc);
-#endif /* MBS_SUPPORT */
-
   compile(keys, keycc);
 
   if ((argc - optind > 1 && !no_filenames) || with_filenames)
diff --git a/src/search.c b/src/search.c
index c4e5149..4074765 100644
--- a/src/search.c
+++ b/src/search.c
@@ -59,14 +59,82 @@ kwsinit (void)
   static char trans[NCHAR];
   int i;
 
-  if (match_icase)
-    for (i = 0; i < NCHAR; ++i)
-      trans[i] = TOLOWER (i);
+  if (match_icase && MB_CUR_MAX == 1)
+    {
+      for (i = 0; i < NCHAR; ++i)
+        trans[i] = TOLOWER (i);
+
+      kwset = kwsalloc (trans);
+    }
+  else
+    kwset = kwsalloc (NULL);
 
-  if (!(kwset = kwsalloc (match_icase ? trans : (char *) 0)))
+  if (!kwset)
     xalloc_die ();
 }
 
+#ifdef MBS_SUPPORT
+/* Convert the string from BEG to N to lowercase.  Overwrite N
+   with the length of the new string, and return a pointer to
+   the lowercase string.  Successive calls to mbtolower will
+   rewrite the output buffer.  */
+static char *
+mbtolower (const char *beg, size_t *n)
+{
+  static char *out;
+  static size_t outlen;
+  mbstate_t is, os;
+  const char *end;
+  char *p;
+  size_t free;
+
+  if (*n > outlen)
+    {
+      /* Get some additional room since we're resizing.  */
+      outlen = *n * 2 + MB_CUR_MAX + 1;
+      out = xrealloc (out, outlen);
+    }
+
+  memset (&is, 0, sizeof (is));
+  memset (&os, 0, sizeof (os));
+  end = beg + *n;
+  p = out;
+  free = outlen - MB_CUR_MAX;
+  while (beg < end)
+    {
+      wchar_t wc;
+      size_t mbclen = mbrtowc(&wc, beg, end - beg, &is);
+      if (free < 0)
+        {
+          free += outlen;
+          outlen *= 2;
+          out = xrealloc (out, outlen);
+        }
+
+      if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0)
+        {
+          /* An invalid sequence, or a truncated multi-octet character.
+             We treat it as a single-octet character.  */
+          *p++ = *beg++;
+          memset (&is, 0, sizeof (is));
+          memset (&os, 0, sizeof (os));
+        }
+      else
+        {
+          beg += mbclen;
+          mbclen = wcrtomb (p, towlower ((wint_t) wc), &os);
+          p += mbclen;
+          free -= mbclen;
+        }
+    }
+
+  *n = p - out;
+  *p++ = 0;
+  return out;
+}
+#endif
+
+
 #ifndef FGREP_PROGRAM
 /* DFA compiled regexp. */
 static struct dfa dfa;
@@ -94,6 +162,22 @@ dfaerror (char const *mesg)
    call the regexp matcher at all. */
 static int kwset_exact_matches;
 
+static char const *
+kwsincr_case (const char *must)
+{
+  const char *buf;
+  size_t n;
+
+  n = strlen (must);
+#ifdef MBS_SUPPORT
+  if (match_icase && MB_CUR_MAX > 1)
+    buf = mbtolower (must, &n);
+  else
+#endif
+    buf = must;
+  return kwsincr (kwset, buf, n);
+}
+
 /* If the DFA turns out to have some set of fixed strings one of
    which must occur in the match, then we build a kwset matcher
    to find those strings, and thus quickly filter out impossible
@@ -102,7 +186,7 @@ static void
 kwsmusts (void)
 {
   struct dfamust const *dm;
-  char const *err;
+  char const *err, *buf;
 
   if (dfa.musts)
     {
@@ -115,7 +199,7 @@ kwsmusts (void)
          if (!dm->exact)
            continue;
          ++kwset_exact_matches;
-         if ((err = kwsincr (kwset, dm->must, strlen (dm->must))) != NULL)
+         if ((err = kwsincr_case (dm->must)) != NULL)
            error (EXIT_TROUBLE, 0, "%s", err);
        }
       /* Now, we compile the substrings that will require
@@ -124,7 +208,7 @@ kwsmusts (void)
        {
          if (dm->exact)
            continue;
-         if ((err = kwsincr (kwset, dm->must, strlen (dm->must))) != NULL)
+         if ((err = kwsincr_case (dm->must)) != NULL)
            error (EXIT_TROUBLE, 0, "%s", err);
        }
       if ((err = kwsprep (kwset)) != NULL)
@@ -134,48 +218,9 @@ kwsmusts (void)
 #endif /* !FGREP_PROGRAM */
 
 #ifdef MBS_SUPPORT
-/* This function allocate the array which correspond to "buf".
-   Then this check multibyte string and mark on the positions which
-   are not single byte character nor the first byte of a multibyte
-   character.  Caller must free the array.  */
-static char*
-check_multibyte_string(char *buf, size_t size)
-{
-  char *mb_properties = xcalloc(size, 1);
-  mbstate_t cur_state;
-  wchar_t wc;
-  int i;
-
-  memset(&cur_state, 0, sizeof(mbstate_t));
-
-  for (i = 0; i < size ;)
-    {
-      size_t mbclen;
-      mbclen = mbrtowc(&wc, buf + i, size - i, &cur_state);
-
-      if (mbclen == (size_t) -1 || mbclen == (size_t) -2 || mbclen == 0)
-       {
-         /* An invalid sequence, or a truncated multibyte character.
-            We treat it as a single byte character.  */
-         mbclen = 1;
-       }
-      else if (match_icase)
-       {
-         if (iswupper((wint_t)wc))
-           {
-             wc = towlower((wint_t)wc);
-             ignore_value (wcrtomb(buf + i, wc, &cur_state));
-           }
-       }
-      mb_properties[i] = mbclen;
-      i += mbclen;
-    }
-
-  return mb_properties;
-}
 
 static char*
-check_multibyte_string_no_icase(const char *buf, size_t size)
+check_multibyte_string(const char *buf, size_t size)
 {
   char *mb_properties = xcalloc(size, 1);
   mbstate_t cur_state;
@@ -219,10 +264,8 @@ GEAcompile (char const *pattern, size_t size, reg_syntax_t 
syntax_bits)
   size_t total = size;
   char const *motif = pattern;
 
-#if 0
   if (match_icase)
     syntax_bits |= RE_ICASE;
-#endif
   re_set_syntax (syntax_bits);
   dfasyntax (syntax_bits, match_icase, eolbyte);
 
@@ -333,17 +376,14 @@ EXECUTE_FCT(EGexecute)
     {
       if (match_icase)
         {
-          char *case_buf = xmalloc(size);
-          memcpy(case_buf, buf, size);
+          char *case_buf = mbtolower (buf, &size);
          if (start_ptr)
            start_ptr = case_buf + (start_ptr - buf);
-         if (kwset)
-           mb_properties = check_multibyte_string(case_buf, size);
           buf = case_buf;
         }
-      else
-       if (kwset)
-         mb_properties = check_multibyte_string_no_icase(buf, size);
+
+      if (kwset)
+        mb_properties = check_multibyte_string(buf, size);
     }
 #endif /* MBS_SUPPORT */
 
@@ -509,11 +549,7 @@ EXECUTE_FCT(EGexecute)
  out:
 #ifdef MBS_SUPPORT
   if (MB_CUR_MAX > 1)
-    {
-      if (match_icase)
-        free ((char *) buf);
-      free (mb_properties);
-    }
+    free (mb_properties);
 #endif /* MBS_SUPPORT */
   return ret_val;
 }
@@ -522,16 +558,23 @@ EXECUTE_FCT(EGexecute)
 #if defined(GREP_PROGRAM) || defined(FGREP_PROGRAM)
 COMPILE_FCT(Fcompile)
 {
-  char const *beg, *end, *lim, *err;
+  char const *beg, *end, *lim, *err, *pat;
+  size_t psize;
 
   kwsinit ();
-  beg = pattern;
+  psize = size;
+  if (match_icase && MB_CUR_MAX > 1)
+    pat = mbtolower (pattern, &psize);
+  else
+    pat = pattern;
+
+  beg = pat;
   do
     {
       for (lim = beg;; ++lim)
        {
          end = lim;
-         if (lim >= pattern + size)
+         if (lim >= pat + psize)
            break;
         if (*lim == '\n')
           {
@@ -539,18 +582,19 @@ COMPILE_FCT(Fcompile)
             break;
           }
 #if HAVE_DOS_FILE_CONTENTS
-        if (*lim == '\r' && lim + 1 < pattern + size && lim[1] == '\n')
+        if (*lim == '\r' && lim + 1 < pat + psize && lim[1] == '\n')
           {
             lim += 2;
             break;
           }
 #endif
        }
+
       if ((err = kwsincr (kwset, beg, end - beg)) != NULL)
        error (EXIT_TROUBLE, 0, "%s", err);
       beg = lim;
     }
-  while (beg < pattern + size);
+  while (beg < pat + psize);
 
   if ((err = kwsprep (kwset)) != NULL)
     error (EXIT_TROUBLE, 0, "%s", err);
@@ -569,14 +613,13 @@ EXECUTE_FCT(Fexecute)
     {
       if (match_icase)
         {
-          char *case_buf = xmemdup (buf, size);
+          char *case_buf = mbtolower (buf, &size);
          if (start_ptr)
            start_ptr = case_buf + (start_ptr - buf);
-         mb_properties = check_multibyte_string(case_buf, size);
           buf = case_buf;
         }
-      else
-       mb_properties = check_multibyte_string_no_icase(buf, size);
+
+      mb_properties = check_multibyte_string(buf, size);
     }
 #endif /* MBS_SUPPORT */
 
@@ -641,11 +684,7 @@ EXECUTE_FCT(Fexecute)
  out:
 #ifdef MBS_SUPPORT
   if (MB_CUR_MAX > 1)
-    {
-      if (match_icase)
-        free ((char *) buf);
-      free (mb_properties);
-    }
+    free (mb_properties);
 #endif /* MBS_SUPPORT */
   return ret_val;
 }
diff --git a/tests/fedora.sh b/tests/fedora.sh
index a179271..37a2d5b 100644
--- a/tests/fedora.sh
+++ b/tests/fedora.sh
@@ -71,6 +71,10 @@ echo -n "grep -w broken in non-utf8 multibyte locales: "
 echo za a > 179698.out
 LANG=ja_JP.eucjp ${GREP} -w a 179698.out | diff - 179698.out && ok || fail
 
+U=https://bugzilla.redhat.com/show_bug.cgi?id=123363
+echo -n "bad handling of case-insensitive match in brackets: "
+echo Y | LANG=en_US.UTF-8 ${GREP} -i '[y]' >/dev/null && ok || fail
+
 # Skip the rest of tests in compiled without PCRE
 echo a |grep -P a >/dev/null || exit $failures
 
diff --git a/tests/foad1.sh b/tests/foad1.sh
index 7c16d00..68acc77 100755
--- a/tests/foad1.sh
+++ b/tests/foad1.sh
@@ -42,9 +42,8 @@ grep_test ()
 
 # "-o" with "-i" should output an exact copy of the matching input text.
 grep_test "WordA/wordB/WORDC/" "Word/word/WORD/" "word" -o -i
-# Comment out cases that are known to fail. These should be uncommented after 
the 2.5.4 release. TAA.
-#grep_test "WordA/wordB/WORDC/" "Word/word/WORD/" "Word" -o -i
-#grep_test "WordA/wordB/WORDC/" "Word/word/WORD/" "WORD" -o -i
+grep_test "WordA/wordB/WORDC/" "Word/word/WORD/" "Word" -o -i
+grep_test "WordA/wordB/WORDC/" "Word/word/WORD/" "WORD" -o -i
 
 # Should display the line number (-n), octet offset (-b), or file name
 # (-H) of every match, not just of the first match on each input line.
@@ -82,9 +81,8 @@ CE="[m[K"
 
 # "--color" with "-i" should output an exact copy of the matching input text.
 grep_test "WordA/wordb/WORDC/" 
"${CB}Word${CE}A/${CB}word${CE}b/${CB}WORD${CE}C/" "word" --color=always -i
-# Comment out cases that are known to fail. These should be uncommented after 
the 2.5.4 release. TAA.
-#grep_test "WordA/wordb/WORDC/" 
"${CB}Word${CE}A/${CB}word${CE}b/${CB}WORD${CE}C/" "Word" --color=always -i
-#grep_test "WordA/wordb/WORDC/" 
"${CB}Word${CE}A/${CB}word${CE}b/${CB}WORD${CE}C/" "WORD" --color=always -i
+grep_test "WordA/wordb/WORDC/" 
"${CB}Word${CE}A/${CB}word${CE}b/${CB}WORD${CE}C/" "Word" --color=always -i
+grep_test "WordA/wordb/WORDC/" 
"${CB}Word${CE}A/${CB}word${CE}b/${CB}WORD${CE}C/" "WORD" --color=always -i
 
 # End of a previous match should not match a "start of ..." expression.
 grep_test "word_word/" "${CB}word_${CE}word/" "^word_*" --color=always
-- 
1.6.6
[Prev in Thread]
Current Thread
[Next in Thread]
[PATCH] RFC: rewrite handling of multibyte case folding, Paolo Bonzini <=
Prev by Date: Re: Debian patches
Next by Date: [PATCH] grep(1) man page: touchup --label option
Previous by thread: [PATCH 0/6] *** SUBJECT HERE ***
Next by thread: [PATCH] grep(1) man page: touchup --label option
Index(es):
- Date
- Thread