bug-grep
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

bug#17221: [PATCH] grep: port better to hosts with nonstandard nl_langin


From: Paul Eggert
Subject: bug#17221: [PATCH] grep: port better to hosts with nonstandard nl_langinfo
Date: Tue, 8 Apr 2014 08:04:36 -0700

On some hosts, nl_langinfo returns strings other than "UTF-8" when
UTF-8 is used, and (worse) return "UTF-8" even if the encoding is
single-byte.  Work around these problems by trying a sample
character instead.
* src/dfa.c, src/pcresearch.c, src/searchutils.c:
Don't include <langinfo.h>.
* src/dfa.c (using_utf8): Test for UTF-8 by trying a character
rather than by invoking nl_langinfo (CODESET); this is more
portable in practice, and removes a dependency on
HAVE_LANGINFO_CODESET.
* src/pcresearch.c: Include dfa.h, for using_utf8.
(Pcompile): Use using_utf8 rather than nl_langinfo.
---
 src/dfa.c         | 14 +++++---------
 src/pcresearch.c  |  8 ++------
 src/searchutils.c |  3 ---
 3 files changed, 7 insertions(+), 18 deletions(-)

diff --git a/src/dfa.c b/src/dfa.c
index 76f7e79..34f230e 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -60,10 +60,6 @@ typedef bool bool_bf;
 #include <wchar.h>
 #include <wctype.h>
 
-#if HAVE_LANGINFO_CODESET
-# include <langinfo.h>
-#endif
-
 #include "xalloc.h"
 
 /* HPUX defines these as macros in sys/param.h.  */
@@ -819,14 +815,14 @@ setbit_case_fold_c (int b, charclass c)
 int
 using_utf8 (void)
 {
-#ifdef HAVE_LANGINFO_CODESET
   static int utf8 = -1;
   if (utf8 < 0)
-    utf8 = STREQ (nl_langinfo (CODESET), "UTF-8");
+    {
+      wchar_t wc;
+      mbstate_t mbs = { 0 };
+      utf8 = mbrtowc (&wc, "\xc4\x80", 2, &mbs) == 2 && wc == 0x100;
+    }
   return utf8;
-#else
-  return 0;
-#endif
 }
 
 /* Return true if the current locale is known to be a unibyte locale
diff --git a/src/pcresearch.c b/src/pcresearch.c
index 319155f..a5e953f 100644
--- a/src/pcresearch.c
+++ b/src/pcresearch.c
@@ -20,14 +20,12 @@
 
 #include <config.h>
 #include "search.h"
+#include "dfa.h"
 #if HAVE_PCRE_H
 # include <pcre.h>
 #elif HAVE_PCRE_PCRE_H
 # include <pcre/pcre.h>
 #endif
-#if HAVE_LANGINFO_CODESET
-# include <langinfo.h>
-#endif
 
 #if HAVE_LIBPCRE
 /* Compiled internal form of a Perl regular expression.  */
@@ -60,14 +58,12 @@ Pcompile (char const *pattern, size_t size)
   char const *p;
   char const *pnul;
 
-# if defined HAVE_LANGINFO_CODESET
-  if (STREQ (nl_langinfo (CODESET), "UTF-8"))
+  if (using_utf8 ())
     {
       /* Enable PCRE's UTF-8 matching.  Note also the use of
          PCRE_NO_UTF8_CHECK when calling pcre_extra, below.   */
       flags |= PCRE_UTF8;
     }
-# endif
 
   /* FIXME: Remove these restrictions.  */
   if (memchr (pattern, '\n', size))
diff --git a/src/searchutils.c b/src/searchutils.c
index 6749945..6440f07 100644
--- a/src/searchutils.c
+++ b/src/searchutils.c
@@ -20,9 +20,6 @@
 #include <assert.h>
 #include "search.h"
 #include "dfa.h"
-#if HAVE_LANGINFO_CODESET
-# include <langinfo.h>
-#endif
 
 #define NCHAR (UCHAR_MAX + 1)
 
-- 
1.9.0






reply via email to

[Prev in Thread] Current Thread [Next in Thread]