[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
bug#17221: [PATCH] grep: port better to hosts with nonstandard nl_langin
From: |
Paul Eggert |
Subject: |
bug#17221: [PATCH] grep: port better to hosts with nonstandard nl_langinfo |
Date: |
Tue, 8 Apr 2014 08:04:36 -0700 |
On some hosts, nl_langinfo returns strings other than "UTF-8" when
UTF-8 is used, and (worse) return "UTF-8" even if the encoding is
single-byte. Work around these problems by trying a sample
character instead.
* src/dfa.c, src/pcresearch.c, src/searchutils.c:
Don't include <langinfo.h>.
* src/dfa.c (using_utf8): Test for UTF-8 by trying a character
rather than by invoking nl_langinfo (CODESET); this is more
portable in practice, and removes a dependency on
HAVE_LANGINFO_CODESET.
* src/pcresearch.c: Include dfa.h, for using_utf8.
(Pcompile): Use using_utf8 rather than nl_langinfo.
---
src/dfa.c | 14 +++++---------
src/pcresearch.c | 8 ++------
src/searchutils.c | 3 ---
3 files changed, 7 insertions(+), 18 deletions(-)
diff --git a/src/dfa.c b/src/dfa.c
index 76f7e79..34f230e 100644
--- a/src/dfa.c
+++ b/src/dfa.c
@@ -60,10 +60,6 @@ typedef bool bool_bf;
#include <wchar.h>
#include <wctype.h>
-#if HAVE_LANGINFO_CODESET
-# include <langinfo.h>
-#endif
-
#include "xalloc.h"
/* HPUX defines these as macros in sys/param.h. */
@@ -819,14 +815,14 @@ setbit_case_fold_c (int b, charclass c)
int
using_utf8 (void)
{
-#ifdef HAVE_LANGINFO_CODESET
static int utf8 = -1;
if (utf8 < 0)
- utf8 = STREQ (nl_langinfo (CODESET), "UTF-8");
+ {
+ wchar_t wc;
+ mbstate_t mbs = { 0 };
+ utf8 = mbrtowc (&wc, "\xc4\x80", 2, &mbs) == 2 && wc == 0x100;
+ }
return utf8;
-#else
- return 0;
-#endif
}
/* Return true if the current locale is known to be a unibyte locale
diff --git a/src/pcresearch.c b/src/pcresearch.c
index 319155f..a5e953f 100644
--- a/src/pcresearch.c
+++ b/src/pcresearch.c
@@ -20,14 +20,12 @@
#include <config.h>
#include "search.h"
+#include "dfa.h"
#if HAVE_PCRE_H
# include <pcre.h>
#elif HAVE_PCRE_PCRE_H
# include <pcre/pcre.h>
#endif
-#if HAVE_LANGINFO_CODESET
-# include <langinfo.h>
-#endif
#if HAVE_LIBPCRE
/* Compiled internal form of a Perl regular expression. */
@@ -60,14 +58,12 @@ Pcompile (char const *pattern, size_t size)
char const *p;
char const *pnul;
-# if defined HAVE_LANGINFO_CODESET
- if (STREQ (nl_langinfo (CODESET), "UTF-8"))
+ if (using_utf8 ())
{
/* Enable PCRE's UTF-8 matching. Note also the use of
PCRE_NO_UTF8_CHECK when calling pcre_extra, below. */
flags |= PCRE_UTF8;
}
-# endif
/* FIXME: Remove these restrictions. */
if (memchr (pattern, '\n', size))
diff --git a/src/searchutils.c b/src/searchutils.c
index 6749945..6440f07 100644
--- a/src/searchutils.c
+++ b/src/searchutils.c
@@ -20,9 +20,6 @@
#include <assert.h>
#include "search.h"
#include "dfa.h"
-#if HAVE_LANGINFO_CODESET
-# include <langinfo.h>
-#endif
#define NCHAR (UCHAR_MAX + 1)
--
1.9.0
- bug#17221: [PATCH] grep: port better to hosts with nonstandard nl_langinfo,
Paul Eggert <=