bug-gnulib
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: new module 'mbrlen'


From: Bruno Haible
Subject: Re: new module 'mbrlen'
Date: Tue, 23 Dec 2008 00:33:33 +0100
User-agent: KMail/1.9.9

>       New module 'mbrlen'.

mbrlen() has essentially the same set of platform dependent bugs as mbrtowc().
This patch provides a workaround:


2008-12-22  Bruno Haible  <address@hidden>

        Work around mbrlen() bugs on AIX, HP-UX, OSF/1, Solaris.
        * m4/mbrlen.m4 (gl_FUNC_MBRLEN): Set REPLACE_MBRLEN if mbrtowc is
        being overridden.
        (gl_MBRLEN_INCOMPLETE_STATE, gl_MBRLEN_RETVAL, gl_MBRLEN_NUL_RETVAL):
        New macros.
        * lib/wchar.in.h (mbrlen): Override if REPLACE_MBRLEN is set.
        * m4/wchar.m4 (gl_WCHAR_H_DEFAULTS): Initialize REPLACE_MBRLEN.
        * modules/wchar (Makefile.am): Substitute REPLACE_MBRLEN.
        * doc/posix-functions/mbrlen.texi: Mention the various platform bugs.

--- doc/posix-functions/mbrlen.texi.orig        2008-12-23 00:27:48.000000000 
+0100
+++ doc/posix-functions/mbrlen.texi     2008-12-22 23:53:43.000000000 +0100
@@ -11,6 +11,18 @@
 @item
 This function is missing on some platforms:
 HP-UX 11.00, IRIX 6.5, Solaris 2.6, mingw, Interix 3.5.
address@hidden
+This function does not put the state into non-initial state when parsing an
+incomplete multibyte character on some platforms:
+AIX 5.1, OSF/1 5.1.
address@hidden
+This function returns the total number of bytes that make up the multibyte
+character, not the number of bytes that were needed to complete the multibyte
+character, on some platforms:
+HP-UX 11.11, Solaris 10.
address@hidden
+This function may not return 0 when parsing the NUL character on some 
platforms:
+Solaris 9.
 @end itemize
 
 Portability problems not fixed by Gnulib:
--- lib/wchar.in.h.orig 2008-12-23 00:27:48.000000000 +0100
+++ lib/wchar.in.h      2008-12-22 13:34:03.000000000 +0100
@@ -157,7 +157,11 @@
 
 /* Recognize a multibyte character.  */
 #if @GNULIB_MBRLEN@
-# if address@hidden@
+# if @REPLACE_MBRLEN@
+#  undef mbrlen
+#  define mbrlen rpl_mbrlen
+# endif
+# if address@hidden@ || @REPLACE_MBRLEN@
 extern size_t mbrlen (const char *s, size_t n, mbstate_t *ps);
 # endif
 #elif defined GNULIB_POSIXCHECK
--- m4/mbrlen.m4.orig   2008-12-23 00:27:48.000000000 +0100
+++ m4/mbrlen.m4        2008-12-23 00:25:05.000000000 +0100
@@ -1,4 +1,4 @@
-# mbrlen.m4 serial 1
+# mbrlen.m4 serial 2
 dnl Copyright (C) 2008 Free Software Foundation, Inc.
 dnl This file is free software; the Free Software Foundation
 dnl gives unlimited permission to copy and/or distribute it,
@@ -9,15 +9,188 @@
   AC_REQUIRE([gl_WCHAR_H_DEFAULTS])
 
   AC_REQUIRE([AC_TYPE_MBSTATE_T])
+  AC_REQUIRE([gl_FUNC_MBRTOWC])
   AC_CHECK_FUNCS_ONCE([mbrlen])
   if test $ac_cv_func_mbrlen = no; then
     HAVE_MBRLEN=0
+  else
+    dnl Most bugs affecting the system's mbrtowc function also affect the
+    dnl mbrlen function. So override mbrlen whenever mbrtowc is overridden.
+    dnl We could also run the individual tests below; the results would be
+    dnl the same.
+    if test $REPLACE_MBRTOWC = 1; then
+      REPLACE_MBRLEN=1
+    fi
+  fi
+  if test $HAVE_MBRLEN = 0 || test $REPLACE_MBRLEN = 1; then
     gl_REPLACE_WCHAR_H
     AC_LIBOBJ([mbrlen])
     gl_PREREQ_MBRLEN
   fi
 ])
 
+dnl Test whether mbrlen puts the state into non-initial state when parsing an
+dnl incomplete multibyte character.
+dnl Result is gl_cv_func_mbrlen_incomplete_state.
+
+AC_DEFUN([gl_MBRLEN_INCOMPLETE_STATE],
+[
+  AC_REQUIRE([AC_PROG_CC])
+  AC_REQUIRE([gt_LOCALE_JA])
+  AC_REQUIRE([AC_CANONICAL_HOST]) dnl for cross-compiles
+  AC_CACHE_CHECK([whether mbrlen handles incomplete characters],
+    [gl_cv_func_mbrlen_incomplete_state],
+    [
+      dnl Initial guess, used when cross-compiling or when no suitable locale
+      dnl is present.
+changequote(,)dnl
+      case "$host_os" in
+              # Guess no on AIX and OSF/1.
+        osf*) gl_cv_func_mbrlen_incomplete_state="guessing no" ;;
+              # Guess yes otherwise.
+        *)    gl_cv_func_mbrlen_incomplete_state="guessing yes" ;;
+      esac
+changequote([,])dnl
+      if test $LOCALE_JA != none; then
+        AC_TRY_RUN([
+#include <locale.h>
+#include <string.h>
+#include <wchar.h>
+int main ()
+{
+  if (setlocale (LC_ALL, "$LOCALE_JA") != NULL)
+    {
+      const char input[] = "B\217\253\344\217\251\316er"; /* "Büßer" */
+      mbstate_t state;
+
+      memset (&state, '\0', sizeof (mbstate_t));
+      if (mbrlen (input + 1, 1, &state) == (size_t)(-2))
+        if (mbsinit (&state))
+          return 1;
+    }
+  return 0;
+}],
+          [gl_cv_func_mbrlen_incomplete_state=yes],
+          [gl_cv_func_mbrlen_incomplete_state=no],
+          [])
+      fi
+    ])
+])
+
+dnl Test whether mbrlen, when parsing the end of a multibyte character,
+dnl correctly returns the number of bytes that were needed to complete the
+dnl character (not the total number of bytes of the multibyte character).
+dnl Result is gl_cv_func_mbrlen_retval.
+
+AC_DEFUN([gl_MBRLEN_RETVAL],
+[
+  AC_REQUIRE([AC_PROG_CC])
+  AC_REQUIRE([gt_LOCALE_FR_UTF8])
+  AC_REQUIRE([gt_LOCALE_JA])
+  AC_REQUIRE([AC_CANONICAL_HOST]) dnl for cross-compiles
+  AC_CACHE_CHECK([whether mbrlen has a correct return value],
+    [gl_cv_func_mbrlen_retval],
+    [
+      dnl Initial guess, used when cross-compiling or when no suitable locale
+      dnl is present.
+changequote(,)dnl
+      case "$host_os" in
+                          # Guess no on HP-UX and Solaris.
+        hpux* | solaris*) gl_cv_func_mbrlen_retval="guessing no" ;;
+                          # Guess yes otherwise.
+        *)                gl_cv_func_mbrlen_retval="guessing yes" ;;
+      esac
+changequote([,])dnl
+      if test $LOCALE_FR_UTF8 != none || test $LOCALE_JA != none; then
+        AC_TRY_RUN([
+#include <locale.h>
+#include <string.h>
+#include <wchar.h>
+int main ()
+{
+  /* This fails on Solaris.  */
+  if (setlocale (LC_ALL, "$LOCALE_FR_UTF8") != NULL)
+    {
+      char input[] = "B\303\274\303\237er"; /* "Büßer" */
+      mbstate_t state;
+
+      memset (&state, '\0', sizeof (mbstate_t));
+      if (mbrlen (input + 1, 1, &state) == (size_t)(-2))
+        {
+          input[1] = '\0';
+          if (mbrlen (input + 2, 5, &state) != 1)
+            return 1;
+        }
+    }
+  /* This fails on HP-UX 11.11.  */
+  if (setlocale (LC_ALL, "$LOCALE_JA") != NULL)
+    {
+      char input[] = "B\217\253\344\217\251\316er"; /* "Büßer" */
+      mbstate_t state;
+
+      memset (&state, '\0', sizeof (mbstate_t));
+      if (mbrlen (input + 1, 1, &state) == (size_t)(-2))
+        {
+          input[1] = '\0';
+          if (mbrlen (input + 2, 5, &state) != 2)
+            return 1;
+        }
+    }
+  return 0;
+}],
+          [gl_cv_func_mbrlen_retval=yes],
+          [gl_cv_func_mbrlen_retval=no],
+          [])
+      fi
+    ])
+])
+
+dnl Test whether mbrlen, when parsing a NUL character, correctly returns 0.
+dnl Result is gl_cv_func_mbrlen_nul_retval.
+
+AC_DEFUN([gl_MBRLEN_NUL_RETVAL],
+[
+  AC_REQUIRE([AC_PROG_CC])
+  AC_REQUIRE([gt_LOCALE_ZH_CN])
+  AC_REQUIRE([AC_CANONICAL_HOST]) dnl for cross-compiles
+  AC_CACHE_CHECK([whether mbrlen returns 0 when parsing a NUL character],
+    [gl_cv_func_mbrlen_nul_retval],
+    [
+      dnl Initial guess, used when cross-compiling or when no suitable locale
+      dnl is present.
+changequote(,)dnl
+      case "$host_os" in
+                    # Guess no on Solaris 9.
+        solaris2.9) gl_cv_func_mbrlen_nul_retval="guessing no" ;;
+                    # Guess yes otherwise.
+        *)          gl_cv_func_mbrlen_nul_retval="guessing yes" ;;
+      esac
+changequote([,])dnl
+      if test $LOCALE_ZH_CN != none; then
+        AC_TRY_RUN([
+#include <locale.h>
+#include <string.h>
+#include <wchar.h>
+int main ()
+{
+  /* This crashes on Solaris 9 inside __mbrtowc_dense_gb18030.  */
+  if (setlocale (LC_ALL, "$LOCALE_ZH_CN") != NULL)
+    {
+      mbstate_t state;
+
+      memset (&state, '\0', sizeof (mbstate_t));
+      if (mbrlen ("", 1, &state) != 0)
+        return 1;
+    }
+  return 0;
+}],
+          [gl_cv_func_mbrlen_nul_retval=yes],
+          [gl_cv_func_mbrlen_nul_retval=no],
+          [])
+      fi
+    ])
+])
+
 # Prerequisites of lib/mbrlen.c.
 AC_DEFUN([gl_PREREQ_MBRLEN], [
   :
--- m4/wchar.m4.orig    2008-12-23 00:27:48.000000000 +0100
+++ m4/wchar.m4 2008-12-22 13:34:42.000000000 +0100
@@ -7,7 +7,7 @@
 
 dnl Written by Eric Blake.
 
-# wchar.m4 serial 21
+# wchar.m4 serial 22
 
 AC_DEFUN([gl_WCHAR_H],
 [
@@ -89,6 +89,7 @@
   REPLACE_WCTOB=0;     AC_SUBST([REPLACE_WCTOB])
   REPLACE_MBSINIT=0;   AC_SUBST([REPLACE_MBSINIT])
   REPLACE_MBRTOWC=0;   AC_SUBST([REPLACE_MBRTOWC])
+  REPLACE_MBRLEN=0;    AC_SUBST([REPLACE_MBRLEN])
   REPLACE_MBSRTOWCS=0; AC_SUBST([REPLACE_MBSRTOWCS])
   REPLACE_MBSNRTOWCS=0;AC_SUBST([REPLACE_MBSNRTOWCS])
   REPLACE_WCRTOMB=0;   AC_SUBST([REPLACE_WCRTOMB])
--- modules/wchar.orig  2008-12-23 00:27:48.000000000 +0100
+++ modules/wchar       2008-12-22 13:34:56.000000000 +0100
@@ -53,6 +53,7 @@
              -e 's|@''REPLACE_WCTOB''@|$(REPLACE_WCTOB)|g' \
              -e 's|@''REPLACE_MBSINIT''@|$(REPLACE_MBSINIT)|g' \
              -e 's|@''REPLACE_MBRTOWC''@|$(REPLACE_MBRTOWC)|g' \
+             -e 's|@''REPLACE_MBRLEN''@|$(REPLACE_MBRLEN)|g' \
              -e 's|@''REPLACE_MBSRTOWCS''@|$(REPLACE_MBSRTOWCS)|g' \
              -e 's|@''REPLACE_MBSNRTOWCS''@|$(REPLACE_MBSNRTOWCS)|g' \
              -e 's|@''REPLACE_WCRTOMB''@|$(REPLACE_WCRTOMB)|g' \




reply via email to

[Prev in Thread] Current Thread [Next in Thread]