bug-gnulib
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

striconveha: add support for transliteration


From: Bruno Haible
Subject: striconveha: add support for transliteration
Date: Wed, 24 Jan 2007 01:55:43 +0100 (MET)
User-agent: KMail/1.5.4

This adds one more option to the string iconv modules: support for
transliteration, as implemented in glibc and GNU libiconv.

2007-01-23  Bruno Haible  <address@hidden>

        * lib/striconveha.h: Include <stdbool.h>.
        (mem_iconveha, str_iconveha): Add 'transliterate' argument.
        * lib/striconveha.c: Include allocsa.h, strdup.h, c-strcase.h.
        (mem_iconveha_notranslit): Renamed from mem_iconveha.
        (mem_iconveha): New function.
        (str_iconveha_notranslit): Renamed from str_iconveha.
        (str_iconveha): New function.
        * modules/striconveha (Depends-on): Add stdbool, allocsa, strdup,
        c-strcase.

*** lib/striconveha.h   23 Jan 2007 01:17:42 -0000      1.3
--- lib/striconveha.h   24 Jan 2007 00:49:48 -0000
***************
*** 19,24 ****
--- 19,26 ----
  #ifndef _STRICONVEHA_H
  #define _STRICONVEHA_H
  
+ #include <stdbool.h>
+ 
  #include "striconveh.h"
  
  
***************
*** 30,35 ****
--- 32,40 ----
  /* Convert an entire string from one encoding to another, using iconv.
     The original string is at [SRC,...,SRC+SRCLEN-1].
     The "from" encoding can also be a name defined for autodetection.
+    If TRANSLITERATE is true, transliteration will attempted to avoid 
conversion
+    errors, for iconv implementations that support this.  Usually you'll choose
+    TRANSLITERATE = true if HANDLER != iconveh_error.
     If OFFSETS is not NULL, it should point to an array of SRCLEN integers; 
this
     array is filled with offsets into the result, i.e. the character starting
     at SRC[i] corresponds to the character starting at (*RESULTP)[OFFSETS[i]],
***************
*** 44,49 ****
--- 49,55 ----
  extern int
         mem_iconveha (const char *src, size_t srclen,
                     const char *from_codeset, const char *to_codeset,
+                    bool transliterate,
                     enum iconv_ilseq_handler handler,
                     size_t *offsets,
                     char **resultp, size_t *lengthp);
***************
*** 53,64 ****
--- 59,74 ----
     Both the "from" and the "to" encoding must use a single NUL byte at the
     end of the string (i.e. not UCS-2, UCS-4, UTF-16, UTF-32).
     The "from" encoding can also be a name defined for autodetection.
+    If TRANSLITERATE is true, transliteration will attempted to avoid 
conversion
+    errors, for iconv implementations that support this.  Usually you'll choose
+    TRANSLITERATE = true if HANDLER != iconveh_error.
     Allocate a malloced memory block for the result.
     Return value: the freshly allocated resulting NUL-terminated string if
     successful, otherwise NULL and errno set.  */
  extern char *
         str_iconveha (const char *src,
                     const char *from_codeset, const char *to_codeset,
+                    bool transliterate,
                     enum iconv_ilseq_handler handler);
  
  
*** lib/striconveha.c   24 Jan 2007 00:48:01 -0000      1.3
--- lib/striconveha.c   24 Jan 2007 00:49:48 -0000
***************
*** 25,30 ****
--- 25,34 ----
  #include <stdlib.h>
  #include <string.h>
  
+ #include "allocsa.h"
+ #include "strdup.h"
+ #include "c-strcase.h"
+ 
  #define SIZEOF(a) (sizeof(a)/sizeof(a[0]))
  
  
***************
*** 143,154 ****
      }
  }
  
! int
! mem_iconveha (const char *src, size_t srclen,
!             const char *from_codeset, const char *to_codeset,
!             enum iconv_ilseq_handler handler,
!             size_t *offsets,
!             char **resultp, size_t *lengthp)
  {
    int retval = mem_iconveh (src, srclen, from_codeset, to_codeset, handler,
                            offsets, resultp, lengthp);
--- 147,159 ----
      }
  }
  
! /* Like mem_iconveha, except no handling of transliteration.  */
! static int
! mem_iconveha_notranslit (const char *src, size_t srclen,
!                        const char *from_codeset, const char *to_codeset,
!                        enum iconv_ilseq_handler handler,
!                        size_t *offsets,
!                        char **resultp, size_t *lengthp)
  {
    int retval = mem_iconveh (src, srclen, from_codeset, to_codeset, handler,
                            offsets, resultp, lengthp);
***************
*** 171,180 ****
                encodings = alias->encodings_to_try;
                do
                  {
!                   retval = mem_iconveha (src, srclen,
!                                          *encodings, to_codeset,
!                                          iconveh_error, offsets,
!                                          resultp, lengthp);
                    if (!(retval < 0 && errno == EILSEQ))
                      return retval;
                    encodings++;
--- 176,185 ----
                encodings = alias->encodings_to_try;
                do
                  {
!                   retval = mem_iconveha_notranslit (src, srclen,
!                                                     *encodings, to_codeset,
!                                                     iconveh_error, offsets,
!                                                     resultp, lengthp);
                    if (!(retval < 0 && errno == EILSEQ))
                      return retval;
                    encodings++;
***************
*** 185,194 ****
            encodings = alias->encodings_to_try;
            do
              {
!               retval = mem_iconveha (src, srclen,
!                                      *encodings, to_codeset,
!                                      handler, offsets,
!                                      resultp, lengthp);
                if (!(retval < 0 && errno == EILSEQ))
                  return retval;
                encodings++;
--- 190,199 ----
            encodings = alias->encodings_to_try;
            do
              {
!               retval = mem_iconveha_notranslit (src, srclen,
!                                                 *encodings, to_codeset,
!                                                 handler, offsets,
!                                                 resultp, lengthp);
                if (!(retval < 0 && errno == EILSEQ))
                  return retval;
                encodings++;
***************
*** 205,214 ****
      }
  }
  
! char *
! str_iconveha (const char *src,
              const char *from_codeset, const char *to_codeset,
!             enum iconv_ilseq_handler handler)
  {
    char *result = str_iconveh (src, from_codeset, to_codeset, handler);
  
--- 210,261 ----
      }
  }
  
! int
! mem_iconveha (const char *src, size_t srclen,
              const char *from_codeset, const char *to_codeset,
!             bool transliterate,
!             enum iconv_ilseq_handler handler,
!             size_t *offsets,
!             char **resultp, size_t *lengthp)
! {
!   if (srclen == 0)
!     {
!       /* Nothing to convert.  */
!       *lengthp = 0;
!       return 0;
!     }
! 
!   /* When using GNU libc >= 2.2 or GNU libiconv >= 1.5,
!      we want to use transliteration.  */
! #if (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2 || 
_LIBICONV_VERSION >= 0x0105
!   if (transliterate)
!     {
!       int retval;
!       size_t len = strlen (to_codeset);
!       char *to_codeset_suffixed = (char *) allocsa (len + 10 + 1);
!       memcpy (to_codeset_suffixed, to_codeset, len);
!       memcpy (to_codeset_suffixed + len, "//TRANSLIT", 10 + 1);
! 
!       retval = mem_iconveha_notranslit (src, srclen,
!                                       from_codeset, to_codeset_suffixed,
!                                       handler, offsets, resultp, lengthp);
! 
!       freesa (to_codeset_suffixed);
! 
!       return retval;
!     }
!   else
! #endif
!     return mem_iconveha_notranslit (src, srclen,
!                                   from_codeset, to_codeset,
!                                   handler, offsets, resultp, lengthp);
! }
! 
! /* Like str_iconveha, except no handling of transliteration.  */
! static char *
! str_iconveha_notranslit (const char *src,
!                        const char *from_codeset, const char *to_codeset,
!                        enum iconv_ilseq_handler handler)
  {
    char *result = str_iconveh (src, from_codeset, to_codeset, handler);
  
***************
*** 231,239 ****
                encodings = alias->encodings_to_try;
                do
                  {
!                   result = str_iconveha (src,
!                                          *encodings, to_codeset,
!                                          iconveh_error);
                    if (!(result == NULL && errno == EILSEQ))
                      return result;
                    encodings++;
--- 278,286 ----
                encodings = alias->encodings_to_try;
                do
                  {
!                   result = str_iconveha_notranslit (src,
!                                                     *encodings, to_codeset,
!                                                     iconveh_error);
                    if (!(result == NULL && errno == EILSEQ))
                      return result;
                    encodings++;
***************
*** 244,252 ****
            encodings = alias->encodings_to_try;
            do
              {
!               result = str_iconveha (src,
!                                      *encodings, to_codeset,
!                                      handler);
                if (!(result == NULL && errno == EILSEQ))
                  return result;
                encodings++;
--- 291,299 ----
            encodings = alias->encodings_to_try;
            do
              {
!               result = str_iconveha_notranslit (src,
!                                                 *encodings, to_codeset,
!                                                 handler);
                if (!(result == NULL && errno == EILSEQ))
                  return result;
                encodings++;
***************
*** 262,264 ****
--- 309,349 ----
        return NULL;
      }
  }
+ 
+ char *
+ str_iconveha (const char *src,
+             const char *from_codeset, const char *to_codeset,
+             bool transliterate,
+             enum iconv_ilseq_handler handler)
+ {
+   if (*src == '\0' || c_strcasecmp (from_codeset, to_codeset) == 0)
+     {
+       char *result = strdup (src);
+ 
+       if (result == NULL)
+       errno = ENOMEM;
+       return result;
+     }
+ 
+   /* When using GNU libc >= 2.2 or GNU libiconv >= 1.5,
+      we want to use transliteration.  */
+ #if (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2 || 
_LIBICONV_VERSION >= 0x0105
+   if (transliterate)
+     {
+       char *result;
+       size_t len = strlen (to_codeset);
+       char *to_codeset_suffixed = (char *) allocsa (len + 10 + 1);
+       memcpy (to_codeset_suffixed, to_codeset, len);
+       memcpy (to_codeset_suffixed + len, "//TRANSLIT", 10 + 1);
+ 
+       result = str_iconveha_notranslit (src, from_codeset, 
to_codeset_suffixed,
+                                       handler);
+ 
+       freesa (to_codeset_suffixed);
+ 
+       return result;
+     }
+   else
+ #endif
+     return str_iconveha_notranslit (src, from_codeset, to_codeset, handler);
+ }
*** modules/striconveha 21 Jan 2007 22:59:19 -0000      1.1
--- modules/striconveha 24 Jan 2007 00:49:49 -0000
***************
*** 7,13 ****
--- 7,17 ----
  lib/striconveha.c
  
  Depends-on:
+ stdbool
  striconveh
+ allocsa
+ strdup
+ c-strcase
  
  configure.ac:
  





reply via email to

[Prev in Thread] Current Thread [Next in Thread]