bug-gnulib
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

case-mapping part of a Unicode string


From: Bruno Haible
Subject: case-mapping part of a Unicode string
Date: Tue, 30 Jun 2009 00:43:23 +0200
User-agent: KMail/1.9.9

About the <unicase.h> functions, Paolo Bonzini wrote in
<http://lists.gnu.org/archive/html/bug-libunistring/2009-04/msg00001.html>:
> It seems to me that there is a limitation, in that you cannot turn to
> lowercase/uppercase/titlecase parts of a string; for that you have to
> use uc_toupper/lower/title and forget about the locale-specific mappings.

This resolves this limitation. I'm adding functions to case-convert part
of a string, within the context of the entire string. The main change is
below. For the documentation, please look in libunistring:
<http://git.savannah.gnu.org/gitweb/?p=libunistring.git;a=commitdiff;h=116c15a81610cdeab05883d1a8e5149c1964ab20#patch5>


*** lib/unicase.h.orig  2009-06-29 21:47:28.000000000 +0200
--- lib/unicase.h       2009-06-29 21:47:00.000000000 +0200
***************
*** 134,139 ****
--- 134,269 ----
                    uninorm_t nf,
                    uint32_t *resultbuf, size_t *lengthp);
  
+ /* The case-mapping context given by a prefix string.  */
+ typedef struct casing_prefix_context
+       {
+         /* These fields are private, undocumented.  */
+         uint32_t last_char_except_ignorable;
+         uint32_t last_char_normal_or_above;
+       }
+       casing_prefix_context_t;
+ /* The case-mapping context of the empty prefix string.  */
+ extern const casing_prefix_context_t unicase_empty_prefix_context;
+ /* Return the case-mapping context of a given prefix string.  */
+ extern casing_prefix_context_t
+        u8_casing_prefix_context (const uint8_t *s, size_t n);
+ extern casing_prefix_context_t
+        u16_casing_prefix_context (const uint16_t *s, size_t n);
+ extern casing_prefix_context_t
+        u32_casing_prefix_context (const uint32_t *s, size_t n);
+ /* Return the case-mapping context of the prefix concat(A, S), given the
+    case-mapping context of the prefix A.  */
+ extern casing_prefix_context_t
+        u8_casing_prefixes_context (const uint8_t *s, size_t n,
+                                  casing_prefix_context_t a_context);
+ extern casing_prefix_context_t
+        u16_casing_prefixes_context (const uint16_t *s, size_t n,
+                                   casing_prefix_context_t a_context);
+ extern casing_prefix_context_t
+        u32_casing_prefixes_context (const uint32_t *s, size_t n,
+                                   casing_prefix_context_t a_context);
+ 
+ /* The case-mapping context given by a suffix string.  */
+ typedef struct casing_suffix_context
+       {
+         /* These fields are private, undocumented.  */
+         uint32_t bits;
+         uint32_t unused_bits;
+       }
+       casing_suffix_context_t;
+ /* The case-mapping context of the empty suffix string.  */
+ extern const casing_suffix_context_t unicase_empty_suffix_context;
+ /* Return the case-mapping context of a given suffix string.  */
+ extern casing_suffix_context_t
+        u8_casing_suffix_context (const uint8_t *s, size_t n);
+ extern casing_suffix_context_t
+        u16_casing_suffix_context (const uint16_t *s, size_t n);
+ extern casing_suffix_context_t
+        u32_casing_suffix_context (const uint32_t *s, size_t n);
+ /* Return the case-mapping context of the suffix concat(S, A), given the
+    case-mapping context of the suffix A.  */
+ extern casing_suffix_context_t
+        u8_casing_suffixes_context (const uint8_t *s, size_t n,
+                                  casing_suffix_context_t a_context);
+ extern casing_suffix_context_t
+        u16_casing_suffixes_context (const uint16_t *s, size_t n,
+                                   casing_suffix_context_t a_context);
+ extern casing_suffix_context_t
+        u32_casing_suffixes_context (const uint32_t *s, size_t n,
+                                   casing_suffix_context_t a_context);
+ 
+ /* Return the uppercase mapping of a string that is surrounded by a prefix
+    and a suffix.  */
+ extern uint8_t *
+        u8_ct_toupper (const uint8_t *s, size_t n,
+                     casing_prefix_context_t prefix_context,
+                     casing_suffix_context_t suffix_context,
+                     const char *iso639_language,
+                     uninorm_t nf,
+                     uint8_t *resultbuf, size_t *lengthp);
+ extern uint16_t *
+        u16_ct_toupper (const uint16_t *s, size_t n,
+                     casing_prefix_context_t prefix_context,
+                     casing_suffix_context_t suffix_context,
+                     const char *iso639_language,
+                     uninorm_t nf,
+                     uint16_t *resultbuf, size_t *lengthp);
+ extern uint32_t *
+        u32_ct_toupper (const uint32_t *s, size_t n,
+                     casing_prefix_context_t prefix_context,
+                     casing_suffix_context_t suffix_context,
+                     const char *iso639_language,
+                     uninorm_t nf,
+                     uint32_t *resultbuf, size_t *lengthp);
+ 
+ /* Return the lowercase mapping of a string that is surrounded by a prefix
+    and a suffix.  */
+ extern uint8_t *
+        u8_ct_tolower (const uint8_t *s, size_t n,
+                     casing_prefix_context_t prefix_context,
+                     casing_suffix_context_t suffix_context,
+                     const char *iso639_language,
+                     uninorm_t nf,
+                     uint8_t *resultbuf, size_t *lengthp);
+ extern uint16_t *
+        u16_ct_tolower (const uint16_t *s, size_t n,
+                     casing_prefix_context_t prefix_context,
+                     casing_suffix_context_t suffix_context,
+                     const char *iso639_language,
+                     uninorm_t nf,
+                     uint16_t *resultbuf, size_t *lengthp);
+ extern uint32_t *
+        u32_ct_tolower (const uint32_t *s, size_t n,
+                     casing_prefix_context_t prefix_context,
+                     casing_suffix_context_t suffix_context,
+                     const char *iso639_language,
+                     uninorm_t nf,
+                     uint32_t *resultbuf, size_t *lengthp);
+ 
+ /* Return the titlecase mapping of a string that is surrounded by a prefix
+    and a suffix.  */
+ extern uint8_t *
+        u8_ct_totitle (const uint8_t *s, size_t n,
+                     casing_prefix_context_t prefix_context,
+                     casing_suffix_context_t suffix_context,
+                     const char *iso639_language,
+                     uninorm_t nf,
+                     uint8_t *resultbuf, size_t *lengthp);
+ extern uint16_t *
+        u16_ct_totitle (const uint16_t *s, size_t n,
+                     casing_prefix_context_t prefix_context,
+                     casing_suffix_context_t suffix_context,
+                     const char *iso639_language,
+                     uninorm_t nf,
+                     uint16_t *resultbuf, size_t *lengthp);
+ extern uint32_t *
+        u32_ct_totitle (const uint32_t *s, size_t n,
+                     casing_prefix_context_t prefix_context,
+                     casing_suffix_context_t suffix_context,
+                     const char *iso639_language,
+                     uninorm_t nf,
+                     uint32_t *resultbuf, size_t *lengthp);
+ 
  /* Return the case folded string.
     Comparing uN_casefold (S1) and uN_casefold (S2) with uN_cmp2() is 
equivalent
     to comparing S1 and S2 with uN_casecmp().
***************
*** 151,156 ****
--- 281,308 ----
         u32_casefold (const uint32_t *s, size_t n, const char *iso639_language,
                     uninorm_t nf,
                     uint32_t *resultbuf, size_t *lengthp);
+ /* Likewise, for a string that is surrounded by a prefix and a suffix.  */
+ extern uint8_t *
+        u8_ct_casefold (const uint8_t *s, size_t n,
+                      casing_prefix_context_t prefix_context,
+                      casing_suffix_context_t suffix_context,
+                      const char *iso639_language,
+                      uninorm_t nf,
+                      uint8_t *resultbuf, size_t *lengthp);
+ extern uint16_t *
+        u16_ct_casefold (const uint16_t *s, size_t n,
+                       casing_prefix_context_t prefix_context,
+                       casing_suffix_context_t suffix_context,
+                       const char *iso639_language,
+                       uninorm_t nf,
+                       uint16_t *resultbuf, size_t *lengthp);
+ extern uint32_t *
+        u32_ct_casefold (const uint32_t *s, size_t n,
+                       casing_prefix_context_t prefix_context,
+                       casing_suffix_context_t suffix_context,
+                       const char *iso639_language,
+                       uninorm_t nf,
+                       uint32_t *resultbuf, size_t *lengthp);
  
  /* Compare S1 and S2, ignoring differences in case and normalization.
     The nf argument identifies the normalization form to apply after the


2009-06-29  Bruno Haible  <address@hidden>

        Define u32_casefold as a wrapper around u32_ct_casefold.
        * lib/unicase/u32-casefold.c: Update.
        * modules/unicase/u32-casefold (Depends-on): Add
        unicase/u32-ct-casefold, unicase/empty-prefix-context,
        unicase/empty-suffix-context. Clean up.

        Define u16_casefold as a wrapper around u16_ct_casefold.
        * lib/unicase/u16-casefold.c: Update.
        * modules/unicase/u16-casefold (Depends-on): Add
        unicase/u16-ct-casefold, unicase/empty-prefix-context,
        unicase/empty-suffix-context. Clean up.

        Define u8_casefold as a wrapper around u8_ct_casefold.
        * lib/unicase/u-casefold.h (FUNC): Delegate to U_CT_CASEFOLD.
        * lib/unicase/u8-casefold.c: Update.
        * modules/unicase/u8-casefold (Depends-on): Add unicase/u8-ct-casefold,
        unicase/empty-prefix-context, unicase/empty-suffix-context. Clean up.

        Define u32_totitle as a wrapper around u32_ct_totitle.
        * lib/unicase/u32-totitle.c: Update.
        * modules/unicase/u32-totitle (Depends-on): Add unicase/u32-ct-totitle,
        unicase/empty-prefix-context, unicase/empty-suffix-context. Clean up.

        Define u16_totitle as a wrapper around u16_ct_totitle.
        * lib/unicase/u16-totitle.c: Update.
        * modules/unicase/u16-totitle (Depends-on): Add unicase/u16-ct-totitle,
        unicase/empty-prefix-context, unicase/empty-suffix-context. Clean up.

        Define u8_totitle as a wrapper around u8_ct_totitle.
        * lib/unicase/u-totitle.h (is_cased, is_case_ignorable): Remove
        functions.
        (FUNC): Delegate to U_CT_TOTITLE.
        * lib/unicase/u8-totitle.c: Update.
        * modules/unicase/u8-totitle (Depends-on): Add unicase/u8-ct-totitle,
        unicase/empty-prefix-context, unicase/empty-suffix-context. Clean up.

        * lib/unicase/u32-tolower.c (u32_tolower): Update u32_casemap
        invocation.
        * modules/unicase/u32-tolower (Depends-on): Add
        unicase/empty-prefix-context, unicase/empty-suffix-context.

        * lib/unicase/u16-tolower.c (u16_tolower): Update u16_casemap
        invocation.
        * modules/unicase/u16-tolower (Depends-on): Add
        unicase/empty-prefix-context, unicase/empty-suffix-context.

        * lib/unicase/u8-tolower.c (u8_tolower): Update u8_casemap invocation.
        * modules/unicase/u8-tolower (Depends-on): Add
        unicase/empty-prefix-context, unicase/empty-suffix-context.

        * lib/unicase/u32-toupper.c (u32_toupper): Update u32_casemap
        invocation.
        * modules/unicase/u32-toupper (Depends-on): Add
        unicase/empty-prefix-context, unicase/empty-suffix-context.

        * lib/unicase/u16-toupper.c (u16_toupper): Update u16_casemap
        invocation.
        * modules/unicase/u16-toupper (Depends-on): Add
        unicase/empty-prefix-context, unicase/empty-suffix-context.

        * lib/unicase/u8-toupper.c (u8_toupper): Update u8_casemap invocation.
        * modules/unicase/u8-toupper (Depends-on): Add
        unicase/empty-prefix-context, unicase/empty-suffix-context.

        New module 'unicase/u32-ct-casefold'.
        * lib/unicase/u32-ct-casefold.c: New file.
        * modules/unicase/u32-ct-casefold: New file.

        New module 'unicase/u16-ct-casefold'.
        * lib/unicase/u16-ct-casefold.c: New file.
        * modules/unicase/u16-ct-casefold: New file.

        New module 'unicase/u8-ct-casefold'.
        * lib/unicase/u8-ct-casefold.c: New file.
        * lib/unicase/u-ct-casefold.h: New file, derived from
        lib/unicase/u-casefold.h.
        * modules/unicase/u8-ct-casefold: New file.

        New module 'unicase/u32-ct-totitle'.
        * lib/unicase/u32-ct-totitle.c: New file.
        * modules/unicase/u32-ct-totitle: New file.

        New module 'unicase/u16-ct-totitle'.
        * lib/unicase/u16-ct-totitle.c: New file.
        * modules/unicase/u16-ct-totitle: New file.

        New module 'unicase/u8-ct-totitle'.
        * lib/unicase/u8-ct-totitle.c: New file.
        * lib/unicase/u-ct-totitle.h: New file, derived from
        lib/unicase/u-totitle.h.
        * modules/unicase/u8-ct-totitle: New file.

        New module 'unicase/u32-ct-tolower'.
        * lib/unicase/u32-ct-tolower.c: New file.
        * modules/unicase/u32-ct-tolower: New file.

        New module 'unicase/u16-ct-tolower'.
        * lib/unicase/u16-ct-tolower.c: New file.
        * modules/unicase/u16-ct-tolower: New file.

        New module 'unicase/u8-ct-tolower'.
        * lib/unicase/u8-ct-tolower.c: New file.
        * modules/unicase/u8-ct-tolower: New file.

        New module 'unicase/u32-ct-toupper'.
        * lib/unicase/u32-ct-toupper.c: New file.
        * modules/unicase/u32-ct-toupper: New file.

        New module 'unicase/u16-ct-toupper'.
        * lib/unicase/u16-ct-toupper.c: New file.
        * modules/unicase/u16-ct-toupper: New file.

        New module 'unicase/u8-ct-toupper'.
        * lib/unicase/u8-ct-toupper.c: New file.
        * modules/unicase/u8-ct-toupper: New file.

        Add context arguments to u*_casemap functions.
        * lib/unicase/unicasemap.h: Include unicase.h.
        (u8_casemap, u16_casemap, u32_casemap): Add prefix_context and
        suffix_context arguments.
        * lib/unicase/u-casemap.h (is_cased, is_case_ignorable): Remove
        functions.
        (FUNC): Add prefix_context and suffix_context arguments. Use
        uc_is_cased and uc_is_case_ignorable.
        * lib/unicase/u8-casemap.c: Include caseprop.h and context.h.
        * lib/unicase/u16-casemap.c: Likewise.
        * lib/unicase/u32-casemap.c: Likewise.
        * modules/unicase/u8-casemap (Files): Add lib/unicase/context.h.
        (Depends-on): Add unicase/cased, unicase/ignorable. Clean up.
        * modules/unicase/u16-casemap (Files): Add lib/unicase/context.h.
        (Depends-on): Add unicase/cased, unicase/ignorable. Clean up.
        * modules/unicase/u32-casemap (Files): Add lib/unicase/context.h.
        (Depends-on): Add unicase/cased, unicase/ignorable. Clean up.

        New module 'unicase/u32-suffix-context'.
        * lib/unicase/u32-suffix-context.c: New file.
        * modules/unicase/u32-suffix-context: New file.

        New module 'unicase/u16-suffix-context'.
        * lib/unicase/u16-suffix-context.c: New file.
        * modules/unicase/u16-suffix-context: New file.

        New module 'unicase/u8-suffix-context'.
        * lib/unicase/u8-suffix-context.c: New file.
        * lib/unicase/u-suffix-context.h: New file.
        * modules/unicase/u8-suffix-context: New file.

        New module 'unicase/empty-suffix-context'.
        * lib/unicase/empty-suffix-context.c: New file.
        * modules/unicase/empty-suffix-context: New file.

        New module 'unicase/u32-prefix-context'.
        * lib/unicase/u32-prefix-context.c: New file.
        * modules/unicase/u32-prefix-context: New file.

        New module 'unicase/u16-prefix-context'.
        * lib/unicase/u16-prefix-context.c: New file.
        * modules/unicase/u16-prefix-context: New file.

        New module 'unicase/u8-prefix-context'.
        * lib/unicase/u8-prefix-context.c: New file.
        * lib/unicase/u-prefix-context.h: New file.
        * lib/unicase/context.h: New file.
        * modules/unicase/u8-prefix-context: New file.

        New module 'unicase/empty-prefix-context'.
        * lib/unicase/empty-prefix-context.c: New file.
        * modules/unicase/empty-prefix-context: New file.

        New module 'unicase/ignorable'.
        * lib/unicase/ignorable.c: New file.
        * modules/unicase/ignorable: New file.

        New module 'unicase/cased'.
        * lib/unicase/caseprop.h: New file.
        * lib/unicase/cased.c: New file.
        * modules/unicase/cased: New file.

        New functions for case mapping of substrings.
        * lib/unicase.h (casing_prefix_context_t): New type.
        (unicase_empty_prefix_context): New variable.
        (u8_casing_prefix_context, u16_casing_prefix_context,
        u32_casing_prefix_context, u8_casing_prefixes_context,
        u16_casing_prefixes_context, u32_casing_prefixes_context): New
        declarations.
        (casing_suffix_context_t): New type.
        (unicase_empty_suffix_context): New variable.
        (u8_casing_suffix_context, u16_casing_suffix_context,
        u32_casing_suffix_context, u8_casing_suffixes_context,
        u16_casing_suffixes_context, u32_casing_suffixes_context,
        u8_ct_toupper, u16_ct_toupper, u32_ct_toupper, u8_ct_tolower,
        u16_ct_tolower, u32_ct_tolower, u8_ct_totitle, u16_ct_totitle,
        u32_ct_totitle, u8_ct_casefold, u16_ct_casefold, u32_ct_casefold): New
        declarations.





reply via email to

[Prev in Thread] Current Thread [Next in Thread]