[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
case-mapping part of a Unicode string
From: |
Bruno Haible |
Subject: |
case-mapping part of a Unicode string |
Date: |
Tue, 30 Jun 2009 00:43:23 +0200 |
User-agent: |
KMail/1.9.9 |
About the <unicase.h> functions, Paolo Bonzini wrote in
<http://lists.gnu.org/archive/html/bug-libunistring/2009-04/msg00001.html>:
> It seems to me that there is a limitation, in that you cannot turn to
> lowercase/uppercase/titlecase parts of a string; for that you have to
> use uc_toupper/lower/title and forget about the locale-specific mappings.
This resolves this limitation. I'm adding functions to case-convert part
of a string, within the context of the entire string. The main change is
below. For the documentation, please look in libunistring:
<http://git.savannah.gnu.org/gitweb/?p=libunistring.git;a=commitdiff;h=116c15a81610cdeab05883d1a8e5149c1964ab20#patch5>
*** lib/unicase.h.orig 2009-06-29 21:47:28.000000000 +0200
--- lib/unicase.h 2009-06-29 21:47:00.000000000 +0200
***************
*** 134,139 ****
--- 134,269 ----
uninorm_t nf,
uint32_t *resultbuf, size_t *lengthp);
+ /* The case-mapping context given by a prefix string. */
+ typedef struct casing_prefix_context
+ {
+ /* These fields are private, undocumented. */
+ uint32_t last_char_except_ignorable;
+ uint32_t last_char_normal_or_above;
+ }
+ casing_prefix_context_t;
+ /* The case-mapping context of the empty prefix string. */
+ extern const casing_prefix_context_t unicase_empty_prefix_context;
+ /* Return the case-mapping context of a given prefix string. */
+ extern casing_prefix_context_t
+ u8_casing_prefix_context (const uint8_t *s, size_t n);
+ extern casing_prefix_context_t
+ u16_casing_prefix_context (const uint16_t *s, size_t n);
+ extern casing_prefix_context_t
+ u32_casing_prefix_context (const uint32_t *s, size_t n);
+ /* Return the case-mapping context of the prefix concat(A, S), given the
+ case-mapping context of the prefix A. */
+ extern casing_prefix_context_t
+ u8_casing_prefixes_context (const uint8_t *s, size_t n,
+ casing_prefix_context_t a_context);
+ extern casing_prefix_context_t
+ u16_casing_prefixes_context (const uint16_t *s, size_t n,
+ casing_prefix_context_t a_context);
+ extern casing_prefix_context_t
+ u32_casing_prefixes_context (const uint32_t *s, size_t n,
+ casing_prefix_context_t a_context);
+
+ /* The case-mapping context given by a suffix string. */
+ typedef struct casing_suffix_context
+ {
+ /* These fields are private, undocumented. */
+ uint32_t bits;
+ uint32_t unused_bits;
+ }
+ casing_suffix_context_t;
+ /* The case-mapping context of the empty suffix string. */
+ extern const casing_suffix_context_t unicase_empty_suffix_context;
+ /* Return the case-mapping context of a given suffix string. */
+ extern casing_suffix_context_t
+ u8_casing_suffix_context (const uint8_t *s, size_t n);
+ extern casing_suffix_context_t
+ u16_casing_suffix_context (const uint16_t *s, size_t n);
+ extern casing_suffix_context_t
+ u32_casing_suffix_context (const uint32_t *s, size_t n);
+ /* Return the case-mapping context of the suffix concat(S, A), given the
+ case-mapping context of the suffix A. */
+ extern casing_suffix_context_t
+ u8_casing_suffixes_context (const uint8_t *s, size_t n,
+ casing_suffix_context_t a_context);
+ extern casing_suffix_context_t
+ u16_casing_suffixes_context (const uint16_t *s, size_t n,
+ casing_suffix_context_t a_context);
+ extern casing_suffix_context_t
+ u32_casing_suffixes_context (const uint32_t *s, size_t n,
+ casing_suffix_context_t a_context);
+
+ /* Return the uppercase mapping of a string that is surrounded by a prefix
+ and a suffix. */
+ extern uint8_t *
+ u8_ct_toupper (const uint8_t *s, size_t n,
+ casing_prefix_context_t prefix_context,
+ casing_suffix_context_t suffix_context,
+ const char *iso639_language,
+ uninorm_t nf,
+ uint8_t *resultbuf, size_t *lengthp);
+ extern uint16_t *
+ u16_ct_toupper (const uint16_t *s, size_t n,
+ casing_prefix_context_t prefix_context,
+ casing_suffix_context_t suffix_context,
+ const char *iso639_language,
+ uninorm_t nf,
+ uint16_t *resultbuf, size_t *lengthp);
+ extern uint32_t *
+ u32_ct_toupper (const uint32_t *s, size_t n,
+ casing_prefix_context_t prefix_context,
+ casing_suffix_context_t suffix_context,
+ const char *iso639_language,
+ uninorm_t nf,
+ uint32_t *resultbuf, size_t *lengthp);
+
+ /* Return the lowercase mapping of a string that is surrounded by a prefix
+ and a suffix. */
+ extern uint8_t *
+ u8_ct_tolower (const uint8_t *s, size_t n,
+ casing_prefix_context_t prefix_context,
+ casing_suffix_context_t suffix_context,
+ const char *iso639_language,
+ uninorm_t nf,
+ uint8_t *resultbuf, size_t *lengthp);
+ extern uint16_t *
+ u16_ct_tolower (const uint16_t *s, size_t n,
+ casing_prefix_context_t prefix_context,
+ casing_suffix_context_t suffix_context,
+ const char *iso639_language,
+ uninorm_t nf,
+ uint16_t *resultbuf, size_t *lengthp);
+ extern uint32_t *
+ u32_ct_tolower (const uint32_t *s, size_t n,
+ casing_prefix_context_t prefix_context,
+ casing_suffix_context_t suffix_context,
+ const char *iso639_language,
+ uninorm_t nf,
+ uint32_t *resultbuf, size_t *lengthp);
+
+ /* Return the titlecase mapping of a string that is surrounded by a prefix
+ and a suffix. */
+ extern uint8_t *
+ u8_ct_totitle (const uint8_t *s, size_t n,
+ casing_prefix_context_t prefix_context,
+ casing_suffix_context_t suffix_context,
+ const char *iso639_language,
+ uninorm_t nf,
+ uint8_t *resultbuf, size_t *lengthp);
+ extern uint16_t *
+ u16_ct_totitle (const uint16_t *s, size_t n,
+ casing_prefix_context_t prefix_context,
+ casing_suffix_context_t suffix_context,
+ const char *iso639_language,
+ uninorm_t nf,
+ uint16_t *resultbuf, size_t *lengthp);
+ extern uint32_t *
+ u32_ct_totitle (const uint32_t *s, size_t n,
+ casing_prefix_context_t prefix_context,
+ casing_suffix_context_t suffix_context,
+ const char *iso639_language,
+ uninorm_t nf,
+ uint32_t *resultbuf, size_t *lengthp);
+
/* Return the case folded string.
Comparing uN_casefold (S1) and uN_casefold (S2) with uN_cmp2() is
equivalent
to comparing S1 and S2 with uN_casecmp().
***************
*** 151,156 ****
--- 281,308 ----
u32_casefold (const uint32_t *s, size_t n, const char *iso639_language,
uninorm_t nf,
uint32_t *resultbuf, size_t *lengthp);
+ /* Likewise, for a string that is surrounded by a prefix and a suffix. */
+ extern uint8_t *
+ u8_ct_casefold (const uint8_t *s, size_t n,
+ casing_prefix_context_t prefix_context,
+ casing_suffix_context_t suffix_context,
+ const char *iso639_language,
+ uninorm_t nf,
+ uint8_t *resultbuf, size_t *lengthp);
+ extern uint16_t *
+ u16_ct_casefold (const uint16_t *s, size_t n,
+ casing_prefix_context_t prefix_context,
+ casing_suffix_context_t suffix_context,
+ const char *iso639_language,
+ uninorm_t nf,
+ uint16_t *resultbuf, size_t *lengthp);
+ extern uint32_t *
+ u32_ct_casefold (const uint32_t *s, size_t n,
+ casing_prefix_context_t prefix_context,
+ casing_suffix_context_t suffix_context,
+ const char *iso639_language,
+ uninorm_t nf,
+ uint32_t *resultbuf, size_t *lengthp);
/* Compare S1 and S2, ignoring differences in case and normalization.
The nf argument identifies the normalization form to apply after the
2009-06-29 Bruno Haible <address@hidden>
Define u32_casefold as a wrapper around u32_ct_casefold.
* lib/unicase/u32-casefold.c: Update.
* modules/unicase/u32-casefold (Depends-on): Add
unicase/u32-ct-casefold, unicase/empty-prefix-context,
unicase/empty-suffix-context. Clean up.
Define u16_casefold as a wrapper around u16_ct_casefold.
* lib/unicase/u16-casefold.c: Update.
* modules/unicase/u16-casefold (Depends-on): Add
unicase/u16-ct-casefold, unicase/empty-prefix-context,
unicase/empty-suffix-context. Clean up.
Define u8_casefold as a wrapper around u8_ct_casefold.
* lib/unicase/u-casefold.h (FUNC): Delegate to U_CT_CASEFOLD.
* lib/unicase/u8-casefold.c: Update.
* modules/unicase/u8-casefold (Depends-on): Add unicase/u8-ct-casefold,
unicase/empty-prefix-context, unicase/empty-suffix-context. Clean up.
Define u32_totitle as a wrapper around u32_ct_totitle.
* lib/unicase/u32-totitle.c: Update.
* modules/unicase/u32-totitle (Depends-on): Add unicase/u32-ct-totitle,
unicase/empty-prefix-context, unicase/empty-suffix-context. Clean up.
Define u16_totitle as a wrapper around u16_ct_totitle.
* lib/unicase/u16-totitle.c: Update.
* modules/unicase/u16-totitle (Depends-on): Add unicase/u16-ct-totitle,
unicase/empty-prefix-context, unicase/empty-suffix-context. Clean up.
Define u8_totitle as a wrapper around u8_ct_totitle.
* lib/unicase/u-totitle.h (is_cased, is_case_ignorable): Remove
functions.
(FUNC): Delegate to U_CT_TOTITLE.
* lib/unicase/u8-totitle.c: Update.
* modules/unicase/u8-totitle (Depends-on): Add unicase/u8-ct-totitle,
unicase/empty-prefix-context, unicase/empty-suffix-context. Clean up.
* lib/unicase/u32-tolower.c (u32_tolower): Update u32_casemap
invocation.
* modules/unicase/u32-tolower (Depends-on): Add
unicase/empty-prefix-context, unicase/empty-suffix-context.
* lib/unicase/u16-tolower.c (u16_tolower): Update u16_casemap
invocation.
* modules/unicase/u16-tolower (Depends-on): Add
unicase/empty-prefix-context, unicase/empty-suffix-context.
* lib/unicase/u8-tolower.c (u8_tolower): Update u8_casemap invocation.
* modules/unicase/u8-tolower (Depends-on): Add
unicase/empty-prefix-context, unicase/empty-suffix-context.
* lib/unicase/u32-toupper.c (u32_toupper): Update u32_casemap
invocation.
* modules/unicase/u32-toupper (Depends-on): Add
unicase/empty-prefix-context, unicase/empty-suffix-context.
* lib/unicase/u16-toupper.c (u16_toupper): Update u16_casemap
invocation.
* modules/unicase/u16-toupper (Depends-on): Add
unicase/empty-prefix-context, unicase/empty-suffix-context.
* lib/unicase/u8-toupper.c (u8_toupper): Update u8_casemap invocation.
* modules/unicase/u8-toupper (Depends-on): Add
unicase/empty-prefix-context, unicase/empty-suffix-context.
New module 'unicase/u32-ct-casefold'.
* lib/unicase/u32-ct-casefold.c: New file.
* modules/unicase/u32-ct-casefold: New file.
New module 'unicase/u16-ct-casefold'.
* lib/unicase/u16-ct-casefold.c: New file.
* modules/unicase/u16-ct-casefold: New file.
New module 'unicase/u8-ct-casefold'.
* lib/unicase/u8-ct-casefold.c: New file.
* lib/unicase/u-ct-casefold.h: New file, derived from
lib/unicase/u-casefold.h.
* modules/unicase/u8-ct-casefold: New file.
New module 'unicase/u32-ct-totitle'.
* lib/unicase/u32-ct-totitle.c: New file.
* modules/unicase/u32-ct-totitle: New file.
New module 'unicase/u16-ct-totitle'.
* lib/unicase/u16-ct-totitle.c: New file.
* modules/unicase/u16-ct-totitle: New file.
New module 'unicase/u8-ct-totitle'.
* lib/unicase/u8-ct-totitle.c: New file.
* lib/unicase/u-ct-totitle.h: New file, derived from
lib/unicase/u-totitle.h.
* modules/unicase/u8-ct-totitle: New file.
New module 'unicase/u32-ct-tolower'.
* lib/unicase/u32-ct-tolower.c: New file.
* modules/unicase/u32-ct-tolower: New file.
New module 'unicase/u16-ct-tolower'.
* lib/unicase/u16-ct-tolower.c: New file.
* modules/unicase/u16-ct-tolower: New file.
New module 'unicase/u8-ct-tolower'.
* lib/unicase/u8-ct-tolower.c: New file.
* modules/unicase/u8-ct-tolower: New file.
New module 'unicase/u32-ct-toupper'.
* lib/unicase/u32-ct-toupper.c: New file.
* modules/unicase/u32-ct-toupper: New file.
New module 'unicase/u16-ct-toupper'.
* lib/unicase/u16-ct-toupper.c: New file.
* modules/unicase/u16-ct-toupper: New file.
New module 'unicase/u8-ct-toupper'.
* lib/unicase/u8-ct-toupper.c: New file.
* modules/unicase/u8-ct-toupper: New file.
Add context arguments to u*_casemap functions.
* lib/unicase/unicasemap.h: Include unicase.h.
(u8_casemap, u16_casemap, u32_casemap): Add prefix_context and
suffix_context arguments.
* lib/unicase/u-casemap.h (is_cased, is_case_ignorable): Remove
functions.
(FUNC): Add prefix_context and suffix_context arguments. Use
uc_is_cased and uc_is_case_ignorable.
* lib/unicase/u8-casemap.c: Include caseprop.h and context.h.
* lib/unicase/u16-casemap.c: Likewise.
* lib/unicase/u32-casemap.c: Likewise.
* modules/unicase/u8-casemap (Files): Add lib/unicase/context.h.
(Depends-on): Add unicase/cased, unicase/ignorable. Clean up.
* modules/unicase/u16-casemap (Files): Add lib/unicase/context.h.
(Depends-on): Add unicase/cased, unicase/ignorable. Clean up.
* modules/unicase/u32-casemap (Files): Add lib/unicase/context.h.
(Depends-on): Add unicase/cased, unicase/ignorable. Clean up.
New module 'unicase/u32-suffix-context'.
* lib/unicase/u32-suffix-context.c: New file.
* modules/unicase/u32-suffix-context: New file.
New module 'unicase/u16-suffix-context'.
* lib/unicase/u16-suffix-context.c: New file.
* modules/unicase/u16-suffix-context: New file.
New module 'unicase/u8-suffix-context'.
* lib/unicase/u8-suffix-context.c: New file.
* lib/unicase/u-suffix-context.h: New file.
* modules/unicase/u8-suffix-context: New file.
New module 'unicase/empty-suffix-context'.
* lib/unicase/empty-suffix-context.c: New file.
* modules/unicase/empty-suffix-context: New file.
New module 'unicase/u32-prefix-context'.
* lib/unicase/u32-prefix-context.c: New file.
* modules/unicase/u32-prefix-context: New file.
New module 'unicase/u16-prefix-context'.
* lib/unicase/u16-prefix-context.c: New file.
* modules/unicase/u16-prefix-context: New file.
New module 'unicase/u8-prefix-context'.
* lib/unicase/u8-prefix-context.c: New file.
* lib/unicase/u-prefix-context.h: New file.
* lib/unicase/context.h: New file.
* modules/unicase/u8-prefix-context: New file.
New module 'unicase/empty-prefix-context'.
* lib/unicase/empty-prefix-context.c: New file.
* modules/unicase/empty-prefix-context: New file.
New module 'unicase/ignorable'.
* lib/unicase/ignorable.c: New file.
* modules/unicase/ignorable: New file.
New module 'unicase/cased'.
* lib/unicase/caseprop.h: New file.
* lib/unicase/cased.c: New file.
* modules/unicase/cased: New file.
New functions for case mapping of substrings.
* lib/unicase.h (casing_prefix_context_t): New type.
(unicase_empty_prefix_context): New variable.
(u8_casing_prefix_context, u16_casing_prefix_context,
u32_casing_prefix_context, u8_casing_prefixes_context,
u16_casing_prefixes_context, u32_casing_prefixes_context): New
declarations.
(casing_suffix_context_t): New type.
(unicase_empty_suffix_context): New variable.
(u8_casing_suffix_context, u16_casing_suffix_context,
u32_casing_suffix_context, u8_casing_suffixes_context,
u16_casing_suffixes_context, u32_casing_suffixes_context,
u8_ct_toupper, u16_ct_toupper, u32_ct_toupper, u8_ct_tolower,
u16_ct_tolower, u32_ct_tolower, u8_ct_totitle, u16_ct_totitle,
u32_ct_totitle, u8_ct_casefold, u16_ct_casefold, u32_ct_casefold): New
declarations.
- case-mapping part of a Unicode string,
Bruno Haible <=