From f8fef903d535fa9ceb2677ab0c7dacc7692ea0f3 Mon Sep 17 00:00:00 2001 From: Julian Graham Date: Thu, 24 Dec 2009 00:25:19 -0500 Subject: [PATCH] Support for Unicode general categories * libguile/chars.c, libguile/chars.h (scm_char_general_category): New function. * test-suite/tests/chars.test: Unit tests for `char-general-category'. * doc/ref/api-data.texi (Characters): Documentation for `char-general-category'. --- doc/ref/api-data.texi | 91 +++++++++++++++++++++++++++++++++++++++++++ libguile/chars.c | 20 +++++++++ libguile/chars.h | 1 + test-suite/tests/chars.test | 7 +++- 4 files changed, 118 insertions(+), 1 deletions(-) diff --git a/doc/ref/api-data.texi b/doc/ref/api-data.texi index 6721b12..df5db48 100755 --- a/doc/ref/api-data.texi +++ b/doc/ref/api-data.texi @@ -1875,6 +1875,97 @@ Return @code{#t} iff @var{chr} is either uppercase or lowercase, else @code{#f}. @end deffn address@hidden {Scheme Procedure} char-general-category chr address@hidden {C Function} scm_char_general_category (chr) +Return a symbol giving the one- or two-letter name of the Unicode +general category assigned to @var{chr} or @code{#f} if no named category +is assigned. The following table provides a list of category names +along with their meanings. + address@hidden @columnfractions .1 .4 .1 .4 address@hidden L + @tab Letter + @tab Pf + @tab Final quote punctuation address@hidden Lu + @tab Uppercase letter + @tab Po + @tab Other punctuation address@hidden Ll + @tab Lowercase letter + @tab S + @tab Symbol address@hidden Lt + @tab Titlecase letter + @tab Sm + @tab Math symbol address@hidden Lm + @tab Modifier letter + @tab Sc + @tab Currency symbol address@hidden Lo + @tab Other letter + @tab Sk + @tab Modifier symbol address@hidden M + @tab Mark + @tab So + @tab Other synbol address@hidden Mn + @tab Non-spacing mark + @tab Z + @tab Separator address@hidden Mc + @tab Combining spacing mark + @tab Zs + @tab Space separator address@hidden Me + @tab Enclosing mark + @tab Zl + @tab Line separator address@hidden N + @tab Number + @tab Zp + @tab Paragraph separator address@hidden Nd + @tab Decimal digit number + @tab C + @tab Other address@hidden Nl + @tab Letter number + @tab Cc + @tab Control address@hidden No + @tab Other number + @tab Cf + @tab Format address@hidden P + @tab Punctuation + @tab Cs + @tab Surrogate address@hidden Pc + @tab Connector punctuation + @tab Co + @tab Private use address@hidden Pd + @tab Dash punctuation + @tab Cn + @tab Unassigned address@hidden Ps + @tab Open punctuation + @tab + @tab address@hidden Pe + @tab Close punctuation + @tab + @tab address@hidden Pi + @tab Initial quote punctuation + @tab + @tab address@hidden multitable address@hidden deffn + @rnindex char->integer @deffn {Scheme Procedure} char->integer chr @deffnx {C Function} scm_char_to_integer (chr) diff --git a/libguile/chars.c b/libguile/chars.c index 1c4d106..36cb08d 100644 --- a/libguile/chars.c +++ b/libguile/chars.c @@ -25,6 +25,7 @@ #include #include #include +#include #include "libguile/_scm.h" #include "libguile/validate.h" @@ -467,6 +468,25 @@ SCM_DEFINE (scm_char_titlecase, "char-titlecase", 1, 0, 0, } #undef FUNC_NAME +SCM_DEFINE (scm_char_general_category, "char-general-category", 1, 0, 0, + (SCM chr), + "Return a symbol representing the Unicode general category of " + "@var{chr} or @code{#f} if a named category cannot be found.") +#define FUNC_NAME s_scm_char_general_category +{ + char *sym = NULL; + uc_general_category_t cat; + + SCM_VALIDATE_CHAR (1, chr); + cat = uc_general_category ((int) SCM_CHAR (chr)); + sym = uc_general_category_name (cat); + + if (sym != NULL) + return scm_from_locale_symbol (sym); + return SCM_BOOL_F; +} +#undef FUNC_NAME + diff --git a/libguile/chars.h b/libguile/chars.h index 2b00645..488dd25 100644 --- a/libguile/chars.h +++ b/libguile/chars.h @@ -81,6 +81,7 @@ SCM_API SCM scm_integer_to_char (SCM n); SCM_API SCM scm_char_upcase (SCM chr); SCM_API SCM scm_char_downcase (SCM chr); SCM_API SCM scm_char_titlecase (SCM chr); +SCM_API SCM scm_char_general_category (SCM chr); SCM_API scm_t_wchar scm_c_upcase (scm_t_wchar c); SCM_API scm_t_wchar scm_c_downcase (scm_t_wchar c); SCM_API scm_t_wchar scm_c_titlecase (scm_t_wchar c); diff --git a/test-suite/tests/chars.test b/test-suite/tests/chars.test index 72805d1..cd1572f 100644 --- a/test-suite/tests/chars.test +++ b/test-suite/tests/chars.test @@ -210,7 +210,12 @@ (not (char-is-both? #\newline)) (char-is-both? #\a) (char-is-both? #\Z) - (not (char-is-both? #\1))))) + (not (char-is-both? #\1)))) + + (pass-if "char-general-category" + (and (eq? (char-general-category #\a) 'Ll) + (eq? (char-general-category #\A) 'Lu) + (eq? (char-general-category #\762) 'Lt)))) (with-test-prefix "integer" -- 1.6.3.3