diff --git a/lib/uniwidth.h b/lib/uniwidth.h index cdc7d96..3979187 100644 --- a/lib/uniwidth.h +++ b/lib/uniwidth.h @@ -37,25 +37,7 @@ extern "C" { /* Determine number of column positions required for UC. */ extern int - uc_width (ucs4_t uc, const char *encoding); - -/* Determine number of column positions required for first N units - (or fewer if S ends before this) in S. */ -extern int - u8_width (const uint8_t *s, size_t n, const char *encoding); -extern int - u16_width (const uint16_t *s, size_t n, const char *encoding); -extern int - u32_width (const uint32_t *s, size_t n, const char *encoding); - -/* Determine number of column positions required for S. */ -extern int - u8_strwidth (const uint8_t *s, const char *encoding); -extern int - u16_strwidth (const uint16_t *s, const char *encoding); -extern int - u32_strwidth (const uint32_t *s, const char *encoding); - + uc_width (ucs4_t uc, int is_cjk_encoding); #ifdef __cplusplus } diff --git a/lib/uniwidth/width.c b/lib/uniwidth/width.c index 4161c26..e43d789 100644 --- a/lib/uniwidth/width.c +++ b/lib/uniwidth/width.c @@ -20,8 +20,6 @@ /* Specification. */ #include "uniwidth.h" -#include "cjk.h" - /* * Non-spacing attribute table. * Consists of: @@ -267,7 +265,7 @@ static const signed char nonspacing_table_ind[240] = { /* Determine number of column positions required for UC. */ int -uc_width (ucs4_t uc, const char *encoding) +uc_width (ucs4_t uc, int is_cjk_encoding) { /* Test for non-spacing or control character. */ if ((uc >> 9) < 240) @@ -317,7 +315,7 @@ uc_width (ucs4_t uc, const char *encoding) /* In ancient CJK encodings, Cyrillic and most other characters are double-width as well. */ if (uc >= 0x00A1 && uc < 0xFF61 && uc != 0x20A9 - && is_cjk_encoding (encoding)) + && is_cjk_encoding) return 2; return 1; } diff --git a/lib/wcwidth.c b/lib/wcwidth.c index 4885071..c2de83b 100644 --- a/lib/wcwidth.c +++ b/lib/wcwidth.c @@ -22,22 +22,54 @@ /* Get iswprint. */ #include +#if HAVE_LANGINFO_CODESET +# include +#endif + #include "localcharset.h" #include "streq.h" #include "uniwidth.h" #undef wcwidth +#include "uniwidth/cjk.h" + +static char cached_encoding[32]; +static int cached_is_cjk_encoding; +static int cached_is_utf8_encoding; + +static const char *locale_charset_simple () +{ +#if HAVE_LANGINFO_CODESET + /* Most systems support nl_langinfo (CODESET) nowadays. */ + return nl_langinfo (CODESET); +# else + /* Do the complex case */ + return locale_charset (); +# endif +} + +static void cache_encoding () +{ + const char *encoding = locale_charset_simple (); + if (!strncmp(encoding, cached_encoding, sizeof (cached_encoding))) + return; + strncpy (cached_encoding, encoding, sizeof (cached_encoding)); + encoding = locale_charset (); + cached_is_utf8_encoding = STREQ (encoding, "UTF-8", 'U', 'T', 'F', '-', '8', 0, 0, 0 ,0); + cached_is_cjk_encoding = is_cjk_encoding (encoding); +} + int rpl_wcwidth (wchar_t wc) { + cache_encoding (); /* In UTF-8 locales, use a Unicode aware width function. */ - const char *encoding = locale_charset (); - if (STREQ (encoding, "UTF-8", 'U', 'T', 'F', '-', '8', 0, 0, 0 ,0)) + if (cached_is_utf8_encoding || cached_is_cjk_encoding) { /* We assume that in a UTF-8 locale, a wide character is the same as a Unicode character. */ - return uc_width (wc, encoding); + return uc_width (wc, cached_is_cjk_encoding); } else {