diff --git a/lib/uniwidth.in.h b/lib/uniwidth.in.h index e806744..8b962ef 100644 --- a/lib/uniwidth.in.h +++ b/lib/uniwidth.in.h @@ -38,7 +38,7 @@ extern "C" { /* Determine number of column positions required for UC. */ extern int - uc_width (ucs4_t uc, const char *encoding) + uc_width (ucs4_t uc, int is_cjk_encoding) #if __GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 96) __attribute__ ((__pure__)) #endif diff --git a/lib/uniwidth/u16-width.c b/lib/uniwidth/u16-width.c index f8008f2..64cb66f 100644 --- a/lib/uniwidth/u16-width.c +++ b/lib/uniwidth/u16-width.c @@ -27,6 +27,7 @@ u16_width (const uint16_t *s, size_t n, const char *encoding) { const uint16_t *s_end = s + n; int width = 0; + int is_cjk = is_cjk_encoding(encoding); while (s < s_end) { @@ -38,7 +39,7 @@ u16_width (const uint16_t *s, size_t n, const char *encoding) if (uc == 0) break; /* end of string reached */ - w = uc_width (uc, encoding); + w = uc_width (uc, is_cjk); if (w >= 0) /* ignore control characters in the string */ width += w; } diff --git a/lib/uniwidth/u32-width.c b/lib/uniwidth/u32-width.c index 60b5a35..55bbd3a 100644 --- a/lib/uniwidth/u32-width.c +++ b/lib/uniwidth/u32-width.c @@ -25,6 +25,7 @@ u32_width (const uint32_t *s, size_t n, const char *encoding) { const uint32_t *s_end = s + n; int width = 0; + int is_cjk = is_cjk_encoding(encoding); while (s < s_end) { @@ -34,7 +35,7 @@ u32_width (const uint32_t *s, size_t n, const char *encoding) if (uc == 0) break; /* end of string reached */ - w = uc_width (uc, encoding); + w = uc_width (uc, is_cjk); if (w >= 0) /* ignore control characters in the string */ width += w; } diff --git a/lib/uniwidth/u8-width.c b/lib/uniwidth/u8-width.c index 96e5ea4..49e063e 100644 --- a/lib/uniwidth/u8-width.c +++ b/lib/uniwidth/u8-width.c @@ -27,6 +27,7 @@ u8_width (const uint8_t *s, size_t n, const char *encoding) { const uint8_t *s_end = s + n; int width = 0; + int is_cjk = is_cjk_encoding(encoding); while (s < s_end) { @@ -38,7 +39,7 @@ u8_width (const uint8_t *s, size_t n, const char *encoding) if (uc == 0) break; /* end of string reached */ - w = uc_width (uc, encoding); + w = uc_width (uc, is_cjk); if (w >= 0) /* ignore control characters in the string */ width += w; } diff --git a/lib/uniwidth/width.c b/lib/uniwidth/width.c index a314e71..8eb2eff 100644 --- a/lib/uniwidth/width.c +++ b/lib/uniwidth/width.c @@ -20,8 +20,6 @@ /* Specification. */ #include "uniwidth.h" -#include "cjk.h" - /* * Non-spacing attribute table. * Consists of: @@ -312,7 +310,7 @@ static const signed char nonspacing_table_ind[240] = { /* Determine number of column positions required for UC. */ int -uc_width (ucs4_t uc, const char *encoding) +uc_width (ucs4_t uc, int is_cjk_encoding) { /* Test for non-spacing or control character. */ if ((uc >> 9) < 240) @@ -361,8 +359,7 @@ uc_width (ucs4_t uc, const char *encoding) return 2; /* In ancient CJK encodings, Cyrillic and most other characters are double-width as well. */ - if (uc >= 0x00A1 && uc < 0xFF61 && uc != 0x20A9 - && is_cjk_encoding (encoding)) + if (is_cjk_encoding && uc >= 0x00A1 && uc < 0xFF61 && uc != 0x20A9) return 2; return 1; } diff --git a/lib/wcwidth.c b/lib/wcwidth.c index a006ca7..49378bd 100644 --- a/lib/wcwidth.c +++ b/lib/wcwidth.c @@ -22,21 +22,53 @@ /* Get iswprint. */ #include +#if HAVE_LANGINFO_CODESET +# include +#endif + #include "localcharset.h" #include "streq.h" #include "uniwidth.h" +#include "uniwidth/cjk.h" + +static char cached_encoding[32]; +static int cached_is_cjk_encoding; +static int cached_is_utf8_encoding; + +static const char *locale_charset_simple () +{ +#if HAVE_LANGINFO_CODESET + /* Most systems support nl_langinfo (CODESET) nowadays. */ + return nl_langinfo (CODESET); +# else + /* Do the complex case */ + return locale_charset (); +# endif +} + +static void cache_encoding () +{ + const char *encoding = locale_charset_simple (); + if (!strncmp(encoding, cached_encoding, sizeof (cached_encoding))) + return; + strncpy (cached_encoding, encoding, sizeof (cached_encoding)); + encoding = locale_charset (); + cached_is_utf8_encoding = STREQ (encoding, "UTF-8", 'U', 'T', 'F', '-', '8', 0, 0, 0 ,0); + cached_is_cjk_encoding = is_cjk_encoding (encoding); +} + int wcwidth (wchar_t wc) #undef wcwidth { + cache_encoding (); /* In UTF-8 locales, use a Unicode aware width function. */ - const char *encoding = locale_charset (); - if (STREQ (encoding, "UTF-8", 'U', 'T', 'F', '-', '8', 0, 0, 0 ,0)) + if (cached_is_utf8_encoding || cached_is_cjk_encoding) { /* We assume that in a UTF-8 locale, a wide character is the same as a Unicode character. */ - return uc_width (wc, encoding); + return uc_width (wc, cached_is_cjk_encoding); } else {