emacs-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Emacs-diffs] trunk r117735: Implement locale-sensitive string collation


From: Eli Zaretskii
Subject: [Emacs-diffs] trunk r117735: Implement locale-sensitive string collation for MS-Windows. (Bug#18051)
Date: Mon, 25 Aug 2014 15:56:56 +0000
User-agent: Bazaar (2.6b2)

------------------------------------------------------------
revno: 117735
revision-id: address@hidden
parent: address@hidden
fixes bug: http://debbugs.gnu.org/18051
committer: Eli Zaretskii <address@hidden>
branch nick: trunk
timestamp: Mon 2014-08-25 18:55:46 +0300
message:
  Implement locale-sensitive string collation for MS-Windows.  (Bug#18051)
  
   src/w32proc.c (get_lcid_callback, get_lcid, w32_compare_strings):
   New functions.
   src/w32.h (w32_compare_strings): Add prototype.
   src/w32.c <g_b_init_compare_string_w>: New global flag.
   (globals_of_w32): Initialize it.
   src/sysdep.c (str_collate) [WINDOWSNT]: Implementation for MS-Windows.
   src/fns.c (Fstring_collate_lessp, Fstring_collate_equalp)
   [WINDOWSNT]: Call str_collate on MS-Windows.
  
   etc/NEWS: Mention that string-collate-* functions are supported on
   MS-Windows as well.
modified:
  etc/ChangeLog                  changelog-20091113204419-o5vbwnq5f7feedwu-1485
  etc/NEWS                       news-20100311060928-aoit31wvzf25yr1z-1
  src/ChangeLog                  changelog-20091113204419-o5vbwnq5f7feedwu-1438
  src/fns.c                      fns.c-20091113204419-o5vbwnq5f7feedwu-203
  src/sysdep.c                   sysdep.c-20091113204419-o5vbwnq5f7feedwu-448
  src/w32.c                      w32.c-20091113204419-o5vbwnq5f7feedwu-808
  src/w32.h                      w32.h-20091113204419-o5vbwnq5f7feedwu-809
  src/w32proc.c                  w32proc.c-20091113204419-o5vbwnq5f7feedwu-814
=== modified file 'etc/ChangeLog'
--- a/etc/ChangeLog     2014-08-13 19:17:21 +0000
+++ b/etc/ChangeLog     2014-08-25 15:55:46 +0000
@@ -1,3 +1,8 @@
+2014-08-25  Eli Zaretskii  <address@hidden>
+
+       * NEWS: Mention that string-collate-* functions are supported on
+       MS-Windows as well.
+
 2014-08-08  Jan Nieuwenhuizen  <address@hidden>
 
        * compilation.txt (file): Add Guile backtrace example.

=== modified file 'etc/NEWS'
--- a/etc/NEWS  2014-08-24 15:47:06 +0000
+++ b/etc/NEWS  2014-08-25 15:55:46 +0000
@@ -68,9 +68,9 @@
 
 ** The new functions `string-collate-lessp' and `string-collate-equalp'
 preserve the collation order as defined by the system's locale(1)
-environment.  For the time being this is implemented for POSIX systems
-only, for other systems they fall back to their counterparts
-`string-lessp' and `string-equal'.
+environment.  For the time being this is implemented for modern POSIX
+systems and for MS-Windows, for other systems they fall back to their
+counterparts `string-lessp' and `string-equal'.
 
 
 * Editing Changes in Emacs 24.5

=== modified file 'src/ChangeLog'
--- a/src/ChangeLog     2014-08-25 07:00:42 +0000
+++ b/src/ChangeLog     2014-08-25 15:55:46 +0000
@@ -1,3 +1,19 @@
+2014-08-25  Eli Zaretskii  <address@hidden>
+
+       Implement locale-sensitive string collation for MS-Windows.
+       * w32proc.c (get_lcid_callback, get_lcid, w32_compare_strings):
+       New functions.  (Bug#18051)
+
+       * w32.h (w32_compare_strings): Add prototype.
+
+       * w32.c <g_b_init_compare_string_w>: New global flag.
+       (globals_of_w32): Initialize it.
+
+       * sysdep.c (str_collate) [WINDOWSNT]: Implementation for MS-Windows.
+
+       * fns.c (Fstring_collate_lessp, Fstring_collate_equalp)
+       [WINDOWSNT]: Call str_collate on MS-Windows.
+
 2014-08-25  Dmitry Antipov  <address@hidden>
 
        One more minor cleanup of font subsystem.

=== modified file 'src/fns.c'
--- a/src/fns.c 2014-08-25 05:44:57 +0000
+++ b/src/fns.c 2014-08-25 15:55:46 +0000
@@ -364,7 +364,7 @@
 it overrides the setting of your current locale.  */)
   (Lisp_Object s1, Lisp_Object s2)
 {
-#ifdef __STDC_ISO_10646__
+#if defined __STDC_ISO_10646__ || defined WINDOWSNT
   /* Check parameters.  */
   if (SYMBOLP (s1))
     s1 = SYMBOL_NAME (s1);
@@ -375,9 +375,9 @@
 
   return (str_collate (s1, s2) < 0) ? Qt : Qnil;
 
-#else
+#else  /* !__STDC_ISO_10646__, !WINDOWSNT */
   return Fstring_lessp (s1, s2);
-#endif /* __STDC_ISO_10646__ */
+#endif /* !__STDC_ISO_10646__, !WINDOWSNT */
 }
 
 DEFUN ("string-collate-equalp", Fstring_collate_equalp, 
Sstring_collate_equalp, 2, 2, 0,
@@ -401,7 +401,7 @@
 it overrides the setting of your current locale.  */)
   (Lisp_Object s1, Lisp_Object s2)
 {
-#ifdef __STDC_ISO_10646__
+#if defined __STDC_ISO_10646__ || defined WINDOWSNT
   /* Check parameters.  */
   if (SYMBOLP (s1))
     s1 = SYMBOL_NAME (s1);
@@ -412,9 +412,9 @@
 
   return (str_collate (s1, s2) == 0) ? Qt : Qnil;
 
-#else
+#else  /* !__STDC_ISO_10646__, !WINDOWSNT */
   return Fstring_equal (s1, s2);
-#endif /* __STDC_ISO_10646__ */
+#endif /* !__STDC_ISO_10646__, !WINDOWSNT */
 }
 
 static Lisp_Object concat (ptrdiff_t nargs, Lisp_Object *args,

=== modified file 'src/sysdep.c'
--- a/src/sysdep.c      2014-08-25 05:44:57 +0000
+++ b/src/sysdep.c      2014-08-25 15:55:46 +0000
@@ -3592,3 +3592,15 @@
   return res;
 }
 #endif /* __STDC_ISO_10646__ */
+
+#ifdef WINDOWSNT
+ptrdiff_t
+str_collate (Lisp_Object s1, Lisp_Object s2)
+{
+  Lisp_Object lc_collate =
+    Fgetenv_internal (build_string ("LC_COLLATE"), Vprocess_environment);
+  char *loc = STRINGP (lc_collate) ? SSDATA (lc_collate) : NULL;
+
+  return w32_compare_strings (SDATA (s1), SDATA (s2), loc);
+}
+#endif /* WINDOWSNT */

=== modified file 'src/w32.c'
--- a/src/w32.c 2014-07-14 19:23:18 +0000
+++ b/src/w32.c 2014-08-25 15:55:46 +0000
@@ -309,6 +309,8 @@
 static BOOL g_b_init_set_named_security_info_a;
 static BOOL g_b_init_get_adapters_info;
 
+BOOL g_b_init_compare_string_w;
+
 /*
   BEGIN: Wrapper functions around OpenProcessToken
   and other functions in advapi32.dll that are only
@@ -9068,6 +9070,7 @@
   g_b_init_set_named_security_info_w = 0;
   g_b_init_set_named_security_info_a = 0;
   g_b_init_get_adapters_info = 0;
+  g_b_init_compare_string_w = 0;
   num_of_processors = 0;
   /* The following sets a handler for shutdown notifications for
      console apps. This actually applies to Emacs in both console and

=== modified file 'src/w32.h'
--- a/src/w32.h 2014-07-10 19:09:26 +0000
+++ b/src/w32.h 2014-08-25 15:55:46 +0000
@@ -210,6 +210,9 @@
 extern int w32_memory_info (unsigned long long *, unsigned long long *,
                            unsigned long long *, unsigned long long *);
 
+/* Compare 2 UTF-8 strings in locale-dependent fashion.  */
+extern int w32_compare_strings (const char *, const char *, char *);
+
 #ifdef HAVE_GNUTLS
 #include <gnutls/gnutls.h>
 

=== modified file 'src/w32proc.c'
--- a/src/w32proc.c     2014-03-26 10:21:55 +0000
+++ b/src/w32proc.c     2014-08-25 15:55:46 +0000
@@ -32,6 +32,7 @@
 #include <signal.h>
 #include <sys/file.h>
 #include <mbstring.h>
+#include <locale.h>
 
 /* must include CRT headers *before* config.h */
 #include <config.h>
@@ -3144,6 +3145,159 @@
   return Fw32_get_keyboard_layout ();
 }
 
+/* Two variables to interface between get_lcid and the EnumLocales
+   callback function below.  */
+#ifndef LOCALE_NAME_MAX_LENGTH
+# define LOCALE_NAME_MAX_LENGTH 85
+#endif
+static LCID found_lcid;
+static char lname[3 * LOCALE_NAME_MAX_LENGTH + 1 + 1];
+
+/* Callback function for EnumLocales.  */
+static BOOL CALLBACK
+get_lcid_callback (LPTSTR locale_num_str)
+{
+  char *endp;
+  char locval[2 * LOCALE_NAME_MAX_LENGTH + 1 + 1];
+  LCID try_lcid = strtoul (locale_num_str, &endp, 16);
+
+  if (GetLocaleInfo (try_lcid, LOCALE_SABBREVLANGNAME,
+                    locval, LOCALE_NAME_MAX_LENGTH))
+    {
+      strcat (locval, "_");
+      if (GetLocaleInfo (try_lcid, LOCALE_SABBREVCTRYNAME,
+                        locval + strlen (locval), LOCALE_NAME_MAX_LENGTH))
+       {
+         size_t locval_len = strlen (locval);
+
+         if (strnicmp (locval, lname, locval_len) == 0
+             && (lname[locval_len] == '.'
+                 || lname[locval_len] == '\0'))
+           {
+             found_lcid = try_lcid;
+             return FALSE;
+           }
+       }
+    }
+  return TRUE;
+}
+
+/* Return the Locale ID (LCID) number given the locale's name, a
+   string, in LOCALE_NAME.  This works by enumerating all the locales
+   supported by the system, until we find one whose name matches
+   LOCALE_NAME.  */
+static LCID
+get_lcid (const char *locale_name)
+{
+  /* A simple cache.  */
+  static LCID last_lcid;
+  static char last_locale[1000];
+
+  /* The code below is not thread-safe, as it uses static variables.
+     But this function is called only from the Lisp thread.  */
+  if (last_lcid > 0 && strcmp (locale_name, last_locale) == 0)
+    return last_lcid;
+
+  strncpy (lname, locale_name, sizeof (lname) - 1);
+  lname[sizeof (lname) - 1] = '\0';
+  found_lcid = 0;
+  EnumSystemLocales (get_lcid_callback, LCID_SUPPORTED);
+  if (found_lcid > 0)
+    {
+      last_lcid = found_lcid;
+      strcpy (last_locale, locale_name);
+    }
+  return found_lcid;
+}
+
+#ifndef _NSLCMPERROR
+# define _NSLCMPERROR INT_MAX
+#endif
+
+int
+w32_compare_strings (const char *s1, const char *s2, char *locname)
+{
+  LCID lcid = GetThreadLocale ();
+  wchar_t *string1_w, *string2_w;
+  int val, needed;
+  extern BOOL g_b_init_compare_string_w;
+  static int (WINAPI *pCompareStringW)(LCID, DWORD, LPCWSTR, int, LPCWSTR, 
int);
+
+  USE_SAFE_ALLOCA;
+
+  if (!g_b_init_compare_string_w)
+    {
+      if (os_subtype == OS_9X)
+       {
+         pCompareStringW = GetProcAddress (LoadLibrary ("Unicows.dll"),
+                                           "CompareStringW");
+         if (!pCompareStringW)
+           {
+             errno = EINVAL;
+             /* This return value is compatible with wcscoll and
+                other MS CRT functions.  */
+             return _NSLCMPERROR;
+           }
+       }
+      else
+       pCompareStringW = CompareStringW;
+
+      g_b_init_compare_string_w = 1;
+    }
+
+  needed = pMultiByteToWideChar (CP_UTF8, MB_ERR_INVALID_CHARS, s1, -1, NULL, 
0);
+  if (needed > 0)
+    {
+      SAFE_NALLOCA (string1_w, 1, needed + 1);
+      pMultiByteToWideChar (CP_UTF8, MB_ERR_INVALID_CHARS, s1, -1,
+                           string1_w, needed);
+    }
+  else
+    {
+      errno = EINVAL;
+      return _NSLCMPERROR;
+    }
+
+  needed = pMultiByteToWideChar (CP_UTF8, MB_ERR_INVALID_CHARS, s2, -1, NULL, 
0);
+  if (needed > 0)
+    {
+      SAFE_NALLOCA (string2_w, 1, needed + 1);
+      pMultiByteToWideChar (CP_UTF8, MB_ERR_INVALID_CHARS, s2, -1,
+                           string2_w, needed);
+    }
+  else
+    {
+      SAFE_FREE ();
+      errno = EINVAL;
+      return _NSLCMPERROR;
+    }
+
+  if (locname)
+    {
+      /* Convert locale name string to LCID.  We don't want to use
+        LocaleNameToLCID because (a) it is only available since
+        Vista, and (b) it doesn't accept locale names returned by
+        'setlocale' and 'GetLocaleInfo'.  */
+      LCID new_lcid = get_lcid (locname);
+
+      if (new_lcid > 0)
+       lcid = new_lcid;
+    }
+
+  /* FIXME: Need a way to control the FLAGS argument, perhaps via the
+     CODESET part of LOCNAME.  In particular, ls-lisp will want
+     NORM_IGNORESYMBOLS and sometimes LINGUISTIC_IGNORECASE or
+     NORM_IGNORECASE.  */
+  val = pCompareStringW (lcid, 0, string1_w, -1, string2_w, -1);
+  SAFE_FREE ();
+  if (!val)
+    {
+      errno = EINVAL;
+      return _NSLCMPERROR;
+    }
+  return val - 2;
+}
+
 
 void
 syms_of_ntproc (void)


reply via email to

[Prev in Thread] Current Thread [Next in Thread]