bug-gnu-emacs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

bug#12055: Re: bug#12055: 24.1.50; Characters "á" and "é" are not correc


From: Eli Zaretskii
Subject: bug#12055: Re: bug#12055: 24.1.50; Characters "á" and "é" are not correctly displayed on a Windows terminal
Date: Sat, 28 Jul 2012 13:06:30 +0300

> Date: Sat, 28 Jul 2012 11:04:29 +0300
> From: Eli Zaretskii <address@hidden>
> Cc: address@hidden, address@hidden
> 
> > Date: Sat, 28 Jul 2012 03:12:12 +0200
> > From: Dani Moncayo <address@hidden>
> > Cc: address@hidden, address@hidden
> > 
> > > Please
> > > post here the exact output, and please tell for each pair of such
> > > messages which character did you type.
> > 
> > Sorry for the delay.  I've not had time until now.
> > 
> > Here is my data:
> 
> Thanks to both of you.  Now I see that my theory is correct, and I can
> sit down and code the solution for this problem.

Please try the patch below.  It works for me.

Please try it also when Unicode input is not used (it is by default on
Windows NT and later, as result of this patch).  You can do that by
forcing w32_console_unicode_input to zero (either by modifying the
source of w32console.c and rebuilding, or by setting the variable's
value in GDB.

TIA


=== modified file 'lisp/international/mule-cmds.el'
--- lisp/international/mule-cmds.el     2012-07-25 23:11:23 +0000
+++ lisp/international/mule-cmds.el     2012-07-28 09:43:40 +0000
@@ -2655,23 +2655,29 @@ See also `locale-charset-language-names'
 
     ;; On Windows, override locale-coding-system,
     ;; default-file-name-coding-system, keyboard-coding-system,
-    ;; terminal-coding-system with system codepage.
+    ;; terminal-coding-system with the appropriate codepages.
     (when (boundp 'w32-ansi-code-page)
-      (let ((code-page-coding (intern (format "cp%d" w32-ansi-code-page))))
-       (when (coding-system-p code-page-coding)
-         (unless frame (setq locale-coding-system code-page-coding))
-         (set-keyboard-coding-system code-page-coding frame)
-         (set-terminal-coding-system code-page-coding frame)
-         ;; Set default-file-name-coding-system last, so that Emacs
-         ;; doesn't try to use cpNNNN when it defines keyboard and
-         ;; terminal encoding.  That's because the above two lines
-         ;; will want to load code-pages.el, where cpNNNN are
-         ;; defined; if default-file-name-coding-system were set to
-         ;; cpNNNN while these two lines run, Emacs will want to use
-         ;; it for encoding the file name it wants to load.  And that
-         ;; will fail, since cpNNNN is not yet usable until
-         ;; code-pages.el finishes loading.
-         (setq default-file-name-coding-system code-page-coding))))
+      (let ((ansi-code-page-coding (intern (format "cp%d" w32-ansi-code-page)))
+           (oem-code-page-coding
+            (intern (format "cp%d" (w32-get-console-codepage))))
+           ansi-cs-p oem-cs-p)
+       (and (coding-system-p ansi-code-page-coding)
+            (setq ansi-cs-p t))
+       (and (coding-system-p oem-code-page-coding)
+            (setq oem-cs-p t))
+       ;; Set the keyboard and display encoding to either the current
+       ;; ANSI codepage of the OEM codepage, depending on whether
+       ;; this is a GUI or a TTY frame.
+       (when ansi-cs-p
+         (unless frame (setq locale-coding-system ansi-code-page-coding))
+         (when (display-graphic-p frame)
+           (set-keyboard-coding-system ansi-code-page-coding frame)
+           (set-terminal-coding-system ansi-code-page-coding frame))
+         (setq default-file-name-coding-system ansi-code-page-coding))
+       (when oem-cs-p
+         (unless (display-graphic-p frame)
+           (set-keyboard-coding-system oem-code-page-coding frame)
+           (set-terminal-coding-system oem-code-page-coding frame)))))
 
     (when (eq system-type 'darwin)
       ;; On Darwin, file names are always encoded in utf-8, no matter

=== modified file 'src/w32console.c'
--- src/w32console.c    2012-06-28 07:50:27 +0000
+++ src/w32console.c    2012-07-28 09:48:41 +0000
@@ -37,6 +37,7 @@ along with GNU Emacs.  If not, see <http
 #include "termhooks.h"
 #include "termchar.h"
 #include "dispextern.h"
+#include "w32heap.h"   /* for os_subtype */
 #include "w32inevt.h"
 
 /* from window.c */
@@ -67,6 +68,7 @@ static CONSOLE_CURSOR_INFO prev_console_
 #endif
 
 HANDLE  keyboard_handle;
+int w32_console_unicode_input;
 
 
 /* Setting this as the ctrl handler prevents emacs from being killed when
@@ -786,6 +788,11 @@ initialize_w32_display (struct terminal 
                       info.srWindow.Left);
     }
 
+  if (os_subtype == OS_NT)
+    w32_console_unicode_input = 1;
+  else
+    w32_console_unicode_input = 0;
+
   /* Setup w32_display_info structure for this frame. */
 
   w32_initialize_display_info (build_string ("Console"));

=== modified file 'src/w32inevt.c'
--- src/w32inevt.c      2012-05-26 11:58:19 +0000
+++ src/w32inevt.c      2012-07-28 09:57:11 +0000
@@ -41,6 +41,7 @@ along with GNU Emacs.  If not, see <http
 #include "termchar.h"
 #include "w32heap.h"
 #include "w32term.h"
+#include "w32inevt.h"
 
 /* stdin, from w32console.c */
 extern HANDLE keyboard_handle;
@@ -61,6 +62,15 @@ static INPUT_RECORD *queue_ptr = event_q
 /* Temporarily store lead byte of DBCS input sequences.  */
 static char dbcs_lead = 0;
 
+static inline BOOL
+w32_read_console_input (HANDLE h, INPUT_RECORD *rec, DWORD recsize,
+                       DWORD *waiting)
+{
+  return (w32_console_unicode_input
+         ? ReadConsoleInputW (h, rec, recsize, waiting)
+         : ReadConsoleInputA (h, rec, recsize, waiting));
+}
+
 static int
 fill_queue (BOOL block)
 {
@@ -80,8 +90,8 @@ fill_queue (BOOL block)
        return 0;
     }
 
-  rc = ReadConsoleInput (keyboard_handle, event_queue, EVENT_QUEUE_SIZE,
-                        &events_waiting);
+  rc = w32_read_console_input (keyboard_handle, event_queue, EVENT_QUEUE_SIZE,
+                              &events_waiting);
   if (!rc)
     return -1;
   queue_ptr = event_queue;
@@ -224,7 +234,7 @@ w32_kbd_patch_key (KEY_EVENT_RECORD *eve
 #endif
 
   /* On NT, call ToUnicode instead and then convert to the current
-     locale's default codepage.  */
+     console input codepage.  */
   if (os_subtype == OS_NT)
     {
       WCHAR buf[128];
@@ -233,14 +243,9 @@ w32_kbd_patch_key (KEY_EVENT_RECORD *eve
                          keystate, buf, 128, 0);
       if (isdead > 0)
        {
-         char cp[20];
-         int cpId;
+         int cpId = GetConsoleCP ();
 
          event->uChar.UnicodeChar = buf[isdead - 1];
-
-         GetLocaleInfo (GetThreadLocale (),
-                        LOCALE_IDEFAULTANSICODEPAGE, cp, 20);
-         cpId = atoi (cp);
          isdead = WideCharToMultiByte (cpId, 0, buf, isdead,
                                        ansi_code, 4, NULL, NULL);
        }
@@ -447,26 +452,34 @@ key_event (KEY_EVENT_RECORD *event, stru
        }
       else if (event->uChar.AsciiChar > 0)
        {
+         /* Pure ASCII characters < 128.  */
          emacs_ev->kind = ASCII_KEYSTROKE_EVENT;
          emacs_ev->code = event->uChar.AsciiChar;
        }
-      else if (event->uChar.UnicodeChar > 0)
+      else if (event->uChar.UnicodeChar > 0
+              && w32_console_unicode_input)
        {
+         /* Unicode codepoint; only valid if we are using Unicode
+            console input mode.  */
          emacs_ev->kind = MULTIBYTE_CHAR_KEYSTROKE_EVENT;
          emacs_ev->code = event->uChar.UnicodeChar;
        }
       else
        {
-         /* Fallback for non-Unicode versions of Windows.  */
+         /* Fallback handling of non-ASCII characters for non-Unicode
+            versions of Windows, and for non-Unicode input on NT
+            family of Windows.  Only characters in the current
+            console codepage are supported by this fallback.  */
          wchar_t code;
          char dbcs[2];
-          char cp[20];
           int cpId;
 
-         /* Get the codepage to interpret this key with.  */
-          GetLocaleInfo (GetThreadLocale (),
-                        LOCALE_IDEFAULTANSICODEPAGE, cp, 20);
-          cpId = atoi (cp);
+         /* Get the current console input codepage to interpret this
+            key with.  Note that the system defaults for the OEM
+            codepage could have been changed by calling SetConsoleCP
+            or w32-set-console-codepage, so using GetLocaleInfo to
+            get LOCALE_IDEFAULTCODEPAGE is not TRT here.  */
+          cpId = GetConsoleCP ();
 
          dbcs[0] = dbcs_lead;
          dbcs[1] = event->uChar.AsciiChar;
@@ -501,6 +514,7 @@ key_event (KEY_EVENT_RECORD *event, stru
     }
   else
     {
+      /* Function keys and other non-character keys.  */
       emacs_ev->kind = NON_ASCII_KEYSTROKE_EVENT;
       emacs_ev->code = event->wVirtualKeyCode;
     }

=== modified file 'src/w32inevt.h'
--- src/w32inevt.h      2012-01-19 07:21:25 +0000
+++ src/w32inevt.h      2012-07-28 08:39:49 +0000
@@ -19,6 +19,8 @@ along with GNU Emacs.  If not, see <http
 #ifndef EMACS_W32INEVT_H
 #define EMACS_W32INEVT_H
 
+extern int w32_console_unicode_input;
+
 extern int w32_console_read_socket (struct terminal *term, int numchars,
                                    struct input_event *hold_quit);
 extern void w32_console_mouse_position (FRAME_PTR *f, int insist,






reply via email to

[Prev in Thread] Current Thread [Next in Thread]