emacs-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Bugfix for utf-8 XTerm/MinTTY and (set-input-meta-mode t)


From: Max Mikhanosha
Subject: Bugfix for utf-8 XTerm/MinTTY and (set-input-meta-mode t)
Date: Tue, 01 Jun 2021 16:19:40 +0000

Emacs incorrectly handles (set-input-meta-mode t) (the meta in the 8th bit of 
input) when terminal is in UTF-8 mode.

Both XTerm and MinTTY, when configured to send meta modifier as 8th bit while 
in utf-8 mode, will first add 8th bit, and then encode resulting character with 
utf-8. For example Meta-X is encoded as ?x+120 = #248 codepoint, encoded as 
0xc3,0xb8

But Emacs handles meta modifier in the 8th bit in tty_read_avail_input, before 
decoding the raw keyboard input.

So it erroneously treats 0xc3,0xb8 input as two ordinary ASCII characters with 
meta modifier set, stripping the 8th bit and garbling the input.

This problem had existed for a long time, and had frustrated at least a few 
hundred people, as can be seen by the view count on stackoverflow article that 
comes up when googling "emacs utf8 xterm"

Below patch fixes this bug, by making 8th bit meta key handling to work 
correctly in utf8 mode.

I have tested it with xterm and mintty and meta keys, and meta-control keys now 
work correctly regardless if terminals are in utf-8 mode.

Diff against Emacs-26 branch pasted below

diff --git a/src/coding.c b/src/coding.c
index 078c1c4e6a..743fceb32c 100644
--- a/src/coding.c
+++ b/src/coding.c
@@ -5989,6 +5989,11 @@ raw_text_coding_system_p (struct coding_system *coding)
          && coding->encoder == encode_coding_raw_text) ? true : false;
 }

+bool utf_8_input_coding_system_p(struct coding_system *coding)
+{
+  return (coding->decoder == decode_coding_utf_8) ? true : false;
+}
+

 /* If CODING_SYSTEM doesn't specify end-of-line format, return one of
    the subsidiary that has the same eol-spec as PARENT (if it is not
diff --git a/src/coding.h b/src/coding.h
index aab8c2d438..6124330a1f 100644
--- a/src/coding.h
+++ b/src/coding.h
@@ -702,6 +702,7 @@ extern Lisp_Object encode_file_name (Lisp_Object);
 extern Lisp_Object decode_file_name (Lisp_Object);
 extern Lisp_Object raw_text_coding_system (Lisp_Object);
 extern bool raw_text_coding_system_p (struct coding_system *);
+extern bool utf_8_input_coding_system_p (struct coding_system *);
 extern Lisp_Object coding_inherit_eol_type (Lisp_Object, Lisp_Object);
 extern Lisp_Object complement_process_encoding_system (Lisp_Object);

diff --git a/src/keyboard.c b/src/keyboard.c
index aa3448439b..84acf4a998 100644
--- a/src/keyboard.c
+++ b/src/keyboard.c
@@ -2235,14 +2235,16 @@ read_decoded_event_from_main_queue (struct timespec 
*end_time,
        return nextevt;         /* No decoding needed.  */
       else
        {
+         struct coding_system *coding = TERMINAL_KEYBOARD_CODING (terminal);
+         bool utf8_input_terminal = utf_8_input_coding_system_p (coding);
          int meta_key = terminal->display_info.tty->meta_key;
+
          eassert (n < MAX_ENCODED_BYTES);
          events[n++] = nextevt;
+
          if (NATNUMP (nextevt)
-             && XINT (nextevt) < (meta_key == 1 ? 0x80 : 0x100))
+             && XINT (nextevt) < ((meta_key == 1 && !utf8_input_terminal) ? 
0x80 : 0x100))
            { /* An encoded byte sequence, let's try to decode it.  */
-             struct coding_system *coding
-               = TERMINAL_KEYBOARD_CODING (terminal);

              if (raw_text_coding_system_p (coding))
                {
@@ -2253,12 +2255,13 @@ read_decoded_event_from_main_queue (struct timespec 
*end_time,
                }
              else
                {
+
                  unsigned char src[MAX_ENCODED_BYTES];
                  unsigned char dest[MAX_ENCODED_BYTES * MAX_MULTIBYTE_LENGTH];
                  int i;
                  for (i = 0; i < n; i++)
                    src[i] = XINT (events[i]);
-                 if (meta_key != 2)
+                 if (!utf8_input_terminal && meta_key != 2)
                    for (i = 0; i < n; i++)
                      src[i] &= ~0x80;
                  coding->destination = dest;
@@ -2275,8 +2278,21 @@ read_decoded_event_from_main_queue (struct timespec 
*end_time,
                      const unsigned char *p = coding->destination;
                      eassert (coding->carryover_bytes == 0);
                      n = 0;
-                     while (n < coding->produced_char)
-                       events[n++] = make_number (STRING_CHAR_ADVANCE (p));
+                      while (n < coding->produced_char)
+                        {
+                          int c = STRING_CHAR_ADVANCE (p);
+                         if (utf8_input_terminal)
+                           {
+                             /* put meta modifier on the key */
+                             int modifier = 0;
+                             if (meta_key == 1 && c < 0x100 && (c & 0x80))
+                               modifier = meta_modifier;
+                             if (meta_key != 2)
+                               c &= ~0x80;
+                             c |= modifier;
+                           }
+                         events[n++] = make_number (c);
+                        }
                    }
                }
            }
@@ -7118,16 +7134,31 @@ tty_read_avail_input (struct terminal *terminal,
 #endif /* not MSDOS */
 #endif /* not WINDOWSNT */

+  bool utf8_input_terminal = utf_8_input_coding_system_p 
(TERMINAL_KEYBOARD_CODING(terminal));
+
   for (i = 0; i < nread; i++)
     {
       struct input_event buf;
       EVENT_INIT (buf);
       buf.kind = ASCII_KEYSTROKE_EVENT;
       buf.modifiers = 0;
-      if (tty->meta_key == 1 && (cbuf[i] & 0x80))
-        buf.modifiers = meta_modifier;
-      if (tty->meta_key != 2)
-        cbuf[i] &= ~0x80;
+
+      /* Both XTerm and MinTTY in utf8:true + MetaSendEscape:false mode
+         send Meta + ASCII letters by first adding 0x80, and then UTF-8
+         encoding the result.
+
+         Therefore trying to detect 0x80 meta key flag now not only
+         confuses meta key with UTF-8 encoding, but also loses
+         information by stripping the 8th bit from UTF-8 input before
+         decoding
+      */
+      if (!utf8_input_terminal)
+       {
+         if (tty->meta_key == 1 && (cbuf[i] & 0x80))
+           buf.modifiers = meta_modifier;
+         if (tty->meta_key != 2)
+           cbuf[i] &= ~0x80;
+       }

       buf.code = cbuf[i];
       /* Set the frame corresponding to the active tty.  Note that the





reply via email to

[Prev in Thread] Current Thread [Next in Thread]