[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
bug#3745: 23.0.95; emacs-23.0.95: unibyte-display-via-language-environme
From: |
Kenichi Handa |
Subject: |
bug#3745: 23.0.95; emacs-23.0.95: unibyte-display-via-language-environment |
Date: |
Mon, 06 Jul 2009 15:50:58 +0900 |
In article <tl74otqk501.fsf@m17n.org>, Kenichi Handa <handa@m17n.org> writes:
> But, using unibyte_char_to_multibyte here is a clear bug.
> If the overhead by DECODE_CHAR is untolerable (I don't
> believe it), we can do this:
> (1) modify unibyte_char_to_multibyte to use BYTE8_TO_CHAR
> instead of the table unibyte_to_multibyte_table.
> (2) Setup unibyte_to_multibyte_table for unibyte_charset.
> (3) Just lookup that table in x_produce_glyphs.
To minimize the changes, I made the attached patch. It
doesn't touch unibyte_to_multibyte_table, but introduced
charset_unibyte_decoder[128]. I confirmed it didn't make
the display code slow.
---
Kenichi Handa
handa@m17n.org
Index: character.c
===================================================================
RCS file: /cvsroot/emacs/emacs/src/character.c,v
retrieving revision 1.24
diff -u -r1.24 character.c
--- character.c 5 Feb 2009 08:46:52 -0000 1.24
+++ character.c 6 Jul 2009 06:42:31 -0000
@@ -90,9 +90,9 @@
/* Mapping table from unibyte chars to multibyte chars. */
int unibyte_to_multibyte_table[256];
-/* Nth element is 1 iff unibyte char N can be mapped to a multibyte
- char. */
-char unibyte_has_multibyte_table[256];
+/* Decoding table for 8-bit byte codes of the charset charset_unibyte.
+ Nth element is for the code (N-0x80). */
+int charset_unibyte_decoder[128];
@@ -270,9 +270,8 @@
return c;
}
-/* Convert the multibyte character C to unibyte 8-bit character based
- on the current value of charset_unibyte. If dimension of
- charset_unibyte is more than one, return (C & 0xFF).
+/* Convert ASCII or 8-bit character C to unibyte. If C is none of
+ them, return (C & 0xFF).
The argument REV_TBL is now ignored. It will be removed in the
future. */
@@ -282,14 +281,11 @@
int c;
Lisp_Object rev_tbl;
{
- struct charset *charset;
- unsigned c1;
-
+ if (c < 0x80)
+ return c;
if (CHAR_BYTE8_P (c))
return CHAR_TO_BYTE8 (c);
- charset = CHARSET_FROM_ID (charset_unibyte);
- c1 = ENCODE_CHAR (charset, c);
- return ((c1 != CHARSET_INVALID_CODE (charset)) ? c1 : c & 0xFF);
+ return (c & 0xFF);
}
/* Like multibyte_char_to_unibyte, but return -1 if C is not supported
@@ -302,11 +298,11 @@
struct charset *charset;
unsigned c1;
+ if (c < 0x80)
+ return c;
if (CHAR_BYTE8_P (c))
return CHAR_TO_BYTE8 (c);
- charset = CHARSET_FROM_ID (charset_unibyte);
- c1 = ENCODE_CHAR (charset, c);
- return ((c1 != CHARSET_INVALID_CODE (charset)) ? c1 : -1);
+ return -1;
}
DEFUN ("characterp", Fcharacterp, Scharacterp, 1, 2, 0,
@@ -337,10 +333,8 @@
c = XFASTINT (ch);
if (c >= 0400)
error ("Invalid unibyte character: %d", c);
- charset = CHARSET_FROM_ID (charset_unibyte);
- c = DECODE_CHAR (charset, c);
- if (c < 0)
- c = BYTE8_TO_CHAR (XFASTINT (ch));
+ if (c >= 0x80)
+ c = BYTE8_TO_CHAR (c);
return make_number (c);
}
Index: character.h
===================================================================
RCS file: /cvsroot/emacs/emacs/src/character.h,v
retrieving revision 1.15
diff -u -r1.15 character.h
--- character.h 8 Jan 2009 03:15:27 -0000 1.15
+++ character.h 6 Jul 2009 06:42:31 -0000
@@ -87,11 +87,15 @@
#define unibyte_char_to_multibyte(c) \
((c) < 256 ? unibyte_to_multibyte_table[(c)] : (c))
-/* Nth element is 1 iff unibyte char N can be mapped to a multibyte
- char. */
-extern char unibyte_has_multibyte_table[256];
-
-#define UNIBYTE_CHAR_HAS_MULTIBYTE_P(c) (unibyte_has_multibyte_table[(c)])
+/* Decoding table for 8-bit byte codes of the charset charset_unibyte.
+ Nth element is for the code (N-0x80). */
+extern int charset_unibyte_decoder[128];
+
+/* Return a character correspoinding to the code BYTE of
+ charset_unibyte. BYTE must be a byte; i.e. less than 0x100. If
+ BYTE is not a valid code of charset_unibyte, return -1. */
+#define DECODE_UNIBYTE(BYTE) \
+ ((BYTE) < 0x80 ? (int) (BYTE) : charset_unibyte_decoder[(BYTE) - 0x80])
/* If C is not ASCII, make it unibyte. */
#define MAKE_CHAR_UNIBYTE(c) \
Index: charset.c
===================================================================
RCS file: /cvsroot/emacs/emacs/src/charset.c,v
retrieving revision 1.179
diff -u -r1.179 charset.c
--- charset.c 9 Jun 2009 02:53:07 -0000 1.179
+++ charset.c 6 Jul 2009 06:42:32 -0000
@@ -2260,6 +2260,7 @@
Vcharset_ordered_list = Fnconc (2, arglist);
charset_ordered_list_tick++;
+ charset_unibyte = -1;
for (old_list = Vcharset_ordered_list, list_2022 = list_emacs_mule = Qnil;
CONSP (old_list); old_list = XCDR (old_list))
{
@@ -2267,9 +2268,25 @@
list_2022 = Fcons (XCAR (old_list), list_2022);
if (! NILP (Fmemq (XCAR (old_list), Vemacs_mule_charset_list)))
list_emacs_mule = Fcons (XCAR (old_list), list_emacs_mule);
+ if (charset_unibyte < 0)
+ {
+ struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (old_list)));
+
+ if (CHARSET_DIMENSION (charset) == 1
+ && CHARSET_ASCII_COMPATIBLE_P (charset)
+ && CHARSET_MAX_CHAR (charset) >= 0x80)
+ charset_unibyte = CHARSET_ID (charset);
+ }
}
Viso_2022_charset_list = Fnreverse (list_2022);
Vemacs_mule_charset_list = Fnreverse (list_emacs_mule);
+ if (charset_unibyte < 0)
+ charset_unibyte = charset_iso_8859_1;
+ {
+ struct charset *charset = CHARSET_FROM_ID (charset_unibyte);
+ for (i = 128; i < 256; i++)
+ charset_unibyte_decoder[i - 128] = DECODE_CHAR (charset, i);
+ }
return Qnil;
}
@@ -2328,6 +2345,10 @@
unibyte_to_multibyte_table[i] = i;
for (; i < 256; i++)
unibyte_to_multibyte_table[i] = BYTE8_TO_CHAR (i);
+ for (i = 0; i < 32; i++)
+ charset_unibyte_decoder[i] = -1;
+ for (; i < 128; i++)
+ charset_unibyte_decoder[i] = 128 + i;
}
#ifdef emacs
@@ -2429,6 +2450,7 @@
= define_charset_internal (Qeight_bit, 1, "\x80\xFF\x00\x00\x00\x00",
128, 255, -1, 0, -1, 0, 1,
MAX_5_BYTE_CHAR + 1);
+ charset_unibyte = charset_iso_8859_1;
}
#endif /* emacs */
Index: xdisp.c
===================================================================
RCS file: /cvsroot/emacs/emacs/src/xdisp.c,v
retrieving revision 1.1288
diff -u -r1.1288 xdisp.c
--- xdisp.c 18 Jun 2009 09:49:07 -0000 1.1288
+++ xdisp.c 6 Jul 2009 06:42:34 -0000
@@ -5743,7 +5743,7 @@
|| it->c == 0xAD /* SOFT HYPHEN */)))
: (it->c >= 127
&& (! unibyte_display_via_language_environment
- || (UNIBYTE_CHAR_HAS_MULTIBYTE_P (it->c)))))))
+ || (DECODE_UNIBYTE (it->c) <= 0xA0))))))
{
/* IT->c is a control character which must be displayed
either as '\003' or as `^C' where the '\\' and '^'
@@ -21196,9 +21196,8 @@
{
if (SINGLE_BYTE_CHAR_P (it->c)
&& unibyte_display_via_language_environment)
- it->char_to_display = unibyte_char_to_multibyte (it->c);
- if (! SINGLE_BYTE_CHAR_P (it->char_to_display))
{
+ it->char_to_display = DECODE_UNIBYTE (it->c);
it->multibyte_p = 1;
it->face_id = FACE_FOR_CHAR (it->f, face, it->char_to_display,
-1, Qnil);
bug#3745: 23.0.95; emacs-23.0.95: unibyte-display-via-language-environment, Chong Yidong, 2009/07/03