From 97a0e969903bd799ee7cdf5a6f230596637f8d79 Mon Sep 17 00:00:00 2001 From: Hin-Tak Leung Date: Mon, 19 Dec 2016 13:02:05 +0000 Subject: [PATCH] [cjk-enc.el] Updated to work with Emacs 25 and later. In batch mode, `message' in Emacs 25+ obeys coding-system-for-write. [1] `cjk-encode' can mis-behave and, at worst, crash from `message' recursing. [2] The charset priority system introduced with Emacs 23+ changes further in Emacs 25+. Charset `big5' has been updated (together with `big5-hkscs') [2] and now takes priority over the historical `chinese-big5-2' and `chinese-big5-1'. Added also verbose warning where it was silently mis-behaving. Thanks for advice from Eli Zaretskii. Also worth noting is that, "Wrote output.cjk" is no longer seen with Emacs 25+. [4], and see also the previous change regarding Emacs 23+ update. Tested against example/{Big5.tex, Big5vert.tex, CJKbabel.tex, muletest.tex, rubytest.tex, thai.tex}, and LANG'ed non-'coding:'ed big5 example, with emacs 21.4, 22.3, 23.3, 24.5 and 25.1. * utils/lisp/emacs/cjk-enc.el [emacs version >= 25]: Protect `message' by resetting to terminal-coding-system temporarily. Treat `big5' the same as `chinese-big5-2' and `chinese-big5-1', and tuning priority system slightly. Emit warning message where it was silently mis-behaving. * examples/{Big5.tex, Big5vert.tex}: added comment obout cjk-enc.el [1] From: Eli Zaretskii Date: Wed, 6 Jan 2016 20:25:45 +0200 Subject: Obey coding-system-for-write when writing stdout/stderr in batch http://git.savannah.gnu.org/cgit/emacs.git/commit/?h=emacs-25&id=c63246628461f748d66a8a07ba008de2e00fd33a [2] Bug #25203 "25.1; crash during message, infinite recursion" https://debbugs.gnu.org/cgi/bugreport.cgi?bug=25203 [3] Possibly these two changes, plus before and after: From: Glenn Morris Date: Sat, 23 May 2015 16:38:13 -0700 Subject: Remove charset map files from repository, generate in first bootstrap http://git.savannah.gnu.org/cgit/emacs.git/commit/?h=emacs-25&id=9f89ea1f84dbf708228ced0202774b36f30f2b01 From: Glenn Morris Date: Sat, 23 May 2015 11:07:40 -0700 Subject: * admin/charsets/glibc/: New directory, imported from glibc 2.21. http://git.savannah.gnu.org/cgit/emacs.git/commit/?h=emacs-25&id=49fa1919e9be863f22ec8042704bf3436d0ec26d [4] From: Paul Eggert Date: Sat, 3 Jan 2015 17:48:23 -0800 Subject: batch write-region no longer says "Wrote FOO" http://git.savannah.gnu.org/cgit/emacs.git/commit/?h=emacs-25&id=d20f82e6f1ab902469723ebd530ca21bea9cc41b Signed-off-by: Hin-Tak Leung --- ChangeLog | 26 ++++++++++++++++++++++++++ examples/Big5.tex | 2 +- examples/Big5vert.tex | 2 +- utils/lisp/emacs/cjk-enc.el | 34 +++++++++++++++++++++++++++------- 4 files changed, 55 insertions(+), 9 deletions(-) diff --git a/ChangeLog b/ChangeLog index f943ead..3606e16 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,29 @@ +2016-12-19 Hin-Tak Leung + + [cjk-enc.el] Updated to work with Emacs 25 and later. + + In batch mode, `message' in Emacs 25+ obeys coding-system-for-write. + `cjk-encode' can mis-behave and, at worst, crash from `message' + recursing. + + The charset priority system introduced with Emacs 23+ changes + further in Emacs 25+. Charset `big5' has been updated (together with + `big5-hkscs') and now takes priority over the historical + `chinese-big5-2' and `chinese-big5-1'. + + Thanks for advice from Eli Zaretskii. + + * utils/lisp/emacs/cjk-enc.el [emacs version >= 25]: + Protect `message' by resetting to terminal-coding-system + temporarily. + + Treat `big5' the same as `chinese-big5-2' and `chinese-big5-1', + and tuning priority system slightly. + + Emit warning message, where it was silently mis-behaving. + + * examples/{Big5.tex, Big5vert.tex}: Added comment obout cjk-enc.el + 2015-04-18 Werner LEMBERG Version 4.8.4 released diff --git a/examples/Big5.tex b/examples/Big5.tex index 9f6ce11..283ee3f 100644 --- a/examples/Big5.tex +++ b/examples/Big5.tex @@ -6,7 +6,7 @@ % Version 4.8.4 (18-Apr-2015) % % -% process this file with bg5latex +% process this file with bg5latex (or cjk-enc.el; see cjk-enc.txt) \documentclass[12pt]{article} diff --git a/examples/Big5vert.tex b/examples/Big5vert.tex index a2523a3..ba127bc 100644 --- a/examples/Big5vert.tex +++ b/examples/Big5vert.tex @@ -1,7 +1,7 @@ % This is the file Big5vert.tex of the CJK package % for testing vertical typesetting (in Big 5 encoding). % -% process this file with bg5latex +% process this file with bg5latex (or cjk-enc.el; see cjk-enc.txt) % % written by Werner Lemberg % diff --git a/utils/lisp/emacs/cjk-enc.el b/utils/lisp/emacs/cjk-enc.el index da2052e..7556e51 100644 --- a/utils/lisp/emacs/cjk-enc.el +++ b/utils/lisp/emacs/cjk-enc.el @@ -444,6 +444,7 @@ (korean-ksc5601 . KS) (chinese-big5-1 . Bg5) (chinese-big5-2 . Bg5) + (big5 . Bg5) (chinese-cns11643-1 . CNS1) (chinese-cns11643-2 . CNS2) (chinese-cns11643-3 . CNS3) @@ -505,6 +506,7 @@ (chinese-gb2312 . ("\17767\177\177" . "\177%c\177%d\177")) (chinese-big5-1 . ("\17768\177\177" . "\177%c\177%d\177")) (chinese-big5-2 . ("\17768\177\177" . "\177%c\177%d\177")) + (big5 . ("\17768\177\177" . "\177%c\177%d\177")) (korean-ksc5601 . ("\17769\177\177" . "\177%c\177%d\177")) ;; Cdr part is a formatter string FORMAT. Each character is @@ -629,7 +631,8 @@ ch ch1 ch2 format-spec (skipped-whitespace nil) - (last-pos 0)) + (last-pos 0) + (unsupported-charset-warned nil)) ;; Now we go to beginning of TEMP-BUF and start the loop. (goto-char (point-min)) (setq prev-charset 'ascii) @@ -653,6 +656,13 @@ (if (eq charset 'tis620-2533) (setq charset (char-charset ch '(thai-tis620 ascii)))) + ;; emacs 25+: Language-detection heuristics changed. + ;; `big5' is preferred over `chinese-big5-1' and `chinese-big5-1'. + ;; If not over-riden, can also swallow LaTeX (ascii) instructions + ;; afterwards. + (if (eq charset 'big5) + (setq charset (char-charset ch '(ascii big5)))) + ;; Check whether we have Unicode based input. (if (eq charset 'unicode) (let ((l (split-char ch))) @@ -685,8 +695,8 @@ ;; CH1 -- first character code ;; CH2 -- second character code (of two-byte characters) ;; if any - (if (or (eq charset 'chinese-big5-1) - (eq charset 'chinese-big5-2)) + (if (or (eq charset 'big5) (or (eq charset 'chinese-big5-1) + (eq charset 'chinese-big5-2))) ;; Emacs uses two special character sets for Big5 ;; characters. We must decode the current character to ;; get the real Big5 character code. @@ -712,8 +722,14 @@ ;; FORMAT-SPEC tells how to encode this character. (setq format-spec (cdr (assq charset cjk-format-spec-table))) (if (null format-spec) - ;; Unsupported character set. Do nothing. - nil + ;; Unsupported character set. + (when (not unsupported-charset-warned) + (let ((coding-system-for-write (terminal-coding-system))) + (message "WARNING: Detected unsupported character set %s" + charset) + (message "Consider using \`coding:\' or setting LANG.")) + ;; Suppress further warning in the same lang block. + (setq unsupported-charset-warned t)) ;; Ok, it is supported. If this character set is a CJK ;; character set (i.e., it is in CJK-ENC-TABLE), we need a ;; special header at the beginning of the output file. @@ -765,6 +781,9 @@ (re-search-forward "\\ct+" nil t) (setq end (point-marker)) (goto-char start) + ;; THAI-BREAK-WORDS is in `thai-word.el', + ;; shipped with emacs 22+ onwards. + ;; loads from cjk for emacs 21 or below. (thai-break-words "|" end) ;; Extract this run. (setq str (buffer-substring start end) @@ -879,8 +898,9 @@ (if (> (- (point) last-pos) 1000) (progn (setq last-pos (point)) - (message "Converting: %2d%%" - (/ (* 100 (point)) (point-max))))) + (let ((coding-system-for-write (terminal-coding-system))) + (message "Converting: %2d%%" + (/ (* 100 (point)) (point-max)))))) ;; Advance to the next character and loop. (forward-char 1)) -- 2.9.3