>From f63b918057f7eaf6f8eebb28071ac17dd5ab3ff1 Mon Sep 17 00:00:00 2001 From: Eric Abrahamsen Date: Sat, 26 Jan 2019 20:11:23 -0800 Subject: [PATCH] New constant chinese-pinyin-character-map * lisp/language/china-util.el (chinese-pinyin-character-map): Constant holding an alist built from the pinyin-to-character mapping provided in the file pinyin.map. --- lisp/language/china-util.el | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/lisp/language/china-util.el b/lisp/language/china-util.el index 70710bac18..cdbd8e322f 100644 --- a/lisp/language/china-util.el +++ b/lisp/language/china-util.el @@ -30,7 +30,7 @@ ;;; Code: -;; Hz/ZW/EUC-TW encoding stuff +;; Hz/ZW/EUC-TW encoding stuff, also a pinyin-to-character mapping. ;; HZ is an encoding method for Chinese character set GB2312 used ;; widely in Internet. It is very similar to 7-bit environment of @@ -202,6 +202,30 @@ pre-write-encode-hz (let (last-coding-system-used) (encode-hz-region 1 (point-max))) nil)) + +;;; Elisp-accessible version of the pinyin-to-character mapping +;;; provided in leim/MISC-DIC/pinyin.map, which is otherwise only +;;; exposed to the quail input method. + +(eval-and-compile + (defconst chinese-pinyin-character-map + (let ((py-file (expand-file-name + "leim/MISC-DIC/pinyin.map" + source-directory)) + alst) + (with-temp-buffer + (insert-file-contents py-file) + (re-search-forward "^[^%]" (point-max) t) + (beginning-of-line) + (while (re-search-forward "^\\([[:ascii:]]+\\)\t\\(\\cc+\\)$" + (point-max) t) + (push (cons (match-string-no-properties 1) + (match-string-no-properties 2)) +alst)) + (nreverse alst))) + "An alist mapping pinyin syllables to Chinese characters. +Produced from data in pinyin.map.")) + ;; (provide 'china-util) -- 2.20.1