emacs-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Emacs-diffs] Changes to emacs/lisp/language/china-util.el


From: Dave Love
Subject: [Emacs-diffs] Changes to emacs/lisp/language/china-util.el
Date: Sun, 02 Jun 2002 16:27:27 -0400

Index: emacs/lisp/language/china-util.el
diff -c emacs/lisp/language/china-util.el:1.20 
emacs/lisp/language/china-util.el:1.21
*** emacs/lisp/language/china-util.el:1.20      Sun Jul 15 15:53:53 2001
--- emacs/lisp/language/china-util.el   Tue Dec 18 12:52:17 2001
***************
*** 1,7 ****
! ;;; china-util.el --- utilities for Chinese
  
  ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
  ;; Licensed to the Free Software Foundation.
  
  ;; Keywords: mule, multilingual, Chinese
  
--- 1,8 ----
! ;;; china-util.el --- utilities for Chinese  -*- coding: iso-2022-7bit -*-
  
  ;; Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
  ;; Licensed to the Free Software Foundation.
+ ;; Copyright (C) 1995, 2001 Free Software Foundation, Inc.
  
  ;; Keywords: mule, multilingual, Chinese
  
***************
*** 26,32 ****
  
  ;;; Code:
  
! ;; Hz/ZW encoding stuffs
  
  ;; HZ is an encoding method for Chinese character set GB2312 used
  ;; widely in Internet.  It is very similar to 7-bit environment of
--- 27,33 ----
  
  ;;; Code:
  
! ;; Hz/ZW/EUC-TW encoding stuff
  
  ;; HZ is an encoding method for Chinese character set GB2312 used
  ;; widely in Internet.  It is very similar to 7-bit environment of
***************
*** 38,43 ****
--- 39,51 ----
  ;; encodes Chinese characters line by line by starting each line with
  ;; the sequence "zW".  It also uses only 7-bit as HZ.
  
+ ;; EUC-TW is similar to EUC-KS or EUC-JP.  Its main character set is
+ ;; plane 1 of CNS 11643; characters of planes 2 to 7 are accessed with
+ ;; a single shift escape followed by three bytes: the first gives the
+ ;; plane, the second and third the character code.  Note that characters
+ ;; of plane 1 are (redundantly) accessible with a single shift escape
+ ;; also.
+ 
  ;; ISO-2022 escape sequence to designate GB2312.
  (defvar iso2022-gb-designation "\e$A")
  ;; HZ escape sequence to designate GB2312.
***************
*** 155,160 ****
--- 163,427 ----
    "Encode the text in the current buffer to HZ."
    (interactive)
    (encode-hz-region (point-min) (point-max)))
+ 
+ ;; The following sets up a translation table (big5-to-cns) from Big 5
+ ;; to CNS encoding, using some auxiliary functions to make the code
+ ;; more readable.
+ 
+ ;; Many kudos to Himi!  The used code has been adapted from his
+ ;; mule-ucs package.
+ 
+ (defun big5-to-flat-code (num)
+   "Convert NUM in Big 5 encoding to a `flat code'.
+ 0xA140 will be mapped to position 0, 0xA141 to position 1, etc.
+ There are no gaps in the flat code."
+ 
+   (let ((hi (/ num 256))
+         (lo (% num 256)))
+     (+ (* 157 (- hi #xa1))
+        (- lo (if (>= lo #xa1) 98 64)))))
+ 
+ (defun flat-code-to-big5 (num)
+   "Convert NUM from a `flat code' to Big 5 encoding.
+ This is the inverse function of `big5-to-flat-code'."
+ 
+   (let ((hi (/ num 157))
+         (lo (% num 157)))
+     (+ (* 256 (+ hi #xa1))
+        (+ lo (if (< lo 63) 64 98)))))
+ 
+ (defun euc-to-flat-code (num)
+   "Convert NUM in EUC encoding (in GL representation) to a `flat code'.
+ 0x2121 will be mapped to position 0, 0x2122 to position 1, etc.
+ There are no gaps in the flat code."
+ 
+   (let ((hi (/ num 256))
+         (lo (% num 256)))
+     (+ (* 94 (- hi #x21))
+        (- lo #x21))))
+ 
+ (defun flat-code-to-euc (num)
+   "Convert NUM from a `flat code' to EUC encoding (in GL representation).
+ The inverse function of `euc-to-flat-code'.  The high and low bytes are
+ returned in a list."
+ 
+   (let ((hi (/ num 94))
+         (lo (% num 94)))
+     (list (+ hi #x21) (+ lo #x21))))
+ 
+ (defun expand-euc-big5-alist (alist)
+   "Create a translation table and fills it with data given in ALIST.
+ Elements of ALIST can be either given as
+ 
+   ((euc-charset . startchar) . (big5-range-begin . big5-range-end))
+ 
+ or as
+ 
+   (euc-character . big5-charcode)
+ 
+ The former maps a range of glyphs in an EUC charset (where STARTCHAR
+ is in GL representation) to a certain range of Big 5 encoded
+ characters, the latter maps a single glyph.  Glyphs which can't be
+ mapped will be represented with the byte 0xFF.
+ 
+ The return value is the filled translation table."
+ 
+   (let (chartable
+         elem
+         result
+         char
+         big5
+         i
+         end
+         codepoint
+         charset)
+     (setq chartable (make-char-table 'translation-table #xFF))
+     (while alist
+       (setq elem (car alist)
+             char (car elem)
+             big5 (cdr elem)
+             alist (cdr alist))
+       (cond ((and (consp char)
+                   (consp big5))
+                (setq i (big5-to-flat-code (car big5))
+                      end (big5-to-flat-code (cdr big5))
+                      codepoint (euc-to-flat-code (cdr char))
+                      charset (car char))
+                (while (>= end i)
+                  (aset chartable
+                        (decode-big5-char (flat-code-to-big5 i))
+                        (apply (function make-char)
+                               charset
+                               (flat-code-to-euc codepoint)))
+                  (setq i (1+ i)
+                        codepoint (1+ codepoint)))
+             )
+             ((and (char-valid-p char)
+                   (numberp big5))
+                (setq i (decode-big5-char big5))
+                (aset chartable i char)
+             )
+             (t
+              (error "Unknown slot type: %S" elem)
+             )
+       )
+     )
+     ;; the return value
+     chartable
+   )
+ )
+ 
+ ;; All non-CNS encodings are commented out.
+ 
+ (define-translation-table 'big5-to-cns
+   (expand-euc-big5-alist
+    '(
+      ;; Symbols
+      ((chinese-cns11643-1 . #x2121) . (#xA140 . #xA1F5))
+      (?$(G"X(B . #xA1F6)
+      (?$(G"W(B . #xA1F7)
+      ((chinese-cns11643-1 . #x2259) . (#xA1F8 . #xA2AE))
+      ((chinese-cns11643-1 . #x2421) . (#xA2AF . #xA3BF))
+      ;; Control codes (vendor dependent)
+      ((chinese-cns11643-1 . #x4221) . (#xA3C0 . #xA3E0))
+      ;; Level 1 Ideographs
+      ((chinese-cns11643-1 . #x4421) . (#xA440 . #xACFD))
+      (?$(GWS(B . #xACFE)
+      ((chinese-cns11643-1 . #x5323) . (#xAD40 . #xAFCF))
+      ((chinese-cns11643-1 . #x5754) . (#xAFD0 . #xBBC7))
+      ((chinese-cns11643-1 . #x6B51) . (#xBBC8 . #xBE51))
+      (?$(GkP(B . #xBE52)
+      ((chinese-cns11643-1 . #x6F5C) . (#xBE53 . #xC1AA))
+      ((chinese-cns11643-1 . #x7536) . (#xC1AB . #xC2CA))
+      (?$(Gu5(B . #xC2CB)
+      ((chinese-cns11643-1 . #x7737) . (#xC2CC . #xC360))
+      ((chinese-cns11643-1 . #x782E) . (#xC361 . #xC3B8))
+      (?$(Gxe(B . #xC3B9)
+      (?$(Gxd(B . #xC3BA)
+      ((chinese-cns11643-1 . #x7866) . (#xC3BB . #xC455))
+      (?$(Gx-(B . #xC456)
+      ((chinese-cns11643-1 . #x7962) . (#xC457 . #xC67E))
+      ;; Symbols
+      ((chinese-cns11643-1 . #x2621) . (#xC6A1 . #xC6BE))
+      ;; Radicals
+      (?$(G'#(B . #xC6BF)
+      (?$(G'$(B . #xC6C0)
+      (?$(G'&(B . #xC6C1)
+      (?$(G'((B . #xC6C2)
+      (?$(G'-(B . #xC6C3)
+      (?$(G'.(B . #xC6C4)
+      (?$(G'/(B . #xC6C5)
+      (?$(G'4(B . #xC6C6)
+      (?$(G'7(B . #xC6C7)
+      (?$(G':(B . #xC6C8)
+      (?$(G'<(B . #xC6C9)
+      (?$(G'B(B . #xC6CA)
+      (?$(G'G(B . #xC6CB)
+      (?$(G'N(B . #xC6CC)
+      (?$(G'S(B . #xC6CD)
+      (?$(G'T(B . #xC6CE)
+      (?$(G'U(B . #xC6CF)
+      (?$(G'Y(B . #xC6D0)
+      (?$(G'Z(B . #xC6D1)
+      (?$(G'a(B . #xC6D2)
+      (?$(G'f(B . #xC6D3)
+      (?$(G()(B . #xC6D4)
+      (?$(G(*(B . #xC6D5)
+      (?$(G(c(B . #xC6D6)
+      (?$(G(l(B . #xC6D7)
+      ;; Diacritical Marks
+      ; ((japanese-jisx0208 . #x212F) . (#xC6D8 . #xC6D9))
+      ;; Japanese Kana Supplement
+      ; ((japanese-jisx0208 . #x2133) . (#xC6DA . #xC6E3))
+      ;; Japanese Hiragana
+      ; ((japanese-jisx0208 . #x2421) . (#xC6E7 . #xC77A))
+      ;; Japanese Katakana
+      ; ((japanese-jisx0208 . #x2521) . (#xC77B . #xC7F2))
+      ;; Cyrillic Characters
+      ; ((japanese-jisx0208 . #x2721) . (#xC7F3 . #xC854))
+      ; ((japanese-jisx0208 . #x2751) . (#xC855 . #xC875))
+      ;; Special Chinese Characters
+      (?$(J!#(B . #xC879)
+      (?$(J!$(B . #xC87B)
+      (?$(J!*(B . #xC87D)
+      (?$(J!R(B . #xC8A2)
+ 
+      ;; JIS X 0208 NOT SIGN (cf. U+00AC)
+      ; (?$B"L(B . #xC8CD)
+      ;; JIS X 0212 BROKEN BAR (cf. U+00A6)
+      ; (?$(D"C(B . #xC8CE)
+ 
+      ;; GB 2312 characters
+      ; (?$A!d(B . #xC8CF)
+      ; (?$A!e(B . #xC8D0)
+         ;;;;; C8D1 - Japanese `($B3t(B)'
+      ; (?$A!m(B . #xC8D2)
+         ;;;;; C8D2 - Tel.
+ 
+      ;; Level 2 Ideographs
+      ((chinese-cns11643-2 . #x2121) . (#xC940 . #xC949))
+      (?$(GDB(B . #xC94A);; a duplicate of #xA461
+      ((chinese-cns11643-2 . #x212B) . (#xC94B . #xC96B))
+      ((chinese-cns11643-2 . #x214D) . (#xC96C . #xC9BD))
+      (?$(H!L(B . #xC9BE)
+      ((chinese-cns11643-2 . #x217D) . (#xC9BF . #xC9EC))
+      ((chinese-cns11643-2 . #x224E) . (#xC9ED . #xCAF6))
+      (?$(H"M(B . #xCAF7)
+      ((chinese-cns11643-2 . #x2439) . (#xCAF8 . #xD6CB))
+      (?$(H>c(B . #xD6CC)
+      ((chinese-cns11643-2 . #x3770) . (#xD6CD . #xD779))
+      (?$(H?j(B . #xD77A)
+      ((chinese-cns11643-2 . #x387E) . (#xD77B . #xDADE))
+      (?$(H7o(B . #xDADF)
+      ((chinese-cns11643-2 . #x3E64) . (#xDAE0 . #xDBA6))
+      ((chinese-cns11643-2 . #x3F6B) . (#xDBA7 . #xDDFB))
+      (?$(HAv(B . #xDDFC);; a duplicate of #xDCD1
+      ((chinese-cns11643-2 . #x4424) . (#xDDFD . #xE8A2))
+      ((chinese-cns11643-2 . #x554C) . (#xE8A3 . #xE975))
+      ((chinese-cns11643-2 . #x5723) . (#xE976 . #xEB5A))
+      ((chinese-cns11643-2 . #x5A29) . (#xEB5B . #xEBF0))
+      (?$(HUK(B . #xEBF1)
+      ((chinese-cns11643-2 . #x5B3F) . (#xEBF2 . #xECDD))
+      (?$(HW"(B . #xECDE)
+      ((chinese-cns11643-2 . #x5C6A) . (#xECDF . #xEDA9))
+      ((chinese-cns11643-2 . #x5D75) . (#xEDAA . #xEEEA))
+      (?$(Hd/(B . #xEEEB)
+      ((chinese-cns11643-2 . #x6039) . (#xEEEC . #xF055))
+      (?$(H]t(B . #xF056)
+      ((chinese-cns11643-2 . #x6243) . (#xF057 . #xF0CA))
+      (?$(HZ((B . #xF0CB)
+      ((chinese-cns11643-2 . #x6337) . (#xF0CC . #xF162))
+      ((chinese-cns11643-2 . #x6430) . (#xF163 . #xF16A))
+      (?$(Hga(B . #xF16B)
+      ((chinese-cns11643-2 . #x6438) . (#xF16C . #xF267))
+      (?$(Hi4(B . #xF268)
+      ((chinese-cns11643-2 . #x6573) . (#xF269 . #xF2C2))
+      ((chinese-cns11643-2 . #x664E) . (#xF2C3 . #xF374))
+      ((chinese-cns11643-2 . #x6762) . (#xF375 . #xF465))
+      ((chinese-cns11643-2 . #x6935) . (#xF466 . #xF4B4))
+      (?$(HfM(B . #xF4B5)
+      ((chinese-cns11643-2 . #x6962) . (#xF4B6 . #xF4FC))
+      ((chinese-cns11643-2 . #x6A4C) . (#xF4FD . #xF662))
+      (?$(HjK(B . #xF663)
+      ((chinese-cns11643-2 . #x6C52) . (#xF664 . #xF976))
+      ((chinese-cns11643-2 . #x7167) . (#xF977 . #xF9C3))
+      (?$(Hqf(B . #xF9C4)
+      (?$(Hr4(B . #xF9C5)
+      (?$(address@hidden(B . #xF9C6)
+      ((chinese-cns11643-2 . #x7235) . (#xF9C7 . #xF9D1))
+      ((chinese-cns11643-2 . #x7241) . (#xF9D2 . #xF9D5))
+ 
+      ;; Additional Ideographs
+      (?$(IC7(B . #xF9D6)
+      (?$(IOP(B . #xF9D7)
+      (?$(IDN(B . #xF9D8)
+      (?$(IPJ(B . #xF9D9)
+      (?$(I,](B . #xF9DA)
+      (?$(I=~(B . #xF9DB)
+      (?$(IK\(B . #xF9DC)
+     )
+   )
+ )
  
  ;;
  (provide 'china-util)



reply via email to

[Prev in Thread] Current Thread [Next in Thread]