From 0b655da7d98d6ff5c6211d1a56e879ac291f9c34 Mon Sep 17 00:00:00 2001 From: James Thomas Date: Mon, 27 Apr 2020 08:06:48 +0530 Subject: [PATCH] Improve Malayalam language transliteration The existing ITRANS scheme does not support some characters and language quirks like 'chillu's. The Inscript method has errors. * lisp/language/ind-util.el (indian-mlm-base-table): + archaic chars, Mozhi combos; cleanup. (indian-mlm-mozhi-table): For new scheme Mozhi. * lisp/leim/quail/indian.el (inscript-mlm-keytable): Correct errors. Add Inscript chillus & zero-width chars, Mozhi scheme. * etc/NEWS: Mention change Add a sufficient implementation of the Mozhi scheme. Complete Inscript implementation. --- etc/NEWS | 7 +++ lisp/language/ind-util.el | 40 +++++++++++++++--- lisp/leim/quail/indian.el | 89 +++++++++++++++++++++++++++++++++++---- 3 files changed, 122 insertions(+), 14 deletions(-) diff --git a/etc/NEWS b/etc/NEWS index 025d5c14a7..aa551177d1 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -288,6 +288,13 @@ prefix on the Subject line in various languages. These new navigation commands are bound to 'n' and 'p' in 'apropos-mode'. +** Quail + +--- +*** Improved Malayalam language transliteration +Added new Mozhi scheme. The inapplicable ITRANS scheme is now +deprecated. Errors in Inscript method corrected. + * New Modes and Packages in Emacs 28.1 diff --git a/lisp/language/ind-util.el b/lisp/language/ind-util.el index 4319e5537e..62885227f1 100644 --- a/lisp/language/ind-util.el +++ b/lisp/language/ind-util.el @@ -232,8 +232,8 @@ indian-mlm-base-table '( (;; VOWELS (?അ nil) (?ആ ?ാ) (?ഇ ?ി) (?ഈ ?ീ) (?ഉ ?ു) (?ഊ ?ൂ) - (?ഋ ?ൃ) (?ഌ nil) nil (?ഏ ?േ) (?എ ?െ) (?ഐ ?ൈ) - nil (?ഓ ?ോ) (?ഒ ?ൊ) (?ഔ ?ൌ) nil nil) + (?ഋ ?ൃ) (?ഌ ?ൢ) (?ൡ ?ൣ) (?ഏ ?േ) (?എ ?െ) (?ഐ ?ൈ) + nil (?ഒ ?ൊ) (?ഓ ?ോ) (?ഔ ?ൗ) (?് ?്) (?ൠ ?ൄ)) (;; CONSONANTS ?ക ?ഖ ?ഗ ?ഘ ?ങ ;; GUTTRULS ?ച ?ഛ ?ജ ?ഝ ?ഞ ;; PALATALS @@ -243,13 +243,16 @@ indian-mlm-base-table ?യ ?ര ?റ ?ല ?ള ?ഴ ?വ ;; SEMIVOWELS ?ശ ?ഷ ?സ ?ഹ ;; SIBILANTS nil nil nil nil nil nil nil nil ;; NUKTAS - "ജ്ഞ" "ക്ഷ") + "ജ്ഞ" "ക്ഷ" + "റ്റ" "ന്റ" "ത്ത" "ത്ഥ" "ഞ്ഞ" "ങ്ങ" "ന്ന" + "ഞ്ച" "ന്ക" "ങ്ക" "ച്ച" "ച്ഛ" "ക്ക" + "ബ്ബ" "ക്ക" "ഗ്ഗ" "ജ്ജ" "മ്മ" "പ്പ" "വ്വ" "ക്സ" "ശ്ശ") (;; Misc Symbols nil ?ം ?ഃ nil ?് nil nil) (;; Digits ?൦ ?൧ ?൨ ?൩ ?൪ ?൫ ?൬ ?൭ ?൮ ?൯) - (;; Inscript-extra (4) (#, $, ^, *, ]) - "്ര" "ര്" "ത്ര" "ശ്ര" nil))) + (;; Chillus + "ണ്" ?ൺ "ന്" ?ൻ "ര്" ?ർ "ല്" ?ൽ "ള്" ?ൾ))) (defvar indian-tml-base-table '( @@ -323,6 +326,29 @@ indian-itrans-v5-table-for-tamil (;; misc -- 7 ".N" (".n" "M") "H" ".a" ".h" ("AUM" "OM") ".."))) +(defvar indian-mlm-mozhi-table + '(;; for encode/decode + (;; vowels -- 18 + "a" ("aa" "A") "i" ("ii" "I") "u" ("uu" "U") + "R" "Ll" "Lll" ("E" "ae") "e" "ai" + nil "o" "O" "au" "~" "RR") + (;; consonants -- 40 + ("k" "c") "kh" "g" "gh" "ng" + "ch" ("Ch" "chh") "j" "jh" "nj" + "T" "Th" "D" "Dh" "N" + "th" "thh" "d" "dh" "n" nil + "p" ("ph" "f") "b" "bh" "m" + "y" "r" "rr" "l" "L" "zh" ("v" "w") + ("S" "z") "sh" "s" "h" + nil nil nil nil nil nil nil nil + nil "X" + ;; some of these are extra to Mozhi + ("t" "tt") "nt" "tth" "tthh" "nnj" "nng" "nn" + "nch" "nc" "nk" "cch" "cchh" "cc" + "B" ("C" "K" "q") "G" "J" "M" "P" "V" "x" "Z") + (;; misc -- 7 + nil nil "H"))) + (defvar indian-kyoto-harvard-table '(;; for encode/decode (;; vowel @@ -524,6 +550,10 @@ indian-mlm-itrans-v5-hash (indian-make-hash indian-mlm-base-table indian-itrans-v5-table)) +(defvar indian-mlm-mozhi-hash + (indian-make-hash indian-mlm-base-table + indian-mlm-mozhi-table)) + (defvar indian-tml-itrans-v5-hash (indian-make-hash indian-tml-base-table indian-itrans-v5-table-for-tamil)) diff --git a/lisp/leim/quail/indian.el b/lisp/leim/quail/indian.el index 2681eab0e5..100ae63f6a 100644 --- a/lisp/leim/quail/indian.el +++ b/lisp/leim/quail/indian.el @@ -117,6 +117,7 @@ "\\''" indian-knd-itrans-v5-hash "kannada-itrans" "Kannada" "KndIT" "Kannada transliteration by ITRANS method.") +;; ITRANS not applicable to Malayalam & could be removed eventually (if nil (quail-define-package "malayalam-itrans" "Malayalam" "MlmIT" t "Malayalam ITRANS")) (quail-define-indian-trans-package @@ -358,24 +359,23 @@ inscript-mlm-keytable '( (;; VOWELS (18) (?D nil) (?E ?e) (?F ?f) (?R ?r) (?G ?g) (?T ?t) - (?+ ?=) ("F]" "f]") (?! ?@) (?S ?s) (?Z ?z) (?W ?w) - (?| ?\\) (?~ ?`) (?A ?a) (?Q ?q) ("+]" "=]") ("R]" "r]")) + (?= ?+) nil nil (?S ?s) (?Z ?z) (?W ?w) + nil (?~ ?`) (?A ?a) (?Q ?q)) (;; CONSONANTS (42) ?k ?K ?i ?I ?U ;; GRUTTALS ?\; ?: ?p ?P ?} ;; PALATALS ?' ?\" ?\[ ?{ ?C ;; CEREBRALS - ?l ?L ?o ?O ?v ?V ;; DENTALS + ?l ?L ?o ?O ?v nil ;; DENTALS ?h ?H ?y ?Y ?c ;; LABIALS - ?/ ?j ?J ?n ?N "N]" ?b ;; SEMIVOWELS + ?/ ?j ?J ?n ?N ?B ?b ;; SEMIVOWELS ?M ?< ?m ?u ;; SIBILANTS - "k]" "K]" "i]" "p]" "[]" "{]" "H]" "/]" ;; NUKTAS - ?% ?&) + nil nil nil nil nil nil nil nil nil) ;; NUKTAS (;; Misc Symbols (7) - ?X ?x ?_ ">]" ?d "X]" ?>) + nil ?x ?_ nil ?d) (;; Digits ?0 ?1 ?2 ?3 ?4 ?5 ?6 ?7 ?8 ?9) - (;; Inscripts - ?# ?$ ?^ ?* ?\]))) + (;; Chillus + "Cd" "Cd]" "vd" "vd]" "jd" "jd]" "nd" "nd]" "Nd" "Nd]"))) (defvar inscript-tml-keytable '( @@ -463,6 +463,9 @@ inscript-tml-keytable "malayalam-inscript" "Malayalam" "MlmIS" "Malayalam keyboard Inscript.") +(quail-defrule "\\" ?‌) +(quail-defrule "X" ?​) + (if nil (quail-define-package "tamil-inscript" "Tamil" "TmlIS" t "Tamil keyboard Inscript")) (quail-define-inscript-package @@ -571,4 +574,72 @@ inscript-tml-keytable ("?" ?\?) ("/" ?্)) +(defun indian-mlm-mozhi-update-translation (control-flag) + (let ((len (length quail-current-key)) chillu + (vowels '(?a ?e ?i ?o ?u ?A ?E ?I ?O ?U ?R))) + (cond ((numberp control-flag) + (progn (if (= control-flag 0) + (setq quail-current-str quail-current-key) + (cond (input-method-exit-on-first-char) + ((and (memq (aref quail-current-key + (1- control-flag)) + vowels) + (setq chillu (cl-position + (aref quail-current-key + control-flag) + '(?m ?N ?n ?r ?l ?L)))) + ;; conditions for putting chillu + (and (or (and (= control-flag (1- len)) + (not (setq control-flag nil))) + (and (= control-flag (- len 2)) + (let ((temp (aref quail-current-key + (1- len)))) + ;; is it last char of word? + (not + (or (and (>= temp ?a) (<= temp ?z)) + (and (>= temp ?A) (<= temp ?Z)) + (eq temp ?~)))) + (setq control-flag (1+ control-flag)))) + (setq quail-current-str ;; put chillu + (concat (if (not (stringp + quail-current-str)) + (string quail-current-str) + quail-current-str) + (string + (nth chillu '(?ം ?ൺ ?ൻ ?ർ ?ൽ ?ൾ))))))))) + (and (not input-method-exit-on-first-char) control-flag + (while (> len control-flag) + (setq len (1- len)) + (setq unread-command-events + (cons (aref quail-current-key len) + unread-command-events)))) + )) + ((null control-flag) + (unless quail-current-str + (setq quail-current-str quail-current-key) + )) + ((equal control-flag t) + (if (memq (aref quail-current-key (1- len)) ;; If vowel ending, + vowels) ;; may have to put + (setq control-flag nil))))) ;; chillu. So don't + control-flag) ;; end translation + +(quail-define-package "malayalam-mozhi" "Malayalam" "MlmMI" t + "Malayalam transliteration by Mozhi method." + nil nil t nil nil nil t nil + 'indian-mlm-mozhi-update-translation) + +(maphash + (lambda (key val) + (quail-defrule key (if (= (length val) 1) + (string-to-char val) + (vector val)))) + (cdr indian-mlm-mozhi-hash)) + +(defun indian-mlm-mozhi-underscore (key len) (throw 'quail-tag nil)) + +(quail-defrule "_" 'indian-mlm-mozhi-underscore) +(quail-defrule "|" ?‌) +(quail-defrule "||" ?​) + ;;; indian.el ends here -- 2.20.1