[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
bug#28339: 25.2; Emacs shows ZWNJ character (Zero Width non-Joiner) as S
From: |
handa |
Subject: |
bug#28339: 25.2; Emacs shows ZWNJ character (Zero Width non-Joiner) as Space |
Date: |
Fri, 06 Oct 2017 19:05:41 +0900 |
In article
<CALp2H_2MWgjoEEm6Rp5+5uOdMk-RbFWzaCrweo=pbdzAaq8btA@mail.gmail.com>, Nima
Aryan <nimawebgard@gmail.com> writes:
> As a user I prefer absorb mode by default but some times thin-space (and
> not simple space) might be a good option to consider.
Attached patch introduces a customizable variable
arabic-shaper-ZWNJ-handling. Shall I install it?
---
K. Handa
handa@gnu.org
------------------------------------------------------------
diff --git a/lisp/composite.el b/lisp/composite.el
index ab39e08..72b0ffc 100644
--- a/lisp/composite.el
+++ b/lisp/composite.el
@@ -442,8 +442,10 @@ lglyph-set-width
(defsubst lglyph-set-adjustment (glyph &optional xoff yoff wadjust)
(aset glyph 9 (vector (or xoff 0) (or yoff 0) (or wadjust 0))))
+;; Return the shallow Copy of GLYPH.
(defsubst lglyph-copy (glyph) (copy-sequence glyph))
+;; Insert GLYPH at the index IDX of GSTRING.
(defun lgstring-insert-glyph (gstring idx glyph)
(let ((nglyphs (lgstring-glyph-len gstring))
(i idx))
@@ -459,6 +461,18 @@ lgstring-insert-glyph
(lgstring-set-glyph gstring i glyph)
gstring))
+;; Remove glyph at IDX from GSTRING.
+(defun lgstring-remove-glyph (gstring idx)
+ (setq gstring (copy-sequence gstring))
+ (lgstring-set-id gstring nil)
+ (let ((len (length gstring)))
+ (setq idx (+ idx 3))
+ (while (< idx len)
+ (aset gstring (1- idx) (aref gstring idx))
+ (setq idx (1+ idx)))
+ (aset gstring (1- len) nil))
+ gstring)
+
(defun compose-glyph-string (gstring from to)
(let ((glyph (lgstring-glyph gstring from))
from-pos to-pos)
diff --git a/lisp/language/misc-lang.el b/lisp/language/misc-lang.el
index 2843c7c..4e10227 100644
--- a/lisp/language/misc-lang.el
+++ b/lisp/language/misc-lang.el
@@ -75,12 +75,72 @@ 'cp1256
(sample-text . "Persian فارسی")
(documentation . "Bidirectional editing is supported.")))
+(defcustom arabic-shaper-ZWNJ-handling nil
+ "How to handle ZWMJ in Arabic text renderling.
+This variable controls the way to handle a glyph for ZWNJ
+returned by the underling shaping engine.
+
+The default value is nil, which means that the ZWNJ glyph is
+displayed as is.
+
+If the value is `absorb', ZWNJ is absorbed into the previous
+grapheme cluster, and not displayed.
+
+If the value is `as-space', the glyph is displayed by a
+thin (i.e. 1-dot width) space.
+
+Customizing the value takes effect when you start Emacs next time."
+ :group 'mule
+ :version "27.1"
+ :type '(choice
+ (const :tag "default" nil)
+ (const :tag "as space" as-space)
+ (const :tag "absorb" absorb)))
+
+(defvar arabic-shape-log nil)
+
+(defun arabic-shape-gstring (gstring)
+ (setq gstring (font-shape-gstring gstring))
+ (push arabic-shaper-ZWNJ-handling arabic-shape-log)
+ (condition-case err
+ (when arabic-shaper-ZWNJ-handling
+ (let ((font (lgstring-font gstring))
+ (i 1)
+ (len (lgstring-glyph-len gstring))
+ (modified nil))
+ (while (< i len)
+ (let ((glyph (lgstring-glyph gstring i)))
+ (when (eq (lglyph-char glyph) #x200c)
+ (cond
+ ((eq arabic-shaper-ZWNJ-handling 'as-space)
+ (if (> (- (lglyph-rbearing glyph) (lglyph-lbearing glyph)) 0)
+ (let ((space-glyph (aref (font-get-glyphs font 0 1 " ")
0)))
+ (when space-glyph
+ (lglyph-set-code glyph (aref space-glyph 3))
+ (lglyph-set-width glyph (aref space-glyph 4)))))
+ (lglyph-set-adjustment glyph 0 0 1)
+ (setq modified t))
+ ((eq arabic-shaper-ZWNJ-handling 'absorb)
+ (let ((prev (lgstring-glyph gstring (1- i))))
+ (lglyph-set-from-to prev (lglyph-from prev) (lglyph-to
glyph))
+ (push (cons "remove" (lgstring-glyph gstring i))
+ arabic-shape-log)
+ (setq gstring (lgstring-remove-glyph gstring i))
+ (setq len (1- len)))
+ (setq modified t)))))
+ (setq i (1+ i)))
+ (if modified
+ (lgstring-set-id gstring nil))))
+ (error (push err arabic-shape-log)))
+ gstring)
+
(set-char-table-range
composition-function-table
'(#x600 . #x74F)
- (list (vector "[\u0600-\u074F\u200C\u200D]+" 0 'font-shape-gstring)
- (vector "[\u200C\u200D][\u0600-\u074F\u200C\u200D]+"
- 1 'font-shape-gstring)))
+ (list (vector "[\u0600-\u074F\u200C\u200D]+" 0
+ 'arabic-shape-gstring)
+ (vector "[\u200C\u200D][\u0600-\u074F\u200C\u200D]+" 1
+ 'arabic-shape-gstring)))
(provide 'misc-lang)
- bug#28339: 25.2; Emacs shows ZWNJ character (Zero Width non-Joiner) as Space,
handa <=