emacs-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Emacs-diffs] master d259328: Further Unicode restrictive fixups


From: Lars Ingebrigtsen
Subject: [Emacs-diffs] master d259328: Further Unicode restrictive fixups
Date: Tue, 29 Dec 2015 16:46:21 +0000

branch: master
commit d259328fb87db8cc67d52771efcfa653e52c5b71
Author: Lars Ingebrigtsen <address@hidden>
Commit: Lars Ingebrigtsen <address@hidden>

    Further Unicode restrictive fixups
    
    * puny.el (puny-highly-restrictive-p): Include the extra
    identifier characters from table 3.
---
 lisp/net/puny.el |   31 +++++++++++++++++++++++++++----
 1 files changed, 27 insertions(+), 4 deletions(-)

diff --git a/lisp/net/puny.el b/lisp/net/puny.el
index 08da51b..ac47e13 100644
--- a/lisp/net/puny.el
+++ b/lisp/net/puny.el
@@ -191,13 +191,36 @@ For instance \"xn--bcher-kva\" => \"bücher\"."
     (buffer-string)))
 
 ;; http://www.unicode.org/reports/tr39/#Restriction_Level_Detection
+;; 
http://www.unicode.org/reports/tr31/#Table_Candidate_Characters_for_Inclusion_in_Identifiers
 
 (defun puny-highly-restrictive-p (string)
   (let ((scripts
-         (seq-uniq
-          (seq-map (lambda (char)
-                     (aref char-script-table char))
-                   string))))
+         (delq
+          t
+          (seq-uniq
+           (seq-map (lambda (char)
+                      (if (memq char
+                                ;; These characters are always allowed
+                                ;; in any string.
+                                '(#x0027 ; APOSTROPHE
+                                  #x002D ; HYPHEN-MINUS
+                                  #x002E ; FULL STOP
+                                  #x003A ; COLON
+                                  #x00B7 ; MIDDLE DOT
+                                  #x058A ; ARMENIAN HYPHEN
+                                  #x05F3 ; HEBREW PUNCTUATION GERESH
+                                  #x05F4 ; HEBREW PUNCTUATION GERSHAYIM
+                                  #x0F0B ; IBETAN MARK INTERSYLLABIC TSHEG
+                                  #x200C ; ERO WIDTH NON-JOINER*
+                                  #x200D ; ERO WIDTH JOINER*
+                                  #x2010 ; YPHEN
+                                  #x2019 ; IGHT SINGLE QUOTATION MARK
+                                  #x2027 ; YPHENATION POINT
+                                  #x30A0 ; KATAKANA-HIRAGANA DOUBLE HYPHEN
+                                  #x30FB)) ; KATAKANA MIDDLE DOT
+                          t
+                        (aref char-script-table char)))
+                    string)))))
     (or
      ;; Every character uses the same script.
      (= (length scripts) 1)



reply via email to

[Prev in Thread] Current Thread [Next in Thread]