emacs-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Emacs-diffs] master c7a19e0: * lisp/isearch.el: Fold many unicode chara


From: Artur Malabarba
Subject: [Emacs-diffs] master c7a19e0: * lisp/isearch.el: Fold many unicode characters to ASCII
Date: Tue, 23 Jun 2015 19:09:48 +0000

branch: master
commit c7a19e0c80ec6134ab6fb1950d3e1ac59a7b986f
Author: Artur Malabarba <address@hidden>
Commit: Artur Malabarba <address@hidden>

    * lisp/isearch.el: Fold many unicode characters to ASCII
    
    (isearch-character-fold-search, isearch--character-fold-extras)
    (isearch--character-fold-table): New variable.
    (isearch--character-folded-regexp): New function.
    (isearch-search-fun-default): Use them.
    
    * lisp/replace.el (replace-character-fold): New variable.
    (replace-search): Use it.
    
    * etc/NEWS: Document it.
---
 etc/NEWS        |   14 ++++++++++
 lisp/isearch.el |   78 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 lisp/replace.el |    9 ++++++
 3 files changed, 101 insertions(+), 0 deletions(-)

diff --git a/etc/NEWS b/etc/NEWS
index 31055ac..e4cf2d6 100644
--- a/etc/NEWS
+++ b/etc/NEWS
@@ -84,6 +84,20 @@ command line when `initial-buffer-choice' is non-nil.
 
 * Changes in Emacs 25.1
 
+** `isearch' and `query-replace' now perform character folding in matches.
+This is analogous to case-folding, but applies between Unicode
+characters and their ASCII counterparts. This means many characters
+will match entire groups of charactes.
+
+For instance, the " will match all variants of unicode double quotes
+(like “ and ”), and the letter a will match all of its accented
+cousins, even those composed of multiple characters, as well as many
+other symbols like ℀, ℁, ⒜, and ⓐ.
+
+** New function `isearch--character-folded-regexp' can be used
+by searching commands to produce a a regexp matching anything that
+character-folds into STRING.
+
 ** New command `checkdoc-package-keywords' checks if the
 current package keywords are recognized. Set the new option
 `checkdoc-package-keywords-flag' to non-nil to make
diff --git a/lisp/isearch.el b/lisp/isearch.el
index d1b92bd..44ce902 100644
--- a/lisp/isearch.el
+++ b/lisp/isearch.el
@@ -272,6 +272,79 @@ Default value, nil, means edit the string instead."
   :version "23.1"
   :group 'isearch)
 
+(defvar isearch-character-fold-search t
+  "Non-nil if isearch should fold similar characters.
+This means some characters will match entire groups of charactes.
+For instance, \" will match all variants of double quotes, and
+the letter a will match all of its accented versions (and then
+some).")
+
+(defconst isearch--character-fold-extras
+  '((?\" """ "“" "”" "”" "„" "⹂" "〞" "‟" "‟" "❞" "❝" "❠" "“" "„" "〝" "〟" "🙷" 
"🙶" "🙸" "«" "»")
+    (?' "❟" "❛" "❜" "‘" "’" "‚" "‛" "‚" "󠀢" "❮" "❯" "‹" "›")
+    (?` "❛" "‘" "‛" "󠀢" "❮" "‹")
+    ;; `isearch-character-fold-search' doesn't interact with
+    ;; `isearch-lax-whitespace' yet.  So we need to add this here.
+    (?\s "     " "\r" "\n"))
+  "Extra entries to add to `isearch--character-fold-table'.
+Used to specify character folding not covered by unicode
+decomposition.  Each car is a character and each cdr is a list of
+strings that it should match (itself excluded).")
+
+(defvar isearch--character-fold-table
+  (eval-when-compile
+    (require 'subr-x)
+    (let ((equiv (make-char-table 'character-fold-table)))
+      ;; Compile a list of all complex characters that each simple
+      ;; character should match.
+      (dotimes (i (length equiv))
+        (let ((dd (get-char-code-property i 'decomposition))
+              d k found)
+          ;; Skip trivial cases (?a decomposes to (?a)).
+          (unless (and (eq i (car dd)))
+            ;; Discard a possible formatting tag.
+            (when (symbolp (car-safe dd))
+              (setq dd (cdr dd)))
+            ;; Is k a number or letter, per unicode standard?
+            (setq d dd)
+            (while (and d (not found))
+              (setq k (pop d))
+              (setq found (and (characterp k)
+                               (memq (get-char-code-property k 
'general-category)
+                                     '(Lu Ll Lt Lm Lo Nd Nl No)))))
+            ;; If there's no number or letter on the
+            ;; decomposition, find the first character in it.
+            (setq d dd)
+            (while (and d (not found))
+              (setq k (pop d))
+              (setq found (characterp k)))
+            ;; Add i to the list of characters that k can
+            ;; represent. Also add its decomposition, so we can
+            ;; match multi-char representations like (format "a%c" 769)
+            (when (and found (not (eq i k)))
+              (aset equiv k (cons (apply #'string dd)
+                                  (cons (char-to-string i)
+                                        (aref equiv k))))))))
+      (dotimes (i (length equiv))
+        (when-let ((chars (append (cdr (assq i isearch--character-fold-extras))
+                                  (aref equiv i))))
+          (aset equiv i (regexp-opt (cons (char-to-string i) chars)))))
+      equiv))
+  "Used for folding characters of the same group during search.")
+
+(defun isearch--character-folded-regexp (string)
+  "Return a regexp matching anything that character-folds into STRING.
+If `isearch-character-fold-search' is nil, `regexp-quote' string.
+Otherwise, any character in STRING that has an entry in
+`isearch--character-fold-table' is replaced with that entry
+\(which is a regexp) and other characters are `regexp-quote'd."
+  (if isearch-character-fold-search
+      (apply #'concat
+        (mapcar (lambda (c) (or (aref isearch--character-fold-table c)
+                           (regexp-quote (string c))))
+                string))
+    (regexp-quote string)))
+
 (defcustom isearch-lazy-highlight t
   "Controls the lazy-highlighting during incremental search.
 When non-nil, all text in the buffer matching the current search
@@ -2607,6 +2680,11 @@ Can be changed via `isearch-search-fun-function' for 
special needs."
       're-search-backward-lax-whitespace))
    (isearch-regexp
     (if isearch-forward 're-search-forward 're-search-backward))
+   (isearch-character-fold-search
+    (lambda (string &optional bound noerror count)
+      (funcall (if isearch-forward #'re-search-forward #'re-search-backward)
+        (isearch--character-folded-regexp string)
+        bound noerror count)))
    ((and isearch-lax-whitespace search-whitespace-regexp)
     (if isearch-forward
        'search-forward-lax-whitespace
diff --git a/lisp/replace.el b/lisp/replace.el
index 74909ef..5e3ddc5 100644
--- a/lisp/replace.el
+++ b/lisp/replace.el
@@ -33,6 +33,14 @@
   :type 'boolean
   :group 'matching)
 
+(defcustom replace-character-fold t
+  "Non-nil means `query-replace' should do character folding in matches.
+This means, for instance, that ' will match a large variety of
+unicode quotes."
+  :type 'boolean
+  :group 'matching
+  :version "25.1")
+
 (defcustom replace-lax-whitespace nil
   "Non-nil means `query-replace' matches a sequence of whitespace chars.
 When you enter a space or spaces in the strings to be replaced,
@@ -2005,6 +2013,7 @@ It is called with three arguments, as if it were
   ;; used after `recursive-edit' might override them.
   (let* ((isearch-regexp regexp-flag)
         (isearch-word delimited-flag)
+         (isearch-character-fold-search replace-character-fold)
         (isearch-lax-whitespace
          replace-lax-whitespace)
         (isearch-regexp-lax-whitespace



reply via email to

[Prev in Thread] Current Thread [Next in Thread]