[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Emacs-diffs] emacs-25 61a4b57: * lisp/character-fold.el: Add back multi
From: |
Artur Malabarba |
Subject: |
[Emacs-diffs] emacs-25 61a4b57: * lisp/character-fold.el: Add back multi-char matching |
Date: |
Tue, 01 Dec 2015 13:55:55 +0000 |
branch: emacs-25
commit 61a4b57f1d90f81fb491abfebd94a5a6389db62f
Author: Artur Malabarba <address@hidden>
Commit: Artur Malabarba <address@hidden>
* lisp/character-fold.el: Add back multi-char matching
(character-fold-to-regexp): Uncomment recently commented code
and make the algorithm "dummer" by not checking every possible
combination. This will miss some possible matches, but it
greatly reduces regexp size.
* test/automated/character-fold-tests.el
(character-fold--test-fold-to-regexp): Comment out test of
functionality no longer supported.
---
lisp/character-fold.el | 68 +++++++++++++++++--------------
test/automated/character-fold-tests.el | 5 ++-
2 files changed, 41 insertions(+), 32 deletions(-)
diff --git a/lisp/character-fold.el b/lisp/character-fold.el
index 07537d8..fb28bae 100644
--- a/lisp/character-fold.el
+++ b/lisp/character-fold.el
@@ -180,43 +180,49 @@ from which to start."
(regexp-quote (string c))))
(alist nil))
;; Long string. The regexp would probably be too long.
- ;; (unless (> end 50)
- ;; (setq alist (aref multi-char-table c))
- ;; (when case-fold-search
- ;; (let ((other-c (aref lower-case-table c)))
- ;; (when (or (not other-c)
- ;; (eq other-c c))
- ;; (setq other-c (aref upper-case-table c)))
- ;; (when other-c
- ;; (setq alist (append alist (aref multi-char-table
other-c)))
- ;; (setq regexp (concat "\\(?:" regexp "\\|"
- ;; (or (aref character-fold-table
other-c)
- ;; (regexp-quote (string
other-c)))
- ;; "\\)"))))))
- (push (let ((alist-out '("\\)")))
- (pcase-dolist (`(,suffix . ,out-regexp) alist)
- (let ((len-suf (length suffix)))
+ (unless (> end 50)
+ (setq alist (aref multi-char-table c))
+ (when case-fold-search
+ (let ((other-c (aref lower-case-table c)))
+ (when (or (not other-c)
+ (eq other-c c))
+ (setq other-c (aref upper-case-table c)))
+ (when other-c
+ (setq alist (append alist (aref multi-char-table
other-c)))
+ (setq regexp (concat "\\(?:" regexp "\\|"
+ (or (aref character-fold-table
other-c)
+ (regexp-quote (string other-c)))
+ "\\)"))))))
+ (push (let ((matched-entries nil)
+ (max-length 0))
+ (dolist (entry alist)
+ (let* ((suffix (car entry))
+ (len-suf (length suffix)))
(when (eq (compare-strings suffix 0 nil
string (1+ i) (+ i 1
len-suf)
nil)
t)
- ;; FIXME: If N suffixes match, we "branch"
- ;; out into N+1 executions for the rest of
- ;; the string. This involves redundant
- ;; work and makes a huge regexp.
- (push (concat "\\|" out-regexp
- (character-fold-to-regexp
- string nil (+ i 1 len-suf)))
- alist-out))))
+ (push (cons len-suf (cdr entry)) matched-entries)
+ (setq max-length (max max-length len-suf)))))
;; If no suffixes matched, just go on.
- (if (not (cdr alist-out))
+ (if (not matched-entries)
regexp
- ;; Otherwise, add a branch for the
- ;; no-suffix case, and stop the loop here.
- (prog1 (apply #'concat "\\(?:" regexp
- (character-fold-to-regexp string nil (1+
i))
- alist-out)
- (setq i end))))
+;;; If N suffixes match, we "branch" out into N+1 executions for the
+;;; length of the longest match. This means "fix" will match "fix" but
+;;; not "fⅸ", but it's necessary to keep the regexp size from scaling
+;;; exponentially. See
https://lists.gnu.org/archive/html/emacs-devel/2015-11/msg02562.html
+ (let ((subs (substring string (1+ i) (+ i 1
max-length))))
+ ;; `i' is still going to inc by 1 below.
+ (setq i (+ i max-length))
+ (concat
+ "\\(?:"
+ (mapconcat (lambda (entry)
+ (let ((length (car entry))
+ (suffix-regexp (cdr entry)))
+ (concat suffix-regexp
+ (character-fold-to-regexp
subs nil length))))
+ `((0 . ,regexp) . ,matched-entries) "\\|")
+ "\\)"))))
out))))
(setq i (1+ i)))
(when (> spaces 0)
diff --git a/test/automated/character-fold-tests.el
b/test/automated/character-fold-tests.el
index cf19584..40735e5 100644
--- a/test/automated/character-fold-tests.el
+++ b/test/automated/character-fold-tests.el
@@ -93,7 +93,10 @@
(aset multi ?1 '(("2" . "yy")))
(character-fold--test-match-exactly "a1" "xx44" "99")
(character-fold--test-match-exactly "a12" "77" "xx442" "992")
- (character-fold--test-match-exactly "a12" "xxyy")))
+ ;; Support for this case is disabled. See function definition or:
+ ;; https://lists.gnu.org/archive/html/emacs-devel/2015-11/msg02562.html
+ ;; (character-fold--test-match-exactly "a12" "xxyy")
+ ))
(provide 'character-fold-tests)
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- [Emacs-diffs] emacs-25 61a4b57: * lisp/character-fold.el: Add back multi-char matching,
Artur Malabarba <=