[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[elpa] externals/trie 2a9d7ec 099/111: Port efficiency improvements to t
From: |
Stefan Monnier |
Subject: |
[elpa] externals/trie 2a9d7ec 099/111: Port efficiency improvements to trie-fuzzy-match. |
Date: |
Mon, 14 Dec 2020 11:35:29 -0500 (EST) |
branch: externals/trie
commit 2a9d7ec2295d301662eef82dcb1ae3c4ddc2f22c
Author: Toby S. Cubitt <toby-predictive@dr-qubit.org>
Commit: Toby S. Cubitt <toby-predictive@dr-qubit.org>
Port efficiency improvements to trie-fuzzy-match.
Make argument-passing to trie-fuzzy-[complete|match] cleaner.
---
trie.el | 197 +++++++++++++++++++++++++++++++++++++++++++++-------------------
1 file changed, 138 insertions(+), 59 deletions(-)
diff --git a/trie.el b/trie.el
index 22a5d0c..300691d 100644
--- a/trie.el
+++ b/trie.el
@@ -232,14 +232,32 @@
(cond
((< (cdar a) (cdar b)) t)
((> (cdar a) (cdar b)) nil)
- (t (funcall compfun (nth 0 (car a)) (nth 0 (car b))))))))
+ (t (funcall compfun (caar a) (caar b)))))))
(defun trie--construct-fuzzy-match-rankfun (comparison-function)
`(lambda (a b)
(cond
((< (cdar a) (cdar b)) t)
((> (cdar a) (cdar b)) nil)
(t ,(trie-construct-sortfun comparison-function)
- (nth 0 (car a)) (nth 0 (car b)))))))
+ (caar a) (caar b))))))
+
+
+(trie--if-lexical-binding
+ (defun trie--construct-fuzzy-match-dist-rankfun (rankfun)
+ (lambda (a b)
+ (cond
+ ((< (cdar a) (cdar b)) t)
+ ((> (cdar a) (cdar b)) nil)
+ (t (funcall rankfun
+ (cons (caar a) (cdr a))
+ (cons (caar b) (cdr b)))))))
+ (defun trie--construct-fuzzy-match-dist-rankfun (rankfun)
+ `(lambda (a b)
+ (cond
+ ((< (cdar a) (cdar b)) t)
+ ((> (cdar a) (cdar b)) nil)
+ (t (,rankfun (cons (caar a) (cdr a))
+ (cons (caar b) (cdr b))))))))
(trie--if-lexical-binding
@@ -249,14 +267,33 @@
(cond
((< (nth 1 (car a)) (nth 1 (car b))) t)
((> (nth 1 (car a)) (nth 1 (car b))) nil)
- (t (funcall compfun (nth 0 (car a)) (nth 0 (car b))))))))
+ (t (funcall compfun (caar a) (caar b)))))))
(defun trie--construct-fuzzy-complete-rankfun (comparison-function)
`(lambda (a b)
(cond
((< (nth 1 (car a)) (nth 1 (car b))) t)
((> (nth 1 (car a)) (nth 1 (car b))) nil)
(t ,(trie-construct-sortfun comparison-function)
- (nth 0 (car a)) (nth 0 (car b)))))))
+ (caar a) (caar b))))))
+
+
+(trie--if-lexical-binding
+ (defun trie--construct-fuzzy-complete-dist-rankfun (rankfun)
+ (lambda (a b)
+ (cond
+ ((< (nth 1 (car a)) (nth 1 (car b))) t)
+ ((> (nth 1 (car a)) (nth 1 (car b))) nil)
+ (t (funcall rankfun
+ (cons (caar a) (cdr a))
+ (cons (caar b) (cdr b)))))))
+ (defun trie--construct-fuzzy-complete-dist-rankfun (rankfun)
+ `(lambda (a b)
+ (cond
+ ((< (nth 1 (car a)) (nth 1 (car b))) t)
+ ((> (nth 1 (car a)) (nth 1 (car b))) nil)
+ (t (,rankfun
+ (cons (caar a) (cdr a))
+ (cons (caar b) (cdr b))))))))
@@ -2218,35 +2255,48 @@ of the default key-dist-data list."
;; convert trie from print-form if necessary
(trie-transform-from-read-warn trie)
- ;; construct rankfun to sort by Lewenstein distance if requested
- (when (eq rankfun t)
- (setq rankfun (trie--construct-fuzzy-match-rankfun
- (trie--comparison-function trie))))
+ (let ((equalfun (trie--construct-equality-function
+ (trie--comparison-function trie)))
+ ranked-by-dist stats)
+ ;; construct rankfun to sort by Lewenstein distance if requested
+ (cond
+ ((eq rankfun t)
+ (setq rankfun (trie--construct-fuzzy-match-rankfun
+ (trie--comparison-function trie))
+ ranked-by-dist 'dist-only))
+ ((eq (car-safe rankfun) t)
+ (setq rankfun (trie--construct-fuzzy-match-dist-rankfun
+ (cdr rankfun))
+ ranked-by-dist t)))
+ (when ranked-by-dist (setq stats (make-list (1+ distance) 0)))
- ;; accumulate results
- (trie--accumulate-results
- rankfun maxnum reverse filter resultfun accumulator nil
- (funcall (trie--mapfun trie)
- (lambda (node)
- (trie--do-fuzzy-match
- node
- (apply #'vector (number-sequence 0 (length string)))
- (cond ((stringp string) "") ((listp string) ()) (t []))
- ;; FIXME: Would it pay to replace these arguments with
- ;; dynamically-scoped variables, to save stack space?
- string distance (if maxnum reverse (not reverse))
- (trie--comparison-function trie)
- (trie--construct-equality-function
- (trie--comparison-function trie))
- (trie--lookupfun trie)
- (trie--mapfun trie)
- accumulator))
- (trie--node-subtree (trie--root trie))
- (if maxnum reverse (not reverse)))))
+ ;; accumulate results
+ (trie--accumulate-results
+ rankfun maxnum reverse filter resultfun accumulator nil
+ (funcall (trie--mapfun trie)
+ (lambda (node)
+ (trie--do-fuzzy-match
+ node
+ (apply #'vector (number-sequence 0 (length string)))
+ (cond ((stringp string) "") ((listp string) ()) (t []))
+ ;; FIXME: Would it pay to replace these arguments with
+ ;; dynamically-scoped variables, to save stack space?
+ string distance (if maxnum reverse (not reverse))
+ (trie--comparison-function trie)
+ equalfun
+ (trie--lookupfun trie)
+ (trie--mapfun trie)
+ accumulator
+ ranked-by-dist
+ (and ranked-by-dist maxnum)
+ (and ranked-by-dist maxnum stats)))
+ (trie--node-subtree (trie--root trie))
+ (if maxnum reverse (not reverse))))))
(defun trie--do-fuzzy-match (node row seq string distance reverse
- cmpfun equalfun lookupfun mapfun accumulator)
+ cmpfun equalfun lookupfun mapfun accumulator
+ ranked-by-dist maxnum stats)
;; Search everything below NODE for matches within Lewenstein distance
;; DISTANCE of STRING. ROW is the previous row of the Lewenstein table. SEQ
;; is the sequence corresponding to NODE. Remaining arguments are
@@ -2256,24 +2306,36 @@ of the default key-dist-data list."
;; entry of row is <= DISTANCE), accumulate result
(if (trie--node-data-p node)
(when (<= (aref row (1- (length row))) distance)
- (funcall accumulator
- (cons seq (aref row (1- (length row))))
- (trie--node-data node)))
+ (let ((dist (aref row (1- (length row)))))
+ (funcall accumulator (cons seq dist) (trie--node-data node))
+ (and stats
+ (incf (nth dist stats))
+ (eq ranked-by-dist 'dist-only)
+ (>= (nth 0 stats) maxnum)
+ (throw 'trie--accumulate-done nil))))
;; build next row of Lewenstein table
(setq row (Lewenstein--next-row
row string (trie--node-split node) equalfun)
seq (trie--seq-append seq (trie--node-split node)))
- ;; as long as some row entry is <= DISTANCE, recursively search below NODE
- (when (<= (apply #'min (append row nil)) distance)
- (funcall mapfun
- (lambda (n)
- (trie--do-fuzzy-match
- n row seq string distance reverse
- cmpfun equalfun lookupfun mapfun accumulator))
- (trie--node-subtree node)
- reverse))))
+ ;; MIN = minimum possible prefix cost for any continuation of SEQ
+ ;; NUM = number of guaranteed-better matches already accumulated
+ (let* ((min (apply #'min (append row nil)))
+ (num (and ranked-by-dist
+ (apply #'+ (cl-subseq stats 0 min)))))
+ ;; skip subtree if we already have enough guaranteed-better completions
+ (when (or (null ranked-by-dist) (< num maxnum))
+ ;; as long as some row entry is <= DISTANCE, recursively search below
NODE
+ (when (<= min distance)
+ (funcall mapfun
+ (lambda (n)
+ (trie--do-fuzzy-match
+ n row seq string distance reverse
+ cmpfun equalfun lookupfun mapfun accumulator
+ ranked-by-dist maxnum stats))
+ (trie--node-subtree node)
+ reverse))))))
@@ -2434,8 +2496,7 @@ results\)."
;; Fuzzy completing
(defun trie-fuzzy-complete
- (trie prefix distance &optional rankfun maxnum reverse filter resultfun
- ranked-by-dist)
+ (trie prefix distance &optional rankfun maxnum reverse filter resultfun)
"Return completions of prefixes within Lewenstein DISTANCE of PREFIX
along with their associated data, in the order defined by
RANKFUN, defaulting to \"lexicographic\" order \(i.e. the order
@@ -2465,10 +2526,24 @@ The optional integer argument MAXNUM limits the results
to the
first MAXNUM matches. Otherwise, all matches are returned.
-RANKFUN overrides the default ordering of the results. If it is t,
-matches are instead ordered by increasing Lewenstein distance of
-their prefix \(with same-distance prefixes ordered
-lexicographically\).
+RANKFUN overrides the default lexicographic ordering of the
+results. If it is t, matches are instead ordered by increasing
+Lewenstein distance of their prefix, with same-distance prefixes
+ordered lexicographically.
+
+If RANKFUN is a cons cell of the form
+
+ (t . FUNCTION)
+
+then matches are again ordered by increasing Lewenstein distance
+of their prefix, but with same-distance prefixes ordered by
+FUNCTION. This should take two arguments, both of the form
+
+ (KEY . DATA)
+
+where KEY is a key from the trie and DATA is its associated data.
+FUNCTION should return non-nil if first argument is ranked
+strictly higher than the second, nil otherwise.
If RANKFUN is a function, it must accept two arguments, both of
the form:
@@ -2481,6 +2556,7 @@ return non-nil if first argument is ranked strictly
higher than
the second, nil otherwise.
+
The FILTER argument sets a filter function for the matches. If
supplied, it is called for each possible match with two
arguments: a (KEY DIST PFXLEN) list, and DATA. If the filter
@@ -2496,18 +2572,21 @@ of the default key-dist-data list."
;; convert trie from print-form if necessary
(trie-transform-from-read-warn trie)
- ;; construct rankfun to sort by Lewenstein distance if requested
- (cond
- ((eq rankfun t)
- (setq rankfun (trie--construct-fuzzy-complete-rankfun
- (trie--comparison-function trie))
- ranked-by-dist 'dist-only))
- ((null rankfun) (setq ranked-by-dist nil))
- (ranked-by-dist (setq ranked-by-dist t)))
-
(let ((equalfun (trie--construct-equality-function
(trie--comparison-function trie)))
- (stats (make-list (1+ distance) 0)))
+ ranked-by-dist stats)
+ ;; construct rankfun to sort by Lewenstein distance if requested
+ (cond
+ ((eq rankfun t)
+ (setq rankfun (trie--construct-fuzzy-complete-rankfun
+ (trie--comparison-function trie))
+ ranked-by-dist 'dist-only))
+ ((eq (car-safe rankfun) t)
+ (setq rankfun (trie--construct-fuzzy-complete-dist-rankfun
+ (cdr rankfun))
+ ranked-by-dist t)))
+ (when ranked-by-dist (setq stats (make-list (1+ distance) 0)))
+
;; accumulate results
(trie--accumulate-results
rankfun maxnum reverse filter resultfun accumulator nil
@@ -2562,8 +2641,8 @@ of the default key-dist-data list."
(setq pfxcost (aref row (1- (length row)))
pfxlen (length seq)))
- ;; min = minimum possible prefix cost for any continnuation of seq
- ;; num = number of guaranteed-better completions already accumulated
+ ;; MIN = minimum possible prefix cost for any continuation of SEQ
+ ;; NUM = number of guaranteed-better completions already accumulated
(let* ((min (apply #'min (append row nil)))
(num (and ranked-by-dist
(apply #'+ (cl-subseq stats 0 (min pfxcost min))))))
- [elpa] externals/trie c2b5e26 105/111: Myriad bug fixes and code refactoring in new fuzzy and ngram completion., (continued)
- [elpa] externals/trie c2b5e26 105/111: Myriad bug fixes and code refactoring in new fuzzy and ngram completion., Stefan Monnier, 2020/12/14
- [elpa] externals/trie 63da3b1 111/111: * trie.el: Fix header which likely suffered a `M-q` accident, Stefan Monnier, 2020/12/14
- [elpa] externals/trie ff5e05f 040/111: Bumped copyright year, Stefan Monnier, 2020/12/14
- [elpa] externals/trie 2281926 020/111: Minor code reformatting and rearrangement, Stefan Monnier, 2020/12/14
- [elpa] externals/trie d99fb00 055/111: Simplified advice-based edebug pretty-printing of tries and dictionaries., Stefan Monnier, 2020/12/14
- [elpa] externals/trie b4d81bf 064/111: Trivial whitespace tidying., Stefan Monnier, 2020/12/14
- [elpa] externals/trie d45e9d5 062/111: Added autoload cookies., Stefan Monnier, 2020/12/14
- [elpa] externals/trie 1c2790d 038/111: Replaced wildcard searches with more powerful and efficient regexp searches., Stefan Monnier, 2020/12/14
- [elpa] externals/trie bbfecae 085/111: Do lexbind test at compile-time instead of load-time., Stefan Monnier, 2020/12/14
- [elpa] externals/trie 5e8e73f 081/111: Fix data wrapping handling in fuzzy query functions., Stefan Monnier, 2020/12/14
- [elpa] externals/trie 2a9d7ec 099/111: Port efficiency improvements to trie-fuzzy-match.,
Stefan Monnier <=
- [elpa] externals/trie a2554d6 094/111: Fix function symbol quoting., Stefan Monnier, 2020/12/14
- [elpa] externals/trie c6ddbb9 096/111: Bump version numbers., Stefan Monnier, 2020/12/14
- [elpa] externals/trie 94a1a86 087/111: Bump version numbers since we've added iterator generators., Stefan Monnier, 2020/12/14
- [elpa] externals/trie 4001f61 097/111: Fix corresponding bug in trie-fuzzy-complete-stack., Stefan Monnier, 2020/12/14
- [elpa] externals/trie 91d299c 104/111: Pretty-print trie nodes in edebug., Stefan Monnier, 2020/12/14
- [elpa] externals/trie fc9b218 032/111: Removed support for non-terminal * wildcards, Stefan Monnier, 2020/12/14
- [elpa] externals/trie 5a064c0 092/111: Fix bug in trie-delete return value., Stefan Monnier, 2020/12/14
- [elpa] externals/trie 9f49d95 086/111: Implement iterator generators on collection data structures., Stefan Monnier, 2020/12/14
- [elpa] externals/trie 2957aec 103/111: Fix bugs in trie-fuzzy-match/complete., Stefan Monnier, 2020/12/14
- [elpa] externals/trie 3a734c3 077/111: Implement trie-fuzzy-match and trie-fuzzy-complete functions., Stefan Monnier, 2020/12/14