emacs-elpa-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[elpa] externals/org 962b796900 2/2: org-element: Implement tree search


From: ELPA Syncer
Subject: [elpa] externals/org 962b796900 2/2: org-element: Implement tree search caching
Date: Thu, 2 Jun 2022 22:57:47 -0400 (EDT)

branch: externals/org
commit 962b7969004af11a7d7739f0c2e914cfd8cbc350
Author: Ihor Radchenko <yantar92@gmail.com>
Commit: Ihor Radchenko <yantar92@gmail.com>

    org-element: Implement tree search caching
    
    * lisp/org-element.el (org-element--cache-hash-size):
    (org-element--cache-hash-statistics):
    (org-element--cache-hash-nocache):
    (org-element--cache-hash-size):
    (org-element--cache-hash-left):
    (org-element--cache-hash-right): Implement recent search cache for
    `org-element--cache-find'.  The cache stores recent cache tree queries
    as a vector with O(1) access time.
    
    (org-element--cache-find): Make use of `org-element--cache-hash-left'
    and `org-element--cache-hash-right' when cached query element starts
    at POS and SIDE is nil.  Record statistics.
    
    (org-element-cache-reset): Reset search cache on init.
    
    (org-element-cache-hash-show-statistics): Provide a command reporting
    the new caching efficiency.  Can be used for debugging/survey
    purposes.
    
    * lisp/org-macs.el (org-knuth-hash): Implement multiplicative hash
    function.
    
    Preliminary testing reveals that this simple strategy can reduce query
    time from O(Log N) down to O(1) for ~30%-50% cache queries.
---
 lisp/org-element.el | 169 +++++++++++++++++++++++++++++++++++++---------------
 lisp/org-macs.el    |   7 +++
 2 files changed, 128 insertions(+), 48 deletions(-)

diff --git a/lisp/org-element.el b/lisp/org-element.el
index d96ea3b6d6..017c354561 100644
--- a/lisp/org-element.el
+++ b/lisp/org-element.el
@@ -5366,6 +5366,34 @@ Each node of the tree contains an element.  Comparison 
is done
 with `org-element--cache-compare'.  This cache is used in
 `org-element-cache-map'.")
 
+(defconst org-element--cache-hash-size 16
+  "Cache size for recent cached calls to `org-element--cache-find'.
+
+This extra caching is based on the following paper:
+Pugh [Information Processing Letters] (1990) Slow optimally balanced
+ search strategies vs. cached fast uniformly balanced search
+ strategies.  http://dx.doi.org/10.1016/0020-0190(90)90130-P
+ 
+Also, see `org-element--cache-hash-left' and `org-element--cache-hash-right'.")
+(defvar-local org-element--cache-hash-left nil
+  "Cached elements from `org-element--cache' for fast O(1) lookup.
+When non-nil, it should be a vector representing POS arguments of
+`org-element--cache-find' called with nil SIDE argument.
+Also, see `org-element--cache-hash-size'.")
+(defvar-local org-element--cache-hash-right nil
+  "Cached elements from `org-element--cache' for fast O(1) lookup.
+When non-nil, it should be a vector representing POS arguments of
+`org-element--cache-find' called with non-nil, non-`both' SIDE argument.
+Also, see `org-element--cache-hash-size'.")
+
+(defvar org-element--cache-hash-statistics '(0 . 0)
+  "Cons cell storing how Org makes use of `org-element--cache-find' caching.
+The car is the number of successful uses and cdr is the total calls to
+`org-element--cache-find'.")
+(defvar org-element--cache-hash-nocache 0
+  "Number of calls to `org-element--cache-has' with `both' SIDE argument.
+These calls are not cached by hash.  See `org-element--cache-hash-size'.")
+
 (defvar-local org-element--cache-size 0
   "Size of the `org-element--cache'.
 
@@ -5683,6 +5711,25 @@ This function assumes `org-element--headline-cache' is a 
valid AVL tree."
             (memq #'org-element--cache-after-change after-change-functions))
            (eq org-element--cache-change-tic (buffer-chars-modified-tick)))))
 
+;; FIXME: Remove after we establish that hashing app
+(defun org-element-cache-hash-show-statistics ()
+  "Display efficiency of O(1) query cache for `org-element--cache-find'.
+
+This extra caching is based on the following paper:
+Pugh [Information Processing Letters] (1990) Slow optimally balanced
+ search strategies vs. cached fast uniformly balanced search
+ strategies.  http://dx.doi.org/10.1016/0020-0190(90)90130-P
+ 
+Also, see `org-element--cache-size'."
+  (interactive)
+  (message "%.2f%% of cache searches hashed, %.2f%% non-hashable."
+          (* 100
+             (/ (float (car org-element--cache-hash-statistics))
+                (cdr org-element--cache-hash-statistics)))
+          (* 100
+             (/ (float org-element--cache-hash-nocache)
+                (cdr org-element--cache-hash-statistics)))))
+
 (defun org-element--cache-find (pos &optional side)
   "Find element in cache starting at POS or before.
 
@@ -5697,54 +5744,78 @@ after POS.
 The function can only find elements in the synchronized part of
 the cache."
   (with-current-buffer (or (buffer-base-buffer) (current-buffer))
-    (let ((limit (and org-element--cache-sync-requests
-                      (org-element--request-key (car 
org-element--cache-sync-requests))))
-         (node (org-element--cache-root))
-         lower upper)
-      (while node
-        (let* ((element (avl-tree--node-data node))
-              (begin (org-element-property :begin element)))
-         (cond
-          ((and limit
-                (not (org-element--cache-key-less-p
-                    (org-element--cache-key element) limit)))
-           (setq node (avl-tree--node-left node)))
-          ((> begin pos)
-           (setq upper element
-                 node (avl-tree--node-left node)))
-          ((or (< begin pos)
-                ;; If the element is section or org-data, we also need
-                ;; to check the following element.
-                (memq (org-element-type element) '(section org-data)))
-           (setq lower element
-                 node (avl-tree--node-right node)))
-          ;; We found an element in cache starting at POS.  If `side'
-          ;; is `both' we also want the next one in order to generate
-          ;; a key in-between.
-          ;;
-          ;; If the element is the first row or item in a table or
-          ;; a plain list, we always return the table or the plain
-          ;; list.
-          ;;
-          ;; In any other case, we return the element found.
-          ((eq side 'both)
-           (setq lower element)
-           (setq node (avl-tree--node-right node)))
-          ((and (memq (org-element-type element) '(item table-row))
-                (let ((parent (org-element-property :parent element)))
-                  (and (= (org-element-property :begin element)
-                          (org-element-property :contents-begin parent))
-                       (setq node nil
-                             lower parent
-                             upper parent)))))
-          (t
-           (setq node nil
-                 lower element
-                 upper element)))))
-      (pcase side
-        (`both (cons lower upper))
-        (`nil lower)
-        (_ upper)))))
+    (let* ((limit (and org-element--cache-sync-requests
+                       (org-element--request-key (car 
org-element--cache-sync-requests))))
+          (node (org-element--cache-root))
+           (hash-pos (unless (eq side 'both)
+                       (mod (org-knuth-hash pos)
+                            org-element--cache-hash-size)))
+           (hashed (if (not side)
+                       (aref org-element--cache-hash-left hash-pos)
+                     (unless (eq side 'both)
+                       (aref org-element--cache-hash-right hash-pos))))
+          lower upper)
+      ;; `org-element--cache-key-less-p' does not accept markers.
+      (when (markerp pos) (setq pos (marker-position pos)))
+      (cl-incf (cdr org-element--cache-hash-statistics))
+      (when (eq side 'both) (cl-incf org-element--cache-hash-nocache))
+      (if (and hashed (not side)
+               (or (not limit)
+                   ;; Limit can be a list key.
+                   (org-element--cache-key-less-p pos limit))
+               (= pos (org-element-property :begin hashed))
+               (org-element-property :cached hashed))
+          (progn
+            (cl-incf (car org-element--cache-hash-statistics))
+            hashed)
+        (while node
+          (let* ((element (avl-tree--node-data node))
+                (begin (org-element-property :begin element)))
+           (cond
+            ((and limit
+                  (not (org-element--cache-key-less-p
+                      (org-element--cache-key element) limit)))
+             (setq node (avl-tree--node-left node)))
+            ((> begin pos)
+             (setq upper element
+                   node (avl-tree--node-left node)))
+            ((or (< begin pos)
+                  ;; If the element is section or org-data, we also need
+                  ;; to check the following element.
+                  (memq (org-element-type element) '(section org-data)))
+             (setq lower element
+                   node (avl-tree--node-right node)))
+            ;; We found an element in cache starting at POS.  If `side'
+            ;; is `both' we also want the next one in order to generate
+            ;; a key in-between.
+            ;;
+            ;; If the element is the first row or item in a table or
+            ;; a plain list, we always return the table or the plain
+            ;; list.
+            ;;
+            ;; In any other case, we return the element found.
+            ((eq side 'both)
+             (setq lower element)
+             (setq node (avl-tree--node-right node)))
+            ((and (memq (org-element-type element) '(item table-row))
+                  (let ((parent (org-element-property :parent element)))
+                    (and (= (org-element-property :begin element)
+                            (org-element-property :contents-begin parent))
+                         (setq node nil
+                               lower parent
+                               upper parent)))))
+            (t
+             (setq node nil
+                   lower element
+                   upper element)))))
+        (if (not side)
+            (aset org-element--cache-hash-left hash-pos lower)
+          (unless (eq side 'both)
+            (aset org-element--cache-hash-right hash-pos lower)))
+        (pcase side
+          (`both (cons lower upper))
+          (`nil lower)
+          (_ upper))))))
 
 (defun org-element--cache-put (element)
   "Store ELEMENT in current buffer's cache, if allowed."
@@ -7192,6 +7263,8 @@ buffers."
                    (avl-tree-create #'org-element--cache-compare))
         (setq-local org-element--headline-cache
                    (avl-tree-create #'org-element--cache-compare))
+        (setq-local org-element--cache-hash-left (make-vector 
org-element--cache-hash-size nil))
+        (setq-local org-element--cache-hash-right (make-vector 
org-element--cache-hash-size nil))
         (setq-local org-element--cache-size 0)
         (setq-local org-element--headline-cache-size 0)
        (setq-local org-element--cache-sync-keys-value 0)
diff --git a/lisp/org-macs.el b/lisp/org-macs.el
index 10eed2686a..bb3689e290 100644
--- a/lisp/org-macs.el
+++ b/lisp/org-macs.el
@@ -1469,6 +1469,13 @@ window."
         (message "Beginning of buffer")
         (sit-for 1))))))
 
+(cl-defun org-knuth-hash (number &optional (base 32))
+  "Calculate Knuth's multiplicative hash for NUMBER.
+BASE is the maximum bitcount.
+Credit: 
https://stackoverflow.com/questions/11871245/knuth-multiplicative-hash#41537995";
+  (cl-assert (and (<= 0 base 32)))
+  (ash (* number 2654435769) (- base 32)))
+
 (provide 'org-macs)
 
 ;; Local variables:



reply via email to

[Prev in Thread] Current Thread [Next in Thread]