emacs-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Emacs-diffs] master 9d79735 1/2: Optimize skkdic conversion (Bug#28043)


From: Noam Postavsky
Subject: [Emacs-diffs] master 9d79735 1/2: Optimize skkdic conversion (Bug#28043)
Date: Mon, 21 Aug 2017 20:51:30 -0400 (EDT)

branch: master
commit 9d7973530f912c6001445ba9b83b7893b466aee8
Author: Noam Postavsky <address@hidden>
Commit: Noam Postavsky <address@hidden>

    Optimize skkdic conversion (Bug#28043)
    
    The primary speedup comes from the optimizing lookup-nested-alist and
    set-nested-alist for the case where the key is a string.  This brings
    the time down to less than half the original.
    
    * lisp/international/mule-util.el (lookup-nested-alist)
    (set-nested-alist): Use `assq' instead of `assoc' when KEYSEQ is a
    string.
    
    * lisp/international/ja-dic-cnv.el (skkdic-collect-okuri-nasi)
    (skkdic-convert-okuri-nasi): Use progress-reporter functions instead
    of calculating ratio of work done inline.
    
    (skkdic-reduced-candidates): Call `char-category-set' on the first
    character of the string directly, instead of using a regexp for the
    character category.
    (skkdic--japanese-category-set): New constant.
    (skkdic-collect-okuri-nasi): Just set
    `skkdic-okuri-nasi-entries-count' at once at the end rather than
    updating it throughout the loop.
    
    (skkdic-convert-postfix skkdic-convert-prefix)
    skkdic-get-candidate-list, skkdic-collect-okuri-nasi)
    (skkdic-extract-conversion-data): Use `match-string-no-properties'
    instead of `match-string'.
---
 lisp/international/ja-dic-cnv.el | 61 +++++++++++++++----------------
 lisp/international/mule-util.el  | 77 ++++++++++++++++++++++++++++------------
 2 files changed, 83 insertions(+), 55 deletions(-)

diff --git a/lisp/international/ja-dic-cnv.el b/lisp/international/ja-dic-cnv.el
index e80b1b2..63eede0 100644
--- a/lisp/international/ja-dic-cnv.el
+++ b/lisp/international/ja-dic-cnv.el
@@ -125,10 +125,10 @@
 
   ;; Search postfix entries.
   (while (re-search-forward "^[#<>?]\\(\\(\\cH\\|$B!<(B\\)+\\) " nil t)
-    (let ((kana (match-string 1))
+    (let ((kana (match-string-no-properties 1))
          str candidates)
       (while (looking-at "/[#0-9 ]*\\([^/\n]*\\)/")
-       (setq str (match-string 1))
+        (setq str (match-string-no-properties 1))
        (if (not (member str candidates))
            (setq candidates (cons str candidates)))
        (goto-char (match-end 1)))
@@ -158,10 +158,10 @@
            "(skkdic-set-prefix\n"))
   (save-excursion
     (while (re-search-forward "^\\(\\(\\cH\\|$B!<(B\\)+\\)[<>?] " nil t)
-      (let ((kana (match-string 1))
+      (let ((kana (match-string-no-properties 1))
            str candidates)
        (while (looking-at "/\\([^/\n]+\\)/")
-         (setq str (match-string 1))
+          (setq str (match-string-no-properties 1))
          (if (not (member str candidates))
              (setq candidates (cons str candidates)))
          (goto-char (match-end 1)))
@@ -180,8 +180,8 @@
   (let (candidates)
     (goto-char from)
     (while (re-search-forward "/[^/ \n]+" to t)
-      (setq candidates (cons (buffer-substring (1+ (match-beginning 0))
-                                              (match-end 0))
+      (setq candidates (cons (buffer-substring-no-properties
+                              (1+ (match-beginning 0)) (match-end 0))
                             candidates)))
     candidates))
 
@@ -251,12 +251,16 @@
 ;; Return list of candidates which excludes some from CANDIDATES.
 ;; Excluded candidates can be derived from another entry.
 
+(defconst skkdic--japanese-category-set (make-category-set "j"))
+
 (defun skkdic-reduced-candidates (skkbuf kana candidates)
   (let (elt l)
     (while candidates
       (setq elt (car candidates))
       (if (or (= (length elt) 1)
-             (and (string-match "^\\cj" elt)
+             (and (bool-vector-subsetp
+                    skkdic--japanese-category-set
+                    (char-category-set (aref elt 0)))
                   (not (skkdic-breakup-string skkbuf kana elt 0 (length elt)
                                               'first))))
          (setq l (cons elt l)))
@@ -267,24 +271,18 @@
 (defvar skkdic-okuri-nasi-entries-count 0)
 
 (defun skkdic-collect-okuri-nasi ()
-  (message "Collecting OKURI-NASI entries ...")
   (save-excursion
-    (let ((prev-ratio 0)
-         ratio)
+    (let ((progress (make-progress-reporter "Collecting OKURI-NASI entries"
+                                            (point) (point-max)
+                                            nil 10)))
       (while (re-search-forward "^\\(\\(\\cH\\|$B!<(B\\)+\\) \\(/\\cj.*\\)/$"
                                nil t)
-       (let ((kana (match-string 1))
+        (let ((kana (match-string-no-properties 1))
              (candidates (skkdic-get-candidate-list (match-beginning 3)
                                                     (match-end 3))))
          (setq skkdic-okuri-nasi-entries
-               (cons (cons kana candidates) skkdic-okuri-nasi-entries)
-               skkdic-okuri-nasi-entries-count
-               (1+ skkdic-okuri-nasi-entries-count))
-         (setq ratio (floor (* (point) 100.0) (point-max)))
-         (if (/= (/ prev-ratio 10) (/ ratio 10))
-             (progn
-               (message "collected %2d%% ..." ratio)
-               (setq prev-ratio ratio)))
+               (cons (cons kana candidates) skkdic-okuri-nasi-entries))
+          (progress-reporter-update progress (point))
          (while candidates
            (let ((entry (lookup-nested-alist (car candidates)
                                              skkdic-word-list nil nil t)))
@@ -292,26 +290,24 @@
                  (setcar entry (cons kana (car entry)))
                (set-nested-alist (car candidates) (list kana)
                                  skkdic-word-list)))
-           (setq candidates (cdr candidates))))))))
+            (setq candidates (cdr candidates)))))
+      (setq skkdic-okuri-nasi-entries-count (length skkdic-okuri-nasi-entries))
+      (progress-reporter-done progress))))
 
 (defun skkdic-convert-okuri-nasi (skkbuf buf)
-  (message "Processing OKURI-NASI entries ...")
   (with-current-buffer buf
     (insert ";; Setting okuri-nasi entries.\n"
            "(skkdic-set-okuri-nasi\n")
     (let ((l (nreverse skkdic-okuri-nasi-entries))
-         (count 0)
-         (prev-ratio 0)
-         ratio)
+          (progress (make-progress-reporter "Processing OKURI-NASI entries"
+                                            0 skkdic-okuri-nasi-entries-count
+                                            nil 10))
+          (count 0))
       (while l
        (let ((kana (car (car l)))
              (candidates (cdr (car l))))
-         (setq ratio (floor (* count 100.0) skkdic-okuri-nasi-entries-count)
-               count (1+ count))
-         (if (/= (/ prev-ratio 10) (/ ratio 10))
-             (progn
-               (message "processed %2d%% ..." ratio)
-               (setq prev-ratio ratio)))
+          (setq count (1+ count))
+          (progress-reporter-update progress count)
          (if (setq candidates
                    (skkdic-reduced-candidates skkbuf kana candidates))
              (progn
@@ -320,7 +316,8 @@
                  (insert " " (car candidates))
                  (setq candidates (cdr candidates)))
                (insert "\"\n"))))
-       (setq l (cdr l))))
+       (setq l (cdr l)))
+      (progress-reporter-done progress))
     (insert ")\n\n")))
 
 (defun skkdic-convert (filename &optional dirname)
@@ -467,7 +464,7 @@ To get complete usage, invoke:
        (i (match-end 0))
        candidates)
     (while (string-match "[^ ]+" entry i)
-      (setq candidates (cons (match-string 0 entry) candidates))
+      (setq candidates (cons (match-string-no-properties 0 entry) candidates))
       (setq i (match-end 0)))
     (cons (skkdic-get-kana-compact-codes kana) candidates)))
 
diff --git a/lisp/international/mule-util.el b/lisp/international/mule-util.el
index e34b01c..257f885 100644
--- a/lisp/international/mule-util.el
+++ b/lisp/international/mule-util.el
@@ -143,20 +143,43 @@ longer than KEYSEQ.
 See the documentation of `nested-alist-p' for more detail."
   (or (nested-alist-p alist)
       (error "Invalid argument %s" alist))
-  (let ((islist (listp keyseq))
-       (len (or len (length keyseq)))
-       (i 0)
-       key-elt slot)
-    (while (< i len)
-      (if (null (nested-alist-p alist))
-         (error "Keyseq %s is too long for this nested alist" keyseq))
-      (setq key-elt (if islist (nth i keyseq) (aref keyseq i)))
-      (setq slot (assoc key-elt (cdr alist)))
-      (unless slot
-       (setq slot (cons key-elt (list t)))
-       (setcdr alist (cons slot (cdr alist))))
-      (setq alist (cdr slot))
-      (setq i (1+ i)))
+  (let ((len (or len (length keyseq)))
+       (i 0))
+    (cond
+     ((stringp keyseq)             ; We can use `assq' for characters.
+      (while (< i len)
+        (if (null (nested-alist-p alist))
+            (error "Keyseq %s is too long for this nested alist" keyseq))
+        (let* ((key-elt (aref keyseq i))
+               (slot (assq key-elt (cdr alist))))
+          (unless slot
+            (setq slot (list key-elt t))
+            (push slot (cdr alist)))
+          (setq alist (cdr slot)))
+        (setq i (1+ i))))
+     ((arrayp keyseq)
+      (while (< i len)
+        (if (null (nested-alist-p alist))
+            (error "Keyseq %s is too long for this nested alist" keyseq))
+        (let* ((key-elt (aref keyseq i))
+               (slot (assoc key-elt (cdr alist))))
+          (unless slot
+            (setq slot (list key-elt t))
+            (push slot (cdr alist)))
+          (setq alist (cdr slot)))
+        (setq i (1+ i))))
+     ((listp keyseq)
+      (while (< i len)
+        (if (null (nested-alist-p alist))
+            (error "Keyseq %s is too long for this nested alist" keyseq))
+        (let* ((key-elt (pop keyseq))
+               (slot (assoc key-elt (cdr alist))))
+          (unless slot
+            (setq slot (list key-elt t))
+            (push slot (cdr alist)))
+          (setq alist (cdr slot)))
+        (setq i (1+ i))))
+     (t (signal 'wrong-type-argument (list keyseq))))
     (setcar alist entry)
     (if branches
        (setcdr (last alist) branches))))
@@ -179,15 +202,23 @@ Optional 5th argument NIL-FOR-TOO-LONG non-nil means 
return nil
       (setq len (length keyseq)))
   (let ((i (or start 0)))
     (if (catch 'lookup-nested-alist-tag
-         (if (listp keyseq)
-             (while (< i len)
-               (if (setq alist (cdr (assoc (nth i keyseq) (cdr alist))))
-                   (setq i (1+ i))
-                 (throw 'lookup-nested-alist-tag t))))
-         (while (< i len)
-           (if (setq alist (cdr (assoc (aref keyseq i) (cdr alist))))
-               (setq i (1+ i))
-             (throw 'lookup-nested-alist-tag t))))
+          (cond ((stringp keyseq)  ; We can use `assq' for characters.
+                 (while (< i len)
+                   (if (setq alist (cdr (assq (aref keyseq i) (cdr alist))))
+                       (setq i (1+ i))
+                     (throw 'lookup-nested-alist-tag t))))
+                ((arrayp keyseq)
+                 (while (< i len)
+                   (if (setq alist (cdr (assoc (aref keyseq i) (cdr alist))))
+                       (setq i (1+ i))
+                     (throw 'lookup-nested-alist-tag t))))
+                ((listp keyseq)
+                 (setq keyseq (nthcdr i keyseq))
+                 (while (< i len)
+                   (if (setq alist (cdr (assoc (pop keyseq) (cdr alist))))
+                       (setq i (1+ i))
+                     (throw 'lookup-nested-alist-tag t))))
+                (t (signal 'wrong-type-argument (list keyseq)))))
        ;; KEYSEQ is too long.
        (if nil-for-too-long nil i)
       alist)))



reply via email to

[Prev in Thread] Current Thread [Next in Thread]