emacs-elpa-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[elpa] externals/pyim b6a8a5c723 1/3: Refactor pyim-cregexp.el


From: ELPA Syncer
Subject: [elpa] externals/pyim b6a8a5c723 1/3: Refactor pyim-cregexp.el
Date: Sun, 5 Jun 2022 00:58:02 -0400 (EDT)

branch: externals/pyim
commit b6a8a5c723f5165ef533292a765a5be42c6b23c2
Author: Feng Shu <tumashu@163.com>
Commit: Feng Shu <tumashu@163.com>

    Refactor pyim-cregexp.el
    
        * pyim-cregexp.el (pyim-cregexp-build-from-rx)
            (pyim-cregexp-build-1, pyim-cregexp-build-quanpin)
            (pyim-cregexp-build-xingma): Rename from ...
    
            * pyim-cregexp.el (pyim-cregexp-build): Use pyim-cregexp-create.
            (pyim-cregexp-create): New function.
            (pyim-cregexp-create-from-rx)
            (pyim-cregexp-create-1, pyim-cregexp-create-quanpin)
            (pyim-cregexp-create-from-imobjs): to ...
    
            * pyim-candidates.el (pyim-candidates-create-async): Use 
pyim-cregexp-create.
---
 pyim-candidates.el  |   2 +-
 pyim-cregexp.el     | 128 +++++++++++++++++++++++++++++-----------------------
 tests/pyim-tests.el |   6 +--
 3 files changed, 76 insertions(+), 60 deletions(-)

diff --git a/pyim-candidates.el b/pyim-candidates.el
index 29f453fe41..dfcd3e2777 100644
--- a/pyim-candidates.el
+++ b/pyim-candidates.el
@@ -298,7 +298,7 @@
       `(,(car pyim-candidates)
         ,@(pyim-candidates-cloud-search str scheme)
         ,@(pyim-candidates-search-buffer
-           (pyim-cregexp-build str 3 t scheme))
+           (pyim-cregexp-create str scheme 3 t))
         ,@(cdr pyim-candidates)))))
 
 (cl-defgeneric pyim-candidates-cloud-search (string scheme)
diff --git a/pyim-cregexp.el b/pyim-cregexp.el
index 0769d62d7f..095dba6336 100644
--- a/pyim-cregexp.el
+++ b/pyim-cregexp.el
@@ -52,7 +52,28 @@
       (max (min num 4) 1)
     4))
 
-(defun pyim-cregexp-build (string &optional char-level-num chinese-only scheme)
+(defun pyim-cregexp-build (string &optional char-level-num chinese-only)
+  "根据 STRING 构建一个中文 regexp.
+
+这个函数的功能和 `pyim-cregexp-build' 类似,大多数参数也相同,不
+同点是这个函数没有 scheme 参数,它会根据 `pyim-default-scheme' 和
+`pyim-cregexp-fallback-scheme' 等信息动态的获取 scheme."
+  (let ((scheme (pyim-cregexp-scheme)))
+    (pyim-cregexp-create string scheme char-level-num chinese-only)))
+
+(defun pyim-cregexp-scheme (&optional scheme)
+  "返回一个支持 cregexp 的 scheme.
+
+这个函数同时考虑 SCHEME, current scheme 和
+`pyim-cregexp-fallback-scheme'."
+  (let ((current-scheme (pyim-scheme-current)))
+    (cond
+     ((and scheme (pyim-scheme-cregexp-support-p scheme)) scheme)
+     ((and current-scheme (pyim-scheme-cregexp-support-p current-scheme))
+      current-scheme)
+     (t (pyim-scheme-get pyim-cregexp-fallback-scheme)))))
+
+(defun pyim-cregexp-create (string scheme &optional char-level-num 
chinese-only)
   "根据 STRING 构建一个中文 regexp, 用于 \"拼音搜索汉字\".
 
 比如:\"nihao\" -> \"[你呢...][好号...] \\| nihao\"
@@ -70,37 +91,28 @@ CHAR-LEVEL-NUM 代表汉字常用级别,pyim 中根据汉字的使用频率,
 regexp, 所以搜索单字的时候一般可以搜到生僻字,但搜索句子的时候,
 就无法搜索生僻字了。"
   ;; NOTE: (rx-to-string "") will return "\\(?:\\)",
-  ;; While I want (pyim-cregexp-build "") return just "".
-  (setq scheme (pyim-cregexp-scheme scheme))
-  (if (equal string "")
-      string
-    (let ((num (pyim-cregexp-char-level-num char-level-num))
-          rx-string)
-      (while (not (pyim-cregexp-valid-p rx-string))
-        (setq rx-string
-              (or (ignore-errors
-                    (rx-to-string
-                     (pyim-cregexp-build-from-rx
-                      (lambda (x)
-                        (if (stringp x)
-                            (xr (pyim-cregexp-build-1 x num chinese-only 
scheme))
-                          x))
-                      (xr string))))
-                  string))
-        (setq num (1- num)))
-      rx-string)))
-
-(defun pyim-cregexp-scheme (&optional scheme)
-  "返回一个支持 cregexp 的 scheme.
-
-这个函数同时考虑 SCHEME, current scheme 和
-`pyim-cregexp-fallback-scheme'."
-  (let ((current-scheme (pyim-scheme-current)))
-    (cond
-     ((and scheme (pyim-scheme-cregexp-support-p scheme)) scheme)
-     ((and current-scheme (pyim-scheme-cregexp-support-p current-scheme))
-      current-scheme)
-     (t (pyim-scheme-get pyim-cregexp-fallback-scheme)))))
+  ;; While I want (pyim-cregexp-create "") return just "".
+  (if (and string scheme
+           (stringp string)
+           (> (length string) 0)
+           (pyim-scheme-p scheme)
+           (pyim-scheme-cregexp-support-p scheme))
+      (let ((num (pyim-cregexp-char-level-num char-level-num))
+            rx-string)
+        (while (not (pyim-cregexp-valid-p rx-string))
+          (setq rx-string
+                (or (ignore-errors
+                      (rx-to-string
+                       (pyim-cregexp-create-from-rx
+                        (lambda (x)
+                          (if (stringp x)
+                              (xr (pyim-cregexp-create-1 x scheme num 
chinese-only))
+                            x))
+                        (xr string))))
+                    string))
+          (setq num (1- num)))
+        rx-string)
+    string))
 
 (defun pyim-cregexp-valid-p (cregexp)
   "Return t when cregexp is a valid regexp."
@@ -111,20 +123,19 @@ regexp, 所以搜索单字的时候一般可以搜到生僻字,但搜索句子
          ;; FIXME: Emacs can't handle regexps whose length is too big :-(
          (error nil))))
 
-(defun pyim-cregexp-build-from-rx (fn rx-form)
+(defun pyim-cregexp-create-from-rx (fn rx-form)
   (pcase rx-form
     ('nil nil)
     (`(,form) (funcall fn form))
     (`(any . ,_) rx-form)
     (`(,_ . ,_)
      (mapcar (lambda (x)
-               (pyim-cregexp-build-from-rx fn x))
+               (pyim-cregexp-create-from-rx fn x))
              rx-form))
     (_ (funcall fn rx-form))))
 
-(defun pyim-cregexp-build-1 (str &optional char-level-num chinese-only scheme)
+(defun pyim-cregexp-create-1 (str scheme &optional char-level-num chinese-only)
   (let* ((num (pyim-cregexp-char-level-num char-level-num))
-         (code-prefix (pyim-scheme-code-prefix scheme))
          (sep "#####&&&&#####")
          (lst (remove "" (split-string
                           (replace-regexp-in-string
@@ -140,12 +151,9 @@ regexp, 所以搜索单字的时候一般可以搜到生僻字,但搜索句子
          (let* ((string1 (replace-regexp-in-string "'" "" string))
                 (imobjs (pyim-imobjs-create string1 scheme))
                 (regexp-list
-                 (mapcar
-                  (lambda (imobj)
-                    (if (pyim-scheme-xingma-p scheme)
-                        (pyim-cregexp-build-xingma imobj nil nil nil 
code-prefix)
-                      (pyim-cregexp-build-quanpin imobj nil nil nil num)))
-                  imobjs))
+                 (mapcar (lambda (imobj)
+                           (pyim-cregexp-create-from-imobj imobj scheme nil 
nil nil num))
+                         imobjs))
                 (regexp
                  (when regexp-list
                    (string-join (delq nil regexp-list) "\\|")))
@@ -160,9 +168,15 @@ regexp, 所以搜索单字的时候一般可以搜到生僻字,但搜索句子
            (format "\\(?:%s\\)" regexp))))
      lst "")))
 
-(defun pyim-cregexp-build-quanpin (imobj &optional match-beginning
-                                         first-equal all-equal char-level-num)
-  "从 IMOBJ 创建一个搜索中文的 regexp."
+(cl-defgeneric pyim-cregexp-create-from-imobj
+    (imobj _scheme &optional match-beginning
+           first-equal all-equal char-level-num)
+  "从 IMOBJ 创建一个搜索中文的 regexp.")
+
+(cl-defmethod pyim-cregexp-create-from-imobj
+  (imobj (_scheme pyim-scheme-quanpin)
+         &optional match-beginning first-equal all-equal char-level-num)
+  "从 IMOBJ 创建一个搜索中文的 regexp, 适用于全拼输入法。"
   (let* ((num (pyim-cregexp-char-level-num char-level-num))
          (imobj (mapcar (lambda (x)
                           (concat (nth 0 x) (nth 1 x)))
@@ -192,9 +206,10 @@ regexp, 所以搜索单字的时候一般可以搜到生僻字,但搜索句子
     (unless (equal regexp "")
       (concat (if match-beginning "^" "") regexp))))
 
-(defun pyim-cregexp-build-xingma (imobj &optional match-beginning
-                                        first-equal _all-equal code-prefix)
-  "从 IMOBJ 创建一个搜索中文的 regexp."
+(cl-defmethod pyim-cregexp-create-from-imobj
+  (imobj (scheme pyim-scheme-xingma)
+         &optional match-beginning first-equal _all-equal _char-level-num)
+  "从 IMOBJ 创建一个搜索中文的 regexp, 适用于形码输入法。"
   (cl-flet ((build-regexp
               (list)
               (let* ((n (apply #'max (mapcar #'length list)))
@@ -211,14 +226,15 @@ regexp, 所以搜索单字的时候一般可以搜到生僻字,但搜索句子
                                 (if (> i 0) "?" ""))
                         results))
                 (string-join (reverse results)))))
-    (let ((regexp (mapconcat
-                   (lambda (x)
-                     (let ((code (concat (or code-prefix "")
-                                         (if first-equal
-                                             (substring x 0 1)
-                                           x))))
-                       (build-regexp (pyim-dcache-get code '(code2word)))))
-                   imobj "")))
+    (let* ((code-prefix (pyim-scheme-code-prefix scheme))
+           (regexp (mapconcat
+                    (lambda (x)
+                      (let ((code (concat (or code-prefix "")
+                                          (if first-equal
+                                              (substring x 0 1)
+                                            x))))
+                        (build-regexp (pyim-dcache-get code '(code2word)))))
+                    imobj "")))
       (unless (equal regexp "")
         (concat (if match-beginning "^" "") regexp)))))
 
diff --git a/tests/pyim-tests.el b/tests/pyim-tests.el
index 153d4b8946..2be4b29d51 100644
--- a/tests/pyim-tests.el
+++ b/tests/pyim-tests.el
@@ -873,8 +873,8 @@
 
   (let* ((str (nth 2 (split-string (car (pyim-pymap-py2cchar-get "wang" t)) 
"|")))
          (quanpin (pyim-scheme-get 'quanpin))
-         (regexp1 (pyim-cregexp-build-1 "wang" 3 nil quanpin))
-         (regexp2 (pyim-cregexp-build-1 "wang" 2 nil quanpin)))
+         (regexp1 (pyim-cregexp-create-1 "wang" quanpin 3 nil))
+         (regexp2 (pyim-cregexp-create-1 "wang" quanpin 2)))
     (should (string-match-p regexp1 str))
     (should-not (string-match-p regexp2 str)))
 
@@ -895,7 +895,7 @@
     (should (equal (pyim-cregexp-build "adww") 
"\\(?:adww\\|[其匧惹斯欺歁莢蒙][人古]?人?\\)"))
     (should (equal (pyim-cregexp-build "aaaa'aaaa")
                    
"\\(?:\\(?:aaaa'\\|aaaa\\|[工恭]恭?敬?敬?\\)\\(?:aaaa\\|[工恭]恭?敬?敬?\\)\\)"))
-    (should (equal (pyim-cregexp-build-1 "aaaa'aaaa" nil nil wubi)
+    (should (equal (pyim-cregexp-create-1 "aaaa'aaaa" wubi)
                    
"\\(?:aaaa'\\|aaaa\\|[工恭][恭]?[敬]?[敬]?\\)\\(?:aaaa\\|[工恭][恭]?[敬]?[敬]?\\)")))
 
   (with-temp-buffer



reply via email to

[Prev in Thread] Current Thread [Next in Thread]