[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[elpa] externals/pyim b6a8a5c723 1/3: Refactor pyim-cregexp.el
From: |
ELPA Syncer |
Subject: |
[elpa] externals/pyim b6a8a5c723 1/3: Refactor pyim-cregexp.el |
Date: |
Sun, 5 Jun 2022 00:58:02 -0400 (EDT) |
branch: externals/pyim
commit b6a8a5c723f5165ef533292a765a5be42c6b23c2
Author: Feng Shu <tumashu@163.com>
Commit: Feng Shu <tumashu@163.com>
Refactor pyim-cregexp.el
* pyim-cregexp.el (pyim-cregexp-build-from-rx)
(pyim-cregexp-build-1, pyim-cregexp-build-quanpin)
(pyim-cregexp-build-xingma): Rename from ...
* pyim-cregexp.el (pyim-cregexp-build): Use pyim-cregexp-create.
(pyim-cregexp-create): New function.
(pyim-cregexp-create-from-rx)
(pyim-cregexp-create-1, pyim-cregexp-create-quanpin)
(pyim-cregexp-create-from-imobjs): to ...
* pyim-candidates.el (pyim-candidates-create-async): Use
pyim-cregexp-create.
---
pyim-candidates.el | 2 +-
pyim-cregexp.el | 128 +++++++++++++++++++++++++++++-----------------------
tests/pyim-tests.el | 6 +--
3 files changed, 76 insertions(+), 60 deletions(-)
diff --git a/pyim-candidates.el b/pyim-candidates.el
index 29f453fe41..dfcd3e2777 100644
--- a/pyim-candidates.el
+++ b/pyim-candidates.el
@@ -298,7 +298,7 @@
`(,(car pyim-candidates)
,@(pyim-candidates-cloud-search str scheme)
,@(pyim-candidates-search-buffer
- (pyim-cregexp-build str 3 t scheme))
+ (pyim-cregexp-create str scheme 3 t))
,@(cdr pyim-candidates)))))
(cl-defgeneric pyim-candidates-cloud-search (string scheme)
diff --git a/pyim-cregexp.el b/pyim-cregexp.el
index 0769d62d7f..095dba6336 100644
--- a/pyim-cregexp.el
+++ b/pyim-cregexp.el
@@ -52,7 +52,28 @@
(max (min num 4) 1)
4))
-(defun pyim-cregexp-build (string &optional char-level-num chinese-only scheme)
+(defun pyim-cregexp-build (string &optional char-level-num chinese-only)
+ "根据 STRING 构建一个中文 regexp.
+
+这个函数的功能和 `pyim-cregexp-build' 类似,大多数参数也相同,不
+同点是这个函数没有 scheme 参数,它会根据 `pyim-default-scheme' 和
+`pyim-cregexp-fallback-scheme' 等信息动态的获取 scheme."
+ (let ((scheme (pyim-cregexp-scheme)))
+ (pyim-cregexp-create string scheme char-level-num chinese-only)))
+
+(defun pyim-cregexp-scheme (&optional scheme)
+ "返回一个支持 cregexp 的 scheme.
+
+这个函数同时考虑 SCHEME, current scheme 和
+`pyim-cregexp-fallback-scheme'."
+ (let ((current-scheme (pyim-scheme-current)))
+ (cond
+ ((and scheme (pyim-scheme-cregexp-support-p scheme)) scheme)
+ ((and current-scheme (pyim-scheme-cregexp-support-p current-scheme))
+ current-scheme)
+ (t (pyim-scheme-get pyim-cregexp-fallback-scheme)))))
+
+(defun pyim-cregexp-create (string scheme &optional char-level-num
chinese-only)
"根据 STRING 构建一个中文 regexp, 用于 \"拼音搜索汉字\".
比如:\"nihao\" -> \"[你呢...][好号...] \\| nihao\"
@@ -70,37 +91,28 @@ CHAR-LEVEL-NUM 代表汉字常用级别,pyim 中根据汉字的使用频率,
regexp, 所以搜索单字的时候一般可以搜到生僻字,但搜索句子的时候,
就无法搜索生僻字了。"
;; NOTE: (rx-to-string "") will return "\\(?:\\)",
- ;; While I want (pyim-cregexp-build "") return just "".
- (setq scheme (pyim-cregexp-scheme scheme))
- (if (equal string "")
- string
- (let ((num (pyim-cregexp-char-level-num char-level-num))
- rx-string)
- (while (not (pyim-cregexp-valid-p rx-string))
- (setq rx-string
- (or (ignore-errors
- (rx-to-string
- (pyim-cregexp-build-from-rx
- (lambda (x)
- (if (stringp x)
- (xr (pyim-cregexp-build-1 x num chinese-only
scheme))
- x))
- (xr string))))
- string))
- (setq num (1- num)))
- rx-string)))
-
-(defun pyim-cregexp-scheme (&optional scheme)
- "返回一个支持 cregexp 的 scheme.
-
-这个函数同时考虑 SCHEME, current scheme 和
-`pyim-cregexp-fallback-scheme'."
- (let ((current-scheme (pyim-scheme-current)))
- (cond
- ((and scheme (pyim-scheme-cregexp-support-p scheme)) scheme)
- ((and current-scheme (pyim-scheme-cregexp-support-p current-scheme))
- current-scheme)
- (t (pyim-scheme-get pyim-cregexp-fallback-scheme)))))
+ ;; While I want (pyim-cregexp-create "") return just "".
+ (if (and string scheme
+ (stringp string)
+ (> (length string) 0)
+ (pyim-scheme-p scheme)
+ (pyim-scheme-cregexp-support-p scheme))
+ (let ((num (pyim-cregexp-char-level-num char-level-num))
+ rx-string)
+ (while (not (pyim-cregexp-valid-p rx-string))
+ (setq rx-string
+ (or (ignore-errors
+ (rx-to-string
+ (pyim-cregexp-create-from-rx
+ (lambda (x)
+ (if (stringp x)
+ (xr (pyim-cregexp-create-1 x scheme num
chinese-only))
+ x))
+ (xr string))))
+ string))
+ (setq num (1- num)))
+ rx-string)
+ string))
(defun pyim-cregexp-valid-p (cregexp)
"Return t when cregexp is a valid regexp."
@@ -111,20 +123,19 @@ regexp, 所以搜索单字的时候一般可以搜到生僻字,但搜索句子
;; FIXME: Emacs can't handle regexps whose length is too big :-(
(error nil))))
-(defun pyim-cregexp-build-from-rx (fn rx-form)
+(defun pyim-cregexp-create-from-rx (fn rx-form)
(pcase rx-form
('nil nil)
(`(,form) (funcall fn form))
(`(any . ,_) rx-form)
(`(,_ . ,_)
(mapcar (lambda (x)
- (pyim-cregexp-build-from-rx fn x))
+ (pyim-cregexp-create-from-rx fn x))
rx-form))
(_ (funcall fn rx-form))))
-(defun pyim-cregexp-build-1 (str &optional char-level-num chinese-only scheme)
+(defun pyim-cregexp-create-1 (str scheme &optional char-level-num chinese-only)
(let* ((num (pyim-cregexp-char-level-num char-level-num))
- (code-prefix (pyim-scheme-code-prefix scheme))
(sep "#####&&&&#####")
(lst (remove "" (split-string
(replace-regexp-in-string
@@ -140,12 +151,9 @@ regexp, 所以搜索单字的时候一般可以搜到生僻字,但搜索句子
(let* ((string1 (replace-regexp-in-string "'" "" string))
(imobjs (pyim-imobjs-create string1 scheme))
(regexp-list
- (mapcar
- (lambda (imobj)
- (if (pyim-scheme-xingma-p scheme)
- (pyim-cregexp-build-xingma imobj nil nil nil
code-prefix)
- (pyim-cregexp-build-quanpin imobj nil nil nil num)))
- imobjs))
+ (mapcar (lambda (imobj)
+ (pyim-cregexp-create-from-imobj imobj scheme nil
nil nil num))
+ imobjs))
(regexp
(when regexp-list
(string-join (delq nil regexp-list) "\\|")))
@@ -160,9 +168,15 @@ regexp, 所以搜索单字的时候一般可以搜到生僻字,但搜索句子
(format "\\(?:%s\\)" regexp))))
lst "")))
-(defun pyim-cregexp-build-quanpin (imobj &optional match-beginning
- first-equal all-equal char-level-num)
- "从 IMOBJ 创建一个搜索中文的 regexp."
+(cl-defgeneric pyim-cregexp-create-from-imobj
+ (imobj _scheme &optional match-beginning
+ first-equal all-equal char-level-num)
+ "从 IMOBJ 创建一个搜索中文的 regexp.")
+
+(cl-defmethod pyim-cregexp-create-from-imobj
+ (imobj (_scheme pyim-scheme-quanpin)
+ &optional match-beginning first-equal all-equal char-level-num)
+ "从 IMOBJ 创建一个搜索中文的 regexp, 适用于全拼输入法。"
(let* ((num (pyim-cregexp-char-level-num char-level-num))
(imobj (mapcar (lambda (x)
(concat (nth 0 x) (nth 1 x)))
@@ -192,9 +206,10 @@ regexp, 所以搜索单字的时候一般可以搜到生僻字,但搜索句子
(unless (equal regexp "")
(concat (if match-beginning "^" "") regexp))))
-(defun pyim-cregexp-build-xingma (imobj &optional match-beginning
- first-equal _all-equal code-prefix)
- "从 IMOBJ 创建一个搜索中文的 regexp."
+(cl-defmethod pyim-cregexp-create-from-imobj
+ (imobj (scheme pyim-scheme-xingma)
+ &optional match-beginning first-equal _all-equal _char-level-num)
+ "从 IMOBJ 创建一个搜索中文的 regexp, 适用于形码输入法。"
(cl-flet ((build-regexp
(list)
(let* ((n (apply #'max (mapcar #'length list)))
@@ -211,14 +226,15 @@ regexp, 所以搜索单字的时候一般可以搜到生僻字,但搜索句子
(if (> i 0) "?" ""))
results))
(string-join (reverse results)))))
- (let ((regexp (mapconcat
- (lambda (x)
- (let ((code (concat (or code-prefix "")
- (if first-equal
- (substring x 0 1)
- x))))
- (build-regexp (pyim-dcache-get code '(code2word)))))
- imobj "")))
+ (let* ((code-prefix (pyim-scheme-code-prefix scheme))
+ (regexp (mapconcat
+ (lambda (x)
+ (let ((code (concat (or code-prefix "")
+ (if first-equal
+ (substring x 0 1)
+ x))))
+ (build-regexp (pyim-dcache-get code '(code2word)))))
+ imobj "")))
(unless (equal regexp "")
(concat (if match-beginning "^" "") regexp)))))
diff --git a/tests/pyim-tests.el b/tests/pyim-tests.el
index 153d4b8946..2be4b29d51 100644
--- a/tests/pyim-tests.el
+++ b/tests/pyim-tests.el
@@ -873,8 +873,8 @@
(let* ((str (nth 2 (split-string (car (pyim-pymap-py2cchar-get "wang" t))
"|")))
(quanpin (pyim-scheme-get 'quanpin))
- (regexp1 (pyim-cregexp-build-1 "wang" 3 nil quanpin))
- (regexp2 (pyim-cregexp-build-1 "wang" 2 nil quanpin)))
+ (regexp1 (pyim-cregexp-create-1 "wang" quanpin 3 nil))
+ (regexp2 (pyim-cregexp-create-1 "wang" quanpin 2)))
(should (string-match-p regexp1 str))
(should-not (string-match-p regexp2 str)))
@@ -895,7 +895,7 @@
(should (equal (pyim-cregexp-build "adww")
"\\(?:adww\\|[其匧惹斯欺歁莢蒙][人古]?人?\\)"))
(should (equal (pyim-cregexp-build "aaaa'aaaa")
"\\(?:\\(?:aaaa'\\|aaaa\\|[工恭]恭?敬?敬?\\)\\(?:aaaa\\|[工恭]恭?敬?敬?\\)\\)"))
- (should (equal (pyim-cregexp-build-1 "aaaa'aaaa" nil nil wubi)
+ (should (equal (pyim-cregexp-create-1 "aaaa'aaaa" wubi)
"\\(?:aaaa'\\|aaaa\\|[工恭][恭]?[敬]?[敬]?\\)\\(?:aaaa\\|[工恭][恭]?[敬]?[敬]?\\)")))
(with-temp-buffer