[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[elpa] externals/pyim 5973ac084d 2/4: Refactor pyim-cregexp-create.
From: |
ELPA Syncer |
Subject: |
[elpa] externals/pyim 5973ac084d 2/4: Refactor pyim-cregexp-create. |
Date: |
Wed, 22 Jun 2022 02:57:55 -0400 (EDT) |
branch: externals/pyim
commit 5973ac084dc673e8286dcf0b2b327b3f660510a4
Author: Feng Shu <tumashu@163.com>
Commit: Feng Shu <tumashu@163.com>
Refactor pyim-cregexp-create.
---
pyim-cregexp.el | 54 +++++++++++++++++++++++++++++++++--------------------
tests/pyim-tests.el | 6 +++---
2 files changed, 37 insertions(+), 23 deletions(-)
diff --git a/pyim-cregexp.el b/pyim-cregexp.el
index 90266a8c07..be81fdac37 100644
--- a/pyim-cregexp.el
+++ b/pyim-cregexp.el
@@ -90,30 +90,27 @@ CHAR-LEVEL-NUM 代表汉字常用级别,pyim 中根据汉字的使用频率,
会抛弃一些不常用的汉字,重新生成,知道生成一个 Emacs 可以处理的
regexp, 所以搜索单字的时候一般可以搜到生僻字,但搜索句子的时候,
就无法搜索生僻字了。"
- ;; NOTE: (rx-to-string "") will return "\\(?:\\)",
- ;; While I want (pyim-cregexp-create "") return just "".
(if (and string scheme
(stringp string)
(> (length string) 0)
(pyim-scheme-p scheme)
(pyim-scheme-cregexp-support-p scheme))
- (let ((num (pyim-cregexp-char-level-num char-level-num))
- rx-string)
- (while (not (pyim-cregexp-valid-p rx-string))
- (setq rx-string
- (or (ignore-errors
- (rx-to-string
- (pyim-cregexp-create-from-rx
- (lambda (x)
- (if (stringp x)
- (xr (pyim-cregexp-create-1 x scheme num
chinese-only))
- x))
- (xr string))))
- string))
- (setq num (1- num)))
- rx-string)
+ (pyim-cregexp-create-valid-cregexp-from-string
+ string scheme char-level-num chinese-only)
string))
+(defun pyim-cregexp-create-valid-cregexp-from-string
+ (string scheme &optional char-level-num chinese-only)
+ "从 STRING 创建一个有效的搜索中文的 regexp."
+ (let ((num (pyim-cregexp-char-level-num char-level-num))
+ rx-string)
+ (while (not (pyim-cregexp-valid-p rx-string))
+ (setq rx-string
+ (pyim-cregexp-create-beautiful-cregexp-from-string
+ string scheme num chinese-only))
+ (setq num (1- num)))
+ rx-string))
+
(defun pyim-cregexp-valid-p (cregexp)
"Return t when cregexp is a valid regexp."
(and cregexp
@@ -123,18 +120,35 @@ regexp, 所以搜索单字的时候一般可以搜到生僻字,但搜索句子
;; FIXME: Emacs can't handle regexps whose length is too big :-(
(error nil))))
-(defun pyim-cregexp-create-from-rx (fn rx-form)
+(defun pyim-cregexp-create-beautiful-cregexp-from-string
+ (string scheme &optional char-level-num chinese-only)
+ "使用 rx 和 xr, 从 STRING 生成一个漂亮的搜索中文的 regexp.
+
+这个 regexp 可能正常使用,也可能长度超出 emacs 的限制。"
+ (or (ignore-errors
+ (rx-to-string
+ (pyim-cregexp-create-cregexp-from-rx
+ (lambda (x)
+ (if (stringp x)
+ (xr (pyim-cregexp-create-cregexp-from-string
+ x scheme char-level-num chinese-only))
+ x))
+ (xr string))))
+ string))
+
+(defun pyim-cregexp-create-cregexp-from-rx (fn rx-form)
(pcase rx-form
('nil nil)
(`(,form) (funcall fn form))
(`(any . ,_) rx-form)
(`(,_ . ,_)
(mapcar (lambda (x)
- (pyim-cregexp-create-from-rx fn x))
+ (pyim-cregexp-create-cregexp-from-rx fn x))
rx-form))
(_ (funcall fn rx-form))))
-(defun pyim-cregexp-create-1 (str scheme &optional char-level-num chinese-only)
+(defun pyim-cregexp-create-cregexp-from-string
+ (str scheme &optional char-level-num chinese-only)
(let* ((num (pyim-cregexp-char-level-num char-level-num))
(sep "#####&&&&#####")
(lst (remove "" (split-string
diff --git a/tests/pyim-tests.el b/tests/pyim-tests.el
index 163d5ec928..4af2a6fd6d 100644
--- a/tests/pyim-tests.el
+++ b/tests/pyim-tests.el
@@ -951,8 +951,8 @@
(let* ((str (nth 2 (split-string (car (pyim-pymap-py2cchar-get "wang" t))
"|")))
(quanpin (pyim-scheme-get 'quanpin))
- (regexp1 (pyim-cregexp-create-1 "wang" quanpin 3 nil))
- (regexp2 (pyim-cregexp-create-1 "wang" quanpin 2)))
+ (regexp1 (pyim-cregexp-create-cregexp-from-string "wang" quanpin 3
nil))
+ (regexp2 (pyim-cregexp-create-cregexp-from-string "wang" quanpin 2)))
(should (string-match-p regexp1 str))
(should-not (string-match-p regexp2 str)))
@@ -974,7 +974,7 @@
(should (equal (pyim-cregexp-build "adww")
"\\(?:adww\\|[其匧惹斯欺歁莢蒙][人古]?人?\\)"))
(should (equal (pyim-cregexp-build "aaaa'aaaa")
"\\(?:\\(?:aaaa'\\|aaaa\\|[工恭]恭?敬?敬?\\)\\(?:aaaa\\|[工恭]恭?敬?敬?\\)\\)"))
- (should (equal (pyim-cregexp-create-1 "aaaa'aaaa" wubi)
+ (should (equal (pyim-cregexp-create-cregexp-from-string "aaaa'aaaa" wubi)
"\\(?:aaaa'\\|aaaa\\|[工恭][恭]?[敬]?[敬]?\\)\\(?:aaaa\\|[工恭][恭]?[敬]?[敬]?\\)"))
(should (equal (pyim-cregexp-build-xingma-regexp-from-words '("工" "恭恭敬敬"))
"[工恭][恭]?[敬]?[敬]?"))