emacs-elpa-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[elpa] externals/pyim f26fecc 03/36: New file: pyim-cregexp.el


From: ELPA Syncer
Subject: [elpa] externals/pyim f26fecc 03/36: New file: pyim-cregexp.el
Date: Thu, 22 Apr 2021 22:57:14 -0400 (EDT)

branch: externals/pyim
commit f26fecc4d63b3a98c56b99f48f8b272625d37fa6
Author: Feng Shu <tumashu@163.com>
Commit: Feng Shu <tumashu@163.com>

    New file: pyim-cregexp.el
---
 pyim-cregexp.el | 271 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 pyim.el         | 236 ------------------------------------------------
 2 files changed, 271 insertions(+), 236 deletions(-)

diff --git a/pyim-cregexp.el b/pyim-cregexp.el
new file mode 100644
index 0000000..c93416e
--- /dev/null
+++ b/pyim-cregexp.el
@@ -0,0 +1,271 @@
+;;; pyim-cregexp.el --- Chinese regexp tools for pyim.        -*- 
lexical-binding: t; -*-
+
+;; * Header
+;; Copyright (C) 2021 Free Software Foundation, Inc.
+
+;; Author: Feng Shu <tumashu@163.com>
+;; Maintainer: Feng Shu <tumashu@163.com>
+;; URL: https://github.com/tumashu/pyim
+;; Keywords: convenience, Chinese, pinyin, input-method
+;; Package-Requires: ((emacs "24.4") (async "1.6") (xr "1.13"))
+
+;; This file is part of GNU Emacs.
+
+;; GNU Emacs is free software: you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+
+;; GNU Emacs is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.
+
+;;; Commentary:
+
+;;; Code:
+;; * 代码                                                           :code:
+(require 'cl-lib)
+(require 'xr)
+(require 'rx)
+
+(defcustom pyim-cregexp-fallback-scheme 'quanpin
+  "`pyim-cregexp-build' 使用的 Fallback scheme.
+
+如果 `pyim-cregexp-build' 无法支持用户正在使用的 scheme 时,
+将使用这个 scheme."
+  :type 'symbol)
+
+(defun pyim-cregexp-build (string &optional char-level-num)
+  "根据 STRING 构建一个中文 regexp, 用于 \"拼音搜索汉字\".
+比如:\"nihao\" -> \"[你呢...][好号...] \\| nihao\""
+  ;; NOTE: (rx-to-string "") will return "\\(?:\\)",
+  ;; While I want (pyim-cregexp-build "") return just "".
+  (if (equal string "")
+      string
+    (let* ((char-level-num (or char-level-num 3))
+           (rx-string
+            (if (= char-level-num 0)
+                string
+              (ignore-errors
+                (rx-to-string
+                 (pyim-cregexp-build-from-rx
+                  (lambda (x)
+                    (if (stringp x)
+                        (xr (pyim-cregexp-build-1 x char-level-num))
+                      x))
+                  (xr string)))))))
+      (if (and rx-string (stringp rx-string))
+          (if (pyim-cregexp-valid-p rx-string)
+              rx-string
+            (pyim-cregexp-build string (- char-level-num 1)))
+        string))))
+
+(defun pyim-cregexp-valid-p (cregexp)
+  "Return t when cregexp is a valid regexp."
+  (and cregexp
+       (stringp cregexp)
+       (condition-case nil
+           (progn (string-match-p cregexp "") t)
+         ;; FIXME: Emacs can't handle regexps whose length is too big :-(
+         (error nil))))
+
+(defun pyim-cregexp-build-from-rx (fn rx-form)
+  (pcase rx-form
+    ('nil nil)
+    (`(,form) (funcall fn form))
+    (`(any . ,_) rx-form)
+    (`(,_ . ,_)
+     (mapcar (lambda (x)
+               (pyim-cregexp-build-from-rx fn x))
+             rx-form))
+    (_ (funcall fn rx-form))))
+
+(defun pyim-cregexp-build-1 (str &optional char-level-num)
+  (let* ((scheme-name (pyim-scheme-name))
+         (class (pyim-scheme-get-option scheme-name :class))
+         (code-prefix (pyim-scheme-get-option scheme-name :code-prefix))
+         (sep "#####&&&&#####")
+         (lst (remove "" (split-string
+                          (replace-regexp-in-string
+                           "\\([a-z]+'*\\)" (concat sep "\\1" sep) str)
+                          sep)))
+         (char-level-num
+          (cond
+           ((and char-level-num (> char-level-num 3)) 3)
+           ((and char-level-num (< char-level-num 1)) 1)
+           (t char-level-num))))
+    ;; 确保 pyim 词库加载
+    (pyim-dcache-init-variables)
+    ;; pyim 暂时只支持全拼和双拼搜索
+    (when (not (member class '(quanpin shuangpin xingma)))
+      (setq scheme-name pyim-cregexp-fallback-scheme))
+    (mapconcat
+     (lambda (string)
+       (if (or (pyim-string-match-p "[^a-z']+" string)
+               (equal string ""))
+           string
+         (let* ((string1 (replace-regexp-in-string "'" "" string))
+                (imobjs (pyim-imobjs-create string1 scheme-name))
+                (regexp-list
+                 (mapcar
+                  #'(lambda (imobj)
+                      (if (eq class 'xingma)
+                          (pyim-cregexp-build:xingma imobj nil nil nil 
code-prefix)
+                        (pyim-cregexp-build:quanpin imobj nil nil nil 
char-level-num)))
+                  imobjs))
+                (regexp
+                 (when regexp-list
+                   (mapconcat #'identity
+                              (delq nil regexp-list)
+                              "\\|")))
+                (regexp
+                 (if (> (length regexp) 0)
+                     (if (equal string string1)
+                         (concat string "\\|" regexp)
+                       (concat string "\\|" string1 "\\|" regexp))
+                   string)))
+           (format "\\(?:%s\\)" regexp))))
+     lst "")))
+
+(defun pyim-cregexp-build:quanpin (imobj &optional match-beginning
+                                         first-equal all-equal char-level-num)
+  "从 IMOBJ 创建一个搜索中文的 regexp."
+  (let* ((imobj
+          (mapcar #'(lambda (x)
+                      (concat (nth 0 x) (nth 1 x)))
+                  imobj))
+         (cchar-list
+          (let ((n 0) results)
+            (dolist (py imobj)
+              (let* ((equal-match
+                      (or all-equal
+                          (and first-equal (= n 0))))
+                     (cchars
+                      ;; 只取常用字,不常用的汉字忽略,防止生成的
+                      ;; regexp 太长而无法搜索
+                      (mapconcat #'(lambda (x)
+                                     (mapconcat #'identity
+                                                (cl-subseq (split-string x 
"|") 0 char-level-num)
+                                                ""))
+                                 (pyim-pinyin2cchar-get py equal-match nil t) 
"")))
+                (push cchars results))
+              (setq n (+ 1 n)))
+            (nreverse results)))
+         (regexp
+          (mapconcat #'(lambda (x)
+                         (when (pyim-string-match-p "\\cc" x)
+                           (format "[%s]" x)))
+                     cchar-list "")))
+    (unless (equal regexp "")
+      (concat (if match-beginning "^" "") regexp))))
+
+(defun pyim-cregexp-build:xingma (imobj &optional match-beginning
+                                        first-equal _all-equal code-prefix)
+  "从 IMOBJ 创建一个搜索中文的 regexp."
+  (cl-flet ((build-regexp
+             (list)
+             (let* ((n (apply #'max (mapcar #'length list)))
+                    results)
+               (dotimes (i n)
+                 (push (format "[%s]%s"
+                               (mapconcat
+                                (lambda (x)
+                                  (if (> i (- (length x) 1))
+                                      ""
+                                    (char-to-string
+                                     (elt x i))))
+                                list "")
+                               (if (> i 0) "?" ""))
+                       results))
+               (mapconcat #'identity (reverse results) ""))))
+    (let ((regexp (mapconcat
+                   (lambda (x)
+                     (let ((code (concat (or code-prefix "")
+                                         (if first-equal
+                                             (substring x 0 1)
+                                           x))))
+                       (build-regexp (pyim-dcache-get code))))
+                   imobj "")))
+      (unless (equal regexp "")
+        (concat (if match-beginning "^" "") regexp)))))
+
+(defun pyim-convert-cregexp-at-point (&optional insert-only)
+  "将光标前的字符串按拼音的规则转换为一个搜索中文的 regexp.
+用于实现拼音搜索中文的功能。
+
+在 minibuffer 中,这个命令默认会自动运行 `exit-minibuffer'.
+这个可以使用 INSERT-ONLY 参数控制。"
+  (interactive "P")
+  (unless (equal input-method-function 'pyim-input-method)
+    (activate-input-method 'pyim))
+  (let* ((buffer-string
+          (buffer-substring (point-min) (point-max)))
+         (string (if mark-active
+                     (buffer-substring-no-properties
+                      (region-beginning) (region-end))
+                   (buffer-substring
+                    (point)
+                    (save-excursion
+                      (skip-syntax-backward "w")
+                      (point)))))
+         (length (length string))
+         (cregexp (pyim-cregexp-build string)))
+    (delete-char (- 0 length))
+    (cond
+     ;; Deal with `org-search-view'
+     ((and (window-minibuffer-p)
+           (string-match-p
+            (regexp-quote "[+-]Word/{Regexp}") buffer-string))
+      (insert (format "{%s}" cregexp)))
+     (t (insert cregexp)))
+    (when (and (not insert-only)
+               (window-minibuffer-p))
+      (exit-minibuffer))))
+
+(defun pyim-isearch-search-fun ()
+  "这个函数为 isearch 相关命令添加中文拼音搜索功能,
+做为 `isearch-search-fun' 函数的 advice 使用。"
+  (funcall
+   (lambda ()
+     `(lambda (string &optional bound noerror count)
+        (funcall (if ,isearch-forward
+                     're-search-forward
+                   're-search-backward)
+                 (pyim-cregexp-build string) bound noerror count)))))
+
+;;;###autoload
+(define-minor-mode pyim-isearch-mode
+  "这个 mode 为 isearch 添加拼音搜索功能."
+  :global t
+  :require 'pyim
+  :lighter " pyim-isearch"
+  (if pyim-isearch-mode
+      (progn
+        (advice-add 'isearch-search-fun :override #'pyim-isearch-search-fun)
+        (message "PYIM: `pyim-isearch-mode' 已经激活,激活后,一些 isearch 扩展包有可能失效。"))
+    (advice-remove 'isearch-search-fun #'pyim-isearch-search-fun)))
+
+(declare-function ivy--regex-plus "ivy")
+(defun pyim-ivy-cregexp (str)
+  "Let ivy support search Chinese with pinyin feature."
+  (let ((x (ivy--regex-plus str))
+        (case-fold-search nil))
+    (if (listp x)
+        (mapcar (lambda (y)
+                  (if (cdr y)
+                      (list (if (equal (car y) "")
+                                ""
+                              (pyim-cregexp-build (car y)))
+                            (cdr y))
+                    (list (pyim-cregexp-build (car y)))))
+                x)
+      (pyim-cregexp-build x))))
+
+;; * Footer
+(provide 'pyim-cregexp)
+
+;;; pyim-cregexp.el ends here
diff --git a/pyim.el b/pyim.el
index 8226d92..05b0ba4 100644
--- a/pyim.el
+++ b/pyim.el
@@ -581,8 +581,6 @@
 (require 'pyim-pymap)
 (require 'pyim-common)
 (require 'pyim-pinyin)
-(require 'xr) ;Used by pyim-cregexp-build
-(require 'rx) ;Used by pyim-cregexp-build
 
 (defgroup pyim nil
   "Pyim is a Chinese input method support quanpin, shuangpin, wubi and 
cangjie."
@@ -654,13 +652,6 @@ plist 来表示,比如:
 临时激活某种辅助输入法(比如:拼音输入法)来输入汉字。"
   :type 'symbol)
 
-(defcustom pyim-cregexp-fallback-scheme 'quanpin
-  "`pyim-cregexp-build' 使用的 Fallback scheme.
-
-如果 `pyim-cregexp-build' 无法支持用户正在使用的 scheme 时,
-将使用这个 scheme."
-  :type 'symbol)
-
 (defcustom pyim-translate-trigger-char "v"
   "用于触发特殊操作的字符,相当与单字快捷键.
 
@@ -3625,233 +3616,6 @@ PUNCT-LIST 格式类似:
   (setq pyim-input-ascii
         (not pyim-input-ascii)))
 
-;; ** 让 isearch-mode 通过 pinyin 搜索中文
-(defun pyim-cregexp-build (string &optional char-level-num)
-  "根据 STRING 构建一个中文 regexp, 用于 \"拼音搜索汉字\".
-比如:\"nihao\" -> \"[你呢...][好号...] \\| nihao\""
-  ;; NOTE: (rx-to-string "") will return "\\(?:\\)",
-  ;; While I want (pyim-cregexp-build "") return just "".
-  (if (equal string "")
-      string
-    (let* ((char-level-num (or char-level-num 3))
-           (rx-string
-            (if (= char-level-num 0)
-                string
-              (ignore-errors
-                (rx-to-string
-                 (pyim-cregexp-build-from-rx
-                  (lambda (x)
-                    (if (stringp x)
-                        (xr (pyim-cregexp-build-1 x char-level-num))
-                      x))
-                  (xr string)))))))
-      (if (and rx-string (stringp rx-string))
-          (if (pyim-cregexp-valid-p rx-string)
-              rx-string
-            (pyim-cregexp-build string (- char-level-num 1)))
-        string))))
-
-(defun pyim-cregexp-valid-p (cregexp)
-  "Return t when cregexp is a valid regexp."
-  (and cregexp
-       (stringp cregexp)
-       (condition-case nil
-           (progn (string-match-p cregexp "") t)
-         ;; FIXME: Emacs can't handle regexps whose length is too big :-(
-         (error nil))))
-
-(defun pyim-cregexp-build-from-rx (fn rx-form)
-  (pcase rx-form
-    ('nil nil)
-    (`(,form) (funcall fn form))
-    (`(any . ,_) rx-form)
-    (`(,_ . ,_)
-     (mapcar (lambda (x)
-               (pyim-cregexp-build-from-rx fn x))
-             rx-form))
-    (_ (funcall fn rx-form))))
-
-(defun pyim-cregexp-build-1 (str &optional char-level-num)
-  (let* ((scheme-name (pyim-scheme-name))
-         (class (pyim-scheme-get-option scheme-name :class))
-         (code-prefix (pyim-scheme-get-option scheme-name :code-prefix))
-         (sep "#####&&&&#####")
-         (lst (remove "" (split-string
-                          (replace-regexp-in-string
-                           "\\([a-z]+'*\\)" (concat sep "\\1" sep) str)
-                          sep)))
-         (char-level-num
-          (cond
-           ((and char-level-num (> char-level-num 3)) 3)
-           ((and char-level-num (< char-level-num 1)) 1)
-           (t char-level-num))))
-    ;; 确保 pyim 词库加载
-    (pyim-dcache-init-variables)
-    ;; pyim 暂时只支持全拼和双拼搜索
-    (when (not (member class '(quanpin shuangpin xingma)))
-      (setq scheme-name pyim-cregexp-fallback-scheme))
-    (mapconcat
-     (lambda (string)
-       (if (or (pyim-string-match-p "[^a-z']+" string)
-               (equal string ""))
-           string
-         (let* ((string1 (replace-regexp-in-string "'" "" string))
-                (imobjs (pyim-imobjs-create string1 scheme-name))
-                (regexp-list
-                 (mapcar
-                  #'(lambda (imobj)
-                      (if (eq class 'xingma)
-                          (pyim-cregexp-build:xingma imobj nil nil nil 
code-prefix)
-                        (pyim-cregexp-build:quanpin imobj nil nil nil 
char-level-num)))
-                  imobjs))
-                (regexp
-                 (when regexp-list
-                   (mapconcat #'identity
-                              (delq nil regexp-list)
-                              "\\|")))
-                (regexp
-                 (if (> (length regexp) 0)
-                     (if (equal string string1)
-                         (concat string "\\|" regexp)
-                       (concat string "\\|" string1 "\\|" regexp))
-                   string)))
-           (format "\\(?:%s\\)" regexp))))
-     lst "")))
-
-(defun pyim-cregexp-build:quanpin (imobj &optional match-beginning
-                                         first-equal all-equal char-level-num)
-  "从 IMOBJ 创建一个搜索中文的 regexp."
-  (let* ((imobj
-          (mapcar #'(lambda (x)
-                      (concat (nth 0 x) (nth 1 x)))
-                  imobj))
-         (cchar-list
-          (let ((n 0) results)
-            (dolist (py imobj)
-              (let* ((equal-match
-                      (or all-equal
-                          (and first-equal (= n 0))))
-                     (cchars
-                      ;; 只取常用字,不常用的汉字忽略,防止生成的
-                      ;; regexp 太长而无法搜索
-                      (mapconcat #'(lambda (x)
-                                     (mapconcat #'identity
-                                                (cl-subseq (split-string x 
"|") 0 char-level-num)
-                                                ""))
-                                 (pyim-pinyin2cchar-get py equal-match nil t) 
"")))
-                (push cchars results))
-              (setq n (+ 1 n)))
-            (nreverse results)))
-         (regexp
-          (mapconcat #'(lambda (x)
-                         (when (pyim-string-match-p "\\cc" x)
-                           (format "[%s]" x)))
-                     cchar-list "")))
-    (unless (equal regexp "")
-      (concat (if match-beginning "^" "") regexp))))
-
-(defun pyim-cregexp-build:xingma (imobj &optional match-beginning
-                                        first-equal _all-equal code-prefix)
-  "从 IMOBJ 创建一个搜索中文的 regexp."
-  (cl-flet ((build-regexp
-             (list)
-             (let* ((n (apply #'max (mapcar #'length list)))
-                    results)
-               (dotimes (i n)
-                 (push (format "[%s]%s"
-                               (mapconcat
-                                (lambda (x)
-                                  (if (> i (- (length x) 1))
-                                      ""
-                                    (char-to-string
-                                     (elt x i))))
-                                list "")
-                               (if (> i 0) "?" ""))
-                       results))
-               (mapconcat #'identity (reverse results) ""))))
-    (let ((regexp (mapconcat
-                   (lambda (x)
-                     (let ((code (concat (or code-prefix "")
-                                         (if first-equal
-                                             (substring x 0 1)
-                                           x))))
-                       (build-regexp (pyim-dcache-get code))))
-                   imobj "")))
-      (unless (equal regexp "")
-        (concat (if match-beginning "^" "") regexp)))))
-
-(defun pyim-isearch-search-fun ()
-  "这个函数为 isearch 相关命令添加中文拼音搜索功能,
-做为 `isearch-search-fun' 函数的 advice 使用。"
-  (funcall
-   (lambda ()
-     `(lambda (string &optional bound noerror count)
-        (funcall (if ,isearch-forward
-                     're-search-forward
-                   're-search-backward)
-                 (pyim-cregexp-build string) bound noerror count)))))
-
-;;;###autoload
-(define-minor-mode pyim-isearch-mode
-  "这个 mode 为 isearch 添加拼音搜索功能."
-  :global t
-  :require 'pyim
-  :lighter " pyim-isearch"
-  (if pyim-isearch-mode
-      (progn
-        (advice-add 'isearch-search-fun :override #'pyim-isearch-search-fun)
-        (message "PYIM: `pyim-isearch-mode' 已经激活,激活后,一些 isearch 扩展包有可能失效。"))
-    (advice-remove 'isearch-search-fun #'pyim-isearch-search-fun)))
-
-(declare-function ivy--regex-plus "ivy")
-(defun pyim-ivy-cregexp (str)
-  "Let ivy support search Chinese with pinyin feature."
-  (let ((x (ivy--regex-plus str))
-        (case-fold-search nil))
-    (if (listp x)
-        (mapcar (lambda (y)
-                  (if (cdr y)
-                      (list (if (equal (car y) "")
-                                ""
-                              (pyim-cregexp-build (car y)))
-                            (cdr y))
-                    (list (pyim-cregexp-build (car y)))))
-                x)
-      (pyim-cregexp-build x))))
-
-(defun pyim-convert-cregexp-at-point (&optional insert-only)
-  "将光标前的字符串按拼音的规则转换为一个搜索中文的 regexp.
-用于实现拼音搜索中文的功能。
-
-在 minibuffer 中,这个命令默认会自动运行 `exit-minibuffer'.
-这个可以使用 INSERT-ONLY 参数控制。"
-  (interactive "P")
-  (unless (equal input-method-function 'pyim-input-method)
-    (activate-input-method 'pyim))
-  (let* ((buffer-string
-          (buffer-substring (point-min) (point-max)))
-         (string (if mark-active
-                     (buffer-substring-no-properties
-                      (region-beginning) (region-end))
-                   (buffer-substring
-                    (point)
-                    (save-excursion
-                      (skip-syntax-backward "w")
-                      (point)))))
-         (length (length string))
-         (cregexp (pyim-cregexp-build string)))
-    (delete-char (- 0 length))
-    (cond
-     ;; Deal with `org-search-view'
-     ((and (window-minibuffer-p)
-           (string-match-p
-            (regexp-quote "[+-]Word/{Regexp}") buffer-string))
-      (insert (format "{%s}" cregexp)))
-     (t (insert cregexp)))
-    (when (and (not insert-only)
-               (window-minibuffer-p))
-      (exit-minibuffer))))
-
 ;; ** 让 forward/backward 支持中文
 (defun pyim-forward-word (&optional arg)
   "向前移动 ARG 英文或者中文词,向前移动时基于 *最长* 的词移动。"



reply via email to

[Prev in Thread] Current Thread [Next in Thread]