emacs-elpa-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[elpa] externals/pyim f16729fa58 1/3: pyim-cstring--partition -> pyim-py


From: ELPA Syncer
Subject: [elpa] externals/pyim f16729fa58 1/3: pyim-cstring--partition -> pyim-pymap-split-string
Date: Tue, 17 Jan 2023 23:58:06 -0500 (EST)

branch: externals/pyim
commit f16729fa586df689aaed23c89b0377cef9e1b7c7
Author: Feng Shu <tumashu@163.com>
Commit: Feng Shu <tumashu@163.com>

    pyim-cstring--partition -> pyim-pymap-split-string
---
 pyim-cstring-utils.el |  2 +-
 pyim-cstring.el       | 22 ++--------------------
 pyim-pymap.el         | 21 +++++++++++++++++++++
 tests/pyim-tests.el   | 34 ++++++++++++++++++++--------------
 4 files changed, 44 insertions(+), 35 deletions(-)

diff --git a/pyim-cstring-utils.el b/pyim-cstring-utils.el
index c25b0a4340..c0fc879888 100644
--- a/pyim-cstring-utils.el
+++ b/pyim-cstring-utils.el
@@ -98,7 +98,7 @@ CHINESE-STRING 分词,得到一个词条 alist,这个 alist 的元素都是
                      (pyim-cstring--split-to-string
                       str prefer-short-word separator max-word-length)
                    str)))
-             (pyim-cstring--partition string) (or separator " ")))
+             (pyim-pymap-split-string string) (or separator " ")))
 
 (defun pyim-cstring--split-to-string (chinese-string &optional 
prefer-short-word
                                                      separator max-word-length)
diff --git a/pyim-cstring.el b/pyim-cstring.el
index 2d718a72dd..60298d77a1 100644
--- a/pyim-cstring.el
+++ b/pyim-cstring.el
@@ -37,24 +37,6 @@
   "Chinese string tools for pyim."
   :group 'pyim)
 
-(defun pyim-cstring--partition (string &optional to-cchar)
-  "STRING partition.
-
-1. Hello你好 -> (\"Hello\" \"你\" \"好\"), when TO-CCHAR is non-nil.
-2. Hello你好 -> (\"Hello\" \"你好\"), when TO-CCHAR is nil."
-  ;; NOTE: 使用5个\0作为分割符有没有其它副作用?有待观察。
-  (let ((sep (make-string 5 ?\0)))
-    (if (pyim-string-match-p "\\CC" string)
-        ;; 处理中英文混合的情况
-        (remove "" (split-string
-                    (replace-regexp-in-string
-                     (if to-cchar "\\(\\cc\\)" "\\(\\cc+\\)")
-                     (concat sep "\\1" sep) string)
-                    sep))
-      (if to-cchar
-          (cl-mapcar #'char-to-string string)
-        (list string)))))
-
 (defun pyim-cstring--substrings (cstring &optional max-length number)
   "找出 CSTRING 中所有长度不超过 MAX-LENGTH 的子字符串,生成一个 alist。
 
@@ -129,7 +111,7 @@ BUG: 当 STRING 中包含其它标点符号,并且设置 SEPERATER 时,结
 
 (defun pyim-cstring-to-pinyin--from-dcache (cstring)
   "从 Dcache 中搜索 CSTRING 对应的拼音。"
-  (let* ((string-parts (pyim-cstring--partition cstring))
+  (let* ((string-parts (pyim-pymap-split-string cstring))
          (pinyins-list
           (mapcar #'pyim-cstring--get-pinyin-code
                   string-parts)))
@@ -152,7 +134,7 @@ BUG: 当 STRING 中包含其它标点符号,并且设置 SEPERATER 时,结
 (defun pyim-cstring-to-pinyin--from-pymap (cstring)
   "使用 PYMAP 提供的工具来搜索 CSTRING 对应的拼音。"
   (pyim-pymap-cchars2pys-get
-   (pyim-cstring--partition cstring t)))
+   (pyim-pymap-split-string cstring t)))
 
 ;;;###autoload
 (defun pyim-cstring-to-pinyin-simple (string &optional shou-zi-mu separator 
return-list)
diff --git a/pyim-pymap.el b/pyim-pymap.el
index c9448ca6d5..9b36d8211c 100644
--- a/pyim-pymap.el
+++ b/pyim-pymap.el
@@ -1068,6 +1068,27 @@ If FORCE is non-nil, FORCE build."
      (pyim-pymap--adjust-duoyinzi
       cchars pinyins-list))))
 
+(defun pyim-pymap-split-string (string &optional to-cchar)
+  "将 STRING 按照中文处理的标准切开.
+
+1. Hello你好 -> (\"Hello\" \"你\" \"好\"), when TO-CCHAR is non-nil.
+2. Hello你好 -> (\"Hello\" \"你好\"), when TO-CCHAR is nil."
+  (let* ((sep (make-string 5 ?\0))
+         (chars (split-string string ""))
+         (chars-with-seps
+          (cl-mapcan (lambda (a b)
+                       (let ((x (pyim-pymap-cchar2py-get a))
+                             (y (pyim-pymap-cchar2py-get b)))
+                         (cond ((and x y)
+                                (if to-cchar
+                                    (list a sep)
+                                  (list a)))
+                               ((and (not x) (not y))
+                                (list a))
+                               (t (list a sep)))))
+                     chars (cdr chars))))
+    (remove "" (split-string (string-join chars-with-seps) sep))))
+
 (defun pyim-pymap-cchar2py-get (char-or-str)
   "获取字符或者字符串 CHAR-OR-STR 对应的拼音 code.
 
diff --git a/tests/pyim-tests.el b/tests/pyim-tests.el
index 1a2d1c759b..45d76c6779 100644
--- a/tests/pyim-tests.el
+++ b/tests/pyim-tests.el
@@ -269,6 +269,26 @@
   (should (pyim-numbers> '(2) '(1 3))))
 
 ;; ** pyim-pymap 相关单元测试
+(ert-deftest pyim-tests-pyim-pymap-split-string ()
+  (should (equal (pyim-pymap-split-string "你好 hello 你好")
+                 '("你好" " hello " "你好")))
+  (should (equal (pyim-pymap-split-string "hello 你好 hello 你好 hello")
+                 '("hello " "你好" " hello " "你好" " hello")))
+  (should (equal (pyim-pymap-split-string "你好 hello 你好@")
+                 '("你好" " hello " "你好" "@")))
+  (should (equal (pyim-pymap-split-string "你好 hello 你好,你好")
+                 '("你好" " hello " "你好" "," "你好")))
+  (should (equal (pyim-pymap-split-string "你好 hello 你好" t)
+                 '("你" "好" " hello " "你" "好")))
+  (should (equal (pyim-pymap-split-string "你好")
+                 '("你好")))
+  (should (equal (pyim-pymap-split-string "你好" t)
+                 '("你" "好")))
+  (should (equal (pyim-pymap-split-string "hello")
+                 '("hello")))
+  (should (equal (pyim-pymap-split-string "hello" t)
+                 '("hello"))))
+
 (ert-deftest pyim-tests-pyim-pymap ()
   (should-not (cl-find-if-not
                (lambda (x)
@@ -729,20 +749,6 @@
       (should (equal (get-text-property 0 :comment (car words)) "(buf)")))))
 
 ;; ** pyim-cstring 相关单元测试
-(ert-deftest pyim-tests-pyim-cstring--partition ()
-  (should (equal (pyim-cstring--partition "你好 hello 你好")
-                 '("你好" " hello " "你好")))
-  (should (equal (pyim-cstring--partition "你好 hello 你好" t)
-                 '("你" "好" " hello " "你" "好")))
-  (should (equal (pyim-cstring--partition "你好")
-                 '("你好")))
-  (should (equal (pyim-cstring--partition "你好" t)
-                 '("你" "好")))
-  (should (equal (pyim-cstring--partition "hello")
-                 '("hello")))
-  (should (equal (pyim-cstring--partition "hello" t)
-                 '("hello"))))
-
 (ert-deftest pyim-tests-pyim-cstring--substrings ()
   (should (equal (pyim-cstring--substrings "我爱北京")
                  '(("我爱北京" 0 4)



reply via email to

[Prev in Thread] Current Thread [Next in Thread]