[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[elpa] externals/pyim f16729fa58 1/3: pyim-cstring--partition -> pyim-py
From: |
ELPA Syncer |
Subject: |
[elpa] externals/pyim f16729fa58 1/3: pyim-cstring--partition -> pyim-pymap-split-string |
Date: |
Tue, 17 Jan 2023 23:58:06 -0500 (EST) |
branch: externals/pyim
commit f16729fa586df689aaed23c89b0377cef9e1b7c7
Author: Feng Shu <tumashu@163.com>
Commit: Feng Shu <tumashu@163.com>
pyim-cstring--partition -> pyim-pymap-split-string
---
pyim-cstring-utils.el | 2 +-
pyim-cstring.el | 22 ++--------------------
pyim-pymap.el | 21 +++++++++++++++++++++
tests/pyim-tests.el | 34 ++++++++++++++++++++--------------
4 files changed, 44 insertions(+), 35 deletions(-)
diff --git a/pyim-cstring-utils.el b/pyim-cstring-utils.el
index c25b0a4340..c0fc879888 100644
--- a/pyim-cstring-utils.el
+++ b/pyim-cstring-utils.el
@@ -98,7 +98,7 @@ CHINESE-STRING 分词,得到一个词条 alist,这个 alist 的元素都是
(pyim-cstring--split-to-string
str prefer-short-word separator max-word-length)
str)))
- (pyim-cstring--partition string) (or separator " ")))
+ (pyim-pymap-split-string string) (or separator " ")))
(defun pyim-cstring--split-to-string (chinese-string &optional
prefer-short-word
separator max-word-length)
diff --git a/pyim-cstring.el b/pyim-cstring.el
index 2d718a72dd..60298d77a1 100644
--- a/pyim-cstring.el
+++ b/pyim-cstring.el
@@ -37,24 +37,6 @@
"Chinese string tools for pyim."
:group 'pyim)
-(defun pyim-cstring--partition (string &optional to-cchar)
- "STRING partition.
-
-1. Hello你好 -> (\"Hello\" \"你\" \"好\"), when TO-CCHAR is non-nil.
-2. Hello你好 -> (\"Hello\" \"你好\"), when TO-CCHAR is nil."
- ;; NOTE: 使用5个\0作为分割符有没有其它副作用?有待观察。
- (let ((sep (make-string 5 ?\0)))
- (if (pyim-string-match-p "\\CC" string)
- ;; 处理中英文混合的情况
- (remove "" (split-string
- (replace-regexp-in-string
- (if to-cchar "\\(\\cc\\)" "\\(\\cc+\\)")
- (concat sep "\\1" sep) string)
- sep))
- (if to-cchar
- (cl-mapcar #'char-to-string string)
- (list string)))))
-
(defun pyim-cstring--substrings (cstring &optional max-length number)
"找出 CSTRING 中所有长度不超过 MAX-LENGTH 的子字符串,生成一个 alist。
@@ -129,7 +111,7 @@ BUG: 当 STRING 中包含其它标点符号,并且设置 SEPERATER 时,结
(defun pyim-cstring-to-pinyin--from-dcache (cstring)
"从 Dcache 中搜索 CSTRING 对应的拼音。"
- (let* ((string-parts (pyim-cstring--partition cstring))
+ (let* ((string-parts (pyim-pymap-split-string cstring))
(pinyins-list
(mapcar #'pyim-cstring--get-pinyin-code
string-parts)))
@@ -152,7 +134,7 @@ BUG: 当 STRING 中包含其它标点符号,并且设置 SEPERATER 时,结
(defun pyim-cstring-to-pinyin--from-pymap (cstring)
"使用 PYMAP 提供的工具来搜索 CSTRING 对应的拼音。"
(pyim-pymap-cchars2pys-get
- (pyim-cstring--partition cstring t)))
+ (pyim-pymap-split-string cstring t)))
;;;###autoload
(defun pyim-cstring-to-pinyin-simple (string &optional shou-zi-mu separator
return-list)
diff --git a/pyim-pymap.el b/pyim-pymap.el
index c9448ca6d5..9b36d8211c 100644
--- a/pyim-pymap.el
+++ b/pyim-pymap.el
@@ -1068,6 +1068,27 @@ If FORCE is non-nil, FORCE build."
(pyim-pymap--adjust-duoyinzi
cchars pinyins-list))))
+(defun pyim-pymap-split-string (string &optional to-cchar)
+ "将 STRING 按照中文处理的标准切开.
+
+1. Hello你好 -> (\"Hello\" \"你\" \"好\"), when TO-CCHAR is non-nil.
+2. Hello你好 -> (\"Hello\" \"你好\"), when TO-CCHAR is nil."
+ (let* ((sep (make-string 5 ?\0))
+ (chars (split-string string ""))
+ (chars-with-seps
+ (cl-mapcan (lambda (a b)
+ (let ((x (pyim-pymap-cchar2py-get a))
+ (y (pyim-pymap-cchar2py-get b)))
+ (cond ((and x y)
+ (if to-cchar
+ (list a sep)
+ (list a)))
+ ((and (not x) (not y))
+ (list a))
+ (t (list a sep)))))
+ chars (cdr chars))))
+ (remove "" (split-string (string-join chars-with-seps) sep))))
+
(defun pyim-pymap-cchar2py-get (char-or-str)
"获取字符或者字符串 CHAR-OR-STR 对应的拼音 code.
diff --git a/tests/pyim-tests.el b/tests/pyim-tests.el
index 1a2d1c759b..45d76c6779 100644
--- a/tests/pyim-tests.el
+++ b/tests/pyim-tests.el
@@ -269,6 +269,26 @@
(should (pyim-numbers> '(2) '(1 3))))
;; ** pyim-pymap 相关单元测试
+(ert-deftest pyim-tests-pyim-pymap-split-string ()
+ (should (equal (pyim-pymap-split-string "你好 hello 你好")
+ '("你好" " hello " "你好")))
+ (should (equal (pyim-pymap-split-string "hello 你好 hello 你好 hello")
+ '("hello " "你好" " hello " "你好" " hello")))
+ (should (equal (pyim-pymap-split-string "你好 hello 你好@")
+ '("你好" " hello " "你好" "@")))
+ (should (equal (pyim-pymap-split-string "你好 hello 你好,你好")
+ '("你好" " hello " "你好" "," "你好")))
+ (should (equal (pyim-pymap-split-string "你好 hello 你好" t)
+ '("你" "好" " hello " "你" "好")))
+ (should (equal (pyim-pymap-split-string "你好")
+ '("你好")))
+ (should (equal (pyim-pymap-split-string "你好" t)
+ '("你" "好")))
+ (should (equal (pyim-pymap-split-string "hello")
+ '("hello")))
+ (should (equal (pyim-pymap-split-string "hello" t)
+ '("hello"))))
+
(ert-deftest pyim-tests-pyim-pymap ()
(should-not (cl-find-if-not
(lambda (x)
@@ -729,20 +749,6 @@
(should (equal (get-text-property 0 :comment (car words)) "(buf)")))))
;; ** pyim-cstring 相关单元测试
-(ert-deftest pyim-tests-pyim-cstring--partition ()
- (should (equal (pyim-cstring--partition "你好 hello 你好")
- '("你好" " hello " "你好")))
- (should (equal (pyim-cstring--partition "你好 hello 你好" t)
- '("你" "好" " hello " "你" "好")))
- (should (equal (pyim-cstring--partition "你好")
- '("你好")))
- (should (equal (pyim-cstring--partition "你好" t)
- '("你" "好")))
- (should (equal (pyim-cstring--partition "hello")
- '("hello")))
- (should (equal (pyim-cstring--partition "hello" t)
- '("hello"))))
-
(ert-deftest pyim-tests-pyim-cstring--substrings ()
(should (equal (pyim-cstring--substrings "我爱北京")
'(("我爱北京" 0 4)