emacs-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Emacs-diffs] master 05c5e4d 4/4: ; Merge: Update and optimize UCS norma


From: Noam Postavsky
Subject: [Emacs-diffs] master 05c5e4d 4/4: ; Merge: Update and optimize UCS normalization tests
Date: Sat, 8 Jul 2017 14:33:12 -0400 (EDT)

branch: master
commit 05c5e4d8181ee5274885da4ed520bb9874491aab
Merge: efaf148 06ff34c
Author: Noam Postavsky <address@hidden>
Commit: Noam Postavsky <address@hidden>

    ; Merge: Update and optimize UCS normalization tests
---
 test/lisp/international/ucs-normalize-tests.el | 247 +++++++++++++++----------
 1 file changed, 153 insertions(+), 94 deletions(-)

diff --git a/test/lisp/international/ucs-normalize-tests.el 
b/test/lisp/international/ucs-normalize-tests.el
index d85efe2..02a4bba 100644
--- a/test/lisp/international/ucs-normalize-tests.el
+++ b/test/lisp/international/ucs-normalize-tests.el
@@ -26,15 +26,13 @@
 ;; If there are lines marked as failing (see
 ;; `ucs-normalize-tests--failing-lines-part1' and
 ;; `ucs-normalize-tests--failing-lines-part2'), they may need to be
-;; adjusted when NormalizationTest.txt is updated.  To get a list of
-;; currently failing lines, set those 2 variables to nil, run the
-;; tests, and inspect the values of
-;; `ucs-normalize-tests--part1-rule1-failed-lines' and
-;; `ucs-normalize-tests--part1-rule2-failed-chars', respectively.
+;; adjusted when NormalizationTest.txt is updated.  Run the function
+;; `ucs-normalize-check-failing-lines' to see what changes are needed.
 
 ;;; Code:
 
 (eval-when-compile (require 'cl-lib))
+(require 'seq)
 (require 'ert)
 (require 'ucs-normalize)
 
@@ -44,83 +42,98 @@
 (defun ucs-normalize-tests--parse-column ()
   (let ((chars nil)
         (term nil))
-    (while (and (not (equal term ";"))
+    (while (and (not (eq term ?\;))
                 (looking-at "\\([[:xdigit:]]\\{4,6\\}\\)\\([; ]\\)"))
-      (let ((code-point (match-string 1)))
-        (setq term (match-string 2))
+      (let ((code-point (match-string-no-properties 1)))
+        (setq term (char-after (match-beginning 2)))
         (goto-char (match-end 0))
         (push (string-to-number code-point 16) chars)))
-    (nreverse chars)))
+    (apply #'string (nreverse chars))))
 
-(defmacro ucs-normalize-tests--normalize (norm str)
+(defconst ucs-normalize-tests--norm-buf (generate-new-buffer " 
*ucs-normalizing-buffer*"))
+
+(defmacro ucs-normalize-tests--normalization-equal-p (norm str equal-to)
   "Like `ucs-normalize-string' but reuse current buffer for efficiency.
 And NORM is one of the symbols `NFC', `NFD', `NFKC', `NFKD' for brevity."
   (let ((norm-alist '((NFC . ucs-normalize-NFC-region)
                       (NFD . ucs-normalize-NFD-region)
                       (NFKC . ucs-normalize-NFKC-region)
                       (NFKD . ucs-normalize-NFKD-region))))
-    `(save-restriction
-       (narrow-to-region (point) (point))
+    `(with-current-buffer ucs-normalize-tests--norm-buf
+       (erase-buffer)
        (insert ,str)
-       (funcall #',(cdr (assq norm norm-alist)) (point-min) (point-max))
-       (delete-and-extract-region (point-min) (point-max)))))
+       (,(cdr (assq norm norm-alist)) (point-min) (point-max))
+       (goto-char (point-min))
+       (insert ,equal-to)
+       (eq (compare-buffer-substrings nil nil (point) nil (point) nil) 0))))
+
+(defmacro ucs-normalize-tests--normalization-chareq-p (norm char char-eq-to)
+  "Like `ucs-normalize-string' but reuse current buffer for efficiency.
+And NORM is one of the symbols `NFC', `NFD', `NFKC', `NFKD' for brevity."
+  (let ((norm-alist '((NFC . ucs-normalize-NFC-region)
+                      (NFD . ucs-normalize-NFD-region)
+                      (NFKC . ucs-normalize-NFKC-region)
+                      (NFKD . ucs-normalize-NFKD-region))))
+    `(with-current-buffer ucs-normalize-tests--norm-buf
+       (erase-buffer)
+       (insert ,char)
+       (,(cdr (assq norm norm-alist)) (point-min) (point-max))
+       (and (eq (buffer-size) 1)
+            (eq (char-after (point-min)) ,char-eq-to)))))
 
 (defvar ucs-normalize-tests--chars-part1 nil)
 
-(defun ucs-normalize-tests--invariants-hold-p (&rest columns)
+(defsubst ucs-normalize-tests--rule1-holds-p (source nfc nfd nfkc nfkd)
   "Check 1st conformance rule.
 The following invariants must be true for all conformant implementations..."
   (when ucs-normalize-tests--chars-part1
-    ;; See `ucs-normalize-tests--invariants-rule2-hold-p'.
+    ;; See `ucs-normalize-tests--rule2-holds-p'.
     (aset ucs-normalize-tests--chars-part1
-          (caar columns) 1))
-  (cl-destructuring-bind (source nfc nfd nfkc nfkd)
-      (mapcar (lambda (c) (apply #'string c)) columns)
-    (and
-     ;; c2 ==  toNFC(c1) ==  toNFC(c2) ==  toNFC(c3)
-     (equal nfc (ucs-normalize-tests--normalize NFC source))
-     (equal nfc (ucs-normalize-tests--normalize NFC nfc))
-     (equal nfc (ucs-normalize-tests--normalize NFC nfd))
-     ;; c4 ==  toNFC(c4) ==  toNFC(c5)
-     (equal nfkc (ucs-normalize-tests--normalize NFC nfkc))
-     (equal nfkc (ucs-normalize-tests--normalize NFC nfkd))
-
-     ;; c3 ==  toNFD(c1) ==  toNFD(c2) ==  toNFD(c3)
-     (equal nfd (ucs-normalize-tests--normalize NFD source))
-     (equal nfd (ucs-normalize-tests--normalize NFD nfc))
-     (equal nfd (ucs-normalize-tests--normalize NFD nfd))
-     ;; c5 ==  toNFD(c4) ==  toNFD(c5)
-     (equal nfkd (ucs-normalize-tests--normalize NFD nfkc))
-     (equal nfkd (ucs-normalize-tests--normalize NFD nfkd))
-
-     ;; c4 == toNFKC(c1) == toNFKC(c2) == toNFKC(c3) == toNFKC(c4) == 
toNFKC(c5)
-     (equal nfkc (ucs-normalize-tests--normalize NFKC source))
-     (equal nfkc (ucs-normalize-tests--normalize NFKC nfc))
-     (equal nfkc (ucs-normalize-tests--normalize NFKC nfd))
-     (equal nfkc (ucs-normalize-tests--normalize NFKC nfkc))
-     (equal nfkc (ucs-normalize-tests--normalize NFKC nfkd))
-
-     ;; c5 == toNFKD(c1) == toNFKD(c2) == toNFKD(c3) == toNFKD(c4) == 
toNFKD(c5)
-     (equal nfkd (ucs-normalize-tests--normalize NFKD source))
-     (equal nfkd (ucs-normalize-tests--normalize NFKD nfc))
-     (equal nfkd (ucs-normalize-tests--normalize NFKD nfd))
-     (equal nfkd (ucs-normalize-tests--normalize NFKD nfkc))
-     (equal nfkd (ucs-normalize-tests--normalize NFKD nfkd)))))
-
-(defun ucs-normalize-tests--invariants-rule2-hold-p (char)
+          (aref source 0) 1))
+  (and
+   ;; c2 ==  toNFC(c1) ==  toNFC(c2) ==  toNFC(c3)
+   (ucs-normalize-tests--normalization-equal-p NFC source nfc)
+   (ucs-normalize-tests--normalization-equal-p NFC nfc nfc)
+   (ucs-normalize-tests--normalization-equal-p NFC nfd nfc)
+   ;; c4 ==  toNFC(c4) ==  toNFC(c5)
+   (ucs-normalize-tests--normalization-equal-p NFC nfkc nfkc)
+   (ucs-normalize-tests--normalization-equal-p NFC nfkd nfkc)
+
+   ;; c3 ==  toNFD(c1) ==  toNFD(c2) ==  toNFD(c3)
+   (ucs-normalize-tests--normalization-equal-p NFD source nfd)
+   (ucs-normalize-tests--normalization-equal-p NFD nfc nfd)
+   (ucs-normalize-tests--normalization-equal-p NFD nfd nfd)
+   ;; c5 ==  toNFD(c4) ==  toNFD(c5)
+   (ucs-normalize-tests--normalization-equal-p NFD nfkc nfkd)
+   (ucs-normalize-tests--normalization-equal-p NFD nfkd nfkd)
+
+   ;; c4 == toNFKC(c1) == toNFKC(c2) == toNFKC(c3) == toNFKC(c4) == toNFKC(c5)
+   (ucs-normalize-tests--normalization-equal-p NFKC source nfkc)
+   (ucs-normalize-tests--normalization-equal-p NFKC nfc nfkc)
+   (ucs-normalize-tests--normalization-equal-p NFKC nfd nfkc)
+   (ucs-normalize-tests--normalization-equal-p NFKC nfkc nfkc)
+   (ucs-normalize-tests--normalization-equal-p NFKC nfkd nfkc)
+
+   ;; c5 == toNFKD(c1) == toNFKD(c2) == toNFKD(c3) == toNFKD(c4) == toNFKD(c5)
+   (ucs-normalize-tests--normalization-equal-p NFKD source nfkd)
+   (ucs-normalize-tests--normalization-equal-p NFKD nfc nfkd)
+   (ucs-normalize-tests--normalization-equal-p NFKD nfd nfkd)
+   (ucs-normalize-tests--normalization-equal-p NFKD nfkc nfkd)
+   (ucs-normalize-tests--normalization-equal-p NFKD nfkd nfkd)))
+
+(defsubst ucs-normalize-tests--rule2-holds-p (X)
  "Check 2nd conformance rule.
 For every code point X assigned in this version of Unicode that is not 
specifically
 listed in Part 1, the following invariants must be true for all conformant
 implementations:
 
   X == toNFC(X) == toNFD(X) == toNFKC(X) == toNFKD(X)"
- (let ((X (string char)))
-   (and (equal X (ucs-normalize-tests--normalize NFC X))
-        (equal X (ucs-normalize-tests--normalize NFD X))
-        (equal X (ucs-normalize-tests--normalize NFKC X))
-        (equal X (ucs-normalize-tests--normalize NFKD X)))))
+ (and (ucs-normalize-tests--normalization-chareq-p NFC X X)
+      (ucs-normalize-tests--normalization-chareq-p NFD X X)
+      (ucs-normalize-tests--normalization-chareq-p NFKC X X)
+      (ucs-normalize-tests--normalization-chareq-p NFKD X X)))
 
-(cl-defun ucs-normalize-tests--invariants-failing-for-part (part &optional 
skip-lines &key progress-str)
+(cl-defun ucs-normalize-tests--rule1-failing-for-partX (part &optional 
skip-lines &key progress-str)
   "Returns a list of failed line numbers."
   (with-temp-buffer
     (insert-file-contents ucs-normalize-test-data-file)
@@ -136,8 +149,8 @@ implementations:
                                                  progress-str beg-line end-line
                                                  0 nil 0.5))
                for line from beg-line to (1- end-line)
-               unless (or (= (following-char) ?#)
-                          (ucs-normalize-tests--invariants-hold-p
+               unless (or (eq (following-char) ?#)
+                          (ucs-normalize-tests--rule1-holds-p
                            (ucs-normalize-tests--parse-column)
                            (ucs-normalize-tests--parse-column)
                            (ucs-normalize-tests--parse-column)
@@ -148,7 +161,7 @@ implementations:
                do (forward-line)
                if reporter do (progress-reporter-update reporter line)))))
 
-(defun ucs-normalize-tests--invariants-failing-for-lines (lines)
+(defun ucs-normalize-tests--rule1-failing-for-lines (lines)
   "Returns a list of failed line numbers."
   (with-temp-buffer
     (insert-file-contents ucs-normalize-test-data-file)
@@ -156,7 +169,7 @@ implementations:
     (cl-loop for prev-line = 1 then line
              for line in lines
              do (forward-line (- line prev-line))
-             unless (ucs-normalize-tests--invariants-hold-p
+             unless (ucs-normalize-tests--rule1-holds-p
                      (ucs-normalize-tests--parse-column)
                      (ucs-normalize-tests--parse-column)
                      (ucs-normalize-tests--parse-column)
@@ -165,7 +178,7 @@ implementations:
              collect line)))
 
 (ert-deftest ucs-normalize-part0 ()
-  (should-not (ucs-normalize-tests--invariants-failing-for-part 0)))
+  (should-not (ucs-normalize-tests--rule1-failing-for-partX 0)))
 
 (defconst ucs-normalize-tests--failing-lines-part1
   (list 15131 15132 15133 15134 15135 15136 15137 15138
@@ -195,6 +208,8 @@ implementations:
   "A list of line numbers.")
 (defvar ucs-normalize-tests--part1-rule2-failed-chars nil
   "A list of code points.")
+(defvar ucs-normalize-tests--part2-rule1-failed-lines nil
+  "A list of line numbers.")
 
 (defun ucs-normalize-tests--part1-rule2 (chars-part1)
   (let ((reporter (make-progress-reporter "UCS Normalize Test Part1, rule 2"
@@ -204,11 +219,11 @@ implementations:
      (lambda (char-range listed-in-part)
        (unless (eq listed-in-part 1)
          (if (characterp char-range)
-             (progn (unless (ucs-normalize-tests--invariants-rule2-hold-p 
char-range)
+             (progn (unless (ucs-normalize-tests--rule2-holds-p char-range)
                       (push char-range failed-chars))
                     (progress-reporter-update reporter char-range))
            (cl-loop for char from (car char-range) to (cdr char-range)
-                    unless (ucs-normalize-tests--invariants-rule2-hold-p char)
+                    unless (ucs-normalize-tests--rule2-holds-p char)
                     do (push char failed-chars)
                     do (progress-reporter-update reporter char)))))
      chars-part1)
@@ -219,59 +234,103 @@ implementations:
   :tags '(:expensive-test)
   ;; This takes a long time, so make sure we're compiled.
   (dolist (fun '(ucs-normalize-tests--part1-rule2
-                 ucs-normalize-tests--invariants-failing-for-part
-                 ucs-normalize-tests--invariants-hold-p
-                 ucs-normalize-tests--invariants-rule2-hold-p))
+                 ucs-normalize-tests--rule1-failing-for-partX
+                 ucs-normalize-tests--rule1-holds-p
+                 ucs-normalize-tests--rule2-holds-p))
     (or (byte-code-function-p (symbol-function fun))
         (byte-compile fun)))
   (let ((ucs-normalize-tests--chars-part1 (make-char-table 
'ucs-normalize-tests t)))
-    (should-not
-     (setq ucs-normalize-tests--part1-rule1-failed-lines
-           (ucs-normalize-tests--invariants-failing-for-part
-            1 ucs-normalize-tests--failing-lines-part1
-            :progress-str "UCS Normalize Test Part1, rule 1")))
-    (should-not (setq ucs-normalize-tests--part1-rule2-failed-chars
-                      (ucs-normalize-tests--part1-rule2
-                       ucs-normalize-tests--chars-part1)))))
+    (setq ucs-normalize-tests--part1-rule1-failed-lines
+          (ucs-normalize-tests--rule1-failing-for-partX
+           1 ucs-normalize-tests--failing-lines-part1
+           :progress-str "UCS Normalize Test Part1, rule 1"))
+    (setq ucs-normalize-tests--part1-rule2-failed-chars
+          (ucs-normalize-tests--part1-rule2
+           ucs-normalize-tests--chars-part1))
+    (should-not ucs-normalize-tests--part1-rule1-failed-lines)
+    (should-not ucs-normalize-tests--part1-rule2-failed-chars)))
 
 (ert-deftest ucs-normalize-part1-failing ()
   :expected-result :failed
   (skip-unless ucs-normalize-tests--failing-lines-part1)
   (should-not
-   (ucs-normalize-tests--invariants-failing-for-lines
+   (ucs-normalize-tests--rule1-failing-for-lines
     ucs-normalize-tests--failing-lines-part1)))
 
 (defconst ucs-normalize-tests--failing-lines-part2
-  (list 18328 18330 18332 18334 18336 18338 18340 18342
-        18344 18346 18348 18350 18352 18354 18356 18358
-        18360 18362 18364 18366 18368 18370 18372 18374
-        18376 18378 18380 18382 18384 18386 18388 18390
-        18392 18394 18396 18398 18400 18402 18404 18406
-        18408 18410 18412 18414 18416 18418 18420 18422
-        18424 18426 18494 18496 18498 18500 18502 18504
-        18506 18508 18510 18512 18514 18516 18518 18520
-        18522 18524 18526 18528 18530 18532 18534 18536
-        18538 18540 18542 18544 18546 18548 18550 18552
-        18554 18556 18558 18560 18562 18564 18566 18568
-        18570 18572 18574 18576 18578 18580 18582 18584
-        18586 18588 18590 18592 18594 18596))
+  (list 17656 17658 18006 18007 18008 18009 18010 18011
+        18012 18340 18342 18344 18346 18348 18350 18352
+        18354 18356 18358 18360 18362 18364 18366 18368
+        18370 18372 18374 18376 18378 18380 18382 18384
+        18386 18388 18390 18392 18394 18396 18398 18400
+        18402 18404 18406 18408 18410 18412 18414 18416
+        18418 18420 18422 18424 18426 18428 18430 18432
+        18434 18436 18438 18440 18442 18444 18446 18448
+        18450 18518 18520 18522 18524 18526 18528 18530
+        18532 18534 18536 18538 18540 18542 18544 18546
+        18548 18550 18552 18554 18556 18558 18560 18562
+        18564 18566 18568 18570 18572 18574 18576 18578
+        18580 18582 18584 18586 18588 18590 18592 18594
+        18596 18598 18600 18602 18604 18606 18608 18610
+        18612 18614 18616 18618 18620))
 
 (ert-deftest ucs-normalize-part2 ()
   :tags '(:expensive-test)
   (should-not
-   (ucs-normalize-tests--invariants-failing-for-part
-    2 ucs-normalize-tests--failing-lines-part2
-    :progress-str "UCS Normalize Test Part2")))
+   (setq ucs-normalize-tests--part2-rule1-failed-lines
+         (ucs-normalize-tests--rule1-failing-for-partX
+          2 ucs-normalize-tests--failing-lines-part2
+          :progress-str "UCS Normalize Test Part2"))))
 
 (ert-deftest ucs-normalize-part2-failing ()
   :expected-result :failed
   (skip-unless ucs-normalize-tests--failing-lines-part2)
   (should-not
-   (ucs-normalize-tests--invariants-failing-for-lines
+   (ucs-normalize-tests--rule1-failing-for-lines
     ucs-normalize-tests--failing-lines-part2)))
 
 (ert-deftest ucs-normalize-part3 ()
   (should-not
-   (ucs-normalize-tests--invariants-failing-for-part 3)))
+   (ucs-normalize-tests--rule1-failing-for-partX 3)))
+
+(defun ucs-normalize-tests--insert-failing-lines (var newval)
+  (insert (format "`%s' should be updated to:\n
+\(defconst %s
+  (list " var var))
+  (dolist (linos (seq-partition newval 8))
+    (insert (mapconcat #'number-to-string linos " ") "\n"))
+  (insert ")\)"))
+
+(defun ucs-normalize-check-failing-lines ()
+  (interactive)
+  (let ((ucs-normalize-tests--failing-lines-part1 nil)
+        (ucs-normalize-tests--failing-lines-part2 nil))
+    (setq ucs-normalize-tests--part1-rule1-failed-lines nil)
+    (setq ucs-normalize-tests--part1-rule2-failed-chars nil)
+    (setq ucs-normalize-tests--part2-rule1-failed-lines nil)
+    (ert "\\`ucs-normalize"))
+
+  (with-current-buffer (get-buffer-create "*ucs normalize change bad lines*")
+    (erase-buffer)
+    (unless (equal ucs-normalize-tests--part1-rule1-failed-lines
+                   ucs-normalize-tests--failing-lines-part1)
+      (ucs-normalize-tests--insert-failing-lines
+       'ucs-normalize-tests--failing-lines-part1
+       ucs-normalize-tests--part1-rule1-failed-lines))
+
+    (when ucs-normalize-tests--part1-rule2-failed-chars
+      (insert (format "Some characters failed rule 2!\n\n%S"
+                      `(list 
,@ucs-normalize-tests--part1-rule2-failed-chars))))
+
+    (unless (equal ucs-normalize-tests--part2-rule1-failed-lines
+                   ucs-normalize-tests--failing-lines-part2)
+      (ucs-normalize-tests--insert-failing-lines
+       'ucs-normalize-tests--failing-lines-part2
+       ucs-normalize-tests--part2-rule1-failed-lines))
+    (if (> (buffer-size) 0)
+        (if noninteractive
+            (princ (buffer-string) standard-output)
+          (display-buffer (current-buffer)))
+      (message "No changes to failing lines needed"))))
 
 ;;; ucs-normalize-tests.el ends here



reply via email to

[Prev in Thread] Current Thread [Next in Thread]