emacs-elpa-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[elpa] externals/relint 8956b21 6/7: Check for mistakes in rx 'any' form


From: Mattias Engdegård
Subject: [elpa] externals/relint 8956b21 6/7: Check for mistakes in rx 'any' forms
Date: Thu, 5 Mar 2020 10:21:26 -0500 (EST)

branch: externals/relint
commit 8956b21a5213efd10f6b80216ae74b45b370cc2d
Author: Mattias Engdegård <address@hidden>
Commit: Mattias Engdegård <address@hidden>

    Check for mistakes in rx 'any' forms
    
    These checks are similar to those done by xr in string regexps.
---
 README           |   5 ++
 relint.el        | 153 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 test/11.elisp    |  15 ++++++
 test/11.expected |  27 ++++++++++
 4 files changed, 200 insertions(+)

diff --git a/README b/README
index 25431d9..944e665 100644
--- a/README
+++ b/README
@@ -165,6 +165,11 @@ skip-syntax-backward.
     In general, A?, where A matches the empty string, can be
     simplified to just A.
 
+  - Suspect range '+-X' or 'X-+'
+
+    A character range with '+' as one of its endpoints is more often an
+    incorrect attempt to include both '+' and '-' in the set.
+
   - Unnecessarily escaped 'X'
 
     A character is backslash-escaped in a skip set despite not being
diff --git a/relint.el b/relint.el
index 47d9be2..1149fee 100644
--- a/relint.el
+++ b/relint.el
@@ -1306,6 +1306,153 @@ character alternative: `[' followed by a 
regexp-generating expression."
       (setq index (1+ index))
       (setq args (cdr args)))))
 
+(defun relint--pretty-range (from to)
+  (relint--escape-string
+   (if (eq from to)
+       (char-to-string from)
+     (format "%c-%c" from to))
+   t))
+
+(defun relint--intersecting-range (from to ranges)
+  "Return a range in RANGES intersecting [FROM,TO], or nil if none.
+RANGES is a list of (X . Y) representing the interval [X,Y]."
+  (while (and ranges
+              (let ((range (car ranges)))
+                (not (and (<= from (cdr range))
+                          (<= (car range) to)))))
+    (setq ranges (cdr ranges)))
+  (car ranges))
+
+(defun relint--check-rx (item file pos path)
+  "Check the `rx' expression ITEM."
+  (pcase item
+    (`(,(or ': 'seq 'sequence 'and 'or '|
+            'not 'intersection 'repeat '= '>= '**
+            'zero-or-more '0+ '* '*?
+            'one-or-more '1+ '+ '+?
+            'zero-or-one 'opt 'optional '\? ?\s '\?? ??
+            'minimal-match 'maximal-match
+            'group 'submatch
+            'group-n 'submatch-n)
+       . ,args)
+     ;; Form with subforms: recurse.
+     (let ((i 1))
+       (dolist (arg args)
+         (relint--check-rx arg file pos (cons i path))
+         (setq i (1+ i)))))
+
+    (`(,(or 'any 'in 'char 'not-char) . ,args)
+     ;; We don't bother checking for outright errors like "b-a", but
+     ;; look for mistakes that rx itself doesn't complain about. We
+     ;; assume a hand-written rx expression; machine-generated code
+     ;; can break these rules.
+     (let ((i 1)
+           (classes nil)
+           (ranges nil))
+       (dolist (arg args)
+         (cond
+          ((characterp arg)
+           (let ((overlap (relint--intersecting-range arg arg ranges)))
+             (when overlap
+               (relint--warn
+                file pos (cons i path)
+                (if (eq (car overlap) (cdr overlap))
+                    (format-message "Duplicated character `%s'"
+                                    (relint--pretty-range arg arg))
+                  (format-message "Character `%s' included in range `%s'"
+                                  (relint--pretty-range arg arg)
+                                  (relint--pretty-range (car overlap)
+                                                        (cdr overlap)))))))
+           (push (cons arg arg) ranges))
+
+          ((stringp arg)
+           (let ((j 0)
+                 (len (length arg)))
+             (while (< j len)
+               (let ((from (aref arg j)))
+                 (if (and (< (+ j 2) len)
+                          (eq (aref arg (1+ j)) ?-))
+                     (let ((to (aref arg (+ j 2))))
+                       (cond
+                        ;; When people write "+-X" or "X-+" for some
+                        ;; X, they rarely mean a range.
+                        ((or (eq from ?+)
+                             (eq to ?+))
+                         (relint--warn
+                          file pos (cons i path)
+                          (format-message "Suspect range `%s'"
+                                          (relint--pretty-range from to))
+                          arg j))
+                        ((= to from)
+                         (relint--warn
+                          file pos (cons i path)
+                          (format-message
+                           "Single-character range `%s'"
+                           (relint--escape-string (format "%c-%c" from to) t))
+                          arg j))
+                        ((= to (1+ from))
+                         (relint--warn
+                          file pos (cons i path)
+                          (format-message "Two-character range `%s'"
+                                          (relint--pretty-range from to))
+                          arg j)))
+                       (let ((overlap
+                              (relint--intersecting-range from to ranges)))
+                         (when overlap
+                           (relint--warn
+                            file pos (cons i path)
+                            (format-message "Range `%s' overlaps previous `%s'"
+                                            (relint--pretty-range from to)
+                                            (relint--pretty-range
+                                             (car overlap) (cdr overlap)))
+                            arg j)))
+                       (push (cons from to) ranges)
+                       (setq j (+ j 3)))
+                   (when (and (eq from ?-)
+                              (< 0 j (1- len)))
+                     (relint--warn
+                      file pos (cons i path)
+                      (format-message "Literal `-' not first or last")
+                      arg j))
+                   (let ((overlap
+                          (relint--intersecting-range from from ranges)))
+                     (when overlap
+                       (relint--warn
+                        file pos (cons i path)
+                        (if (eq (car overlap) (cdr overlap))
+                            (format-message "Duplicated character `%s'"
+                                            (relint--pretty-range from from))
+                          (format-message
+                           "Character `%s' included in range `%s'"
+                           (relint--pretty-range from from)
+                           (relint--pretty-range (car overlap) (cdr overlap))))
+                        arg j)))
+                   (push (cons from from) ranges)
+                   (setq j (1+ j)))))))
+
+          ((consp arg)
+           (let ((from (car arg))
+                 (to (cdr arg)))
+             (when (and (characterp from) (characterp to)
+                        (<= from to))
+               (let ((overlap
+                      (relint--intersecting-range from to ranges)))
+                 (when overlap
+                   (relint--warn
+                    file pos (cons i path)
+                    (format-message "Range `%s' overlaps previous `%s'"
+                                    (relint--pretty-range from to)
+                                    (relint--pretty-range
+                                     (car overlap) (cdr overlap))))))
+               (push (cons from to) ranges))))
+
+          ((symbolp arg)
+           (when (memq arg classes)
+             (relint--warn file pos (cons i path)
+                           (format-message "Duplicated class `%s'" arg)))
+           (push arg classes)))
+         (setq i (1+ i)))))))
+
 (defun relint--regexp-args-from-doc (doc-string)
   "Extract regexp arguments (as a list of symbols) from DOC-STRING."
   (let ((start 0)
@@ -1755,6 +1902,12 @@ directly."
                                     (cons 'val val))))
                         (list 'expr re-arg))))
               (push (cons name new) relint--variables)))))
+       (`(rx . ,items)
+        (let ((i 1))
+          (while (consp items)
+            (relint--check-rx (car items) file pos (cons i path))
+            (setq items (cdr items))
+            (setq i (1+ i)))))
        (`(font-lock-add-keywords ,_ ,keywords . ,_)
         (relint--check-font-lock-keywords
          keywords (car form) file pos (cons 2 path)))
diff --git a/test/11.elisp b/test/11.elisp
new file mode 100644
index 0000000..23fedb1
--- /dev/null
+++ b/test/11.elisp
@@ -0,0 +1,15 @@
+;;; Relint test file 11          -*- emacs-lisp -*-
+
+;; Test errors in rx
+
+(defun my-fun ()
+  (list
+   (rx nonl (in ?c "abc" ?b))
+   (rx (: (* (not (char "0-9ac-ceg-h3"))
+             (any "a-m" (?f . ?t) "!s")
+             (opt (not-char space "q" digit space)))
+          (any "0-9()-+")
+          (any "0-9+-.")
+          (any "-a-e")
+          (any "k-m-")
+          (any "A-F-K-T")))))
diff --git a/test/11.expected b/test/11.expected
new file mode 100644
index 0000000..bc5d375
--- /dev/null
+++ b/test/11.expected
@@ -0,0 +1,27 @@
+11.elisp:7:23: Duplicated character `c' (pos 2)
+  "abc"
+   ..^
+11.elisp:7:26: Duplicated character `b'
+11.elisp:8:30: Single-character range `c-c' (pos 4)
+  "0-9ac-ceg-h3"
+   ....^
+11.elisp:8:34: Two-character range `g-h' (pos 8)
+  "0-9ac-ceg-h3"
+   ........^
+11.elisp:8:37: Character `3' included in range `0-9' (pos 11)
+  "0-9ac-ceg-h3"
+   ...........^
+11.elisp:9:25: Range `f-t' overlaps previous `a-m'
+11.elisp:9:37: Character `s' included in range `f-t' (pos 1)
+  "!s"
+   .^
+11.elisp:10:45: Duplicated class `space'
+11.elisp:11:21: Suspect range `)-+' (pos 4)
+  "0-9()-+"
+   ....^
+11.elisp:12:20: Suspect range `+-.' (pos 3)
+  "0-9+-."
+   ...^
+11.elisp:15:20: Literal `-' not first or last (pos 3)
+  "A-F-K-T"
+   ...^



reply via email to

[Prev in Thread] Current Thread [Next in Thread]