[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[elpa] externals/relint 8956b21 6/7: Check for mistakes in rx 'any' form
From: |
Mattias Engdegård |
Subject: |
[elpa] externals/relint 8956b21 6/7: Check for mistakes in rx 'any' forms |
Date: |
Thu, 5 Mar 2020 10:21:26 -0500 (EST) |
branch: externals/relint
commit 8956b21a5213efd10f6b80216ae74b45b370cc2d
Author: Mattias Engdegård <address@hidden>
Commit: Mattias Engdegård <address@hidden>
Check for mistakes in rx 'any' forms
These checks are similar to those done by xr in string regexps.
---
README | 5 ++
relint.el | 153 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
test/11.elisp | 15 ++++++
test/11.expected | 27 ++++++++++
4 files changed, 200 insertions(+)
diff --git a/README b/README
index 25431d9..944e665 100644
--- a/README
+++ b/README
@@ -165,6 +165,11 @@ skip-syntax-backward.
In general, A?, where A matches the empty string, can be
simplified to just A.
+ - Suspect range '+-X' or 'X-+'
+
+ A character range with '+' as one of its endpoints is more often an
+ incorrect attempt to include both '+' and '-' in the set.
+
- Unnecessarily escaped 'X'
A character is backslash-escaped in a skip set despite not being
diff --git a/relint.el b/relint.el
index 47d9be2..1149fee 100644
--- a/relint.el
+++ b/relint.el
@@ -1306,6 +1306,153 @@ character alternative: `[' followed by a
regexp-generating expression."
(setq index (1+ index))
(setq args (cdr args)))))
+(defun relint--pretty-range (from to)
+ (relint--escape-string
+ (if (eq from to)
+ (char-to-string from)
+ (format "%c-%c" from to))
+ t))
+
+(defun relint--intersecting-range (from to ranges)
+ "Return a range in RANGES intersecting [FROM,TO], or nil if none.
+RANGES is a list of (X . Y) representing the interval [X,Y]."
+ (while (and ranges
+ (let ((range (car ranges)))
+ (not (and (<= from (cdr range))
+ (<= (car range) to)))))
+ (setq ranges (cdr ranges)))
+ (car ranges))
+
+(defun relint--check-rx (item file pos path)
+ "Check the `rx' expression ITEM."
+ (pcase item
+ (`(,(or ': 'seq 'sequence 'and 'or '|
+ 'not 'intersection 'repeat '= '>= '**
+ 'zero-or-more '0+ '* '*?
+ 'one-or-more '1+ '+ '+?
+ 'zero-or-one 'opt 'optional '\? ?\s '\?? ??
+ 'minimal-match 'maximal-match
+ 'group 'submatch
+ 'group-n 'submatch-n)
+ . ,args)
+ ;; Form with subforms: recurse.
+ (let ((i 1))
+ (dolist (arg args)
+ (relint--check-rx arg file pos (cons i path))
+ (setq i (1+ i)))))
+
+ (`(,(or 'any 'in 'char 'not-char) . ,args)
+ ;; We don't bother checking for outright errors like "b-a", but
+ ;; look for mistakes that rx itself doesn't complain about. We
+ ;; assume a hand-written rx expression; machine-generated code
+ ;; can break these rules.
+ (let ((i 1)
+ (classes nil)
+ (ranges nil))
+ (dolist (arg args)
+ (cond
+ ((characterp arg)
+ (let ((overlap (relint--intersecting-range arg arg ranges)))
+ (when overlap
+ (relint--warn
+ file pos (cons i path)
+ (if (eq (car overlap) (cdr overlap))
+ (format-message "Duplicated character `%s'"
+ (relint--pretty-range arg arg))
+ (format-message "Character `%s' included in range `%s'"
+ (relint--pretty-range arg arg)
+ (relint--pretty-range (car overlap)
+ (cdr overlap)))))))
+ (push (cons arg arg) ranges))
+
+ ((stringp arg)
+ (let ((j 0)
+ (len (length arg)))
+ (while (< j len)
+ (let ((from (aref arg j)))
+ (if (and (< (+ j 2) len)
+ (eq (aref arg (1+ j)) ?-))
+ (let ((to (aref arg (+ j 2))))
+ (cond
+ ;; When people write "+-X" or "X-+" for some
+ ;; X, they rarely mean a range.
+ ((or (eq from ?+)
+ (eq to ?+))
+ (relint--warn
+ file pos (cons i path)
+ (format-message "Suspect range `%s'"
+ (relint--pretty-range from to))
+ arg j))
+ ((= to from)
+ (relint--warn
+ file pos (cons i path)
+ (format-message
+ "Single-character range `%s'"
+ (relint--escape-string (format "%c-%c" from to) t))
+ arg j))
+ ((= to (1+ from))
+ (relint--warn
+ file pos (cons i path)
+ (format-message "Two-character range `%s'"
+ (relint--pretty-range from to))
+ arg j)))
+ (let ((overlap
+ (relint--intersecting-range from to ranges)))
+ (when overlap
+ (relint--warn
+ file pos (cons i path)
+ (format-message "Range `%s' overlaps previous `%s'"
+ (relint--pretty-range from to)
+ (relint--pretty-range
+ (car overlap) (cdr overlap)))
+ arg j)))
+ (push (cons from to) ranges)
+ (setq j (+ j 3)))
+ (when (and (eq from ?-)
+ (< 0 j (1- len)))
+ (relint--warn
+ file pos (cons i path)
+ (format-message "Literal `-' not first or last")
+ arg j))
+ (let ((overlap
+ (relint--intersecting-range from from ranges)))
+ (when overlap
+ (relint--warn
+ file pos (cons i path)
+ (if (eq (car overlap) (cdr overlap))
+ (format-message "Duplicated character `%s'"
+ (relint--pretty-range from from))
+ (format-message
+ "Character `%s' included in range `%s'"
+ (relint--pretty-range from from)
+ (relint--pretty-range (car overlap) (cdr overlap))))
+ arg j)))
+ (push (cons from from) ranges)
+ (setq j (1+ j)))))))
+
+ ((consp arg)
+ (let ((from (car arg))
+ (to (cdr arg)))
+ (when (and (characterp from) (characterp to)
+ (<= from to))
+ (let ((overlap
+ (relint--intersecting-range from to ranges)))
+ (when overlap
+ (relint--warn
+ file pos (cons i path)
+ (format-message "Range `%s' overlaps previous `%s'"
+ (relint--pretty-range from to)
+ (relint--pretty-range
+ (car overlap) (cdr overlap))))))
+ (push (cons from to) ranges))))
+
+ ((symbolp arg)
+ (when (memq arg classes)
+ (relint--warn file pos (cons i path)
+ (format-message "Duplicated class `%s'" arg)))
+ (push arg classes)))
+ (setq i (1+ i)))))))
+
(defun relint--regexp-args-from-doc (doc-string)
"Extract regexp arguments (as a list of symbols) from DOC-STRING."
(let ((start 0)
@@ -1755,6 +1902,12 @@ directly."
(cons 'val val))))
(list 'expr re-arg))))
(push (cons name new) relint--variables)))))
+ (`(rx . ,items)
+ (let ((i 1))
+ (while (consp items)
+ (relint--check-rx (car items) file pos (cons i path))
+ (setq items (cdr items))
+ (setq i (1+ i)))))
(`(font-lock-add-keywords ,_ ,keywords . ,_)
(relint--check-font-lock-keywords
keywords (car form) file pos (cons 2 path)))
diff --git a/test/11.elisp b/test/11.elisp
new file mode 100644
index 0000000..23fedb1
--- /dev/null
+++ b/test/11.elisp
@@ -0,0 +1,15 @@
+;;; Relint test file 11 -*- emacs-lisp -*-
+
+;; Test errors in rx
+
+(defun my-fun ()
+ (list
+ (rx nonl (in ?c "abc" ?b))
+ (rx (: (* (not (char "0-9ac-ceg-h3"))
+ (any "a-m" (?f . ?t) "!s")
+ (opt (not-char space "q" digit space)))
+ (any "0-9()-+")
+ (any "0-9+-.")
+ (any "-a-e")
+ (any "k-m-")
+ (any "A-F-K-T")))))
diff --git a/test/11.expected b/test/11.expected
new file mode 100644
index 0000000..bc5d375
--- /dev/null
+++ b/test/11.expected
@@ -0,0 +1,27 @@
+11.elisp:7:23: Duplicated character `c' (pos 2)
+ "abc"
+ ..^
+11.elisp:7:26: Duplicated character `b'
+11.elisp:8:30: Single-character range `c-c' (pos 4)
+ "0-9ac-ceg-h3"
+ ....^
+11.elisp:8:34: Two-character range `g-h' (pos 8)
+ "0-9ac-ceg-h3"
+ ........^
+11.elisp:8:37: Character `3' included in range `0-9' (pos 11)
+ "0-9ac-ceg-h3"
+ ...........^
+11.elisp:9:25: Range `f-t' overlaps previous `a-m'
+11.elisp:9:37: Character `s' included in range `f-t' (pos 1)
+ "!s"
+ .^
+11.elisp:10:45: Duplicated class `space'
+11.elisp:11:21: Suspect range `)-+' (pos 4)
+ "0-9()-+"
+ ....^
+11.elisp:12:20: Suspect range `+-.' (pos 3)
+ "0-9+-."
+ ...^
+11.elisp:15:20: Literal `-' not first or last (pos 3)
+ "A-F-K-T"
+ ...^
- [elpa] externals/relint updated (c2d3f3b -> 83e677d), Mattias Engdegård, 2020/03/05
- [elpa] externals/relint fd43a5f 2/7: Permit relint-current-buffer in *scratch*, Mattias Engdegård, 2020/03/05
- [elpa] externals/relint 67f4363 1/7: Don't erase the *relint* buffer from relint-buffer, Mattias Engdegård, 2020/03/05
- [elpa] externals/relint 7dab4f2 4/7: Slight macro safety improvement, Mattias Engdegård, 2020/03/05
- [elpa] externals/relint 83e677d 7/7: Increment version to 1.15, Mattias Engdegård, 2020/03/05
- [elpa] externals/relint 8956b21 6/7: Check for mistakes in rx 'any' forms,
Mattias Engdegård <=
- [elpa] externals/relint feba965 3/7: Add severity field to tuple returned from relint-buffer, Mattias Engdegård, 2020/03/05
- [elpa] externals/relint bc7f295 5/7: Better position accuracy in various lists of regexps, Mattias Engdegård, 2020/03/05