[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[elpa] externals/xr a248977 3/3: Parse and lint skip set strings
From: |
Mattias Engdegård |
Subject: |
[elpa] externals/xr a248977 3/3: Parse and lint skip set strings |
Date: |
Sun, 17 Mar 2019 09:10:24 -0400 (EDT) |
branch: externals/xr
commit a24897795ed811ce58d593fb2472723268e27391
Author: Mattias Engdegård <address@hidden>
Commit: Mattias Engdegård <address@hidden>
Parse and lint skip set strings
Add functions to parse and lint skip set strings, which are
the arguments to `skip-chars-forward' and `skip-chars-backward':
xr-skip-set
xr-skip-set-pp
xr-skip-set-lint
Increment the version to 1.7.
---
xr-test.el | 53 +++++++++++++++
xr.el | 221 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++----
2 files changed, 261 insertions(+), 13 deletions(-)
diff --git a/xr-test.el b/xr-test.el
index 4d197bf..860070c 100644
--- a/xr-test.el
+++ b/xr-test.el
@@ -362,6 +362,59 @@
nil))
)
+(ert-deftest xr-skip-set ()
+ (should (equal (xr-skip-set "0-9a-fA-F+*")
+ '(any "0-9a-fA-F" "+*")))
+ (should (equal (xr-skip-set "^ab-ex-")
+ '(not (any "b-e" "ax-"))))
+ (should (equal (xr-skip-set "-^][\\")
+ '(any "^][-")))
+ (should (equal (xr-skip-set "\\^a\\-bc-\\fg")
+ '(any "c-f" "^abg-")))
+ (should (equal (xr-skip-set "\\")
+ '(any)))
+ (should (equal (xr-skip-set "--3^Q-\\")
+ '(any "--3Q-\\" "^")))
+ (should (equal (xr-skip-set "^Q-\\c-\\n")
+ '(not (any "Q-c" "n-"))))
+ (should (equal (xr-skip-set "\\\\A-")
+ '(any "\\A-")))
+ (should (equal (xr-skip-set "[a-z]")
+ '(any "a-z" "[]")))
+ (should (equal (xr-skip-set "[:ascii:]-[:digit:]")
+ '(any "-" ascii digit)))
+ (should (equal (xr-skip-set "A-[:blank:]")
+ '(any "A-[" ":blank]")))
+ (should (equal (xr-skip-set "\\[:xdigit:]-b")
+ '(any "]-b" "[:xdigt")))
+ (should (equal (xr-skip-set "^a-z+" 'terse)
+ '(not (in "a-z" "+"))))
+ (should-error (xr-skip-set "[::]"))
+ (should-error (xr-skip-set "[:whitespace:]"))
+ (should (equal (xr-skip-set ".")
+ "\\."))
+ (should (equal (xr-skip-set "^")
+ 'anything))
+ (should (equal (xr-skip-set "^[:print:]")
+ '(not print)))
+ )
+
+(ert-deftest xr-skip-set-lint ()
+ (should (equal (xr-skip-set-lint "A[:ascii:]B[:space:][:ascii:]")
+ '((20 . "Duplicated character class `[:ascii:]'"))))
+ (should (equal (xr-skip-set-lint "a\\bF-AM-M\\")
+ '((1 . "Unnecessarily escaped `b'")
+ (3 . "Reversed range `F-A'")
+ (6 . "Single-element range `M-M'")
+ (9 . "Stray `\\' at end of string"))))
+ (should (equal (xr-skip-set-lint "A-Fa-z3D-K!3-7\\!b")
+ '((7 . "Ranges `A-F' and `D-K' overlap")
+ (11 . "Range `3-7' includes character `3'")
+ (14 . "Duplicated character `!'")
+ (14 . "Unnecessarily escaped `!'")
+ (16 . "Character `b' included in range `a-z'"))))
+ )
+
(provide 'xr-test)
;;; xr-test.el ends here
diff --git a/xr.el b/xr.el
index eba3a10..2d1eff8 100644
--- a/xr.el
+++ b/xr.el
@@ -3,7 +3,7 @@
;; Copyright (C) 2019 Free Software Foundation, Inc.
;; Author: Mattias Engdegård <address@hidden>
-;; Version: 1.6
+;; Version: 1.7
;; Keywords: lisp, maint, regexps
;; This program is free software; you can redistribute it and/or modify
@@ -30,11 +30,24 @@
;;
;; Please refer to `rx' for more information about the notation.
;;
-;; The exported functions are:
+;; In addition to Emacs regexps, this package can also parse and
+;; troubleshoot skip set strings, which are arguments to
+;; `skip-chars-forward' and `skip-chars-backward'.
+;;
+;; The exported functions for regexps are:
+;;
+;; `xr' - returns the converted rx expression
+;; `xr-pp' - converts to rx and pretty-prints
+;; `xr-lint' - finds mistakes in a regexp string
+;;
+;; For skip sets we also have:
+;;
+;; `xr-skip-set' - return the converted rx expression
+;; `xr-skip-set-pp' - converts to rx and pretty-prints
+;; `xr-skip-set-lint' - finds mistakes in a skip set string
+;;
+;; There is finally the generally useful:
;;
-;; `xr' - returns the converted rx expression
-;; `xr-pp' - pretty-prints the converted rx expression
-;; `xr-lint' - finds deprecated syntax in a regexp string
;; `xr-pp-rx-to-str' - pretty-prints an rx expression to a string
;;
;; Suggested use is from an interactive elisp buffer.
@@ -591,6 +604,153 @@
(error "Unbalanced \\)"))
rx)))
+;; Grammar for skip-set strings:
+;;
+;; skip-set ::= `^'? item*
+;; item ::= range | single
+;; range ::= single `-' end
+;; single ::= (any char but `\')
+;; | `\' (any char)
+;; end ::= single | `\'
+;;
+;; The grammar is ambiguous, resolved left-to-right:
+;; - a leading ^ is always a negation marker
+;; - an item is always a range if possible
+;; - an end is only `\' if last in the string
+
+(defun xr--parse-skip-set-buffer (warnings)
+ (let ((negated (looking-at (rx "^")))
+ (ranges nil)
+ (classes nil))
+ (when negated
+ (forward-char 1))
+ (while (not (eobp))
+ (cond
+ ((looking-at (rx "[:" (group (*? anything)) ":]"))
+ (let ((sym (intern (match-string 1))))
+ (unless (memq sym
+ '(ascii alnum alpha blank cntrl digit graph
+ lower multibyte nonascii print punct space
+ unibyte upper word xdigit))
+ (error "No character class `%s'" (match-string 0)))
+ (when (memq sym classes)
+ (xr--report warnings (point)
+ (format "Duplicated character class `%s'"
+ (match-string 0))))
+ (push sym classes)))
+
+ ((looking-at (rx (or (seq "\\" (group anything))
+ (group (not (any "\\"))))
+ (opt "-"
+ (or (seq "\\" (group anything))
+ (group anything)))))
+ (let ((start (string-to-char (or (match-string 1)
+ (match-string 2))))
+ (end (or (and (match-beginning 3)
+ (string-to-char (match-string 3)))
+ (and (match-beginning 4)
+ (string-to-char (match-string 4))))))
+ (when (and (match-beginning 1)
+ (not (memq start '(?^ ?- ?\\))))
+ (xr--report warnings (point)
+ (xr--escape-string
+ (format "Unnecessarily escaped `%c'" start) nil)))
+ (if (and end (> start end))
+ (xr--report warnings (point)
+ (xr--escape-string
+ (format "Reversed range `%c-%c'" start end) nil))
+ (when (eq start end)
+ (xr--report warnings (point)
+ (xr--escape-string
+ (format "Single-element range `%c-%c'" start end)
+ nil))
+ (setq end nil))
+ (let ((tail ranges))
+ (while tail
+ (let ((range (car tail)))
+ (if (and (<= (car range) (or end start))
+ (<= start (cdr range)))
+ (let ((msg
+ (cond
+ ((and end (< start end)
+ (< (car range) (cdr range)))
+ (format "Ranges `%c-%c' and `%c-%c' overlap"
+ (car range) (cdr range) start end))
+ ((and end (< start end))
+ (format "Range `%c-%c' includes character `%c'"
+ start end (car range)))
+ ((< (car range) (cdr range))
+ (format
+ "Character `%c' included in range `%c-%c'"
+ start (car range) (cdr range)))
+ (t
+ (format "Duplicated character `%c'"
+ start)))))
+ (xr--report warnings (point)
+ (xr--escape-string msg nil))
+ ;; Expand previous interval to include this range.
+ (setcar range (min (car range) start))
+ (setcdr range (max (cdr range) (or end start)))
+ (setq start nil)
+ (setq tail nil))
+ (setq tail (cdr tail))))))
+ (when start
+ (push (cons start (or end start)) ranges)))))
+
+ ((looking-at (rx "\\" eos))
+ (xr--report warnings (point)
+ "Stray `\\' at end of string")))
+
+ (goto-char (match-end 0)))
+
+ (cond
+ ;; Single non-negated character, like "-": make a string.
+ ((and (not negated)
+ (null classes)
+ (= (length ranges) 1)
+ (eq (caar ranges) (cdar ranges)))
+ (regexp-quote (char-to-string (caar ranges))))
+ ;; Negated empty set, like "^": anything.
+ ((and negated
+ (null classes)
+ (null ranges))
+ 'anything)
+ ;; Single named class, like "[:nonascii:]": use the symbol.
+ ((and (= (length classes) 1)
+ (null ranges))
+ (if negated
+ (list 'not (car classes))
+ (car classes)))
+ ;; Anything else: produce (any ...)
+ (t
+ (let ((intervals nil)
+ (chars nil))
+ (mapc (lambda (range)
+ (if (eq (car range) (cdr range))
+ (push (car range) chars)
+ (push (string (car range) ?- (cdr range)) intervals)))
+ ranges)
+ ;; Put a single `-' last.
+ (when (memq ?- chars)
+ (setq chars (append (delq ?- chars) (list ?-))))
+ (let ((set (cons 'any
+ (append
+ (and intervals
+ (list (apply #'concat intervals)))
+ (and chars
+ (list (apply #'string chars)))
+ (nreverse classes)))))
+ (if negated
+ (list 'not set)
+ set)))))))
+
+(defun xr--parse-skip-set (skip-string warnings)
+ (with-temp-buffer
+ (set-buffer-multibyte t)
+ (insert skip-string)
+ (goto-char (point-min))
+ (xr--parse-skip-set-buffer warnings)))
+
;; Substitute keywords in RX using HEAD-ALIST and BODY-ALIST in the
;; head and body positions, respectively.
(defun xr--substitute-keywords (head-alist body-alist rx)
@@ -631,6 +791,14 @@
(bow . word-start)
(eow . word-end))))))
+(defun xr--in-dialect (rx dialect)
+ (let ((keywords (assq (or dialect 'medium) xr--keywords)))
+ (unless keywords
+ (error "Unknown dialect `%S'" dialect))
+ (if (cdr keywords)
+ (xr--substitute-keywords (cadr keywords) (cddr keywords) rx)
+ rx)))
+
;;;###autoload
(defun xr (re-string &optional dialect)
"Convert a regexp string to rx notation; the inverse of `rx'.
@@ -641,17 +809,22 @@ and is one of:
`brief' -- short keywords
`terse' -- very short keywords
`medium' or nil -- a compromise (the default)"
- (let ((keywords (assq (or dialect 'medium) xr--keywords)))
- (unless keywords
- (error "Unknown dialect `%S'" dialect))
- (let ((rx (xr--parse re-string nil)))
- (if (cdr keywords)
- (xr--substitute-keywords (cadr keywords) (cddr keywords) rx)
- rx))))
+ (xr--in-dialect (xr--parse re-string nil) dialect))
+
+;;;###autoload
+(defun xr-skip-set (skip-set-string &optional dialect)
+ "Convert a skip set string argument to rx notation.
+SKIP-SET-STRING is interpreted according to the syntax of
+`skip-chars-forward' and `skip-chars-backward' and converted to
+a character class on `rx' form.
+If desired, `rx' can then be used to convert the result to an
+ordinary regexp.
+See `xr' for a description of the DIALECT argument."
+ (xr--in-dialect (xr--parse-skip-set skip-set-string nil) dialect))
;;;###autoload
(defun xr-lint (re-string)
- "Detect dubious practices in RE-STRING.
+ "Detect dubious practices and possible mistakes in RE-STRING.
This includes uses of tolerated but discouraged constructs.
Outright regexp syntax violations are signalled as errors.
Return a list of (OFFSET . COMMENT) where COMMENT applies at OFFSET
@@ -660,6 +833,19 @@ in RE-STRING."
(xr--parse re-string warnings)
(sort (car warnings) #'car-less-than-car)))
+;;;###autoload
+(defun xr-skip-set-lint (skip-set-string)
+ "Detect dubious practices and possible mistakes in SKIP-SET-STRING.
+This includes uses of tolerated but discouraged constructs.
+Outright syntax violations are signalled as errors.
+The argument is interpreted according to the syntax of
+`skip-chars-forward' and `skip-chars-backward'.
+Return a list of (OFFSET . COMMENT) where COMMENT applies at OFFSET
+in SKIP-SET-STRING."
+ (let ((warnings (list nil)))
+ (xr--parse-skip-set skip-set-string warnings)
+ (sort (car warnings) #'car-less-than-car)))
+
;; Escape non-printing characters in a string for maximum readability.
;; If ESCAPE-PRINTABLE, also escape \ and ", otherwise don't.
(defun xr--escape-string (string escape-printable)
@@ -738,6 +924,15 @@ It is intended for use from an interactive elisp session.
See `xr' for a description of the DIALECT argument."
(insert (xr-pp-rx-to-str (xr re-string dialect))))
+;;;###autoload
+(defun xr-skip-set-pp (skip-set-string &optional dialect)
+ "Convert a skip set string to `rx' notation and pretty-print.
+This function uses `xr-skip-set' to translate SKIP-SET-STRING
+into DIALECT.
+It is intended for use from an interactive elisp session.
+See `xr' for a description of the DIALECT argument."
+ (insert (xr-pp-rx-to-str (xr-skip-set skip-set-string dialect))))
+
(provide 'xr)
;;; xr.el ends here