[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[elpa] externals/lex fe3a98b 2/7: * packages/lex/lex-parse-re.el: New fi
From: |
Stefan Monnier |
Subject: |
[elpa] externals/lex fe3a98b 2/7: * packages/lex/lex-parse-re.el: New file, extracted from lex.el. |
Date: |
Tue, 1 Dec 2020 16:18:10 -0500 (EST) |
branch: externals/lex
commit fe3a98b8dc18023c6e8fb1877175ce406d9dc22e
Author: Stefan Monnier <monnier@iro.umontreal.ca>
Commit: Stefan Monnier <monnier@iro.umontreal.ca>
* packages/lex/lex-parse-re.el: New file, extracted from lex.el.
* packages/lex/lex.el: Use it instead of the self-load hack.
(lex--nfa, lex-compile): Use case-table-get-table.
---
lex-parse-re.el | 258 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
lex-pkg.el | 1 +
lex.el | 234 ++------------------------------------------------
3 files changed, 266 insertions(+), 227 deletions(-)
diff --git a/lex-parse-re.el b/lex-parse-re.el
new file mode 100644
index 0000000..e5e954a
--- /dev/null
+++ b/lex-parse-re.el
@@ -0,0 +1,258 @@
+;;; lex-parse-re.el --- Parse Emacs regexps using Lex
+
+;; Copyright (C) 2008,2013 Free Software Foundation, Inc.
+
+;; Author: Stefan Monnier <monnier@iro.umontreal.ca>
+;; Keywords:
+
+;; This program is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+
+;; This program is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with this program. If not, see <http://www.gnu.org/licenses/>.
+
+;;; Commentary:
+
+;; This exports lex-parse-re, but it also defines lex--parse-charset which is
+;; used internally by lex-compile to handle charsets specified as a string.
+
+;;; Code:
+
+(require 'lex)
+
+;;; Regexp parsers.
+
+(defun lex--tokenizer (lex string)
+ (let ((tokens ())
+ (i 0)
+ tmp)
+ (while (and (< i (length string))
+ (setq tmp (lex-match-string lex string i)))
+ (push (cons (car tmp) (substring string i (setq i (cadr tmp)))) tokens))
+ (nreverse tokens)))
+
+(defun lex--parse-charset (string)
+ (let ((i 0)
+ (ranges ()))
+ (when (eq (aref string i) ?^)
+ (push 'not ranges)
+ (setq i (1+ i)))
+ (let ((op nil)
+ (case-fold-search nil))
+ (while (not (eq op 'stop))
+ (lex-case string i
+ ((seq "[:" (0+ (char (?a . ?z) (?A . ?Z))) ":]")
+ (push (intern (substring string (+ 2 (match-beginning 0))
+ (- (match-end 0) 2)))
+ ranges))
+ ((seq anything "-" anything)
+ (push (cons (aref string (match-beginning 0))
+ (aref string (1- (match-end 0))))
+ ranges))
+ (anything (push (aref string (1- (match-end 0))) ranges))
+ (eob (setq op 'stop))))
+
+ `(char ,@(nreverse ranges)))))
+
+(defconst lex--parse-re-lexspec
+ '(((or "*" "+" "?" "*?" "+?" "??") . suffix)
+ ((seq "[" (opt "^") (opt "]")
+ (0+ (or (seq (char not ?\]) "-" (char not ?\]))
+ (seq "[:" (1+ (char (?a . ?z) (?A . ?Z))) ":]")
+ (char not ?\]))) "]") . charset)
+ ((seq "\\c" anything) . category)
+ ((seq "\\C" anything) . not-category)
+ ((seq "\\s" anything) . syntax)
+ ((seq "\\S" anything) . not-syntax)
+ ((seq "\\" (char (?1 . ?9))) . backref)
+ ("\\'" . eob)
+ ("\\`" . bob)
+ ("." . dot)
+ ("^" . bol)
+ ("$" . eol)
+ ("." . dot)
+ ("\\<" . bow)
+ ("\\>" . eow)
+ ("\\_<" . symbol-start)
+ ("\\_>" . symbol-end)
+ ("\\w" . wordchar)
+ ("\\W" . not-wordchar)
+ ("\\b" . word-boundary)
+ ("\\B" . not-word-boundary)
+ ("\\=" . point)
+ ((or (seq ?\\ anything) anything) . char)))
+
+
+(defconst lex--parse-ere-lexer
+ (let ((case-fold-search nil))
+ (lex-compile
+ (append '(("(?:" . shy-group)
+ ("|" . or)
+ ((seq "{" (0+ (char (?0 . ?9)))
+ (opt (seq "," (0+ (char (?0 . ?9))))) "}") . repeat)
+ ((or ")" eob) . stop)
+ ("(" . group))
+ lex--parse-re-lexspec))))
+
+(defconst lex--parse-bre-lexer
+ (let ((case-fold-search nil))
+ (lex-compile
+ (append '(("\\(?:" . shy-group)
+ ("\\|" . or)
+ ((seq "\\{" (0+ (char (?0 . ?9)))
+ (opt (seq "," (0+ (char (?0 . ?9))))) "\\}") . repeat)
+ ((or "\\)" eob) . stop)
+ ("\\(" . group))
+ lex--parse-re-lexspec))))
+
+(defun lex--parse-re (string i lexer)
+ (let ((stack ())
+ (op nil)
+ (res nil)
+ tmp)
+ (while (and (not (eq op 'stop))
+ (setq tmp (lex-match-string lexer string i)))
+ (pcase (car tmp)
+ (`shy-group
+ (setq tmp (lex--parse-re string (cadr tmp) lexer))
+ (unless (eq (aref string (1- (cadr tmp))) ?\))
+ (error "Unclosed shy-group"))
+ (push (car tmp) res))
+ (`group
+ (setq tmp (lex--parse-re string (cadr tmp) lexer))
+ (unless (eq (aref string (1- (cadr tmp))) ?\))
+ (error "Unclosed group"))
+ (push (list 'group (car tmp)) res))
+ (`suffix
+ (if (null res) (error "Non-prefixed suffix operator")
+ (setq res (cons (list (cdr (assoc (substring string i (cadr tmp))
+ '(("*" . 0+)
+ ("+" . 1+)
+ ("?" . opt)
+ ("*?" . *\?)
+ ("+?" . +\?)
+ ("??" . \?\?))))
+ (car res))
+ (cdr res)))))
+ (`or (push `(or (seq ,@(nreverse res))) stack)
+ (setq res nil))
+ (`charset
+ (push (lex--parse-charset (substring string (1+ i) (1- (cadr tmp))))
+ res))
+ (`repeat
+ ;; Here we would like to have sub-matches :-(
+ (let* ((min (string-to-number
+ (substring string (+ i (if (eq (aref string i) ?\\) 2 1))
+ (cadr tmp))))
+ (max (let ((comma (string-match "," string i)))
+ (if (not (and comma (< comma (cadr tmp))))
+ min
+ (if (= comma (- (cadr tmp) 2))
+ nil
+ (string-to-number (substring string (1+
comma))))))))
+ (if (null res) (error "Non-prefixed repeat operator")
+ (setq res (cons `(repeat ,min ,max ,(car res)) (cdr res))))))
+ (`stop (setq op 'stop))
+ ((or `syntax `category `not-syntax `not-category)
+ (push (list (car tmp) (aref string (1- (cadr tmp)))) res))
+ (`backref
+ (push (list (car tmp) (- (aref string (1- (cadr tmp))) ?0)) res))
+ (`char
+ (push (aref string (1- (cadr tmp))) res))
+ (_ (push (car tmp) res)))
+ (setq i (cadr tmp)))
+ (let ((re `(seq ,@(nreverse res))))
+ (while stack (setq re (nconc (pop stack) (list re))))
+ (list re i))))
+
+;;;###autoload
+(defun lex-parse-re (string &optional lexer)
+ "Parse STRING as a regular expression.
+LEXER specifies the regexp syntax to use. It can be `ere', or `bre'
+and it defaults to `bre'."
+ (setq lexer (cond ((eq lexer 'ere) lex--parse-ere-lexer)
+ ((memq lexer '(bre re nil)) lex--parse-bre-lexer)
+ (t lexer)))
+ (let ((res (lex--parse-re string 0 lexer)))
+ (if (< (cadr res) (length string))
+ (error "Regexp parsing failed around %d: ...%s..."
+ (cadr res) (substring string (1- (cadr res)) (1+ (cadr res))))
+ (car res))))
+
+
+;; (defun lex--parse-re (string i)
+;; (let ((stack ())
+;; (op nil)
+;; (res nil))
+;; (while (and (not (eq op 'stop)))
+;; (lex-case string i
+;; ("(?:" ;shy-group.
+;; (let ((tmp (lex--parse-re string i)))
+;; (setq i (car tmp))
+;; (unless (eq (aref string (1- i)) ?\)) (error "Unclosed
shy-group"))
+;; (push (cdr tmp) res)))
+;; ((or "*?" "+?" "??")
+;; (error "Greediness control unsupported `%s'" (match-string 0
string)))
+;; ((or "*" "+" "?")
+;; (if (null res) (error "Non-prefixed suffix operator")
+;; (setq res (cons (list (cdr (assq (aref string (1- i))
+;; '((?* . 0+)
+;; (?+ . 1+)
+;; (?? . opt))))
+;; (car res))
+;; (cdr res)))))
+;; ("|" (push `(or (seq ,@(nreverse res))) stack)
+;; (setq res nil))
+;; ((seq "[" (opt "^") (opt "]")
+;; (0+ (or (seq (char not ?\]) "-" (char not ?\]))
+;; (seq "[:" (1+ (char (?a . ?z) (?A . ?Z))) ":]")
+;; (char not ?\]))) "]")
+;; (push (lex--parse-charset
+;; (substring string (1+ (match-beginning 0))
+;; (1- (match-end 0))))
+;; res))
+;; ((seq "{" (0+ (char (?0 . ?9)))
+;; (opt (seq "," (0+ (char (?0 . ?9))))) "}")
+;; ;; Here we would like to have sub-matches :-(
+;; (let* ((min (string-to-number (substring string
+;; (1+ (match-beginning 0))
+;; (match-end 0))))
+;; (max (let ((comma (string-match "," string (match-beginning
0))))
+;; (if (not (and comma (< comma (match-end 0))))
+;; min
+;; (if (= comma (- (match-end 0) 2))
+;; nil
+;; (string-to-number (substring string (1+
comma))))))))
+;; (if (null res) (error "Non-prefixed repeat operator")
+;; (setq res (cons `(repeat ,min ,max ,(car res)) (cdr res))))))
+;; ((or ")" eob) (setq op 'stop))
+;; ("\\'" (push 'eob res))
+;; ("\\`" (push 'bob res))
+;; ("^" (push 'bol res))
+;; ("$" (push 'eol res))
+;; ("." (push 'dot res))
+
+;; ((or "(" "\\<" "\\>" "\\_<" "\\_>" "\\c" "\\s" "\\C" "\\S" "\\w"
"\\W"
+;; "\\b" "\\B" "\\=" (seq "\\" (char (?1 . ?9))))
+;; (error "Unsupported construct `%s'" (match-string 0 string)))
+
+;; ((or (seq ?\\ anything) anything)
+;; (push (aref string (1- (match-end 0))) res))
+;; ("" (error "This should not be reachable"))))
+;; (let ((re `(seq ,@(nreverse res))))
+;; (while stack (setq re (nconc (pop stack) (list re))))
+;; (cons i re))))
+
+
+
+
+
+(provide 'lex-parse-re)
+;;; lex-parse-re.el ends here
diff --git a/lex-pkg.el b/lex-pkg.el
new file mode 100644
index 0000000..e35f44e
--- /dev/null
+++ b/lex-pkg.el
@@ -0,0 +1 @@
+(define-package "lex" "1.1" "Lexical analyser construction")
diff --git a/lex.el b/lex.el
index 163459c..e4b3409 100644
--- a/lex.el
+++ b/lex.el
@@ -4,7 +4,6 @@
;; Author: Stefan Monnier <monnier@iro.umontreal.ca>
;; Keywords:
-;; Version: 1.0
;; This program is free software; you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
@@ -547,6 +546,9 @@ or (check (not (PREDICATE . ARG))).")
(push tmp chars))
(if chars (cons char chars)))))
+;; For convenience we use lex itself to tokenize charset strings, so we
+;; define it in another file.
+(autoload 'lex--parse-charset "lex-parse-re")
(defun lex--nfa (re state)
(cl-assert state) ;If `state' is nil we can't match anyway.
@@ -703,7 +705,8 @@ or (check (not (PREDICATE . ARG))).")
(lex--nfa `(seq ,@(cdr re)) state)))
(`case-fold
- (let ((lex--char-equiv-table (get-eqvcase-table (current-case-table))))
+ (let ((lex--char-equiv-table
+ (case-table-get-table (current-case-table) 'eqv)))
(lex--nfa `(seq ,@(cdr re)) state)))
((or `point
@@ -901,7 +904,8 @@ Returns a new NFA."
(lex--dfa-wrapper
(lambda ()
(let* ((lex--char-equiv-table
- (if case-fold-search (get-eqvcase-table (current-case-table))))
+ (if case-fold-search
+ (case-table-get-table (current-case-table) 'eqv)))
(newstate
`(or
,@(mapcar (lambda (x) (lex--nfa (car x) (list 'stop (cdr x))))
@@ -1235,229 +1239,5 @@ continue the match elsewhere."
;; content than what's after `stop'.
(nconc match lastlex)))
-;;; Regexp parsers.
-
-(defun lex--tokenizer (lex string)
- (let ((tokens ())
- (i 0)
- tmp)
- (while (and (< i (length string))
- (setq tmp (lex-match-string lex string i)))
- (push (cons (car tmp) (substring string i (setq i (cadr tmp)))) tokens))
- (nreverse tokens)))
-
-(eval-when-compile
- (unless (fboundp 'lex-compile) (load "lex" 'noerror 'nomessage)))
-
-(defun lex--parse-charset (string)
- (let ((i 0)
- (ranges ()))
- (when (eq (aref string i) ?^)
- (push 'not ranges)
- (setq i (1+ i)))
- (let ((op nil)
- (case-fold-search nil))
- (while (not (eq op 'stop))
- (lex-case string i
- ((seq "[:" (0+ (char (?a . ?z) (?A . ?Z))) ":]")
- (push (intern (substring string (+ 2 (match-beginning 0))
- (- (match-end 0) 2)))
- ranges))
- ((seq anything "-" anything)
- (push (cons (aref string (match-beginning 0))
- (aref string (1- (match-end 0))))
- ranges))
- (anything (push (aref string (1- (match-end 0))) ranges))
- (eob (setq op 'stop))))
-
- `(char ,@(nreverse ranges)))))
-
-(defconst lex--parse-re-lexspec
- '(((or "*" "+" "?" "*?" "+?" "??") . suffix)
- ((seq "[" (opt "^") (opt "]")
- (0+ (or (seq (char not ?\]) "-" (char not ?\]))
- (seq "[:" (1+ (char (?a . ?z) (?A . ?Z))) ":]")
- (char not ?\]))) "]") . charset)
- ((seq "\\c" anything) . category)
- ((seq "\\C" anything) . not-category)
- ((seq "\\s" anything) . syntax)
- ((seq "\\S" anything) . not-syntax)
- ((seq "\\" (char (?1 . ?9))) . backref)
- ("\\'" . eob)
- ("\\`" . bob)
- ("." . dot)
- ("^" . bol)
- ("$" . eol)
- ("." . dot)
- ("\\<" . bow)
- ("\\>" . eow)
- ("\\_<" . symbol-start)
- ("\\_>" . symbol-end)
- ("\\w" . wordchar)
- ("\\W" . not-wordchar)
- ("\\b" . word-boundary)
- ("\\B" . not-word-boundary)
- ("\\=" . point)
- ((or (seq ?\\ anything) anything) . char)))
-
-
-(defconst lex--parse-ere-lexer
- (let ((case-fold-search nil))
- (lex-compile
- (append '(("(?:" . shy-group)
- ("|" . or)
- ((seq "{" (0+ (char (?0 . ?9)))
- (opt (seq "," (0+ (char (?0 . ?9))))) "}") . repeat)
- ((or ")" eob) . stop)
- ("(" . group))
- lex--parse-re-lexspec))))
-
-(defconst lex--parse-bre-lexer
- (let ((case-fold-search nil))
- (lex-compile
- (append '(("\\(?:" . shy-group)
- ("\\|" . or)
- ((seq "\\{" (0+ (char (?0 . ?9)))
- (opt (seq "," (0+ (char (?0 . ?9))))) "\\}") . repeat)
- ((or "\\)" eob) . stop)
- ("\\(" . group))
- lex--parse-re-lexspec))))
-
-(defun lex--parse-re (string i lexer)
- (let ((stack ())
- (op nil)
- (res nil)
- tmp)
- (while (and (not (eq op 'stop))
- (setq tmp (lex-match-string lexer string i)))
- (pcase (car tmp)
- (`shy-group
- (setq tmp (lex--parse-re string (cadr tmp) lexer))
- (unless (eq (aref string (1- (cadr tmp))) ?\))
- (error "Unclosed shy-group"))
- (push (car tmp) res))
- (`group
- (setq tmp (lex--parse-re string (cadr tmp) lexer))
- (unless (eq (aref string (1- (cadr tmp))) ?\))
- (error "Unclosed group"))
- (push (list 'group (car tmp)) res))
- (`suffix
- (if (null res) (error "Non-prefixed suffix operator")
- (setq res (cons (list (cdr (assoc (substring string i (cadr tmp))
- '(("*" . 0+)
- ("+" . 1+)
- ("?" . opt)
- ("*?" . *\?)
- ("+?" . +\?)
- ("??" . \?\?))))
- (car res))
- (cdr res)))))
- (`or (push `(or (seq ,@(nreverse res))) stack)
- (setq res nil))
- (`charset
- (push (lex--parse-charset (substring string (1+ i) (1- (cadr tmp))))
- res))
- (`repeat
- ;; Here we would like to have sub-matches :-(
- (let* ((min (string-to-number
- (substring string (+ i (if (eq (aref string i) ?\\) 2 1))
- (cadr tmp))))
- (max (let ((comma (string-match "," string i)))
- (if (not (and comma (< comma (cadr tmp))))
- min
- (if (= comma (- (cadr tmp) 2))
- nil
- (string-to-number (substring string (1+
comma))))))))
- (if (null res) (error "Non-prefixed repeat operator")
- (setq res (cons `(repeat ,min ,max ,(car res)) (cdr res))))))
- (`stop (setq op 'stop))
- ((or `syntax `category `not-syntax `not-category)
- (push (list (car tmp) (aref string (1- (cadr tmp)))) res))
- (`backref
- (push (list (car tmp) (- (aref string (1- (cadr tmp))) ?0)) res))
- (`char
- (push (aref string (1- (cadr tmp))) res))
- (_ (push (car tmp) res)))
- (setq i (cadr tmp)))
- (let ((re `(seq ,@(nreverse res))))
- (while stack (setq re (nconc (pop stack) (list re))))
- (list re i))))
-
-(defun lex-parse-re (string &optional lexer)
- (setq lexer (cond ((eq lexer 'ere) lex--parse-ere-lexer)
- ((memq lexer '(bre re nil)) lex--parse-bre-lexer)
- (t lexer)))
- (let ((res (lex--parse-re string 0 lexer)))
- (if (< (cadr res) (length string))
- (error "Regexp parsing failed around %d: ...%s..."
- (cadr res) (substring string (1- (cadr res)) (1+ (cadr res))))
- (car res))))
-
-
-;; (defun lex--parse-re (string i)
-;; (let ((stack ())
-;; (op nil)
-;; (res nil))
-;; (while (and (not (eq op 'stop)))
-;; (lex-case string i
-;; ("(?:" ;shy-group.
-;; (let ((tmp (lex--parse-re string i)))
-;; (setq i (car tmp))
-;; (unless (eq (aref string (1- i)) ?\)) (error "Unclosed
shy-group"))
-;; (push (cdr tmp) res)))
-;; ((or "*?" "+?" "??")
-;; (error "Greediness control unsupported `%s'" (match-string 0
string)))
-;; ((or "*" "+" "?")
-;; (if (null res) (error "Non-prefixed suffix operator")
-;; (setq res (cons (list (cdr (assq (aref string (1- i))
-;; '((?* . 0+)
-;; (?+ . 1+)
-;; (?? . opt))))
-;; (car res))
-;; (cdr res)))))
-;; ("|" (push `(or (seq ,@(nreverse res))) stack)
-;; (setq res nil))
-;; ((seq "[" (opt "^") (opt "]")
-;; (0+ (or (seq (char not ?\]) "-" (char not ?\]))
-;; (seq "[:" (1+ (char (?a . ?z) (?A . ?Z))) ":]")
-;; (char not ?\]))) "]")
-;; (push (lex--parse-charset
-;; (substring string (1+ (match-beginning 0))
-;; (1- (match-end 0))))
-;; res))
-;; ((seq "{" (0+ (char (?0 . ?9)))
-;; (opt (seq "," (0+ (char (?0 . ?9))))) "}")
-;; ;; Here we would like to have sub-matches :-(
-;; (let* ((min (string-to-number (substring string
-;; (1+ (match-beginning 0))
-;; (match-end 0))))
-;; (max (let ((comma (string-match "," string (match-beginning
0))))
-;; (if (not (and comma (< comma (match-end 0))))
-;; min
-;; (if (= comma (- (match-end 0) 2))
-;; nil
-;; (string-to-number (substring string (1+
comma))))))))
-;; (if (null res) (error "Non-prefixed repeat operator")
-;; (setq res (cons `(repeat ,min ,max ,(car res)) (cdr res))))))
-;; ((or ")" eob) (setq op 'stop))
-;; ("\\'" (push 'eob res))
-;; ("\\`" (push 'bob res))
-;; ("^" (push 'bol res))
-;; ("$" (push 'eol res))
-;; ("." (push 'dot res))
-
-;; ((or "(" "\\<" "\\>" "\\_<" "\\_>" "\\c" "\\s" "\\C" "\\S" "\\w"
"\\W"
-;; "\\b" "\\B" "\\=" (seq "\\" (char (?1 . ?9))))
-;; (error "Unsupported construct `%s'" (match-string 0 string)))
-
-;; ((or (seq ?\\ anything) anything)
-;; (push (aref string (1- (match-end 0))) res))
-;; ("" (error "This should not be reachable"))))
-;; (let ((re `(seq ,@(nreverse res))))
-;; (while stack (setq re (nconc (pop stack) (list re))))
-;; (cons i re))))
-
-
-
(provide 'lex)
;;; lex.el ends here
- [elpa] branch externals/lex created (now a25aef6), Stefan Monnier, 2020/12/01
- [elpa] externals/lex 02a8843 1/7: New package lex.el., Stefan Monnier, 2020/12/01
- [elpa] externals/lex d5f1594 3/7: * lex.el: Compatibility fix, Stefan Monnier, 2020/12/01
- [elpa] externals/lex e9faad4 4/7: Only keep the strictly necessary *-pkg.el files, Stefan Monnier, 2020/12/01
- [elpa] externals/lex 5483ed3 5/7: * packages/lex/lex.el: Use lexical-binding., Stefan Monnier, 2020/12/01
- [elpa] externals/lex fe3a98b 2/7: * packages/lex/lex-parse-re.el: New file, extracted from lex.el.,
Stefan Monnier <=
- [elpa] externals/lex cd89324 6/7: * packages/gnome-c-style/gnome-c-tests.el: Add copyright blurb, Stefan Monnier, 2020/12/01
- [elpa] externals/lex a25aef6 7/7: * .gitignore: New file, Stefan Monnier, 2020/12/01