emacs-elpa-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[elpa] externals/lex fe3a98b 2/7: * packages/lex/lex-parse-re.el: New fi


From: Stefan Monnier
Subject: [elpa] externals/lex fe3a98b 2/7: * packages/lex/lex-parse-re.el: New file, extracted from lex.el.
Date: Tue, 1 Dec 2020 16:18:10 -0500 (EST)

branch: externals/lex
commit fe3a98b8dc18023c6e8fb1877175ce406d9dc22e
Author: Stefan Monnier <monnier@iro.umontreal.ca>
Commit: Stefan Monnier <monnier@iro.umontreal.ca>

    * packages/lex/lex-parse-re.el: New file, extracted from lex.el.
    * packages/lex/lex.el: Use it instead of the self-load hack.
    (lex--nfa, lex-compile): Use case-table-get-table.
---
 lex-parse-re.el | 258 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 lex-pkg.el      |   1 +
 lex.el          | 234 ++------------------------------------------------
 3 files changed, 266 insertions(+), 227 deletions(-)

diff --git a/lex-parse-re.el b/lex-parse-re.el
new file mode 100644
index 0000000..e5e954a
--- /dev/null
+++ b/lex-parse-re.el
@@ -0,0 +1,258 @@
+;;; lex-parse-re.el --- Parse Emacs regexps using Lex
+
+;; Copyright (C) 2008,2013  Free Software Foundation, Inc.
+
+;; Author: Stefan Monnier <monnier@iro.umontreal.ca>
+;; Keywords:
+
+;; This program is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
+
+;; This program is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+;;; Commentary:
+
+;; This exports lex-parse-re, but it also defines lex--parse-charset which is
+;; used internally by lex-compile to handle charsets specified as a string.
+
+;;; Code:
+
+(require 'lex)
+
+;;; Regexp parsers.
+
+(defun lex--tokenizer (lex string)
+  (let ((tokens ())
+        (i 0)
+        tmp)
+    (while (and (< i (length string))
+                (setq tmp (lex-match-string lex string i)))
+      (push (cons (car tmp) (substring string i (setq i (cadr tmp)))) tokens))
+    (nreverse tokens)))
+
+(defun lex--parse-charset (string)
+  (let ((i 0)
+        (ranges ()))
+    (when (eq (aref string i) ?^)
+      (push 'not ranges)
+      (setq i (1+ i)))
+    (let ((op nil)
+          (case-fold-search nil))
+      (while (not (eq op 'stop))
+        (lex-case string i
+          ((seq "[:" (0+ (char (?a . ?z) (?A . ?Z))) ":]")
+           (push (intern (substring string (+ 2 (match-beginning 0))
+                                    (- (match-end 0) 2)))
+                 ranges))
+          ((seq anything "-" anything)
+           (push (cons (aref string (match-beginning 0))
+                       (aref string (1- (match-end 0))))
+                 ranges))
+          (anything (push (aref string (1- (match-end 0))) ranges))
+          (eob (setq op 'stop))))
+
+      `(char ,@(nreverse ranges)))))
+
+(defconst lex--parse-re-lexspec
+  '(((or "*" "+" "?" "*?" "+?" "??") . suffix)
+    ((seq "[" (opt "^") (opt "]")
+          (0+ (or (seq (char not ?\]) "-" (char not ?\]))
+                  (seq "[:" (1+ (char (?a . ?z) (?A . ?Z))) ":]")
+                  (char not ?\]))) "]") . charset)
+    ((seq "\\c" anything) . category)
+    ((seq "\\C" anything) . not-category)
+    ((seq "\\s" anything) . syntax)
+    ((seq "\\S" anything) . not-syntax)
+    ((seq "\\" (char (?1 . ?9))) . backref)
+    ("\\'" . eob)
+    ("\\`" . bob)
+    ("." . dot)
+    ("^" . bol)
+    ("$" . eol)
+    ("." . dot)
+    ("\\<" . bow)
+    ("\\>" . eow)
+    ("\\_<" . symbol-start)
+    ("\\_>" . symbol-end)
+    ("\\w" . wordchar)
+    ("\\W" . not-wordchar)
+    ("\\b" . word-boundary)
+    ("\\B" . not-word-boundary)
+    ("\\=" . point)
+    ((or (seq ?\\ anything) anything) . char)))
+
+
+(defconst lex--parse-ere-lexer
+  (let ((case-fold-search nil))
+    (lex-compile
+     (append '(("(?:" . shy-group)
+               ("|"  . or)
+               ((seq "{" (0+ (char (?0 . ?9)))
+                     (opt (seq "," (0+ (char (?0 . ?9))))) "}") . repeat)
+               ((or ")" eob) . stop)
+               ("(" . group))
+             lex--parse-re-lexspec))))
+
+(defconst lex--parse-bre-lexer
+  (let ((case-fold-search nil))
+    (lex-compile
+     (append '(("\\(?:" . shy-group)
+               ("\\|"  . or)
+               ((seq "\\{" (0+ (char (?0 . ?9)))
+                     (opt (seq "," (0+ (char (?0 . ?9))))) "\\}") . repeat)
+               ((or "\\)" eob) . stop)
+               ("\\(" . group))
+             lex--parse-re-lexspec))))
+
+(defun lex--parse-re (string i lexer)
+  (let ((stack ())
+        (op nil)
+        (res nil)
+        tmp)
+    (while (and (not (eq op 'stop))
+                (setq tmp (lex-match-string lexer string i)))
+      (pcase (car tmp)
+        (`shy-group
+         (setq tmp (lex--parse-re string (cadr tmp) lexer))
+         (unless (eq (aref string (1- (cadr tmp))) ?\))
+           (error "Unclosed shy-group"))
+         (push (car tmp) res))
+        (`group
+         (setq tmp (lex--parse-re string (cadr tmp) lexer))
+         (unless (eq (aref string (1- (cadr tmp))) ?\))
+           (error "Unclosed group"))
+         (push (list 'group (car tmp)) res))
+        (`suffix
+         (if (null res) (error "Non-prefixed suffix operator")
+           (setq res (cons (list (cdr (assoc (substring string i (cadr tmp))
+                                             '(("*" . 0+)
+                                               ("+" . 1+)
+                                               ("?" . opt)
+                                               ("*?" . *\?)
+                                               ("+?" . +\?)
+                                               ("??" . \?\?))))
+                                 (car res))
+                           (cdr res)))))
+        (`or (push `(or (seq ,@(nreverse res))) stack)
+             (setq res nil))
+        (`charset
+         (push (lex--parse-charset (substring string (1+ i) (1- (cadr tmp))))
+               res))
+        (`repeat
+         ;; Here we would like to have sub-matches :-(
+         (let* ((min (string-to-number
+                      (substring string (+ i (if (eq (aref string i) ?\\) 2 1))
+                                 (cadr tmp))))
+                (max (let ((comma (string-match "," string i)))
+                       (if (not (and comma (< comma (cadr tmp))))
+                           min
+                         (if (= comma (- (cadr tmp) 2))
+                             nil
+                           (string-to-number (substring string (1+ 
comma))))))))
+           (if (null res) (error "Non-prefixed repeat operator")
+             (setq res (cons `(repeat ,min ,max ,(car res)) (cdr res))))))
+        (`stop (setq op 'stop))
+        ((or `syntax `category `not-syntax `not-category)
+         (push (list (car tmp) (aref string (1- (cadr tmp)))) res))
+        (`backref
+         (push (list (car tmp) (- (aref string (1- (cadr tmp))) ?0)) res))
+        (`char
+         (push (aref string (1- (cadr tmp))) res))
+        (_ (push (car tmp) res)))
+      (setq i (cadr tmp)))
+    (let ((re `(seq ,@(nreverse res))))
+      (while stack (setq re (nconc (pop stack) (list re))))
+      (list re i))))
+
+;;;###autoload
+(defun lex-parse-re (string &optional lexer)
+  "Parse STRING as a regular expression.
+LEXER specifies the regexp syntax to use.  It can be `ere', or `bre'
+and it defaults to `bre'."
+  (setq lexer (cond ((eq lexer 'ere) lex--parse-ere-lexer)
+                    ((memq lexer '(bre re nil)) lex--parse-bre-lexer)
+                    (t lexer)))
+  (let ((res (lex--parse-re string 0 lexer)))
+    (if (< (cadr res) (length string))
+        (error "Regexp parsing failed around %d: ...%s..."
+               (cadr res) (substring string (1- (cadr res)) (1+ (cadr res))))
+      (car res))))
+
+
+;; (defun lex--parse-re (string i)
+;;   (let ((stack ())
+;;         (op nil)
+;;         (res nil))
+;;     (while (and (not (eq op 'stop)))
+;;       (lex-case string i
+;;         ("(?:"                          ;shy-group.
+;;          (let ((tmp (lex--parse-re string i)))
+;;            (setq i (car tmp))
+;;            (unless (eq (aref string (1- i)) ?\)) (error "Unclosed 
shy-group"))
+;;            (push (cdr tmp) res)))
+;;         ((or "*?" "+?" "??")
+;;          (error "Greediness control unsupported `%s'" (match-string 0 
string)))
+;;         ((or "*" "+" "?")
+;;          (if (null res) (error "Non-prefixed suffix operator")
+;;            (setq res (cons (list (cdr (assq (aref string (1- i))
+;;                                             '((?* . 0+)
+;;                                               (?+ . 1+)
+;;                                               (?? . opt))))
+;;                                  (car res))
+;;                            (cdr res)))))
+;;         ("|" (push `(or (seq ,@(nreverse res))) stack)
+;;          (setq res nil))
+;;         ((seq "[" (opt "^") (opt "]")
+;;                    (0+ (or (seq (char not ?\]) "-" (char not ?\]))
+;;                            (seq "[:" (1+ (char (?a . ?z) (?A . ?Z))) ":]")
+;;                            (char not ?\]))) "]")
+;;          (push (lex--parse-charset
+;;                 (substring string (1+ (match-beginning 0))
+;;                            (1- (match-end 0))))
+;;                res))
+;;         ((seq "{" (0+ (char (?0 . ?9)))
+;;                    (opt (seq "," (0+ (char (?0 . ?9))))) "}")
+;;          ;; Here we would like to have sub-matches :-(
+;;          (let* ((min (string-to-number (substring string
+;;                                                   (1+ (match-beginning 0))
+;;                                                   (match-end 0))))
+;;                 (max (let ((comma (string-match "," string (match-beginning 
0))))
+;;                        (if (not (and comma (< comma (match-end 0))))
+;;                            min
+;;                          (if (= comma (- (match-end 0) 2))
+;;                              nil
+;;                            (string-to-number (substring string (1+ 
comma))))))))
+;;            (if (null res) (error "Non-prefixed repeat operator")
+;;              (setq res (cons `(repeat ,min ,max ,(car res)) (cdr res))))))
+;;         ((or ")" eob) (setq op 'stop))
+;;         ("\\'" (push 'eob res))
+;;         ("\\`" (push 'bob res))
+;;         ("^" (push 'bol res))
+;;         ("$" (push 'eol res))
+;;         ("." (push 'dot res))
+
+;;         ((or "(" "\\<" "\\>" "\\_<" "\\_>" "\\c" "\\s" "\\C" "\\S" "\\w" 
"\\W"
+;;              "\\b" "\\B" "\\=" (seq "\\" (char (?1 . ?9))))
+;;          (error "Unsupported construct `%s'" (match-string 0 string)))
+
+;;         ((or (seq ?\\ anything) anything)
+;;          (push (aref string (1- (match-end 0))) res))
+;;         ("" (error "This should not be reachable"))))
+;;     (let ((re `(seq ,@(nreverse res))))
+;;       (while stack (setq re (nconc (pop stack) (list re))))
+;;       (cons i re))))
+
+
+
+
+
+(provide 'lex-parse-re)
+;;; lex-parse-re.el ends here
diff --git a/lex-pkg.el b/lex-pkg.el
new file mode 100644
index 0000000..e35f44e
--- /dev/null
+++ b/lex-pkg.el
@@ -0,0 +1 @@
+(define-package "lex" "1.1" "Lexical analyser construction")
diff --git a/lex.el b/lex.el
index 163459c..e4b3409 100644
--- a/lex.el
+++ b/lex.el
@@ -4,7 +4,6 @@
 
 ;; Author: Stefan Monnier <monnier@iro.umontreal.ca>
 ;; Keywords:
-;; Version: 1.0
 
 ;; This program is free software; you can redistribute it and/or modify
 ;; it under the terms of the GNU General Public License as published by
@@ -547,6 +546,9 @@ or (check (not (PREDICATE . ARG))).")
         (push tmp chars))
       (if chars (cons char chars)))))
     
+;; For convenience we use lex itself to tokenize charset strings, so we
+;; define it in another file.
+(autoload 'lex--parse-charset "lex-parse-re")
 
 (defun lex--nfa (re state)
   (cl-assert state)                   ;If `state' is nil we can't match anyway.
@@ -703,7 +705,8 @@ or (check (not (PREDICATE . ARG))).")
          (lex--nfa `(seq ,@(cdr re)) state)))
 
       (`case-fold
-       (let ((lex--char-equiv-table (get-eqvcase-table (current-case-table))))
+       (let ((lex--char-equiv-table
+              (case-table-get-table (current-case-table) 'eqv)))
          (lex--nfa `(seq ,@(cdr re)) state)))
 
       ((or `point
@@ -901,7 +904,8 @@ Returns a new NFA."
   (lex--dfa-wrapper
    (lambda ()
      (let* ((lex--char-equiv-table
-             (if case-fold-search (get-eqvcase-table (current-case-table))))
+             (if case-fold-search
+                 (case-table-get-table (current-case-table) 'eqv)))
             (newstate
              `(or
                ,@(mapcar (lambda (x) (lex--nfa (car x) (list 'stop (cdr x))))
@@ -1235,229 +1239,5 @@ continue the match elsewhere."
     ;; content than what's after `stop'.
     (nconc match lastlex)))
 
-;;; Regexp parsers.
-
-(defun lex--tokenizer (lex string)
-  (let ((tokens ())
-        (i 0)
-        tmp)
-    (while (and (< i (length string))
-                (setq tmp (lex-match-string lex string i)))
-      (push (cons (car tmp) (substring string i (setq i (cadr tmp)))) tokens))
-    (nreverse tokens)))
-
-(eval-when-compile
-  (unless (fboundp 'lex-compile) (load "lex" 'noerror 'nomessage)))
-
-(defun lex--parse-charset (string)
-  (let ((i 0)
-        (ranges ()))
-    (when (eq (aref string i) ?^)
-      (push 'not ranges)
-      (setq i (1+ i)))
-    (let ((op nil)
-          (case-fold-search nil))
-      (while (not (eq op 'stop))
-        (lex-case string i
-          ((seq "[:" (0+ (char (?a . ?z) (?A . ?Z))) ":]")
-           (push (intern (substring string (+ 2 (match-beginning 0))
-                                    (- (match-end 0) 2)))
-                 ranges))
-          ((seq anything "-" anything)
-           (push (cons (aref string (match-beginning 0))
-                       (aref string (1- (match-end 0))))
-                 ranges))
-          (anything (push (aref string (1- (match-end 0))) ranges))
-          (eob (setq op 'stop))))
-      
-      `(char ,@(nreverse ranges)))))
-
-(defconst lex--parse-re-lexspec
-  '(((or "*" "+" "?" "*?" "+?" "??") . suffix)
-    ((seq "[" (opt "^") (opt "]")
-          (0+ (or (seq (char not ?\]) "-" (char not ?\]))
-                  (seq "[:" (1+ (char (?a . ?z) (?A . ?Z))) ":]")
-                  (char not ?\]))) "]") . charset)
-    ((seq "\\c" anything) . category)
-    ((seq "\\C" anything) . not-category)
-    ((seq "\\s" anything) . syntax)
-    ((seq "\\S" anything) . not-syntax)
-    ((seq "\\" (char (?1 . ?9))) . backref)
-    ("\\'" . eob)
-    ("\\`" . bob)
-    ("." . dot)
-    ("^" . bol)
-    ("$" . eol)
-    ("." . dot)
-    ("\\<" . bow)
-    ("\\>" . eow)
-    ("\\_<" . symbol-start)
-    ("\\_>" . symbol-end)
-    ("\\w" . wordchar)
-    ("\\W" . not-wordchar)
-    ("\\b" . word-boundary)
-    ("\\B" . not-word-boundary)
-    ("\\=" . point)
-    ((or (seq ?\\ anything) anything) . char)))
-  
-
-(defconst lex--parse-ere-lexer
-  (let ((case-fold-search nil))
-    (lex-compile
-     (append '(("(?:" . shy-group)
-               ("|"  . or)
-               ((seq "{" (0+ (char (?0 . ?9)))
-                     (opt (seq "," (0+ (char (?0 . ?9))))) "}") . repeat)
-               ((or ")" eob) . stop)
-               ("(" . group))
-             lex--parse-re-lexspec))))
-
-(defconst lex--parse-bre-lexer
-  (let ((case-fold-search nil))
-    (lex-compile
-     (append '(("\\(?:" . shy-group)
-               ("\\|"  . or)
-               ((seq "\\{" (0+ (char (?0 . ?9)))
-                     (opt (seq "," (0+ (char (?0 . ?9))))) "\\}") . repeat)
-               ((or "\\)" eob) . stop)
-               ("\\(" . group))
-             lex--parse-re-lexspec))))
-
-(defun lex--parse-re (string i lexer)
-  (let ((stack ())
-        (op nil)
-        (res nil)
-        tmp)
-    (while (and (not (eq op 'stop))
-                (setq tmp (lex-match-string lexer string i)))
-      (pcase (car tmp)
-        (`shy-group
-         (setq tmp (lex--parse-re string (cadr tmp) lexer))
-         (unless (eq (aref string (1- (cadr tmp))) ?\))
-           (error "Unclosed shy-group"))
-         (push (car tmp) res))
-        (`group
-         (setq tmp (lex--parse-re string (cadr tmp) lexer))
-         (unless (eq (aref string (1- (cadr tmp))) ?\))
-           (error "Unclosed group"))
-         (push (list 'group (car tmp)) res))
-        (`suffix
-         (if (null res) (error "Non-prefixed suffix operator")
-           (setq res (cons (list (cdr (assoc (substring string i (cadr tmp))
-                                             '(("*" . 0+)
-                                               ("+" . 1+)
-                                               ("?" . opt)
-                                               ("*?" . *\?)
-                                               ("+?" . +\?)
-                                               ("??" . \?\?))))
-                                 (car res))
-                           (cdr res)))))
-        (`or (push `(or (seq ,@(nreverse res))) stack)
-             (setq res nil))
-        (`charset
-         (push (lex--parse-charset (substring string (1+ i) (1- (cadr tmp))))
-               res))
-        (`repeat
-         ;; Here we would like to have sub-matches :-(
-         (let* ((min (string-to-number
-                      (substring string (+ i (if (eq (aref string i) ?\\) 2 1))
-                                 (cadr tmp))))
-                (max (let ((comma (string-match "," string i)))
-                       (if (not (and comma (< comma (cadr tmp))))
-                           min
-                         (if (= comma (- (cadr tmp) 2))
-                             nil
-                           (string-to-number (substring string (1+ 
comma))))))))
-           (if (null res) (error "Non-prefixed repeat operator")
-             (setq res (cons `(repeat ,min ,max ,(car res)) (cdr res))))))
-        (`stop (setq op 'stop))
-        ((or `syntax `category `not-syntax `not-category)
-         (push (list (car tmp) (aref string (1- (cadr tmp)))) res))
-        (`backref
-         (push (list (car tmp) (- (aref string (1- (cadr tmp))) ?0)) res))
-        (`char
-         (push (aref string (1- (cadr tmp))) res))
-        (_ (push (car tmp) res)))
-      (setq i (cadr tmp)))
-    (let ((re `(seq ,@(nreverse res))))
-      (while stack (setq re (nconc (pop stack) (list re))))
-      (list re i))))
-
-(defun lex-parse-re (string &optional lexer)
-  (setq lexer (cond ((eq lexer 'ere) lex--parse-ere-lexer)
-                    ((memq lexer '(bre re nil)) lex--parse-bre-lexer)
-                    (t lexer)))
-  (let ((res (lex--parse-re string 0 lexer)))
-    (if (< (cadr res) (length string))
-        (error "Regexp parsing failed around %d: ...%s..."
-               (cadr res) (substring string (1- (cadr res)) (1+ (cadr res))))
-      (car res))))
-
-
-;; (defun lex--parse-re (string i)
-;;   (let ((stack ())
-;;         (op nil)
-;;         (res nil))
-;;     (while (and (not (eq op 'stop)))
-;;       (lex-case string i
-;;         ("(?:"                          ;shy-group.
-;;          (let ((tmp (lex--parse-re string i)))
-;;            (setq i (car tmp))
-;;            (unless (eq (aref string (1- i)) ?\)) (error "Unclosed 
shy-group"))
-;;            (push (cdr tmp) res)))
-;;         ((or "*?" "+?" "??")
-;;          (error "Greediness control unsupported `%s'" (match-string 0 
string)))
-;;         ((or "*" "+" "?")
-;;          (if (null res) (error "Non-prefixed suffix operator")
-;;            (setq res (cons (list (cdr (assq (aref string (1- i))
-;;                                             '((?* . 0+)
-;;                                               (?+ . 1+)
-;;                                               (?? . opt))))
-;;                                  (car res))
-;;                            (cdr res)))))
-;;         ("|" (push `(or (seq ,@(nreverse res))) stack)
-;;          (setq res nil))
-;;         ((seq "[" (opt "^") (opt "]")
-;;                    (0+ (or (seq (char not ?\]) "-" (char not ?\]))
-;;                            (seq "[:" (1+ (char (?a . ?z) (?A . ?Z))) ":]")
-;;                            (char not ?\]))) "]")
-;;          (push (lex--parse-charset
-;;                 (substring string (1+ (match-beginning 0))
-;;                            (1- (match-end 0))))
-;;                res))
-;;         ((seq "{" (0+ (char (?0 . ?9)))
-;;                    (opt (seq "," (0+ (char (?0 . ?9))))) "}")
-;;          ;; Here we would like to have sub-matches :-(
-;;          (let* ((min (string-to-number (substring string
-;;                                                   (1+ (match-beginning 0))
-;;                                                   (match-end 0))))
-;;                 (max (let ((comma (string-match "," string (match-beginning 
0))))
-;;                        (if (not (and comma (< comma (match-end 0))))
-;;                            min
-;;                          (if (= comma (- (match-end 0) 2))
-;;                              nil
-;;                            (string-to-number (substring string (1+ 
comma))))))))
-;;            (if (null res) (error "Non-prefixed repeat operator")
-;;              (setq res (cons `(repeat ,min ,max ,(car res)) (cdr res))))))
-;;         ((or ")" eob) (setq op 'stop))
-;;         ("\\'" (push 'eob res))
-;;         ("\\`" (push 'bob res))
-;;         ("^" (push 'bol res))
-;;         ("$" (push 'eol res))
-;;         ("." (push 'dot res))
-
-;;         ((or "(" "\\<" "\\>" "\\_<" "\\_>" "\\c" "\\s" "\\C" "\\S" "\\w" 
"\\W"
-;;              "\\b" "\\B" "\\=" (seq "\\" (char (?1 . ?9))))
-;;          (error "Unsupported construct `%s'" (match-string 0 string)))
-
-;;         ((or (seq ?\\ anything) anything)
-;;          (push (aref string (1- (match-end 0))) res))
-;;         ("" (error "This should not be reachable"))))
-;;     (let ((re `(seq ,@(nreverse res))))
-;;       (while stack (setq re (nconc (pop stack) (list re))))
-;;       (cons i re))))
-
-
-
 (provide 'lex)
 ;;; lex.el ends here



reply via email to

[Prev in Thread] Current Thread [Next in Thread]