[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[elpa] externals/tNFA f150b88 06/23: Added support for \{...\} postfix r
From: |
Stefan Monnier |
Subject: |
[elpa] externals/tNFA f150b88 06/23: Added support for \{...\} postfix repetition operator |
Date: |
Mon, 14 Dec 2020 12:08:28 -0500 (EST) |
branch: externals/tNFA
commit f150b88cff55b1c8383234c6d08a7a319b4bb4bd
Author: Toby Cubitt <toby-predictive@dr-qubit.org>
Commit: tsc25 <toby-predictive@dr-qubit.org>
Added support for \{...\} postfix repetition operator
---
tNFA.el | 282 +++++++++++++++++++++++++++++++++++++++++++++++-----------------
1 file changed, 207 insertions(+), 75 deletions(-)
diff --git a/tNFA.el b/tNFA.el
index a5be4f1..87847b5 100644
--- a/tNFA.el
+++ b/tNFA.el
@@ -148,19 +148,20 @@
(defun tNFA--NFA-state-patch (attach state)
;; patch STATE onto ATTACH. Return value is meaningless
- (setf (tNFA--NFA-state-type attach) (tNFA--NFA-state-type state)
- (tNFA--NFA-state-label attach) (tNFA--NFA-state-label state)
- (tNFA--NFA-state-next attach) (tNFA--NFA-state-next state)
- (tNFA--NFA-state-count state) (incf (tNFA--NFA-state-in-degree state))
- ))
+ (setf
+ (tNFA--NFA-state-type attach) (tNFA--NFA-state-type state)
+ (tNFA--NFA-state-label attach) (tNFA--NFA-state-label state)
+ (tNFA--NFA-state-next attach) (tNFA--NFA-state-next state)
+ (tNFA--NFA-state-count state) (incf (tNFA--NFA-state-in-degree state))))
(defun tNFA--NFA-state-make-epsilon (state next)
;; create an epsilon transition from STATE to NEXT
- (setf (tNFA--NFA-state-type state) 'epsilon
- (tNFA--NFA-state-label state) nil
- (tNFA--NFA-state-next state) next
- (tNFA--NFA-state-count next) (incf (tNFA--NFA-state-in-degree next))))
+ (setf
+ (tNFA--NFA-state-type state) 'epsilon
+ (tNFA--NFA-state-label state) nil
+ (tNFA--NFA-state-next state) next
+ (tNFA--NFA-state-count next) (incf (tNFA--NFA-state-in-degree next))))
(defun tNFA--NFA-state-make-branch (state next)
@@ -172,6 +173,16 @@
(setf (tNFA--NFA-state-count n) (incf (tNFA--NFA-state-in-degree n)))))
+(defun tNFA--NFA-state-copy (state)
+ ;; Return a copy of STATE. The next link is *not* copied, it is `eq' to the
+ ;; original next link. Use `tNFA--fragment-copy' if you want to recursively
+ ;; copy a chain of states. Note: NFA--state-id must be bound to something
+ ;; appropriate when this function is called.
+ (let ((copy (copy-sequence state)))
+ (setf (tNFA--NFA-state-id copy) (incf NFA--state-id))
+ copy))
+
+
;;; ----------------------------------------------------------------
;;; NFA fragments
@@ -192,6 +203,47 @@
(setf (tNFA--fragment-final frag1) (tNFA--fragment-final frag2)))
+(defun tNFA--fragment-copy (fragment)
+ ;; return a copy of FRAGMENT.
+ (declare (special copied-states))
+ (let (copied-states)
+ (tNFA--fragment-create
+ (tNFA--do-fragment-copy (tNFA--fragment-initial fragment))
+ (cdr (assq (tNFA--fragment-final fragment) copied-states)))))
+
+
+(defun tNFA--do-fragment-copy (state)
+ ;; return a copy of STATE, recursively following and copying links
+ ;; (note: NFA--state-id must be bound to something appropriate when this is
+ ;; called)
+ (declare (special copied-states))
+ (let ((copy (tNFA--NFA-state-copy state)))
+ (push (cons state copy) copied-states)
+
+ ;; if STATE is a branch, copy all links
+ (cond
+ ((eq (tNFA--NFA-state-type copy) 'branch)
+ (setf (tNFA--NFA-state-next copy)
+ (mapcar (lambda (next)
+ (or (cdr (assq next copied-states))
+ (tNFA--do-fragment-copy next)))
+ (tNFA--NFA-state-next copy))))
+
+ ;; if state doesn't have a next link, return
+ ((or (eq (tNFA--NFA-state-type copy) 'match)
+ (null (tNFA--NFA-state-type copy))))
+
+ ;; otherwise, copy next link
+ ((tNFA--NFA-state-type copy)
+ ;; for a non-branch STATE, copy next link
+ (setf (tNFA--NFA-state-next copy)
+ ;; if we've already copied next state, set next link to that
+ (or (cdr (assq (tNFA--NFA-state-next copy) copied-states))
+ ;; otherwise, recursively copy next state
+ (tNFA--do-fragment-copy (tNFA--NFA-state-next copy))))))
+ copy))
+
+
;;; ----------------------------------------------------------------
;;; DFA states
@@ -371,6 +423,15 @@ individual elements of STRING are identical. The default
is `eq'."
)))
+(defmacro tNFA--regexp-postfix-p (regexp)
+ ;; return t if next token in REGEXP is a postfix operator, nil otherwise
+ `(or (eq (car ,regexp) ?*)
+ (eq (car ,regexp) ?+)
+ (eq (car ,regexp) ??)
+ (and (eq (car ,regexp) ?\\)
+ (cdr ,regexp)
+ (eq (cadr ,regexp) ?{))))
+
(defun tNFA--from-regexp (regexp num-tags min-tags max-tags
&optional top-level shy-group)
@@ -387,7 +448,7 @@ individual elements of STRING are identical. The default is
`eq'."
(let* ((new (tNFA--NFA-state-create))
(fragment-stack (list (tNFA--fragment-create new new)))
- fragment attach token type group-end-tag)
+ fragment copy attach token type group-end-tag)
(catch 'constructed
(while t
@@ -513,67 +574,92 @@ individual elements of STRING are identical. The default
is `eq'."
;; ----- attach new fragment -----
(when fragment
- (setq attach (tNFA--fragment-final (car fragment-stack)))
- (if (or (eq (car regexp) ?*)
- (eq (car regexp) ?+)
- (eq (car regexp) ??))
- (if (eq type 'alternation)
- (error "Syntax error in regexp: unexpected \"%s\""
- (char-to-string token))
-
- ;; if next token is a postfix operator, splice new fragment
- ;; into NFA as appropriate
- (setq regexp (tNFA--regexp-next-token regexp)
- type (nth 0 regexp)
- token (nth 1 regexp)
- regexp (nth 2 regexp))
- (setq new (tNFA--NFA-state-create))
-
- (cond
-
- ;; .--fragment--.
- ;; / \
- ;; \ ______/
- ;; \ /
- ;; ---attach-----new---
- ;;
- ((eq type 'postfix*)
- (tNFA--NFA-state-make-branch
- attach (list (tNFA--fragment-initial fragment) new))
- (tNFA--NFA-state-make-epsilon
- (tNFA--fragment-final fragment) attach)
- (setf (tNFA--fragment-final (car fragment-stack)) new))
-
- ;; .----.
- ;; / \
- ;; / \
- ;; \ /
- ;; ---fragment-----new---
- ;;
- ((eq type 'postfix+)
- (tNFA--NFA-state-patch
- attach (tNFA--fragment-initial fragment))
- (tNFA--NFA-state-make-branch
- (tNFA--fragment-final fragment) (list attach new))
- (setf (tNFA--fragment-final (car fragment-stack)) new))
-
- ;; .--fragment--.
- ;; / \
- ;; ---attach new---
- ;; \______________/
- ;;
- ((eq type 'postfix?)
- (tNFA--NFA-state-make-branch
- attach (list (tNFA--fragment-initial fragment) new))
- (tNFA--NFA-state-make-epsilon
- (tNFA--fragment-final fragment) new)
- (setf (tNFA--fragment-final (car fragment-stack)) new))
- ))
-
-
- ;; if next token is not a postfix operator, attach new fragment
- ;; onto end of current NFA fragment
- (tNFA--fragment-patch (car fragment-stack) fragment))
+ ;; if next token is not a postfix operator, attach new fragment onto
+ ;; end of current NFA fragment
+ (if (not (tNFA--regexp-postfix-p regexp))
+ (tNFA--fragment-patch (car fragment-stack) fragment)
+
+ ;; if next token is a postfix operator, splice new fragment into
+ ;; NFA as appropriate
+ (when (eq type 'alternation)
+ (error "Syntax error in regexp: unexpected \"%s\""
+ (char-to-string token)))
+ (setq regexp (tNFA--regexp-next-token regexp)
+ type (nth 0 regexp)
+ token (nth 1 regexp)
+ regexp (nth 2 regexp))
+
+ (while fragment
+ (setq attach (tNFA--fragment-final (car fragment-stack)))
+ (setq new (tNFA--NFA-state-create))
+ (cond
+
+ ;; * postfix = \{0,\}:
+ ;;
+ ;; .--fragment--.
+ ;; / \
+ ;; \ ______/
+ ;; \ /
+ ;; ---attach-----new---
+ ;;
+ ((and (eq (car token) 0) (null (cdr token)))
+ (tNFA--NFA-state-make-branch
+ attach (list (tNFA--fragment-initial fragment) new))
+ (tNFA--NFA-state-make-epsilon
+ (tNFA--fragment-final fragment) attach)
+ (setf (tNFA--fragment-final (car fragment-stack)) new)
+ (setq fragment nil))
+
+ ;; + postfix = \{1,\}:
+ ;;
+ ;; .----.
+ ;; / \
+ ;; / \
+ ;; \ /
+ ;; ---fragment-----new---
+ ;;
+ ((and (eq (car token) 1) (null (cdr token)))
+ (tNFA--NFA-state-patch
+ attach (tNFA--fragment-initial fragment))
+ (tNFA--NFA-state-make-branch
+ (tNFA--fragment-final fragment) (list attach new))
+ (setf (tNFA--fragment-final (car fragment-stack)) new)
+ (setq fragment nil))
+
+ ;; \{0,n\} (note: ? postfix = \{0,1\}):
+ ;;
+ ;; .--fragment--.
+ ;; / \
+ ;; ---attach new---
+ ;; \______________/
+ ;;
+ ((eq (car token) 0)
+ ;; ? postfix = \{0,1\}: after this we're done
+ (if (eq (cdr token) 1)
+ (setq copy nil)
+ (setq copy (tNFA--fragment-copy fragment)))
+ ;; attach fragment
+ (tNFA--NFA-state-make-branch
+ attach (list (tNFA--fragment-initial fragment) new))
+ (tNFA--NFA-state-make-epsilon
+ (tNFA--fragment-final fragment) new)
+ (setf (tNFA--fragment-final (car fragment-stack)) new)
+ ;; prepare for next iteration
+ (decf (cdr token))
+ (setq fragment copy))
+
+ ;; \{n,\} or \{n,m\}:
+ ;;
+ ;; ---attach----fragment----new---
+ ;;
+ (t
+ (setq copy (tNFA--fragment-copy fragment))
+ (tNFA--fragment-patch (car fragment-stack) fragment)
+ ;; prepare for next iteration
+ (decf (car token))
+ (when (cdr token) (decf (cdr token)))
+ (setq fragment copy))
+ )))
;; if ending a group, add a maximize tag to end
@@ -626,9 +712,9 @@ individual elements of STRING are identical. The default is
`eq'."
(error "Syntax error in regexp: missing \"[\""))
;; . * + ?: set appropriate type
- ((eq token ?*) (setq type 'postfix*))
- ((eq token ?+) (setq type 'postfix+))
- ((eq token ??) (setq type 'postfix?))
+ ((eq token ?*) (setq type 'postfix token (cons 0 nil)))
+ ((eq token ?+) (setq type 'postfix token (cons 1 nil)))
+ ((eq token ??) (setq type 'postfix token (cons 0 1)))
((eq token ?.) (setq type 'wildcard))
;; \: look at next character
@@ -636,15 +722,61 @@ individual elements of STRING are identical. The default
is `eq'."
(unless (setq token (pop regexp))
(error "Syntax error in regexp: missing character after \"\\\""))
(cond
+ ;; |: alternation
((eq token ?|) (setq type 'alternation))
+ ;; \(?: shy group start
((and (eq token ?\() (eq (car regexp) ??))
(setq type 'shy-group-start)
(pop regexp))
+ ;; \)?: shy group end
((and (eq token ?\)) (eq (car regexp) ??))
(setq type 'shy-group-end)
(pop regexp))
+ ;; \(: group start
((eq token ?\() (setq type 'group-start))
- ((eq token ?\)) (setq type 'group-end))))
+ ;; \): group end
+ ((eq token ?\)) (setq type 'group-end))
+
+ ;; \{: postfix repetition operator
+ ((eq token ?{)
+ (setq type 'postfix token (cons nil nil))
+ ;; extract first number from repetition operator
+ (while (if (null regexp)
+ (error "Syntax error in regexp: malformed \\{...\\}")
+ (not (or (eq (car regexp) ?,) (eq (car regexp) ?\\))))
+ (setcar token (concat (car token) (char-to-string (pop regexp)))))
+ (if (null (car token))
+ (setcar token 0)
+ (unless (string-match "[0-9]+" (car token))
+ (error "Syntax error in regexp: malformed \\{...\\}"))
+ (setcar token (string-to-number (car token))))
+ (cond
+ ;; if next character is "\", we expect "}" to follow
+ ((eq (car regexp) ?\\)
+ (pop regexp)
+ (unless (eq (car regexp) ?})
+ (error "Syntax error in regexp: expected \"}\""))
+ (pop regexp)
+ (unless (car token)
+ (error "Syntax error in regexp: malformed \\{...\\}"))
+ (setcdr token (car token)))
+ ;; if next character is ",", we expect a second number to follow
+ ((eq (car regexp) ?,)
+ (pop regexp)
+ (while (if (null regexp)
+ (error "Syntax error in regexp: malformed \\{...\\}")
+ (not (eq (car regexp) ?\\)))
+ (setcdr token
+ (concat (cdr token) (char-to-string (pop regexp)))))
+ (unless (null (cdr token))
+ (unless (string-match "[0-9]+" (cdr token))
+ (error "Syntax error in regexp: malformed \\{...\\}"))
+ (setcdr token (string-to-number (cdr token))))
+ (pop regexp)
+ (unless (eq (car regexp) ?})
+ (error "Syntax error in regexp: expected \"}\""))
+ (pop regexp))))
+ ))
)
;; return first token type, token, and remaining regexp
- [elpa] branch externals/tNFA created (now 892122c), Stefan Monnier, 2020/12/14
- [elpa] externals/tNFA 014847d 05/23: Bumped copyright year, Stefan Monnier, 2020/12/14
- [elpa] externals/tNFA f150b88 06/23: Added support for \{...\} postfix repetition operator,
Stefan Monnier <=
- [elpa] externals/tNFA 3835750 17/23: Trivial whitespace tidying., Stefan Monnier, 2020/12/14
- [elpa] externals/tNFA 664c98e 20/23: Remove ChangeLogs from library headers., Stefan Monnier, 2020/12/14
- [elpa] externals/tNFA 892122c 23/23: Tidy up unnecessary macros by making them into defsubst or defun., Stefan Monnier, 2020/12/14
- [elpa] externals/tNFA c9f0989 04/23: Converted transition hash tables to alists, Stefan Monnier, 2020/12/14
- [elpa] externals/tNFA 74b68dd 16/23: Updated copyright attribution and license (GPL2 -> GPL3)., Stefan Monnier, 2020/12/14
- [elpa] externals/tNFA 241dd74 03/23: Bug-fix in tNFA--from-regexp; added public tNFA-group-data function., Stefan Monnier, 2020/12/14
- [elpa] externals/tNFA 9e1ca74 13/23: Added changelog entries, and bumped tNFA version number., Stefan Monnier, 2020/12/14
- [elpa] externals/tNFA 83ab8b3 10/23: Re-filled to 72 chars/line, for mailing to gnu-emacs-sources list, Stefan Monnier, 2020/12/14
- [elpa] externals/tNFA b457403 14/23: Trivial docstring and comment fixes., Stefan Monnier, 2020/12/14
- [elpa] externals/tNFA 1af1e58 22/23: Implement trie-fuzzy-match and trie-fuzzy-complete functions., Stefan Monnier, 2020/12/14