(tRER (fix-re--do-R*ER*-transform (ast-a "R*E*R+") 'car 0 0 1) (ast-a "R*\\(?:E+R+\\|R\\)")) (tRER (fix-re--do-R*ER*-transform (ast-a "R*E*R*") 'car 0 0 1) (ast-a "R*\\(?:E+R*\\)?")) (tRER (fix-re--do-R*ER*-transform (ast-a "R*E*F*R+") 'car 0 1 2) (ast-a "R*\\(?:\\(?:E*F+\\|E+\\)R+\\|R\\)")) (tRER (fix-re--do-R*ER*-transform (ast-a "R*E*F*R*") 'car 0 1 2) (ast-a "R*\\(?:\\(?:E*F+\\|E+\\)R*\\)?")) (tRER (fix-re--do-R*ER*-transform (ast-a "R*E\\{0\\}R+") 'car 0 0 1) (ast-a "R+")) (tRER (fix-re--do-R*ER*-transform (ast-a "R*E\\{0\\}F\\{0\\}R*") 'car 0 1 2) (ast-a "R*")) (tRER (fix-re--do-R*ER*-transform (ast-a "AR*E*R+") 'cadr 1 1 2) (ast-a "AR*\\(?:E+R+\\|R\\)")) (tRER (fix-re--do-R*ER*-transform (ast-a "AR*E*R*") 'cadr 1 1 2) (ast-a "AR*\\(?:E+R*\\)?")) (tRER (fix-re--do-R*ER*-transform (ast-a "AR*E*F*R+") 'cadr 1 2 3) (ast-a "AR*\\(?:\\(?:E*F+\\|E+\\)R+\\|R\\)")) (tRER (fix-re--do-R*ER*-transform (ast-a "AR*E*F*R*") 'cadr 1 2 3) (ast-a "AR*\\(?:\\(?:E*F+\\|E+\\)R*\\)?")) (tRER (fix-re--do-R*ER*-transform (ast-a "AR*E\\{0\\}R+") 'cadr 1 1 2) (ast-a "AR+")) (tRER (fix-re--do-R*ER*-transform (ast-a "AR*E\\{0\\}F\\{0\\}R*") 'cadr 1 2 3) (ast-a "AR*")) (tRER (fix-re--do-R*ER*-transform (ast-a "R*E*R+?") 'car 0 0 1) (ast-a "R*\\(?:E+R+?\\|R\\)")) (tRER (fix-re--do-R*ER*-transform (ast-a "R*E*R*?") 'car 0 0 1) (ast-a "R*\\(?:E+R*?\\)??")) (tRER (fix-re--do-R*ER*-transform (ast-a "R*E*F*R+?") 'car 0 1 2) (ast-a "R*\\(?:\\(?:E*F+\\|E+\\)R+?\\|R\\)")) (tRER (fix-re--do-R*ER*-transform (ast-a "R*E*F*R*?") 'car 0 1 2) (ast-a "R*\\(?:\\(?:E*F+\\|E+\\)R*?\\)??")) (tRER (fix-re--do-R*ER*-transform (ast-a "R*E\\{0\\}R+?") 'car 0 0 1) (ast-a "R+")) (tRER (fix-re--do-R*ER*-transform (ast-a "R*E\\{0\\}F\\{0\\}R*?") 'car 0 1 2) (ast-a "R*")) (tRER (fix-re--do-R*ER*-transform (ast-a "R+?E*R+") 'car 0 0 1) (ast-a "\\(?:R+?E+\\|R\\)R+")) (tRER (fix-re--do-R*ER*-transform (ast-a "R*?E*R*") 'car 0 0 1) (ast-a "\\(?:R*?E+\\)??R*")) (tRER (fix-re--do-R*ER*-transform (ast-a "R+?E*F*R+") 'car 0 1 2) (ast-a "\\(?:R+?\\(?:E*F+\\|E+\\)\\|R\\)R+\\)")) (tRER (fix-re--do-R*ER*-transform (ast-a "R*?E*F*R*") 'car 0 1 2) (ast-a "\\(?:R*?\\(?:E*F+\\|E+\\)\\)??R*")) (tRER (fix-re--do-R*ER*-transform (ast-a "R+?E\\{0\\}R+") 'car 0 0 1) (ast-a "RR+")) (tRER (fix-re--do-R*ER*-transform (ast-a "R*?E\\{0\\}F\\{0\\}R*") 'car 0 1 2) (ast-a "R*")) (tRER (fix-re--do-R*ER*-transform (ast-a "R*?E*R+?") 'car 0 0 1) (ast-a "R*?\\(?:E+R+?\\|R\\)")) (tRER (fix-re--do-R*ER*-transform (ast-a "R*?E*R*?") 'car 0 0 1) (ast-a "R*?\\(?:E+R*?\\)??")) (tRER (fix-re--do-R*ER*-transform (ast-a "R*?E*F*R+?") 'car 0 1 2) (ast-a "R*?\\(?:\\(?:E*F+\\|E+\\)R+?\\|R\\)")) (tRER (fix-re--do-R*ER*-transform (ast-a "R*?E*F*R*?") 'car 0 1 2) (ast-a "R*?\\(?:\\(?:E*F+\\|E+\\)R*?\\)??")) (tRER (fix-re--do-R*ER*-transform (ast-a "R*?E\\{0\\}R+?") 'car 0 0 1) (ast-a "R+?")) (tRER (fix-re--do-R*ER*-transform (ast-a "R*?E\\{0\\}F\\{0\\}R*?") 'car 0 1 2) (ast-a "R*?")) (defun fix-re--R+ER*->R+\(address@hidden)\? (ptr ad) "Do R+ER* -> R+(address@hidden)? or R+ER+ -> R+(address@hidden|R) on the whole list. PTR/AD point at the first element of the sequential list. Here, E is a non-empty sequence of elements which are matched by the empty string, E@ is the \"de-emptified\" version of E." ;; We must perform the loop rightmost transformations first. To see this, ;; consider R*ER*FR* done leftmost first. The first transformation takes us ;; to R*(address@hidden)?FR*. We're now stuck, as the middle R* is no longer ;; "exposed" to the last R*, and the end expression is still ill-formed. ;; Done rightmost first, R*ER*FR* -> R*ER*(address@hidden)? -> R*(address@hidden)?(address@hidden)?, which ;; is well-formed. (let (res) (let ((ptr ptr) (ad ad)) (when (fix-re--ptr-next ptr ad) (setq res (fix-re--R+ER*->R+\(address@hidden)\? ptr ad)))) (let* ((elt-ptr ptr) (elt-ad ad) (elt (fix-re--ptr-get elt-ptr elt-ad)) R0-R empty0-ptr empty1-ptr) ; No need for ..-ad's, since ; these will always be 'cadr. (or ;; Is `elt' R+ or R*? (when (and (consp elt) (memq (car elt) '(+ *))) (setq R0-R (cdr elt)) ;; Is the next element one matching the empty string, and which ;; isn't R+ or R*? (setq elt (fix-re--ptr-next elt-ptr elt-ad)) (when (and elt (fix-re--matches-empty-p elt) (not (and (consp elt) (memq (car elt) '(+ *)) (equal (cdr elt) R0-R)))) (setq empty0-ptr elt-ptr ; Remember first empty. -ad is implicitly 'cadr empty1-ptr elt-ptr ) ; Remember last empty. ;; Read the elements which match empty, but aren't R+ or R*. (while (and (setq elt (fix-re--ptr-next elt-ptr elt-ad)) (fix-re--matches-empty-p elt) (not (and (consp elt) (memq (car elt) '(+ *)) (equal (cdr elt) R0-R)))) (setq empty1-ptr elt-ptr)) ;; Have we found the matching R+ or R*? (when (and elt (consp elt) (memq (car elt) '(+ *)) (equal (cdr elt) R0-R)) ;; Yes. We're in business. (fix-re--do-R*ER*-transform ptr ad empty0-ptr empty1-ptr elt-ptr) t))) res)))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; R*(R*A|B) -> R*(A|B) (defun fix-re--\[R\]+\(\[S\]*\)-transform (ptr ad R*) "Attempt to transform an alternative which begins with [..]+. The entire construct we have looks like [R]*([S]*A|...), where CA1 is the \"outside\" char-alt, and PTR/AD points at the entire alternative. The *s on the char-alts may alternatively be +s. In the following \"[R-S]\" denotes the difference of the two character alteratives R and S. [R-S] is a new character alternative which matches any character which R does, but S doesn't. The transformation looks like: \(i) (with ^ operators) [^R]+([^S]*A|...) -> [^R]+(([R-S][^S]*)?A|...) [^R]+([^S]+A|...) -> [^R]+(([R-S][^S]*)?[^S]A|...) \(ii) (without ^ operators) [R]+([S]*A|...) -> [R]+(([S-R][S]*)?A|...) or [R]+([S]+A|...) -> [R]+(([S-R][S]*)?[S]A|...) or, if S-R is empty: [R]+([S]*A|...) -> [R]+(A|...) [R]+([S]+A|...) -> [R]+([S]A|...) FIXME!!! " (let* ((elt (fix-re--ptr-get ptr ad)) ; [S]*A,,, (S* (car elt)) ; [S]* (S (cdr S*)) ; [S] (R (cdr R*)) ; [R] S-R R-S new-S new-\( res subres ) (when (eq (cadr S) (cadr R)) ; Either both have or neither has ^ operator. (when ; Have R and S got any overlap? (if (cadr S) ; With ^ operator. (progn (setq R-S (copy-tree R)) (setcar (cdr R-S) nil) ; Remove the ^. (setq subres (fix-re--chalt-minus (cdr R-S) (caddr S) t))) ;; Transform [abc]+([cde]*R|...) to [abc]+(([de][cde]*)?R|...) (setq S-R (copy-tree S)) (setq subres (fix-re--chalt-minus (cdr S-R) (caddr R) nil))) (if (null (caddr (or S-R R-S))) (progn (if (eq (car S*) '+) (fix-re--chop-+* elt 'car) ; [abc]+([ab]+A|..) -> [abc]+([ab]A|..) (fix-re--ind-chop ptr ad 'car t)) ; [abc]+([ab]*A|..) -> [abc]+(A|..) (setq res t)) (fix-re--wrap-in-\( '\\\(\?: elt 'car) (setq new-\( (fix-re--ptr-get elt 'car)) (fix-re--insert (if (numberp subres) subres (or S-R R-S)) (cadr new-\() 'car) (fix-re--+*ify '\? elt 'car) (when (eq (car S*) '+) (setcar S* '*) (setq new-S (copy-tree S)) (fix-re--insert-after new-S elt 'car)) (setq res t)))) res)) (tp (fix-re--\[R\]+\(\[S\]*\)-transform (ast-aa "\\([cde]*G\\)") 'cadr (ast-aa "[abc]+")) 1 (ast-aa "\\(\\(?:[de][cde]*\\)?G\\)") t t) (tp (fix-re--\[R\]+\(\[S\]*\)-transform (ast-aa "\\([cde]+G\\)") 'cadr (ast-aa "[abc]+")) 1 (ast-aa "\\(\\(?:[de][cde]*\\)?[cde]G\\)") t t) (tp (fix-re--\[R\]+\(\[S\]*\)-transform (ast-aa "\\([ab]*G\\)") 'cadr (ast-aa "[abc]+")) 1 (ast-aa "\\(G\\)") t t) (tp (fix-re--\[R\]+\(\[S\]*\)-transform (ast-aa "\\([ab]+G\\)") 'cadr (ast-aa "[abc]+")) 1 (ast-aa "\\([ab]G\\)") t t) (tp (fix-re--\[R\]+\(\[S\]*\)-transform (ast-aa "\\([^cde]*G\\)") 'cadr (ast-aa "[^abc]+")) 1 (ast-aa "\\(\\(?:[ab][^cde]*\\)?G\\)") t t) (tp (fix-re--\[R\]+\(\[S\]*\)-transform (ast-aa "\\([^cde]+G\\)") 'cadr (ast-aa "[^abc]+")) 1 (ast-aa "\\(\\(?:[ab][^cde]*\\)?[^cde]G\\)") t t) (tp (fix-re--\[R\]+\(\[S\]*\)-transform (ast-aa "\\([^abc]*G\\)") 'cadr (ast-aa "[^ab]+")) 1 (ast-aa "\\(G\\)") t t) (tp (fix-re--\[R\]+\(\[S\]*\)-transform (ast-aa "\\([^abc]+G\\)") 'cadr (ast-aa "[^ab]+")) 1 (ast-aa "\\([^abc]G\\)") t t) (tp (fix-re--\[R\]+\(\[S\]*\)-transform (ast-aa "\\([cde]*G\\)") 'cadr (ast-aa "[^abc]")) 1 (ast-aa "\\([cde]*G\\)") 'nil t) (tp (fix-re--\[R\]+\(\[S\]*\)-transform (ast-aa "\\([^cde]*G\\)") 'cadr (ast-aa "[abc]")) 1 (ast-aa "\\([^cde]*G\\)") 'nil t) (tp (fix-re--\[R\]+\(\[S\]*\)-transform (ast-aa "\\([def]*G\\)") 'cadr (ast-aa "[abc]")) 1 (ast-aa "\\([def]*G\\)") 'nil t) (defun fix-re--do-R+\(R*A|B\)-transform (R-rep alt) "Attempt a R+(R*A|B) -> R+(A|B) transformation. R-REP is a cons representing either R+ or R*. ALT represents a form of the form \(..\|..\|...\). " (let* ((R-R (cdr R-rep)) (ptr alt) (ad 'cadr) ; Point to the second elt. of the list, the first being '\\\( (elt (fix-re--ptr-get ptr ad)) res car-elt elt-+*) (while elt ; (R*A) (when (and (consp elt) ; This should always be true (setq car-elt (car elt)) ; This is now R*A (consp car-elt) (memq (setq elt-+* (car car-elt)) '(+ *))) (cond ((equal (cdr car-elt) R-R) (if (eq elt-+* '+) (fix-re--chop-+* elt 'car) (fix-re--ind-chop ptr ad 'car t)) ; i.e. ~ (fix-re--chop elt 'car) (setq res t)) ((and (consp R-R) (eq (car R-R) '\[) (consp (cdr car-elt)) (eq (cadr car-elt) '\[)) (if (fix-re--\[R\]+\(\[S\]*\)-transform ptr ad R-rep) (setq res t))))) (setq elt (fix-re--ptr-next ptr ad))) res)) (defun fix-re--R+\(R*A|B\)->R*\(A|B\) (ptr ad) "Do the transition on every pertinent element pairs in the sequence. PTR/AD point to the first element in the sequential list." (let ((elt (fix-re--ptr-get ptr ad)) R-rep res) (while elt (if (and (consp elt) (memq (car elt) '(+ *))) (progn (setq R-rep elt elt (fix-re--ptr-next ptr ad)) (when (fix-re--is-\( elt) (if (fix-re--do-R+\(R*A|B\)-transform R-rep elt) (setq res t)) (setq elt (fix-re--ptr-next ptr ad)))) (setq elt (fix-re--ptr-next ptr ad)))) res)) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; (defun fix-re--\(R|R\)->\(R\) (ptr ad) "Remove duplicate elements from the alternatives list. PTR/AD point to the first element of the list, which will be a symbol like '\\\(." (let* ((elt0-ptr ptr) (elt0-ad ad) (elt0 (fix-re--ptr-next elt0-ptr elt0-ad)) elt1-ptr elt1-ad elt1 res) (while elt0 (setq elt1-ptr elt0-ptr elt1-ad elt0-ad elt1 (fix-re--ptr-next elt1-ptr elt1-ad)) (while elt1 (while (equal elt1 elt0) (fix-re--chop elt1-ptr elt1-ad) (setq elt1 (fix-re--ptr-get elt1-ptr elt1-ad)) (setq res t)) (setq elt1 (fix-re--ptr-next elt1-ptr elt1-ad))) (setq elt0 (fix-re--ptr-next elt0-ptr elt0-ad))) res)) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;