--- regexp-opt.el	2019-02-16 23:59:22.000000000 -0200
+++ regexp-opt3.el	2019-04-12 10:14:25.871968393 -0300
@@ -1,4 +1,4 @@
-;;; regexp-opt.el --- generate efficient regexps to match strings -*- lexical-binding: t -*-
+;;; regexp-opt2.el --- generate efficient regexps to match strings -*- lexical-binding: t -*-
 
 ;; Copyright (C) 1994-2019 Free Software Foundation, Inc.
 
@@ -37,10 +37,10 @@
 ;; For example:
 ;;
 ;; (let ((strings '("cond" "if" "when" "unless" "while"
-;; 		    "let" "let*" "progn" "prog1" "prog2"
-;; 		    "save-restriction" "save-excursion" "save-window-excursion"
-;; 		    "save-current-buffer" "save-match-data"
-;; 		    "catch" "throw" "unwind-protect" "condition-case")))
+;;                  "let" "let*" "progn" "prog1" "prog2"
+;;                  "save-restriction" "save-excursion" "save-window-excursion"
+;;                  "save-current-buffer" "save-match-data"
+;;                  "catch" "throw" "unwind-protect" "condition-case")))
 ;;   (concat "(" (regexp-opt strings t) "\\>"))
 ;;  => "(\\(c\\(atch\\|ond\\(ition-case\\)?\\)\\|if\\|let\\*?\\|prog[12n]\\|save-\\(current-buffer\\|excursion\\|match-data\\|restriction\\|window-excursion\\)\\|throw\\|un\\(less\\|wind-protect\\)\\|wh\\(en\\|ile\\)\\)\\>"
 ;;
@@ -71,18 +71,15 @@
 ;; your code for such changes to have effect in your code.
 
 ;; Originally written for font-lock.el, from an idea from Stig's hl319.el, with
-;; thanks for ideas also to Michael Ernst, Bob Glickstein, Dan Nicolaescu and
-;; Stefan Monnier.
+;; thanks for ideas also to Michael Ernst, Bob Glickstein, Dan Nicolaescu,
+;; Stefan Monnier and Miguel Frasson.
+
 ;; No doubt `regexp-opt' doesn't always produce optimal regexps, so code, ideas
 ;; or any other information to improve things are welcome.
-;;
-;; One possible improvement would be to compile '("aa" "ab" "ba" "bb")
-;; into "[ab][ab]" rather than "a[ab]\\|b[ab]".  I'm not sure it's worth
-;; it but if someone knows how to do it without going through too many
-;; contortions, I'm all ears.
 
 ;;; Code:
 
+;; original
 ;;;###autoload
 (defun regexp-opt (strings &optional paren)
   "Return a regexp to match a string in the list STRINGS.
@@ -117,12 +114,12 @@
  (defun simplified-regexp-opt (strings &optional paren)
    (let ((parens
           (cond ((stringp paren)       (cons paren \"\\\\)\"))
-                ((eq paren \\='words)    \\='(\"\\\\\\=<\\\\(\" . \"\\\\)\\\\>\"))
-                ((eq paren \\='symbols) \\='(\"\\\\_<\\\\(\" . \"\\\\)\\\\_>\"))
-                ((null paren)          \\='(\"\\\\(?:\" . \"\\\\)\"))
-                (t                       \\='(\"\\\\(\" . \"\\\\)\")))))
+                ((eq paren 'words)    '(\"\\\\\\=<\\\\(\" . \"\\\\)\\\\>\"))
+                ((eq paren 'symbols) '(\"\\\\_<\\\\(\" . \"\\\\)\\\\_>\"))
+                ((null paren)          '(\"\\\\(?:\" . \"\\\\)\"))
+                (t                       '(\"\\\\(\" . \"\\\\)\")))))
      (concat (car paren)
-             (mapconcat \\='regexp-quote strings \"\\\\|\")
+             (mapconcat 'regexp-quote strings \"\\\\|\")
              (cdr paren))))"
   (save-match-data
     ;; Recurse on the sorted list.
@@ -141,6 +138,80 @@
 	    (t re)))))
 
 ;;;###autoload
+(defun regexp-opt2 (strings &optional paren)
+  "Return a regexp to match a string in the list STRINGS.
+Each string should be unique in STRINGS and should not contain
+any regexps, quoted or not.  Optional PAREN specifies how the
+returned regexp is surrounded by grouping constructs.
+
+The optional argument PAREN can be any of the following:
+
+a string
+    the resulting regexp is preceded by PAREN and followed by
+    \\), e.g.  use \"\\\\(?1:\" to produce an explicitly numbered
+    group.
+
+`words'
+    the resulting regexp is surrounded by \\=\\<\\( and \\)\\>.
+
+`symbols'
+    the resulting regexp is surrounded by \\_<\\( and \\)\\_>.
+
+non-nil
+    the resulting regexp is surrounded by \\( and \\).
+
+nil
+    the resulting regexp is surrounded by \\(?: and \\), if it is
+    necessary to ensure that a postfix operator appended to it will
+    apply to the whole expression.
+
+The resulting regexp is equivalent to but usually more efficient
+than that of a simplified version:
+
+ (defun simplified-regexp-opt (strings &optional paren)
+   (let ((parens
+          (cond ((stringp paren)       (cons paren \"\\\\)\"))
+                ((eq paren \\='words)    \\='(\"\\\\\\=<\\\\(\" . \"\\\\)\\\\>\"))
+                ((eq paren \\='symbols) \\='(\"\\\\_<\\\\(\" . \"\\\\)\\\\_>\"))
+                ((null paren)          \\='(\"\\\\(?:\" . \"\\\\)\"))
+                (t                       \\='(\"\\\\(\" . \"\\\\)\")))))
+     (concat (car paren)
+             (mapconcat \\='regexp-quote strings \"\\\\|\")
+             (cdr paren))))"
+  (let* ((regexp-opt-branches-alist nil); storage of `regexp-opt-branch'
+					; made local
+	 (root-fa nil))
+    (dolist (s strings)
+      (setq root-fa (regexp-opt-add-string-to-node s root-fa)))
+    ;;
+    ;; Simplification
+    ;;
+    (regexp-opt-perform-simplification root-fa)
+    ;; end of simplification
+    ;;
+    ;; output regexp stored in root-fa
+    ;; return according to PAREN and type
+    ;;
+    ;;(regexp-opt-print-node root-fa 'graphviz)
+    (let* ((re-type (regexp-opt-typed-regexp root-fa))
+	   (type (cdr re-type))
+	   (re (if (or (eq type 'sequence) (eq type 'empty))
+		   (concat "\\(?:" (car re-type) "\\)")
+		 (car re-type)))
+	   ;; re-group is the re beginning with "\\(?:"
+	   (re-group (if (eq type 'single) (concat "\\(?:" re "\\)") re))
+	   ;; open is the group beginning string if necessary
+	   (open (cond ((stringp paren) paren)
+		       ((eq paren 'words) "\\<\\(")
+		       ((eq paren 'symbols) "\\_<\\(")
+		       (paren "\\(")))
+	   (close (cond ((eq paren 'words) "\\>")
+			((eq paren 'symbols) "\\_>")
+			(t ""))))
+      ; if OPEN, replace "\\(?:" by OPEN
+      (if open (concat open (substring re-group 4) close) re))))
+
+;;;###autoload
 (defun regexp-opt-depth (regexp)
   "Return the depth of REGEXP.
 This means the number of non-shy regexp grouping constructs
@@ -151,17 +222,18 @@
     ;; Count the number of open parentheses in REGEXP.
     (let ((count 0) start last)
       (while (string-match "\\\\(\\(\\?[0-9]*:\\)?" regexp start)
-	(setq start (match-end 0))	      ; Start of next search.
-	(when (and (not (match-beginning 1))
-		   (subregexp-context-p regexp (match-beginning 0) last))
-	  ;; It's not a shy group and it's not inside brackets or after
-	  ;; a backslash: it's really a group-open marker.
-	  (setq last start)	    ; Speed up next regexp-opt-re-context-p.
-	  (setq count (1+ count))))
+        (setq start (match-end 0))            ; Start of next search.
+        (when (and (not (match-beginning 1))
+                   (subregexp-context-p regexp (match-beginning 0) last))
+          ;; It's not a shy group and it's not inside brackets or after
+          ;; a backslash: it's really a group-open marker.
+          (setq last start)         ; Speed up next regexp-opt-re-context-p.
+          (setq count (1+ count))))
       count)))
 
 ;;; Workhorse functions.
 
+;; kept for the original `regexp-opt'
 (defun regexp-opt-group (strings &optional paren lax)
   "Return a regexp to match a string in the sorted list STRINGS.
 If PAREN non-nil, output regexp parentheses around returned regexp.
@@ -256,63 +328,893 @@
 			close-group))))))))))
 
 
+;; Rationale for simplification
+;; ============================
+;;
+;; If a list of strings could be arranged in a regexp splitable in
+;; units, and we arrange these strings in finite automata (oriented
+;; graph with labeled arrows and nameless nodes), from a point a set
+;; of subgraphs will reproduce same pattern (are `equal'), so we
+;; simplify by making the `equal's become `eq's, merginging nodes,
+;; what closes a biurcation, meaning a smaller alternative group in
+;; regexp.
+;;
+;; Implementation of optimization by finite automata FA
+;; ====================================================
+;;
+;; Regexp automata are a type of directed graph with labeled arrows.
+;;
+;; NODE = (ARROW1 ARROW2 ...)
+;; A node is a list of arrows pointing out from it.
+;;
+;; ARROW = (CHAR . NODE)
+;; An arrow is a cons of its label and a node pointed by it.
+;;
+;; Each node must have a unique arrow with CHAR, must be sorted
+;; because we will compare nodes with `equal'.
+;;
+;; Since the empty string "" must be included in a natural way, all
+;; strings are included in FAs like a null-terminated strings, what
+;; means "ending with epsilon arrow" in FA jargon.  The string "" is
+;; just an epsilon arrow (arrow with empty label).  The char 0
+;; represents the `epsilon' arrows of FA.
+;;
+;; A path in the graph from start node to end node (nil) represents a
+;; string with that characteres, epsilon arrows (char=0) adding no
+;; chars to string.
+;;
+;; Charsets, alternative groups or `?' constructions make a
+;; bifurcation in the path.  Example: regexp "a?\\(b\\|cd\\)e" is
+;; implemented by the FA
+;;
+;;   epsilon-|
+;;  /        v
+;; >1 --a--> 2 ---b---> 3 --e--> 5 --epsilon--> nil
+;;           |          ^
+;;           \-c-> 4 -d-/
+;;
+;; When translating strings to FA, the "unique arrow with CHAR" rule
+;; already groups similarities at the beginning of regexps.  For
+;; example, from strings ("abd" "acd"), we get the FA:
+;;
+;;  >1 --a--> 2 --b--> 3 --d--> 4 --epsilon--> nil
+;;            |                                 ^
+;;            +---c--> 5 --d--> 6 --epsilon----/
+;;
+;; This FA is internally stored as
+;;    ((?a (?b (?d (0)))
+;;         (?c (?d (0)))))
+;;
+;; Note that not all such graphs come from a regexp automaton. We will
+;; not allow a graph with arrows connecting branches of an
+;; alternative.  For example, it is not allowed an arrow from 3
+;; pointing to 5, getting the following graph:
+;;
+;;  >1 --a--> 2 --b--> 3 --c--> 4 --f--> F
+;;            |        g        ^           (! not allowed graph)
+;;            |        V        |           ( sibling branches connected)
+;;            +---d--> 5 --e--> 6
+;;
+;; Althought we could build a (complicated) regexp from it (namely
+;; "a\\(b\\(c\\|ge\\)\\|de\\)f" we could accomplish it with a graph
+;; without connections between sibling branches (detaching 6 from 5)
+;;
+;;                     +--------c--------+
+;;                     |                 v
+;;  >1 --a--> 2 --b--> 3 --g--> 5 --e--> 4 --f--> F
+;;            |                          ^
+;;            |                          |
+;;            +-----d----> 6 ------e-----+
+;;
+;; Keeping this in mind, it is only allowed merge nodes that have the
+;; same `source' and `sink' nodes, that is, are in sibling branches.
+;; In the example above, nodes 6, 3 and 4 are in sibling branches
+;; (source=2, sink=4), but not 5 (its source is 3).  If we merge 5 and
+;; 6 we return to the previous not allowed graph.
+;;
+;; *** Simplification 1: merge `equal' nodes ***
+;;
+;; Start from strings ("abd" "acd"), we get the FA:
+;;
+;;  >1 --a--> 2 --b--> 3 --d--> 4 --epsilon--> nil
+;;            |                                 ^
+;;            +---c--> 5 --d--> 6 --epsilon----/
+;;
+;; Look at nodes 3 and 5 in sibling branches: both trees starting from
+;; 3 and 5 (d -> epsilon ->), which are internally stored as (?d (0)),
+;; are `equal' but not `eq'; so we "merge" nodes 3 and 5, making arrow
+;; "c" from 2 point node 3 instead of 5, so these trees are `eq' now:
+;;
+;;  >1 --a--> 2 --b--> 3 --d--> 4 --epsilon--> nil
+;;            |        ^
+;;            +---c---/
+;;
+;; Internally: ((?a (?b #1)
+;;                  (?c #1)))    where   #1=(?d (0))
+;;
+;; No other nodes are `equal'. We finished.
+;; This FA translates to regexp "a[bc]d".
+;;
+;; *** Simplification 2: node equal to subset of its source
+;;
+;; This simplification produces `?' constructions.  Start from strings
+;; ("abcf" "adef" "af")
+;;
+;;            +---f--> 9 ----------epsilon -------------\
+;;            |                                          v
+;;  >1 --a--> 2 --b--> 3 --c--> 4 --f--> 5 --epsilon--> nil
+;;            |                                          ^
+;;            +---d--> 6 --e--> 7 --f--> 8 --epsilon----/
+;;
+;; After simplification 1 (merge 4 and 7):
+;;
+;;            +---f--> 9 ----------epsilon -------------\
+;;            |                                          v
+;;  >1 --a--> 2 --b--> 3 --c--> 4 --f--> 5 --epsilon--> nil
+;;            |                 ^
+;;            +---d--> 6 --e---/
+;;
+;; (This is the regexp "a\\(\\(bc\\|de\\)f\\|f\\)" )
+;;
+;; Now, notice that node 4 = {f -> epsilon ->}, internally ((?f (0))),
+;; is `equal' to a subset of arrows of 2, its source.  We could split
+;; node 2 with an epsilon arrow in two nodes
+;;
+;;  +--f--> 9 --epsilon--> nil     epsilon--> 10 --f--> 9 --epsilon--> nil
+;;  |                                 |
+;;  2 --b--> ...   is equivalent to   2 --b--> ...
+;;  |                                 |
+;;  +---d--> ...                      +---d--> ...
+;;
+;; This new node 10 also has source 2, like 4, and is `equal' to 4, so
+;; we merge 10 and 4 by simplification 1 (node 10 disappears again!):
+;;
+;;            +---epsilon-------+
+;;            |                 v
+;;  >1 --a--> 2 --b--> 3 --c--> 4 --f--> 5 --epsilon--> nil
+;;            |                 ^
+;;            +---d--> 6 --e--> 7
+;;
+;; This FA yields the simpler regexp "a\\(bc\\|de\\)?f"
+;;
+;; Summarizing, if a node N is similar to a subtree os its source S,
+;; we delete the corresponding arrows from S and add an epsilon arrow
+;; from S to N.
+;;
+;; Sibling branches
+;; ================
+;;
+;; Sibling branches are those that start in the same node (source
+;; node) and end in the same node (sink node).  This concept is
+;; related to build alternative groups.
+;;
+;; The source node of a node S is the one most nested where one but
+;; not all paths from it lead to N.  The sink is analogous with arrows
+;; in oposite directions.  Source and sink nodes mark beginning and
+;; end of an alternative group of the corresponding regexp.  For
+;; example, in the FA below
+;;
+;;                     +--------c--------+
+;;                     |                 v
+;;  >1 --a--> 2 --b--> 3 --g--> 5 --e--> 4 --f--> 7 --epsilon--> nil
+;;            |                          ^
+;;            |                          |
+;;            +-----d----> 6 ------e-----+
+;;
+;; for regexp "a\\(b\\(c\\|ge\\)\\|de\\)f", there are 2 sets of (non
+;; trivial) sibling branches: {2 -> 4} and {3 -> 4}, this last nested
+;; in the previous.  Here, nodes 3 and 6 are in sibling branches of
+;; {2 -> 4}.  5 does not belong to {2 -> 4} because its source is 3,
+;; not 2.  Nodes 2 and 4 belong to the trivial root branch {1 -> nil}.
+;;
+;; The initial conversion from string to nodes only creates sources
+;; (nil is the only sink).  The simplifications explained only create
+;; sinks, never sources.
+
+;; It may happen that a source has several sinks, so we need
+;; to compute all possible intersections to compute all sinks.
+
+;; Example of sink detection:
+;;       +---a---> 2 --x---\
+;;       |                  v
+;;       |  --b--> 3 --y--> 4 ---\
+;;       | /                      v
+;;      >1 ------c-----> 5 --e--> 7 --epsilon--> nil
+;;        \             ^
+;;         \-d--> 6 -f-/
+;; For source 1:
+;;   path for a = (2 4 7 nil)
+;;   path for b = (3 4 7 nil)
+;;   path for c = (5 7 nil)
+;;   path for d = (6 5 7 nil)
+;; Intersections
+;; ((7 nil) (7 nil) (7 nil) (4 7 nil) (7 nil) (7 nil) (7 nil)
+;;  (7 nil) (7 nil) (7 nil) (5 7 nil))
+;; So we detect the sinks: 4, 5, 7
+
+(defun regexp-opt-perform-simplification (fa)
+  "Perform simplifications to FA, by side effects."
+  ;; We need to substitute nodes that are `equal' to be `eq', so we
+  ;; need to know the "parents" of that node to set cdr's of arrows.
+  ;; Se we make a `parents-alist' that associates nodes with all
+  ;; nodes with an arrow to it.
+  ;;
+  ;; Simplification happens at source nodes (nodes with more than 1
+  ;; arrow from it), so we gather a list of sources too.  Sources
+  ;; will be sorted from most external to most nested.
+  (let ((sources)
+	(parents-alist)
+	;; nodes to be processed: list of (node parent depth)
+	(nodes (list (list fa t 0)))
+        processed-sources
+	node-parent-depth node depth parent
+	new-node source-without-arrows)
+    (while nodes
+      (setq node-parent-depth (pop nodes)
+	    node (car node-parent-depth)
+	    parent (cadr node-parent-depth)
+	    depth (caddr node-parent-depth))
+      (while (and node (= (length node) 1)) ; if 1 arrow, follow up
+					; to a source
+	(push parent (alist-get node parents-alist))
+	(setq parent node
+	      node (cdar node)))
+      (when node ; must be a source, push all subnodes to `nodes'
+	(push (cons node depth) sources)
+	(push parent (alist-get node parents-alist))
+	(dolist (arrow node)
+	  (push (list (cdr arrow) node (1+ depth)) nodes))))
+    ;; Sort sources by depth, from most nested to most external.
+    (setq sources (mapcar #'car (sort sources
+				      (lambda (x y) (> (cdr x) (cdr y))))))
+    ;; start of simplification
+    ;;
+    (dolist (source sources)
+      (setq simplified t)
+      ;; repeat simplification until source is fully simplified
+      (while simplified
+	;; debug
+	;;(regexp-opt-print-node fa 'graphviz "png" "/home/sme/re-temp")
+	;;(read-from-minibuffer "Press ENTER to continue.")
+	(setq simplified nil)
+	;; It's safe to process all branches completely
+	(dolist (branch (regexp-opt-branch source t))
+	  (setq nodes (cdr branch))
+	  (while nodes
+	    (setq to (pop nodes))
+	    ;; Simplification 1
+	    (dolist (from nodes)
+	      (when (equal from to)
+		(setq simplified t)
+		;; make all arrows that pointed to FROM now point
+		;; to TO and update parents-alist of TO
+		(dolist (parent (alist-get from parents-alist))
+		  (dolist (arrow parent)
+		    (when (eq (cdr arrow) from)
+		      (setcdr arrow to)))
+		  (or (memq parent (alist-get to parents-alist))
+		      (push parent (alist-get to parents-alist))))
+		;; verify if from is last sink of some previous source
+		;; if so, must recompute branch for that source
+		(catch 'from-last-sink
+		  (dolist (prev-source processed-sources)
+		    (when (eq from
+			      (caar (last (regexp-opt-branch prev-source))))
+		      (regexp-opt-branch prev-source t)
+		      (throw 'from-last-sink nil))))))
+	    ;; eliminate all simplified nodes from `nodes'
+	    (setq nodes (delete to nodes)))))
+      ;; Simplification 2
+      ;;
+      ;; need to create a epsilon arrow to subnode: if SOURCE
+      ;; has arrow 0, TO must have an arrow 0 so it will be
+      ;; removed from SOURCE in this simplification	
+      (catch 'simplification2
+	(dolist (branch (regexp-opt-branch source))
+	  (dolist (node (cdr branch))
+	    (unless (and (assq 0 source) (not (assq 0 node)))
+	      ;; sorted list of chars of NODE:
+     	      ;; CHARS = (mapcar 'car node)
+     	      ;; list of same arrows in SOURCE:
+     	      ;; NEW-NODE = (mapcar (lambda (c) (assq c source))
+	      ;;                    CHARS)
+	      (setq new-node (mapcar (lambda (char)
+				       (assq char source))
+    				     (mapcar 'car node)))
+	      (when (equal new-node node)
+		(setq simplified t)
+		;; remove arrows of source commom to node,
+		;; keeping cons of SOURCE, add arrow (0 . node)
+		(setf source-without-arrows (regexp-opt-difference
+					     source new-node #'eq)
+	      	      (car source) (cons 0 node)
+	      	      (cdr source) source-without-arrows)
+		;; add source as parrent of node
+		(push source (alist-get node parents-alist))
+		;; recompute branch because last-sink may have changed
+		(regexp-opt-branch source t)
+		;; when simplified, no need to look for another simplif.
+		(throw 'simplification2 nil))))))
+      (push source processed-sources))))
+
+;; (defun regexp-opt-perform-simplification (fa)
+;;   "Perform simplifications to FA, by side effects."
+;;   ;; We need to substitute nodes that are `equal' to be `eq', so we
+;;   ;; need to know the "parents" of that node to set cdr's of arrows.
+;;   ;; Se we make a `parents-alist' that associates nodes with all
+;;   ;; nodes with an arrow to it.
+;;   ;;
+;;   ;; Simplification happens at source nodes (nodes with more than 1
+;;   ;; arrow from it), so we gather a list of sources too.  Sources
+;;   ;; will be sorted from most external to most nested.
+;;   (let ((sources)
+;; 	(parents-alist)
+;; 	;; nodes to be processed: list of (node parent depth)
+;; 	(nodes (list (list fa t 0)))
+;; 	node-parent-depth node depth parent)
+;;     (while nodes
+;;       (setq node-parent-depth (pop nodes)
+;; 	    node (car node-parent-depth)
+;; 	    parent (cadr node-parent-depth)
+;; 	    depth (caddr node-parent-depth))
+;;       (while (and node (= (length node) 1)) ; if 1 arrow, follow up
+;; 					; to a source
+;; 	(push parent (alist-get node parents-alist))
+;; 	(setq parent node
+;; 	      node (cdar node)))
+;;       (when node ; must be a source, push all subnodes to `nodes'
+;; 	(push (cons node depth) sources)
+;; 	(push parent (alist-get node parents-alist))
+;; 	(dolist (arrow node)
+;; 	  (push (list (cdr arrow) node (1+ depth)) nodes))))
+;;     ;; Sort sources by depth, from most nested to most external.
+;;     (setq sources (mapcar #'car (sort sources
+;; 				      (lambda (x y) (> (cdr x) (cdr y))))))
+;;     ;; start of simplification
+;;     (let* ((simplify t)
+;; 	   new-node source-without-arrows)
+;;       ;; redo simplifications until no simplification is made
+;;       (while simplify
+;; 	(setq simplify nil
+;; 	      ;; force branch recomputation
+;; 	      regexp-opt-branches-alist nil)
+;; 	(dolist (source sources)
+;; 	  (setq source-simplified nil)
+;; 	  ;; It's safe to process all branches completely
+;; 	  (dolist (branch (regexp-opt-branch source))
+;; 	    (setq sink (car branch)
+;; 		  nodes (cdr branch))
+;; 	    (while nodes
+;; 	      (setq to (pop nodes))
+;; 	      ;; Simplification 1
+;; 	      (dolist (from nodes)
+;; 		(when (equal from to)
+;; 		  (setq simplify t)
+;; 		  ;; make all arrows that pointed to FROM now point
+;; 		  ;; to TO and update parents-alist of TO
+;; 		  (dolist (parent (alist-get from parents-alist))
+;; 		    (dolist (arrow parent)
+;; 		      (when (eq (cdr arrow) from)
+;; 			(setcdr arrow to)))
+;; 		    (or (memq parent (alist-get to parents-alist))
+;; 			(push parent (alist-get to parents-alist))))
+;; 		  ;; vanish FROM and nested sources from SOURCES
+;; 		  (when (> (length from) 1)
+;; 		    (setq sources (regexp-opt-remove-source from sources)))))
+;; 	      ;; eliminate all simplified nodes from `nodes'
+;; 	      (setq nodes (delete to nodes))
+;; 	      ;; Simplification 2
+;; 	      ;;
+;; 	      ;; need to create a epsilon arrow to subnode: if SOURCE
+;; 	      ;; has arrow 0, TO must have an arrow 0 so it will be
+;; 	      ;; removed from SOURCE in this simplification
+;; 	      (unless (and (assq 0 source) (not (assq 0 to)))
+;; 		;; sorted list of chars of TO:
+;;      		;; CHARS = (mapcar 'car to)
+;;      		;; list of same arrows in SOURCE:
+;;      		;; NEW-NODE = (mapcar (lambda (c) (assq c source))
+;; 		;;                    CHARS)
+;; 		(setq new-node (mapcar (lambda (char)
+;; 					 (assq char source))
+;;     				       (mapcar 'car to)))
+;; 		(when (equal new-node to)
+;; 		  (setq simplify t)
+;; 		  ;; remove arrows of source commom to node,
+;; 		  ;; keeping cons of SOURCE, add arrow (0 . to)
+;; 		  (setf source-without-arrows (regexp-opt-difference
+;; 					       source new-node #'eq)
+;; 	      		(car source) (cons 0 to)
+;; 	      		(cdr source) source-without-arrows)
+;; 		  ;; add source as parrent of node
+;; 		  (push source (alist-get to parents-alist)))))))))))
+
+(defun regexp-opt-add-string-to-node (s node &optional position)
+  "Add string S to NODE and return NODE.
+POSITION is the start position of S considered, 0 if POSITION is
+nil.  Arrows are inserted sorted by sorted by char."
+  (let* ((len (length s))
+	 (pos (if position position 0))
+	 (char (if (> len pos) (aref s pos) 0)) ; epsilon arrow if pos=len
+	 (arrow (assq char node))
+	 (child-node (when (> len pos)
+		       (regexp-opt-add-string-to-node s (cdr arrow)
+						       (1+ pos)))))
+    (cond ((and node arrow) (setcdr arrow child-node) node)
+	  (node (sort (cons (cons char child-node) node)
+		      (lambda (x y) (< (car x) (car y)))))
+	  (t (list (cons char child-node))))))
+
+(defvar regexp-opt-branches-alist nil
+  "Variable holding branch computation or other.
+Before simplification, holds source information.
+During simplification, holds info that can be reused until a
+simplification changes scenario.")
+
+(defun regexp-opt-branch (source &optional recompute)
+  "Return a list of branches associated with SOURCE.
+The return value is a list of (SINK . NODES).  SINK is a node
+that finishes a bifurcation started in SOURCE.  A SOURCE may have
+several sinks.  Both SOURCE and SINK do not belong to NODES.  The
+order of sinks in the returned value is from the most nested to
+the most external.
+
+If RECOMPUTE is nonnil, recomputes branch for SOURCE.  If
+RECOMPUTE is the symbol `recursive', recompute recursively all
+nested sources encountered."
+  (unless (> (length source) 1)
+    (error "Should be a list with at least 2 elements: %s" source))
+  (let* ((branch-info (alist-get source regexp-opt-branches-alist))
+	 (recur (when (eq recompute 'recursive) 'recursive))
+	 (last-sink (caar (last branch-info)))) ; reuse last-sink from
+					; previous computation
+					; to limit path following
+    (when (or recompute
+	      (not branch-info))
+      ;;
+      ;; compute branch-info anew
+      ;;
+      (let (path paths
+	    node nodes
+	    sink sinks
+	    intersections)
+	(dolist (arrow source) ; looping in all arrows of each source
+	  (setq node (cdr arrow)
+		path (list node))
+	  (while (and node (not (eq node last-sink)))
+	    (setq node (if (= (length node) 1)
+			   (cdar node) ; 1 arrow, follow arrow
+			 ;; a source; its most external sink is the car
+			 ;; of last branch-info
+			 (caar (last (regexp-opt-branch node recur)))))
+	    (push node path))
+	  ;; save on paths list
+	  (push (nreverse path) paths))
+	;; Compute intersections of all paths to compute sinks.
+	;;
+	;; To detect the nesting, we sort by intersection length: the
+	;; most external, shorter is the intersection.
+	(setq intersections (sort (regexp-opt-all-intersections t paths)
+				  (lambda (x y) (> (length x) (length y))))
+	      sinks (regexp-opt-uniq (mapcar 'car intersections))
+	      ;; make branch-info an alist in same order that sinks
+	      branch-info (mapcar #'list sinks))
+	;; Now select which nodes are in corresponding branches
+	(dolist (path paths)
+	  (setq nodes nil)
+	  (while path
+	    (while (and path (not (memq (car path) sinks)))
+	      (push (pop path) nodes))
+	    (when path ; means (car path) is in `sinks'
+	      (setq sink (pop path))
+	      ;; add elements of `nodes' to assq of `sink' in `branch-info'
+	      (setf (alist-get sink branch-info)
+		    (append nodes (alist-get sink branch-info)))
+	      ;; this sink is a node for next sink, if any
+	      (setq nodes (list sink)))))
+	;; eliminate duplicated nodes in branches
+	(dolist (branch branch-info)
+	  (setcdr branch (regexp-opt-uniq (cdr branch))))
+	;; end of branch-info computation
+	;;
+	;; save branch-info to `regexp-opt-branches-alist'
+	(setf (alist-get source regexp-opt-branches-alist) branch-info)))
+    branch-info))
+
+(defun regexp-opt-typed-regexp (node)
+  "Return (REGEXP . TYPE) corresponding to NODE.
+REGEXP is a string and TYPE is one of the symbols:
+empty    = empty;
+single   = single char or charset or ? construction;
+group    = \\(?: ... \\);
+sequence = sequence of singles.
+
+A regexp of type `single' or `group' can be followed by * or +
+contructions."
+  (let ((re "")
+	(type 'empty)
+	re-type-lastsink type-s)
+    (while node
+      (if (= (length node) 1)
+	  ;; 1 arrow, follow it
+	  (if (= (caar node) 0) ; epsilon arrow, skip node
+	      (setq node (cdar node))
+	    (setq re (concat re (regexp-quote (char-to-string (caar node))))
+		  type (if (eq type 'empty) 'single 'sequence)
+		  node (cdar node)))
+	;; else, a source
+	(setq re-type-lastsink (regexp-opt-typed-regexp-source node)
+	      ;; re-type-lastsink = (RE TYPE LAST-SINK) for source NODE
+	      re (concat re (car re-type-lastsink))
+	      type-s (cadr re-type-lastsink)
+	      type (cond ((eq type 'empty) type-s)
+			 ((eq type-s 'empty) type)
+			 (t 'sequence))
+	      node (caddr re-type-lastsink))))
+    (cons re type)))
+
+(defun regexp-opt-typed-regexp-source (source)
+  "Return (REGEXP TYPE LAST-SINK) corresponding to SOURCE.
+REGEXP and TYPE are as in `regexp-opt-typed-regexp'."
+  ;; Idea of implementation:
+  ;;
+  ;; A source can have nested sources.  The most nested sources
+  ;; do not depend on other sources, so starting with the most
+  ;; nested sources to the most external, process each source.
+  ;; The regexp must be the regexp for the most external source
+  ;; and sink.
+  ;;
+  ;; For each source, the most nested sinks do not depend on
+  ;; others, only on arrow paths.  So we process all arrow paths
+  ;; first and then resolve the sinks in order from the most
+  ;; nested to the most external.
+  (let* ((branch-info (regexp-opt-branch source))
+	 ;; sinks are also stored in `branch-info'
+	 (sinks (mapcar #'car branch-info))
+	 (last-sink (car (last sinks)))
+	 (sink-chars-alist nil)
+	 (sink-re-alist nil)
+	 ;; `chars-node-list' is a list of (LIST-OBJS . SUBNODE)
+	 ;; if at an arrow, LIST-OBJS is (list CHAR)
+	 ;; if a sink, LIST-OBJS is (list SINK) and we get the regexp
+	 ;; from association of SINK in sink-re-alist.
+	 ;;
+	 ;; populate `chars-node-list' in order, first arrows, then
+	 ;; sinks, from the most nested to the most external (except
+	 ;; the last, that we don't follow)
+	 (chars-node-list (append (mapcar (lambda (arrow)
+					    (cons (list (car arrow))
+						  (cdr arrow)))
+					  source)
+				  (mapcar (lambda (sink)
+					    (cons (list sink) sink))
+					  (butlast sinks))))
+	 re-type)
+    ;; Follow paths until a sink is found.
+    ;; Then save this path to sink-chars-alist for this former sink.
+    (dolist (chars-node chars-node-list)
+      (let* ((chars (car chars-node))
+	     (node (cdr chars-node))
+	     (sinkp (not (characterp (car chars))))
+	     re-type-lastsink)
+	;; allow start from an original sink
+	(while (or sinkp (not (memq node sinks)))
+	  (setq sinkp nil)
+	  ;; follow path
+	  (if (= (length node) 1)
+	      (setq chars (cons (caar node) chars)  ; save char to chars
+		    node (cdar node))  ; follow arrow
+	    ;; means it is a source -- recursion
+	    (setq re-type-lastsink
+		  (regexp-opt-typed-regexp-source node))
+	    ;; save typed-regexp to chars
+	    (push (cons (car re-type-lastsink)
+			(cadr re-type-lastsink))
+		  chars)
+	    ;; node <- last-sink
+	    (setq node (caddr re-type-lastsink))))
+	;; node is a sink, save chars with node as key
+	(push (nreverse chars) (alist-get node sink-chars-alist))))
+    ;;
+    ;; now process each sink of this source in order
+    (dolist (sink sinks)
+      (let* ((list-chars (alist-get sink sink-chars-alist))
+	     (singles nil)
+	     (compounds nil)
+	     (question (member '(0) list-chars))
+	     re type
+	     re-obj type-obj aux)
+	(when question
+	  (setq list-chars (delete '(0) list-chars)))
+	;; eliminate all epsilon arrows left from all chars
+	(setq list-chars (mapcar (lambda (chars) (delq 0 chars)) list-chars))
+	;; separate all alone chars in singles
+	(dolist (chars list-chars)
+	  (setq re "" type 'empty)
+	  ;;
+	  ;; so far, chars only contains chars, sinks or typed-regexps
+	  ;; (cdr is symbol)
+	  (if (= (length chars) 1)
+	      (cond ((characterp (car chars)) ; 1 char => singles
+		     (push (car chars) singles))
+		    ((and (cdar chars)
+			  (symbolp (cdar chars))) ; typed-regexp => compounds
+		     (push (car chars) compounds))
+		    (t
+		     ;; a sink, for sure a most nested, already processed
+		     ;; the association is a typed-regexp
+		     (push (alist-get (car chars) sink-re-alist) compounds)))
+	    ;;
+	    (dolist (obj chars)
+	      (cond ((characterp obj)
+		     (setq re-obj (regexp-quote (char-to-string obj))
+			   type-obj 'single))
+		    ;; typed-regexp
+		    ((and (cdr obj) (symbolp (cdr obj)))
+		     (setq re-obj (car obj)
+			   type-obj (cdr obj)))
+		    ;; obj=sink -> lookup in `sink-re-alist'
+		    (t (setq aux (alist-get obj sink-re-alist)
+			     re-obj (car aux)
+			     type-obj (cdr aux))))
+	      (setq re (concat re re-obj)
+		    type (cond ((eq type 'empty) type-obj)
+			       ((eq type-obj 'empty) type)
+			       (t 'sequence))))
+	    (push (cons re type) compounds)))
+	compounds
+	;; process singles into compounds
+	(cond ((> (length singles) 1)
+	       (push (cons (regexp-opt-charset singles) 'single)
+		     compounds))
+	      ((= (length singles) 1)
+	       (push (cons (regexp-quote (char-to-string (car singles)))
+			   'single)
+		     compounds)))
+	;; output according to compounds and question
+	(cond ((> (length compounds) 1)
+	       (setq re (concat "\\(?:" (mapconcat #'car compounds "\\|") "\\)")
+		     type 'group)
+	       (when question (setq re (concat re "?")
+				    type 'single)))
+	      ;;
+	      ((= (length compounds) 1)
+	       (setq re (caar compounds)
+		     type (cdar compounds))
+	       (cond ((and question (eq type 'single))
+		      (setq re (concat re "?")))
+		     (question (setq re (concat "\\(?:" re "\\)?")
+				     type 'single))))
+	      ((= (length compounds) 0) ; this may happen with node (0)
+	       (setq re ""            ; because question becomes nonnil
+		     type 'empty)))   ; and compounds is empty
+	;; save (RE . TYPE) to sink-re-alist
+	(setf (alist-get sink sink-re-alist) (cons re type))))
+    ;; return (RE TYPE LAST-SINK)
+    (setq re-type (alist-get last-sink sink-re-alist))
+    (list (car re-type) (cdr re-type) last-sink)))
+
+(defun regexp-opt-remove-source (source sources)
+  "Return SOURCES with SOURCE and all nested sources removed."
+  (dolist (branch (regexp-opt-branch source))
+    ;; each branch = (SINK . NODES)
+    (dolist (node (cdr branch))
+      (when (> (length node) 1)
+  	(setq sources (regexp-opt-remove-source node sources)))))
+  (delq source sources))
+
 (defun regexp-opt-charset (chars)
   "Return a regexp to match a character in CHARS.
 CHARS should be a list of characters."
   ;; The basic idea is to find character ranges.  Also we take care in the
   ;; position of character set meta characters in the character set regexp.
   ;;
-  (let* ((charmap (make-char-table 'regexp-opt-charset))
-	 (start -1) (end -2)
-	 (charset "")
-	 (bracket "") (dash "") (caret ""))
+  (let* ((charmap (make-char-table 'case-table))
+         (start -1) (end -2)
+         (charset "")
+         (bracket "") (dash "") (caret ""))
     ;;
     ;; Make a character map but extract character set meta characters.
     (dolist (char chars)
       (cond
        ((eq char ?\])
-	(setq bracket "]"))
+        (setq bracket "]"))
        ((eq char ?^)
-	(setq caret "^"))
+        (setq caret "^"))
        ((eq char ?-)
-	(setq dash "-"))
+        (setq dash "-"))
        (t
-	(aset charmap char t))))
+        (aset charmap char t))))
     ;;
     ;; Make a character set from the map using ranges where applicable.
     (map-char-table
      (lambda (c v)
        (when v
-	 (if (consp c)
-	     (if (= (1- (car c)) end) (setq end (cdr c))
-	       (if (> end (+ start 2))
-		   (setq charset (format "%s%c-%c" charset start end))
-		 (while (>= end start)
-		   (setq charset (format "%s%c" charset start))
-		   (setq start (1+ start))))
-	       (setq start (car c) end (cdr c)))
-	   (if (= (1- c) end) (setq end c)
-	     (if (> end (+ start 2))
-	       (setq charset (format "%s%c-%c" charset start end))
-	     (while (>= end start)
-	       (setq charset (format "%s%c" charset start))
-	       (setq start (1+ start))))
-	     (setq start c end c)))))
+         (if (consp c)
+             (if (= (1- (car c)) end) (setq end (cdr c))
+               (if (> end (+ start 2))
+                   (setq charset (format "%s%c-%c" charset start end))
+                 (while (>= end start)
+                   (setq charset (format "%s%c" charset start))
+                   (setq start (1+ start))))
+               (setq start (car c) end (cdr c)))
+           (if (= (1- c) end) (setq end c)
+             (if (> end (+ start 2))
+               (setq charset (format "%s%c-%c" charset start end))
+             (while (>= end start)
+               (setq charset (format "%s%c" charset start))
+               (setq start (1+ start))))
+             (setq start c end c)))))
      charmap)
     (when (>= end start)
       (if (> end (+ start 2))
-	  (setq charset (format "%s%c-%c" charset start end))
-	(while (>= end start)
-	  (setq charset (format "%s%c" charset start))
-	  (setq start (1+ start)))))
+          (setq charset (format "%s%c-%c" charset start end))
+        (while (>= end start)
+          (setq charset (format "%s%c" charset start))
+          (setq start (1+ start)))))
     ;;
     ;; Make sure a caret is not first and a dash is first or last.
     (if (and (string-equal charset "") (string-equal bracket ""))
-	(if (string-equal dash "")
+        (if (string-equal dash "")
             "\\^"                       ; [^] is not a valid regexp
           (concat "[" dash caret "]"))
       (concat "[" bracket charset caret dash "]"))))
 
+;; Due to a bug with nil as elements, that is fixed only in Emacs
+;; 27.2, I reimplemented `seq-intersection', `seq-uniq' and
+;; `seq-difference'.  Later, if seq is loaded by default, we could
+;; eliminate these functions, changing code accordingly (notice that
+;; TESTFN is `eq' for intersection and uniq!)
+
+(defun regexp-opt-intersection (list1 list2)
+  "Return a list of the elements that appear in both LIST1 and LIST2.
+Equality is defined by `eq'."
+  (let (intersection)
+    (dolist (x list1 (nreverse intersection))
+      (when (memq x list2) (push x intersection)))))
+
+(defun regexp-opt-uniq (list)
+  "Return a list of the elements of LIST with duplicates removed.
+Equality is defined by `eq'."
+  (let (result)
+    (dolist (el list (nreverse result))
+      (unless (memq el result) (push el result)))))
+
+(defun regexp-opt-difference (list1 list2 &optional testfn)
+  "My version of seq-difference, for use with Emacs 26.1."
+  (let ((fn (if testfn testfn #'equal))
+	diff s2)
+    (dolist (x list1 (nreverse diff))
+      (setq s2 list2)
+      (while (and s2 (not (funcall fn x (car s2))))
+	(setq s2 (cdr s2)))
+      (unless s2
+	(push x diff)))))
+
+(defun regexp-opt-all-intersections (x rest)
+  "Return the list of all possible intersections.
+Use (regexp-opt-all-intersections t REST) for the list of all
+possible intersections of combinations of elements of REST with
+at least 2 elements.
+
+For example, if A, B, C, D are lists,
+ (regexp-opt-all-intersections t (A B C D))
+  -> (A∩B A∩B∩C A∩B∩C∩D A∩B∩D A∩C A∩C∩D A∩D B∩C B∩C∩D B∩D C∩D)
+in this order, where X∩Y is computed comparing elements with `eq'.
+
+If ELT is a list, return the list of intersections of ELT with
+all combinations of elements of REST.  This feature is used for
+recursion."
+  ;; Idea for implementation:
+  ;;
+  ;; called with ELT=A, A is a list -> all intersections that have A
+  ;; (regexp-opt-all-intersections A '(B C D)) returns
+  ;;   (A∩B) + (regexp-opt-all-intersections A∩B '(C D))
+  ;;   + (A∩C) + (regexp-opt-all-intersections A∩C '(D))
+  ;;   + (A∩D)
+  ;; = (A∩B)
+  ;;   + (A∩B∩C) + (regexp-opt-all-intersections A∩B∩C '(D)) + (A∩B∩D)
+  ;;   + (A∩C) + (A∩C∩D) + (A∩D)
+  ;; = (A∩B A∩B∩C A∩B∩C∩D A∩B∩D A∩C A∩C∩D A∩D)
+  ;;
+  ;; Called with ELT not a list
+  ;; (regexp-opt-all-intersections t '(A B C D)) returns
+  ;;   (regexp-opt-all-intersections A '(B C D))
+  ;;   + (regexp-opt-all-intersections B '(C D))
+  ;;   + (regexp-opt-all-intersections C '(D))
+  ;;   = (A∩B A∩B∩C A∩B∩C∩D A∩B∩D A∩C A∩C∩D A∩D)
+  ;;     + (B∩C B∩C∩D B∩D)
+  ;;     + (C∩D)
+  ;;   = (A∩B A∩B∩C A∩B∩C∩D A∩B∩D A∩C A∩C∩D A∩D B∩C B∩C∩D B∩D C∩D)
+  ;; ... that is the set of all possible intersections of elements of
+  ;; '(A B C D) where A, B, C, D are lists
+  (if (listp x)
+      (mapcan (lambda (y)
+		(let ((x∩y (regexp-opt-intersection x y))
+		      (tail (cdr (memq y rest))))
+		  (if tail
+		      (cons x∩y
+			    (regexp-opt-all-intersections x∩y tail))
+		    (list x∩y))))
+	      rest)
+    (mapcan (lambda (l)
+	      (let ((tail (cdr (memq l rest))))
+		(when tail (regexp-opt-all-intersections l tail))))
+	    rest)))
+
+;; Debuging: regexp-opt-print-node prints NODE in human readable form.
+;; If ou have graphviz installed, get a nice graph with
+;;   (regexp-opt-print-node node 'graphviz)
+;; or just use
+;;   (regexp-opt-print-node node)
+;; for a list of arrows to numbered nodes.
+
+;; (defun regexp-opt-print-node (node &optional driver ext tempfile-no-ext)
+;;   "Print structure of NODE in human readable form.
+;; Optional second argument DRIVER should be `buffer' (default if
+;; nil) or `graphviz'. If DRIVER is `buffer', insert a list of
+;; arrows in current buffer.  If DRIVER is `graphviz', pass node
+;; structure to graphviz and display the output image in a buffer.
+;; For graphviz driver, other arguments EXT and TEMPFILE-NO-EXT
+;; shuold be strings where EXT (default to \"svg\") is the image
+;; extension to be output by graphviz and TEMPFILE-NO-EXT (default
+;; to \"re-temp\" is the filename without extension for saving and
+;; output for graphviz."
+;;   (let* ((to-be-printed (list node))
+;; 	 (graphviz (eq driver 'graphviz))
+;; 	 (buf (when graphviz (get-buffer-create "*regexp-dot*")))
+;; 	 (last-buffer (current-buffer))
+;; 	 (compilation-ask-about-save nil)
+;; 	 printed-nodes nodes
+;; 	 from to char
+;; 	 from-num to-num
+;; 	 input output
+;; 	 compilation-going-on)
+;;     (unless ext (setq ext "png"))
+;;     (unless tempfile-no-ext (setq tempfile-no-ext "re-temp"))
+;;     (when graphviz
+;;       (setq input (concat tempfile-no-ext ".dot")
+;; 	    output (concat tempfile-no-ext "." ext))
+;;       (set-buffer buf)
+;;       (delete-region (point-min) (point-max))
+;;       (insert "digraph regexpfa {\nrankdir=LR;\nnode [shape = circle];"))
+;;     (while to-be-printed
+;;       (setq from (pop to-be-printed))
+;;       (unless (memq from printed-nodes)
+;; 	(push from printed-nodes)
+;; 	(unless (memq from nodes) (push from nodes))
+;; 	(setq from-num (length (memq from nodes)))
+;; 	(dolist (arrow from)
+;; 	  (setq to (cdr arrow)
+;; 		char (if (= (car arrow) 0) ?ε (car arrow)))
+;; 	  (push to to-be-printed)
+;; 	  (unless (memq to nodes) (push to nodes))
+;; 	  (setq to-num (length (memq to nodes)))
+;; 	  (cond ((eq driver 'graphviz)
+;; 		 (insert (format "\n%3d -> %3d [ label = %S ];"
+;; 				 from-num to-num (char-to-string char))))
+;; 		((or (not driver) (eq driver 'buffer))
+;; 		 (insert (format "\n%3d -- %c --> %3d"
+;; 				 from-num char to-num)))))))
+;;     (when graphviz
+;;       (insert "\n}\n")
+;;       ;; write to temp, compile and display
+;;       (write-file (concat tempfile-no-ext ".dot"))
+;;       (set-buffer last-buffer)
+;;       (kill-buffer buf)
+;;       (when (get-buffer "*preview-re*")
+;;         (kill-buffer "*preview-re*"))
+;;       (setq compilation-going-on t)
+;;       (add-hook 'compilation-finish-functions
+;; 		(lambda (a b) (ignore a b) (setq compilation-going-on nil)))
+;;       (compile (concat "dot -T" ext " \"" input "\" \"-o" output "\""))
+;;       (while compilation-going-on (sleep-for 0.1))
+;;       (when (file-exists-p output)
+;;         (auto-image-file-mode 1)
+;;         (set-buffer (find-file-noselect output))
+;;         (rename-buffer "*preview-re*")
+;; 	(sleep-for 0.1)
+;;         (display-buffer (get-buffer "*preview-re*"))))))
+
 (provide 'regexp-opt)
 
 ;;; regexp-opt.el ends here