>From 872ec904253e2399bcf772f7995c363ca0f8a262 Mon Sep 17 00:00:00 2001 From: Paul Eggert Date: Fri, 19 Apr 2019 09:00:04 -0700 Subject: [PATCH] Fix regexp branches that subsume other branches MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Problems reported by Mattias Engdegård in: https://lists.gnu.org/r/emacs-devel/2019-04/msg00803.html * lisp/arc-mode.el (archive-rar-summarize): * lisp/eshell/em-hist.el (eshell-hist-word-designator): * lisp/info.el (Info-dir-remove-duplicates): * lisp/international/ja-dic-cnv.el (skkdic-convert-postfix) (skkdic-convert-prefix, skkdic-collect-okuri-nasi): * lisp/progmodes/cc-awk.el (c-awk-esc-pair-re): * lisp/xml.el (xml-att-type-re): Omit regexp branches that subsume other branches. * lisp/progmodes/cperl-mode.el (cperl-beautify-regexp-piece): $ and ^ aren’t simple-codes. --- lisp/arc-mode.el | 2 +- lisp/eshell/em-hist.el | 2 +- lisp/info.el | 2 +- lisp/international/ja-dic-cnv.el | 10 +++++----- lisp/progmodes/cc-awk.el | 2 +- lisp/progmodes/cperl-mode.el | 2 +- lisp/xml.el | 1 - 7 files changed, 10 insertions(+), 11 deletions(-) diff --git a/lisp/arc-mode.el b/lisp/arc-mode.el index 6a58d61a54..1c88f9a1a1 100644 --- a/lisp/arc-mode.el +++ b/lisp/arc-mode.el @@ -2019,7 +2019,7 @@ archive-rar-summarize (re-search-forward "^\\(\s+=+\s*\\)+\n") (while (looking-at (concat "^\s+[0-9.]+\s+D?-+\s+" ; Flags "\\([0-9-]+\\)\s+" ; Size - "\\([-0-9.%]+\\|-+\\)\s+" ; Ratio + "\\([-0-9.%]+\\)\s+" ; Ratio "\\([0-9a-zA-Z]+\\)\s+" ; Mode "\\([0-9-]+\\)\s+" ; Date "\\([0-9:]+\\)\s+" ; Time diff --git a/lisp/eshell/em-hist.el b/lisp/eshell/em-hist.el index 614faaa131..adb028002b 100644 --- a/lisp/eshell/em-hist.el +++ b/lisp/eshell/em-hist.el @@ -153,7 +153,7 @@ eshell-hist-event-designator :group 'eshell-hist) (defcustom eshell-hist-word-designator - "^:?\\([0-9]+\\|[$^%*]\\)?\\(\\*\\|-[0-9]*\\|[$^%*]\\)?" + "^:?\\([0-9]+\\|[$^%*]\\)?\\(-[0-9]*\\|[$^%*]\\)?" "The regexp used to identify history word designators." :type 'regexp :group 'eshell-hist) diff --git a/lisp/info.el b/lisp/info.el index f3b413a2f9..2e5f433dc8 100644 --- a/lisp/info.el +++ b/lisp/info.el @@ -1531,7 +1531,7 @@ Info-dir-remove-duplicates (save-restriction (narrow-to-region start (point)) (goto-char (point-min)) - (while (re-search-forward "^\\* \\([^:\n]+:\\(:\\|[^.\n]+\\).\\)" nil 'move) + (while (re-search-forward "^\\* \\([^:\n]+:[^.\n]+.\\)" nil 'move) ;; Fold case straight away; `member-ignore-case' here wasteful. (let ((x (downcase (match-string 1)))) (if (member x seen) diff --git a/lisp/international/ja-dic-cnv.el b/lisp/international/ja-dic-cnv.el index 578cd63a59..e721083189 100644 --- a/lisp/international/ja-dic-cnv.el +++ b/lisp/international/ja-dic-cnv.el @@ -124,7 +124,7 @@ skkdic-convert-postfix (setq l (cdr l))))) ;; Search postfix entries. - (while (re-search-forward "^[#<>?]\\(\\(\\cH\\|ー\\)+\\) " nil t) + (while (re-search-forward "^[#<>?]\\(\\cH+\\) " nil t) (let ((kana (match-string-no-properties 1)) str candidates) (while (looking-at "/[#0-9 ]*\\([^/\n]*\\)/") @@ -157,7 +157,7 @@ skkdic-convert-prefix (insert ";; Setting prefix entries.\n" "(skkdic-set-prefix\n")) (save-excursion - (while (re-search-forward "^\\(\\(\\cH\\|ー\\)+\\)[<>?] " nil t) + (while (re-search-forward "^\\(\\cH+\\)[<>?] " nil t) (let ((kana (match-string-no-properties 1)) str candidates) (while (looking-at "/\\([^/\n]+\\)/") @@ -275,11 +275,11 @@ skkdic-collect-okuri-nasi (let ((progress (make-progress-reporter "Collecting OKURI-NASI entries" (point) (point-max) nil 10))) - (while (re-search-forward "^\\(\\(\\cH\\|ー\\)+\\) \\(/\\cj.*\\)/$" + (while (re-search-forward "^\\(\\cH+\\) \\(/\\cj.*\\)/$" nil t) (let ((kana (match-string-no-properties 1)) - (candidates (skkdic-get-candidate-list (match-beginning 3) - (match-end 3)))) + (candidates (skkdic-get-candidate-list (match-beginning 2) + (match-end 2)))) (setq skkdic-okuri-nasi-entries (cons (cons kana candidates) skkdic-okuri-nasi-entries)) (progress-reporter-update progress (point)) diff --git a/lisp/progmodes/cc-awk.el b/lisp/progmodes/cc-awk.el index 70aa3c4b1f..1a67a95927 100644 --- a/lisp/progmodes/cc-awk.el +++ b/lisp/progmodes/cc-awk.el @@ -95,7 +95,7 @@ awk-mode-syntax-table ;; Emacs has in the past used \r to mark hidden lines in some fashion (and ;; maybe still does). -(defconst c-awk-esc-pair-re "\\\\\\(.\\|\n\\|\r\\|\\'\\)") +(defconst c-awk-esc-pair-re "\\\\\\(.\\|\n\\|\\'\\)") ;; Matches any escaped (with \) character-pair, including an escaped newline. (defconst c-awk-non-eol-esc-pair-re "\\\\\\(.\\|\\'\\)") ;; Matches any escaped (with \) character-pair, apart from an escaped newline. diff --git a/lisp/progmodes/cperl-mode.el b/lisp/progmodes/cperl-mode.el index 73b55e29a5..ba007d67c0 100644 --- a/lisp/progmodes/cperl-mode.el +++ b/lisp/progmodes/cperl-mode.el @@ -7983,7 +7983,7 @@ cperl-beautify-regexp-piece "\\|" ; $ ^ "[$^]" "\\|" ; simple-code simple-code*? - "\\(\\\\.\\|[^][()#|*+?\n]\\)\\([*+{?]\\??\\)?" ; 4 5 + "\\(\\\\.\\|[^][()#|*+?$^\n]\\)\\([*+{?]\\??\\)?" ; 4 5 "\\|" ; Class "\\(\\[\\)" ; 6 "\\|" ; Grouping diff --git a/lisp/xml.el b/lisp/xml.el index b5b923f863..1f3c05f4d9 100644 --- a/lisp/xml.el +++ b/lisp/xml.el @@ -245,7 +245,6 @@ xml-enumerated-type-re ;; [54] AttType ::= StringType | TokenizedType | EnumeratedType ;; [55] StringType ::= 'CDATA' (defconst xml-att-type-re (concat "\\(?:CDATA\\|" xml-tokenized-type-re - "\\|" xml-notation-type-re "\\|" xml-enumerated-type-re "\\)")) ;; [60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) -- 2.20.1