From df662cd9d2cf712b436a7b3d693ebf6bd189422f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Engdeg=C3=A5rd?= Date: Tue, 14 May 2019 11:43:49 +0200 Subject: [PATCH] Add standard unmatchable regexp Add `regexp-unmatchable' as a standard unmatchable regexp, defined as "\\`a\\`". Use it where such a regexp is needed, replacing slower expressions in several places. From a suggestion by Philippe Schnoebelen. * lisp/subr.el (regexp-unmatchable): New defconst. * etc/NEWS (Lisp Changes): Mention `regexp-unmatchable'. * doc/lispref/searching.texi (Regexp Functions): Document it. * lisp/emacs-lisp/regexp-opt.el (regexp-opt) * lisp/progmodes/cc-defs.el (cc-conditional-require-after-load) (c-make-keywords-re) * lisp/progmodes/cc-engine.el (c-beginning-of-statement-1) (c-forward-<>-arglist-recur, c-forward-decl-or-cast-1) (c-looking-at-decl-block) * lisp/progmodes/cc-fonts.el (c-doc-line-join-re) (c-doc-bright-comment-start-re) * lisp/progmodes/cc-langs.el (c-populate-syntax-table) (c-assignment-op-regexp) (c-block-comment-ender-regexp, c-font-lock-comment-end-skip) (c-block-comment-start-regexp, c-line-comment-start-regexp) (c-doc-comment-start-regexp, c-decl-start-colon-kwd-re) (c-type-decl-prefix-key, c-type-decl-operator-prefix-key) (c-pre-id-bracelist-key, c-enum-clause-introduction-re) (c-nonlabel-token-2-key) * lisp/progmodes/cc-mode.el (c-doc-fl-decl-start, c-doc-fl-decl-end) * lisp/progmodes/cc-vars.el (c-noise-macro-with-parens-name-re) (c-noise-macro-name-re, c-make-noise-macro-regexps) * lisp/progmodes/octave.el (octave-help-mode) * lisp/textmodes/ispell.el (ispell-non-empty-string) * lisp/vc/vc-bzr.el (vc-bzr-log-view-mode, vc-bzr-revision-completion-table) * lisp/vc/vc-git.el (vc-git-log-view-mode) * lisp/vc/vc-hg.el (vc-hg-log-view-mode) * lisp/vc/vc-mtn.el (vc-mtn-log-view-mode): Use `regexp-unmatchable'. --- doc/lispref/searching.texi | 7 +++++++ etc/NEWS | 4 ++++ lisp/emacs-lisp/regexp-opt.el | 4 ++-- lisp/net/ange-ftp.el | 2 +- lisp/progmodes/cc-defs.el | 6 +++--- lisp/progmodes/cc-engine.el | 10 ++++++---- lisp/progmodes/cc-fonts.el | 4 ++-- lisp/progmodes/cc-langs.el | 26 +++++++++++++------------- lisp/progmodes/cc-mode.el | 4 ++-- lisp/progmodes/cc-vars.el | 8 ++++---- lisp/progmodes/grep.el | 3 ++- lisp/progmodes/octave.el | 2 +- lisp/subr.el | 4 ++++ lisp/textmodes/ispell.el | 2 +- lisp/vc/vc-bzr.el | 5 +++-- lisp/vc/vc-git.el | 2 +- lisp/vc/vc-hg.el | 2 +- lisp/vc/vc-mtn.el | 2 +- 18 files changed, 58 insertions(+), 39 deletions(-) diff --git a/doc/lispref/searching.texi b/doc/lispref/searching.texi index 8775254dd0..24f30b4dac 100644 --- a/doc/lispref/searching.texi +++ b/doc/lispref/searching.texi @@ -1070,6 +1070,13 @@ Regexp Functions @c Internal functions: regexp-opt-group address@hidden regexp-unmatchable +This variable contains a regexp that is guaranteed not to match any +string at all. It is particularly useful as default value for +variables that may be set to a pattern that actually matches +something. address@hidden defvar + @node Regexp Search @section Regular Expression Searching @cindex regular expression searching diff --git a/etc/NEWS b/etc/NEWS index fc3ca1ea92..699a04b524 100644 --- a/etc/NEWS +++ b/etc/NEWS @@ -1990,6 +1990,10 @@ returns a regexp that never matches anything, which is an identity for this operation. Previously, the empty string was returned in this case. +** New constant 'regexp-unmatchable' contains a never-matching regexp. +It is a convenient and readable way to specify a regexp that should +not match anything, and is as fast as any such regexp can be. + * Changes in Emacs 27.1 on Non-Free Operating Systems diff --git a/lisp/emacs-lisp/regexp-opt.el b/lisp/emacs-lisp/regexp-opt.el index d883752d71..00f72e284a 100644 --- a/lisp/emacs-lisp/regexp-opt.el +++ b/lisp/emacs-lisp/regexp-opt.el @@ -144,9 +144,9 @@ regexp-opt (sort (copy-sequence strings) 'string-lessp))) (re (cond - ;; No strings: return a\` which cannot match anything. + ;; No strings: return an unmatchable regexp. ((null strings) - (concat (or open "\\(?:") "a\\`\\)")) + (concat (or open "\\(?:") regexp-unmatchable "\\)")) ;; If we cannot reorder, give up all attempts at ;; optimisation. There is room for improvement (Bug#34641). ((and keep-order (regexp-opt--contains-prefix sorted-strings)) diff --git a/lisp/net/ange-ftp.el b/lisp/net/ange-ftp.el index 5af9ea75ed..b0a1e1799f 100644 --- a/lisp/net/ange-ftp.el +++ b/lisp/net/ange-ftp.el @@ -1989,7 +1989,7 @@ internal-ange-ftp-mode (make-local-variable 'comint-password-prompt-regexp) ;; This is a regexp that can't match anything. ;; ange-ftp has its own ways of handling passwords. - (setq comint-password-prompt-regexp "\\`a\\`") + (setq comint-password-prompt-regexp regexp-unmatchable) (make-local-variable 'paragraph-start) (setq paragraph-start comint-prompt-regexp)) diff --git a/lisp/progmodes/cc-defs.el b/lisp/progmodes/cc-defs.el index cd4ed6b352..d20e3ef32d 100644 --- a/lisp/progmodes/cc-defs.el +++ b/lisp/progmodes/cc-defs.el @@ -81,7 +81,7 @@ (progn (require 'font-lock) (let (font-lock-keywords) - (font-lock-compile-keywords '("a\\`")) ; doesn't match anything. + (font-lock-compile-keywords (list regexp-unmatchable)) font-lock-keywords)))) @@ -1890,8 +1890,8 @@ c-make-keywords-re ;; Produce a regexp that doesn't match anything. (if adorn - "\\(a\\`\\)" - "a\\`"))) + (concat "\\(" regexp-unmatchable "\\)") + regexp-unmatchable))) (put 'c-make-keywords-re 'lisp-indent-function 1) diff --git a/lisp/progmodes/cc-engine.el b/lisp/progmodes/cc-engine.el index ed8310d0e6..41bab270da 100644 --- a/lisp/progmodes/cc-engine.el +++ b/lisp/progmodes/cc-engine.el @@ -907,7 +907,7 @@ c-beginning-of-statement-1 stack ;; Regexp which matches "for", "if", etc. (cond-key (or c-opt-block-stmt-key - "a\\`")) ; Doesn't match anything. + regexp-unmatchable)) ;; Return value. (ret 'same) ;; Positions of the last three sexps or bounds we've stopped at. @@ -7646,7 +7646,9 @@ c-forward-<>-arglist-recur (progn (c-forward-syntactic-ws) (when (or (and c-record-type-identifiers all-types) - (not (equal c-inside-<>-type-key "\\(a\\`\\)"))) + (not (equal c-inside-<>-type-key + (concat + "\\(" regexp-unmatchable "\\)")))) (c-forward-syntactic-ws) (cond ((eq (char-after) ??) @@ -9253,7 +9255,7 @@ c-forward-decl-or-cast-1 ;; Skip over type decl prefix operators. (Note similar code in ;; `c-forward-declarator'.) (if (and c-recognize-typeless-decls - (equal c-type-decl-prefix-key "a\\`")) ; Regexp which doesn't match + (equal c-type-decl-prefix-key regexp-unmatchable)) (when (eq (char-after) ?\() (progn (setq paren-depth (1+ paren-depth)) @@ -10886,7 +10888,7 @@ c-looking-at-decl-block ;; legal because it's part of a "compound keyword" like ;; "enum class". Of course, if c-after-brace-list-key ;; is nil, we can skip the test. - (or (equal c-after-brace-list-key "a\\`") ; Regexp which doesn't match + (or (equal c-after-brace-list-key regexp-unmatchable) (save-match-data (save-excursion (not diff --git a/lisp/progmodes/cc-fonts.el b/lisp/progmodes/cc-fonts.el index 5f09be60a6..b3a9dd480b 100644 --- a/lisp/progmodes/cc-fonts.el +++ b/lisp/progmodes/cc-fonts.el @@ -2580,14 +2580,14 @@ pike-font-lock-keywords ;;; Doc comments. -(defvar c-doc-line-join-re "a\\`") +(defvar c-doc-line-join-re regexp-unmatchable) ;; Matches a join of two lines in a doc comment. ;; This should not be changed directly, but instead set by ;; `c-setup-doc-comment-style'. This variable is used in `c-find-decl-spots' ;; in (e.g.) autodoc style comments to bridge the gap between a "@\n" at an ;; EOL and the token following "//!" on the next line. -(defvar c-doc-bright-comment-start-re "a\\`") +(defvar c-doc-bright-comment-start-re regexp-unmatchable) ;; Matches the start of a "bright" comment, one whose contents may be ;; fontified by, e.g., `c-font-lock-declarations'. diff --git a/lisp/progmodes/cc-langs.el b/lisp/progmodes/cc-langs.el index 30da10a6c0..9d2f689e58 100644 --- a/lisp/progmodes/cc-langs.el +++ b/lisp/progmodes/cc-langs.el @@ -945,7 +945,7 @@ c-populate-syntax-table (c-make-keywords-re 'appendable (c-lang-const c-cpp-include-directives)) "[ \t]*") - "a\\`")) ; Doesn't match anything + regexp-unmatchable)) (c-lang-defvar c-cpp-include-key (c-lang-const c-cpp-include-key)) (c-lang-defconst c-opt-cpp-macro-define @@ -1331,7 +1331,7 @@ 'c-opt-op-identitier-prefix (c--set-difference (c-lang-const c-assignment-operators) '("=") :test 'string-equal))) - "a\\`")) ; Doesn't match anything. + regexp-unmatchable)) (c-lang-defvar c-assignment-op-regexp (c-lang-const c-assignment-op-regexp)) @@ -1554,7 +1554,7 @@ 'c-opt-op-identitier-prefix ;; language) t (if (c-lang-const c-block-comment-ender) (regexp-quote (c-lang-const c-block-comment-ender)) - "a\\`")) ; Doesn't match anything. + regexp-unmatchable)) (c-lang-defvar c-block-comment-ender-regexp (c-lang-const c-block-comment-ender-regexp)) @@ -1565,7 +1565,7 @@ 'c-opt-op-identitier-prefix ;; `font-lock-comment-delimiter-face'. t (if (c-lang-const c-block-comment-ender) (concat "[ \t]*" (c-lang-const c-block-comment-ender-regexp)) - "a\\`")) ; Doesn't match anything. + regexp-unmatchable)) (c-lang-setvar font-lock-comment-end-skip (c-lang-const c-font-lock-comment-end-skip)) @@ -1584,7 +1584,7 @@ 'c-opt-op-identitier-prefix ;; language) t (if (c-lang-const c-block-comment-starter) (regexp-quote (c-lang-const c-block-comment-starter)) - "a\\`")) ; Doesn't match anything. + regexp-unmatchable)) (c-lang-defvar c-block-comment-start-regexp (c-lang-const c-block-comment-start-regexp)) @@ -1593,7 +1593,7 @@ 'c-opt-op-identitier-prefix ;; language; it does in all 7 CC Mode languages). t (if (c-lang-const c-line-comment-starter) (regexp-quote (c-lang-const c-line-comment-starter)) - "a\\`")) ; Doesn't match anything. + regexp-unmatchable)) (c-lang-defvar c-line-comment-start-regexp (c-lang-const c-line-comment-start-regexp)) @@ -1628,7 +1628,7 @@ 'c-opt-op-identitier-prefix (c-lang-defconst c-doc-comment-start-regexp "Regexp to match the start of documentation comments." - t "a\\`" ; Doesn't match anything. + t regexp-unmatchable ;; From font-lock.el: `doxygen' uses /*! while others use /**. (c c++ objc) "/\\*[*!]" java "/\\*\\*" @@ -3112,7 +3112,7 @@ 'c-opt-op-identitier-prefix "Regexp matching a keyword that is followed by a colon, where the whole construct can precede a declaration. E.g. \"public:\" in C++." - t "a\\`" ; Doesn't match anything. + t regexp-unmatchable c++ (c-make-keywords-re t (c-lang-const c-protection-kwds))) (c-lang-defvar c-decl-start-colon-kwd-re (c-lang-const c-decl-start-colon-kwd-re)) @@ -3309,7 +3309,7 @@ 'c-opt-op-identitier-prefix t (if (c-lang-const c-type-modifier-kwds) (concat (regexp-opt (c-lang-const c-type-modifier-kwds) t) "\\>") ;; Default to a regexp that never matches. - "a\\`") + regexp-unmatchable) ;; Check that there's no "=" afterwards to avoid matching tokens ;; like "*=". (c objc) (concat "\\(" @@ -3347,7 +3347,7 @@ 'c-opt-op-identitier-prefix as the end of the operator. Identifier syntax is in effect when this is matched \(see `c-identifier-syntax-table')." t ;; Default to a regexp that never matches. - "a\\`" + regexp-unmatchable ;; Check that there's no "=" afterwards to avoid matching tokens ;; like "*=". (c objc) (concat "\\(\\*\\)" @@ -3506,7 +3506,7 @@ 'c-opt-op-identitier-prefix (c-lang-defconst c-pre-id-bracelist-key "A regexp matching tokens which, preceding an identifier, signify a bracelist. " - t "a\\`" ; Doesn't match anything. + t regexp-unmatchable c++ "new\\([^[:alnum:]_$]\\|$\\)\\|&&?\\(\\S.\\|$\\)") (c-lang-defvar c-pre-id-bracelist-key (c-lang-const c-pre-id-bracelist-key)) @@ -3562,7 +3562,7 @@ 'c-opt-op-identitier-prefix ;; before the '{' of the enum list, to avoid searching too far. "[^][{};/#=]*" "{") - "a\\`")) ; Doesn't match anything. + regexp-unmatchable)) (c-lang-defvar c-enum-clause-introduction-re (c-lang-const c-enum-clause-introduction-re)) @@ -3703,7 +3703,7 @@ 'c-opt-op-identitier-prefix "Regexp matching things that can't occur two symbols before a colon in a label construct. This catches C++'s inheritance construct \"class foo : bar\". Only used if `c-recognize-colon-labels' is set." - t "a\\`" ; Doesn't match anything. + t regexp-unmatchable c++ (c-make-keywords-re t '("class"))) (c-lang-defvar c-nonlabel-token-2-key (c-lang-const c-nonlabel-token-2-key)) diff --git a/lisp/progmodes/cc-mode.el b/lisp/progmodes/cc-mode.el index bd62fc754a..e4ff9f019d 100644 --- a/lisp/progmodes/cc-mode.el +++ b/lisp/progmodes/cc-mode.el @@ -1825,7 +1825,7 @@ c-doc-fl-decl-start ;; by `c-doc-line-join-re'), return the position of the first line of the ;; sequence. Otherwise, return nil. Point has no significance at entry to ;; and exit from this function. - (when (not (equal c-doc-line-join-re "a\\`")) + (when (not (equal c-doc-line-join-re regexp-unmatchable)) (goto-char pos) (back-to-indentation) (and (or (looking-at c-comment-start-regexp) @@ -1842,7 +1842,7 @@ c-doc-fl-decl-end ;; marker (as defined by `c-doc-line-join-re), return the position of ;; the BOL at the end of the sequence. Otherwise, return nil. Point has no ;; significance at entry to and exit from this function. - (when (not (equal c-doc-line-join-re "a\\`")) + (when (not (equal c-doc-line-join-re regexp-unmatchable)) (goto-char pos) (back-to-indentation) (let ((here (point))) diff --git a/lisp/progmodes/cc-vars.el b/lisp/progmodes/cc-vars.el index 6e8acd4c0d..b818bced73 100644 --- a/lisp/progmodes/cc-vars.el +++ b/lisp/progmodes/cc-vars.el @@ -1648,9 +1648,9 @@ c-asymmetry-fontification-flag :group 'c) ;; Initialize the next two to a regexp which never matches. -(defvar c-noise-macro-with-parens-name-re "a\\`") +(defvar c-noise-macro-with-parens-name-re regexp-unmatchable) (make-variable-buffer-local 'c-noise-macro-with-parens-name-re) -(defvar c-noise-macro-name-re "a\\`") +(defvar c-noise-macro-name-re regexp-unmatchable) (make-variable-buffer-local 'c-noise-macro-name-re) (defcustom c-noise-macro-names nil @@ -1682,7 +1682,7 @@ c-make-noise-macro-regexps ;; Convert `c-noise-macro-names' and `c-noise-macro-with-parens-names' into ;; `c-noise-macro-name-re' and `c-noise-macro-with-parens-name-re'. (setq c-noise-macro-with-parens-name-re - (cond ((null c-noise-macro-with-parens-names) "a\\`") ; Never matches. + (cond ((null c-noise-macro-with-parens-names) regexp-unmatchable) ((consp c-noise-macro-with-parens-names) (concat (regexp-opt c-noise-macro-with-parens-names t) "\\([^[:alnum:]_$]\\|$\\)")) @@ -1691,7 +1691,7 @@ c-make-noise-macro-regexps (t (error "c-make-noise-macro-regexps: \ c-noise-macro-with-parens-names is invalid: %s" c-noise-macro-with-parens-names)))) (setq c-noise-macro-name-re - (cond ((null c-noise-macro-names) "a\\`") ; Never matches anything. + (cond ((null c-noise-macro-names) regexp-unmatchable) ((consp c-noise-macro-names) (concat (regexp-opt c-noise-macro-names t) "\\([^[:alnum:]_$]\\|$\\)")) diff --git a/lisp/progmodes/grep.el b/lisp/progmodes/grep.el index 85f9078d46..79178c4346 100644 --- a/lisp/progmodes/grep.el +++ b/lisp/progmodes/grep.el @@ -837,7 +837,8 @@ grep-mode grep-mode-line-matches) ;; compilation-directory-matcher can't be nil, so we set it to a regexp that ;; can never match. - (set (make-local-variable 'compilation-directory-matcher) '("\\`a\\`")) + (set (make-local-variable 'compilation-directory-matcher) + (list regexp-unmatchable)) (set (make-local-variable 'compilation-process-setup-function) 'grep-process-setup) (set (make-local-variable 'compilation-disable-input) t) diff --git a/lisp/progmodes/octave.el b/lisp/progmodes/octave.el index 52e5fd477f..8a7e24e5ad 100644 --- a/lisp/progmodes/octave.el +++ b/lisp/progmodes/octave.el @@ -1691,7 +1691,7 @@ octave-help-mode (eval-and-compile (require 'help-mode)) ;; Don't highlight `EXAMPLE' as elisp symbols by using a regexp that ;; can never match. - (setq-local help-xref-symbol-regexp "x\\`")) + (setq-local help-xref-symbol-regexp regexp-unmatchable)) (defun octave-help (fn) "Display the documentation of FN." diff --git a/lisp/subr.el b/lisp/subr.el index be21dc67a0..05fb9fea68 100644 --- a/lisp/subr.el +++ b/lisp/subr.el @@ -5544,4 +5544,8 @@ flatten-tree ;; for discoverability: (defalias 'flatten-list 'flatten-tree) +;; The initial anchoring is for better performance in searching matches. +(defconst regexp-unmatchable "\\`a\\`" + "Standard regexp guaranteed not to match any string at all.") + ;;; subr.el ends here diff --git a/lisp/textmodes/ispell.el b/lisp/textmodes/ispell.el index 6553a2799b..0c5e6a183b 100644 --- a/lisp/textmodes/ispell.el +++ b/lisp/textmodes/ispell.el @@ -4016,7 +4016,7 @@ ispell-message (defun ispell-non-empty-string (string) (if (or (not string) (string-equal string "")) - "\\'\\`" ; An unmatchable string if string is null. + regexp-unmatchable (regexp-quote string))) diff --git a/lisp/vc/vc-bzr.el b/lisp/vc/vc-bzr.el index ab5a449cd3..ee1646cae5 100644 --- a/lisp/vc/vc-bzr.el +++ b/lisp/vc/vc-bzr.el @@ -702,7 +702,7 @@ vc-bzr-log-view-mode (remove-hook 'log-view-mode-hook 'vc-bzr-log-view-mode) ;Deactivate the hack. (require 'add-log) (set (make-local-variable 'log-view-per-file-logs) nil) - (set (make-local-variable 'log-view-file-re) "\\`a\\`") + (set (make-local-variable 'log-view-file-re) regexp-unmatchable) (set (make-local-variable 'log-view-message-re) (if (eq vc-log-view-type 'short) "^ *\\([0-9.]+\\): \\(.*?\\)[ \t]+\\([0-9]\\{4\\}-[0-9]\\{2\\}-[0-9]\\{2\\}\\)\\( \\[merge\\]\\)?" @@ -1319,7 +1319,8 @@ vc-bzr-revision-completion-table ((string-match "\\`annotate:" string) (completion-table-with-context (substring string 0 (match-end 0)) - (apply-partially #'completion-table-with-terminator '(":" . "\\`a\\`") + (apply-partially #'completion-table-with-terminator + (cons ":" regexp-unmatchable) #'completion-file-name-table) (substring string (match-end 0)) pred action)) diff --git a/lisp/vc/vc-git.el b/lisp/vc/vc-git.el index 192e6cf68f..61c13026cc 100644 --- a/lisp/vc/vc-git.el +++ b/lisp/vc/vc-git.el @@ -1087,7 +1087,7 @@ log-view-expanded-log-entry-function (define-derived-mode vc-git-log-view-mode log-view-mode "Git-Log-View" (require 'add-log) ;; We need the faces add-log. ;; Don't have file markers, so use impossible regexp. - (set (make-local-variable 'log-view-file-re) "\\`a\\`") + (set (make-local-variable 'log-view-file-re) regexp-unmatchable) (set (make-local-variable 'log-view-per-file-logs) nil) (set (make-local-variable 'log-view-message-re) (if (not (eq vc-log-view-type 'long)) diff --git a/lisp/vc/vc-hg.el b/lisp/vc/vc-hg.el index d3f132dae7..876d824cea 100644 --- a/lisp/vc/vc-hg.el +++ b/lisp/vc/vc-hg.el @@ -444,7 +444,7 @@ log-view-expanded-log-entry-function (define-derived-mode vc-hg-log-view-mode log-view-mode "Hg-Log-View" (require 'add-log) ;; we need the add-log faces - (set (make-local-variable 'log-view-file-re) "\\`a\\`") + (set (make-local-variable 'log-view-file-re) regexp-unmatchable) (set (make-local-variable 'log-view-per-file-logs) nil) (set (make-local-variable 'log-view-message-re) (if (eq vc-log-view-type 'short) diff --git a/lisp/vc/vc-mtn.el b/lisp/vc/vc-mtn.el index f0b12489c1..91cc28021c 100644 --- a/lisp/vc/vc-mtn.el +++ b/lisp/vc/vc-mtn.el @@ -240,7 +240,7 @@ log-view-per-file-logs (define-derived-mode vc-mtn-log-view-mode log-view-mode "Mtn-Log-View" ;; Don't match anything. - (set (make-local-variable 'log-view-file-re) "\\`a\\`") + (set (make-local-variable 'log-view-file-re) regexp-unmatchable) (set (make-local-variable 'log-view-per-file-logs) nil) ;; TODO: Use a more precise regexp than "[ |/]+" to avoid false positives ;; in the ChangeLog text. -- 2.20.1