>From 2ffdab0e86161396e3d2606949d1fcf93c58b592 Mon Sep 17 00:00:00 2001 From: Noam Postavsky Date: Sun, 26 May 2019 11:07:14 -0400 Subject: [PATCH 1/2] Fix some SGML syntax edge cases (Bug#33887) * lisp/textmodes/sgml-mode.el (sgml-syntax-propertize-rules): Handle single and double quotes symmetrically. Don't skip quoted comment enders. * test/lisp/textmodes/sgml-mode-tests.el (sgml-tests--quotes-syntax): Add more test cases. (sgml-mode-quote-in-long-text): New test. --- lisp/textmodes/sgml-mode.el | 5 +++- test/lisp/textmodes/sgml-mode-tests.el | 45 ++++++++++++++++++++++++++++------ 2 files changed, 42 insertions(+), 8 deletions(-) diff --git a/lisp/textmodes/sgml-mode.el b/lisp/textmodes/sgml-mode.el index 75f20722b0..1df7e78afc 100644 --- a/lisp/textmodes/sgml-mode.el +++ b/lisp/textmodes/sgml-mode.el @@ -363,9 +363,12 @@ (eval-and-compile ;; the resulting number of calls to syntax-ppss made it too slow ;; (bug#33887), so we're now careful to leave alone any pair ;; of quotes that doesn't hold a < or > char, which is the vast majority. - ("\\(?:\\(?1:\"\\)[^\"<>]*\\|\\(?1:'\\)[^'\"<>]*\\)" + ("\\([\"']\\)[^\"'<>]*" (1 (if (eq (char-after) (char-after (match-beginning 0))) (forward-char 1) + ;; Avoid skipping comment ender. + (when (eq (char-after) ?>) + (skip-chars-backward "-")) ;; Be careful to call `syntax-ppss' on a position before the one ;; we're going to change, so as not to need to flush the data we ;; just computed. diff --git a/test/lisp/textmodes/sgml-mode-tests.el b/test/lisp/textmodes/sgml-mode-tests.el index 1b8965e344..34d26480a4 100644 --- a/test/lisp/textmodes/sgml-mode-tests.el +++ b/test/lisp/textmodes/sgml-mode-tests.el @@ -161,15 +161,46 @@ (ert-deftest sgml-quote-works () (should (string= "&&" (buffer-string)))))) (ert-deftest sgml-tests--quotes-syntax () + (dolist (str '("a\"b c'd" + "a'b c\"d" + "\"a'" + "'a\"" + "\"a'\"" + "'a\"'" + "a\"b c'd" + "c>'d" + "" + "" + )) + (with-temp-buffer + (sgml-mode) + (insert str) + (ert-info ((format "%S" str) :prefix "Test case: ") + ;; Check that last tag is parsed as a tag. + (should (= 1 (car (syntax-ppss (1- (point-max)))))) + (should (= 0 (car (syntax-ppss (point-max))))))))) + +(ert-deftest sgml-mode-quote-in-long-text () (with-temp-buffer (sgml-mode) - (insert "a\"b c'd") - (should (= 1 (car (syntax-ppss (1- (point-max)))))) - (should (= 0 (car (syntax-ppss (point-max))))) - (erase-buffer) - (insert "c>d") - (should (= 1 (car (syntax-ppss (1- (point-max)))))) - (should (= 0 (car (syntax-ppss (point-max))))))) + (insert "" + ;; `syntax-propertize-wholelines' extends chunk size based + ;; on line length, so newlines are significant! + (make-string syntax-propertize-chunk-size ?a) "\n" + "'" + (make-string syntax-propertize-chunk-size ?a) "\n" + "") + ;; If we just check (syntax-ppss (point-max)) immediately, then + ;; we'll end up propertizing the whole buffer in one chunk (so the + ;; test is useless). Simulate something more like what happens + ;; when the buffer is viewed normally. + (cl-loop for pos from (point-min) to (point-max) + by syntax-propertize-chunk-size + do (syntax-ppss pos)) + (syntax-ppss (point-max)) + ;; Check that last tag is parsed as a tag. + (should (= 1 (- (car (syntax-ppss (1- (point-max)))) + (car (syntax-ppss (point-max)))))))) (provide 'sgml-mode-tests) ;;; sgml-mode-tests.el ends here -- 2.11.0