emacs-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Emacs-diffs] master bb591f1: Enhance CC Mode's fontification, etc., of


From: Alan Mackenzie
Subject: [Emacs-diffs] master bb591f1: Enhance CC Mode's fontification, etc., of unterminated strings.
Date: Sun, 20 May 2018 09:43:20 -0400 (EDT)

branch: master
commit bb591f139f0602af292c772f974dcc14dabb1deb
Author: Alan Mackenzie <address@hidden>
Commit: Alan Mackenzie <address@hidden>

    Enhance CC Mode's fontification, etc., of unterminated strings.
    
    String delimiters, including escaped new lines, of correctly terminated
    strings are left in font-lock-string-face.  All others get
    font-lock-warning-face.  The latter get syntax-table text properties on the
    opening string delim and the "terminating EOL".
    
    Correct two miscellaneous bugs: the handling of text properties on Java 
Mode's
    generic delimiters; the handling of c-just-done-before-change.
    
    * lisp/progmodes/cc-defs.el (c-point): New position 'eoll "end of logical 
line".
    (c-characterp): New macro.
    
    * lisp/progmodes/cc-fonts.el (c-font-lock-invalid-string): Removed.
    (c-basic-matchers-before): Use a simple matcher in place of the form around
    c-font-lock-invalid-string.
    
    * lisp/progmodes/cc-langs.el (c-get-state-before-change-functions): Add
    c-before-change-check-unbalanced-strings to the value for all modes except 
AWK
    Mode.  Also add c-before-change-check-<>-operators to Java Mode, correcting 
an
    error in that mode's handling of generic delimiters.
    (c-before-font-lock-functions): Add 
c-after-change-re-mark-unbalanced-strings
    to the value for all modes except AWK Mode.
    (c-single-quotes-quote-strings, c-string-delims): New lang variables for
    future enhancements.
    (c-string-innards-re-alist): New lang variable.
    
    * lisp/progmodes/cc-mode.el (c-just-done-before-change): Do not set this
    variable when a change is the alteration of text properties.
    (c-basic-common-init): Set parse-sexp-lookup-properties (and the XEmacs
    equivalent) also for Pike Mode.
    (c-neutralize-CPP-line): No longer neutralize unbalanced quotes here.
    (c-unescaped-nls-in-string-p, c-multiline-string-start-is-being-detached)
    (c-pps-to-string-delim, c-before-change-check-unbalanced-strings)
    (c-after-change-re-mark-unbalanced-strings): New functions.
    (c-after-change): Fix a bug with the handling of c-just-done-before-change.
---
 lisp/progmodes/cc-defs.el  |  18 ++++
 lisp/progmodes/cc-fonts.el |  35 +------
 lisp/progmodes/cc-langs.el |  48 +++++++++-
 lisp/progmodes/cc-mode.el  | 225 ++++++++++++++++++++++++++++++++++++++++++---
 4 files changed, 279 insertions(+), 47 deletions(-)

diff --git a/lisp/progmodes/cc-defs.el b/lisp/progmodes/cc-defs.el
index 7ec57e0..53d6654 100644
--- a/lisp/progmodes/cc-defs.el
+++ b/lisp/progmodes/cc-defs.el
@@ -219,6 +219,7 @@ one of the following symbols:
 
 `bol'   -- beginning of line
 `eol'   -- end of line
+`eoll'  -- end of logical line (i.e. without escaped NL)
 `bod'   -- beginning of defun
 `eod'   -- end of defun
 `boi'   -- beginning of indentation
@@ -254,6 +255,16 @@ to it is returned.  This function does not modify the 
point or the mark."
               (end-of-line)
               (point))))
 
+        ((eq position 'eoll)
+         `(save-excursion
+            ,@(if point `((goto-char ,point)))
+            (while (progn
+                     (end-of-line)
+                     (prog1 (eq (logand 1 (skip-chars-backward "\\\\")) 1)))
+              (beginning-of-line 2))
+            (end-of-line)
+            (point)))
+
         ((eq position 'boi)
          `(save-excursion
             ,@(if point `((goto-char ,point)))
@@ -453,6 +464,13 @@ to it is returned.  This function does not modify the 
point or the mark."
       `(int-to-char ,integer)
     integer))
 
+(defmacro c-characterp (arg)
+  ;; Return t when ARG is a character (XEmacs) or integer (Emacs), otherwise
+  ;; return nil.
+  (if (integerp ?c)
+      `(integerp ,arg)
+    `(characterp ,arg)))
+
 (defmacro c-last-command-char ()
   ;; The last character just typed.  Note that `last-command-event' exists in
   ;; both Emacs and XEmacs, but with confusingly different meanings.
diff --git a/lisp/progmodes/cc-fonts.el b/lisp/progmodes/cc-fonts.el
index 7cac55e..9d2517f 100644
--- a/lisp/progmodes/cc-fonts.el
+++ b/lisp/progmodes/cc-fonts.el
@@ -682,33 +682,6 @@ stuff.  Used on level 1 and higher."
                   ''c-nonbreakable-space-face)))
       ))
 
-(defun c-font-lock-invalid-string ()
-  ;; Assuming the point is after the opening character of a string,
-  ;; fontify that char with `font-lock-warning-face' if the string
-  ;; decidedly isn't terminated properly.
-  ;;
-  ;; This function does hidden buffer changes.
-  (let ((start (1- (point))))
-    (save-excursion
-      (and (eq (elt (parse-partial-sexp start (c-point 'eol)) 8) start)
-          (if (if (eval-when-compile (integerp ?c))
-                  ;; Emacs
-                  (integerp c-multiline-string-start-char)
-                ;; XEmacs
-                (characterp c-multiline-string-start-char))
-              ;; There's no multiline string start char before the
-              ;; string, so newlines aren't allowed.
-              (not (eq (char-before start) c-multiline-string-start-char))
-            ;; Multiline strings are allowed anywhere if
-            ;; c-multiline-string-start-char is t.
-            (not c-multiline-string-start-char))
-          (if c-string-escaped-newlines
-              ;; There's no \ before the newline.
-              (not (eq (char-before (point)) ?\\))
-            ;; Escaped newlines aren't supported.
-            t)
-          (c-put-font-lock-face start (1+ start) 'font-lock-warning-face)))))
-
 (defun c-font-lock-invalid-single-quotes (limit)
   ;; This function will be called from font-lock for a region bounded by POINT
   ;; and LIMIT, as though it were to identify a keyword for
@@ -749,16 +722,12 @@ casts and declarations are fontified.  Used on level 2 
and higher."
   ;; `c-recognize-<>-arglists' is set.
 
   t `(;; Put a warning face on the opener of unclosed strings that
-      ;; can't span lines.  Later font
+      ;; can't span lines and on the "terminating" newlines.  Later font
       ;; lock packages have a `font-lock-syntactic-face-function' for
       ;; this, but it doesn't give the control we want since any
       ;; fontification done inside the function will be
       ;; unconditionally overridden.
-      ,(c-make-font-lock-search-function
-       ;; Match a char before the string starter to make
-       ;; `c-skip-comments-and-strings' work correctly.
-       (concat ".\\(" c-string-limit-regexp "\\)")
-       '((c-font-lock-invalid-string)))
+      ("\\s|" 0 font-lock-warning-face t nil)
 
       ;; Invalid single quotes.
       c-font-lock-invalid-single-quotes
diff --git a/lisp/progmodes/cc-langs.el b/lisp/progmodes/cc-langs.el
index 4a7c79a..53cb7f7 100644
--- a/lisp/progmodes/cc-langs.el
+++ b/lisp/progmodes/cc-langs.el
@@ -472,21 +472,24 @@ so that all identifiers are recognized as words.")
 (c-lang-defconst c-get-state-before-change-functions
   ;; For documentation see the following c-lang-defvar of the same name.
   ;; The value here may be a list of functions or a single function.
-  t nil
+  t 'c-before-change-check-unbalanced-strings
   c++ '(c-extend-region-for-CPP
        c-before-change-check-raw-strings
        c-before-change-check-<>-operators
        c-depropertize-CPP
        c-invalidate-macro-cache
        c-truncate-bs-cache
+       c-before-change-check-unbalanced-strings
        c-parse-quotes-before-change)
   (c objc) '(c-extend-region-for-CPP
             c-depropertize-CPP
             c-invalidate-macro-cache
             c-truncate-bs-cache
+            c-before-change-check-unbalanced-strings
             c-parse-quotes-before-change)
-  java 'c-parse-quotes-before-change
-       ;; 'c-before-change-check-<>-operators
+  java '(c-parse-quotes-before-change
+        c-before-change-check-unbalanced-strings
+        c-before-change-check-<>-operators)
   awk 'c-awk-record-region-clear-NL)
 (c-lang-defvar c-get-state-before-change-functions
               (let ((fs (c-lang-const c-get-state-before-change-functions)))
@@ -514,14 +517,17 @@ parameters \(point-min) and \(point-max).")
   ;; For documentation see the following c-lang-defvar of the same name.
   ;; The value here may be a list of functions or a single function.
   t '(c-depropertize-new-text
+      c-after-change-re-mark-unbalanced-strings
       c-change-expand-fl-region)
   (c objc) '(c-depropertize-new-text
             c-parse-quotes-after-change
+            c-after-change-re-mark-unbalanced-strings
             c-extend-font-lock-region-for-macros
             c-neutralize-syntax-in-CPP
             c-change-expand-fl-region)
   c++ '(c-depropertize-new-text
        c-parse-quotes-after-change
+       c-after-change-re-mark-unbalanced-strings
        c-extend-font-lock-region-for-macros
        c-after-change-re-mark-raw-strings
        c-neutralize-syntax-in-CPP
@@ -529,6 +535,7 @@ parameters \(point-min) and \(point-max).")
        c-change-expand-fl-region)
   java '(c-depropertize-new-text
         c-parse-quotes-after-change
+        c-after-change-re-mark-unbalanced-strings
         c-restore-<>-properties
         c-change-expand-fl-region)
   awk '(c-depropertize-new-text
@@ -611,6 +618,19 @@ EOL terminated statements."
   (c c++ objc) t)
 (c-lang-defvar c-has-bitfields (c-lang-const c-has-bitfields))
 
+(c-lang-defconst c-single-quotes-quote-strings
+  "Whether the language uses single quotes for multi-char strings."
+  t nil)
+(c-lang-defvar c-single-quotes-quote-strings
+              (c-lang-const c-single-quotes-quote-strings))
+
+(c-lang-defconst c-string-delims
+  "A list of characters which can delimit arbitrary length strings"
+  t (if (c-lang-const c-single-quotes-quote-strings)
+       '(?\" ?\')
+      '(?\")))
+(c-lang-defvar c-string-delims (c-lang-const c-string-delims))
+
 (c-lang-defconst c-has-quoted-numbers
   "Whether the language has numbers quoted like 4'294'967'295."
   t nil
@@ -856,6 +876,28 @@ literal are multiline."
 (c-lang-defvar c-multiline-string-start-char
   (c-lang-const c-multiline-string-start-char))
 
+(c-lang-defconst c-string-innards-re-alist
+  ;; An alist of regexps matching the innards of a string, the key being the
+  ;; string's delimiter.
+  ;;
+  ;; The regexps' matches extend up to, but not including, the closing string
+  ;; delimiter or an unescaped NL.  An EOL is part of the string only if it is
+  ;; escaped.
+  t (mapcar (lambda (delim)
+             (cons
+              delim
+              (concat "\\(\\\\\\(.\\|\n\\|\r\\)\\|[^\\\n\r"
+                      (string delim)
+                      "]\\)*")))
+           (and
+            (or (null (c-lang-const c-multiline-string-start-char))
+                (c-characterp (c-lang-const c-multiline-string-start-char)))
+            (if (c-lang-const c-single-quotes-quote-strings)
+                '(?\" ?\')
+              '(?\")))))
+(c-lang-defvar c-string-innards-re-alist
+  (c-lang-const c-string-innards-re-alist))
+
 (c-lang-defconst c-opt-cpp-symbol
   "The symbol which starts preprocessor constructs when in the margin."
   t "#"
diff --git a/lisp/progmodes/cc-mode.el b/lisp/progmodes/cc-mode.el
index f425109..2427191 100644
--- a/lisp/progmodes/cc-mode.el
+++ b/lisp/progmodes/cc-mode.el
@@ -499,9 +499,10 @@ preferably use the `c-mode-menu' language constant 
directly."
 ;; `basic-save-buffer' does (insert ?\n) when `require-final-newline' is
 ;; non-nil; (ii) to detect when Emacs fails to invoke
 ;; `before-change-functions'.  This can happen when reverting a buffer - see
-;; bug #24094.  It seems these failures happen only in GNU Emacs; XEmacs
-;; seems to maintain the strict alternation of calls to
-;; `before-change-functions' and `after-change-functions'.
+;; bug #24094.  It seems these failures happen only in GNU Emacs; XEmacs seems
+;; to maintain the strict alternation of calls to `before-change-functions'
+;; and `after-change-functions'.  Note that this variable is not set when
+;; `c-before-change' is invoked by a change to text properties.
 
 (defun c-basic-common-init (mode default-style)
   "Do the necessary initialization for the syntax handling routines
@@ -563,7 +564,7 @@ that requires a literal mode spec at compile time."
 
   (when (or c-recognize-<>-arglists
            (c-major-mode-is 'awk-mode)
-           (c-major-mode-is '(java-mode c-mode c++-mode objc-mode)))
+           (c-major-mode-is '(java-mode c-mode c++-mode objc-mode pike-mode)))
     ;; We'll use the syntax-table text property to change the syntax
     ;; of some chars for this language, so do the necessary setup for
     ;; that.
@@ -996,9 +997,9 @@ Note that the style variables are always made local to the 
buffer."
   ;; characters, ones which would interact syntactically with stuff outside
   ;; this region.
   ;;
-  ;; These are unmatched string delimiters, or unmatched
-  ;; parens/brackets/braces.  An unclosed comment is regarded as valid, NOT
-  ;; obtrusive.
+  ;; These are unmatched parens/brackets/braces.  An unclosed comment is
+  ;; regarded as valid, NOT obtrusive.  Unbalanced strings are handled
+  ;; elsewhere.
   (save-excursion
     (let (s)
       (while
@@ -1008,9 +1009,11 @@ Note that the style variables are always made local to 
the buffer."
             ((< (nth 0 s) 0)           ; found an unmated ),},]
              (c-put-char-property (1- (point)) 'syntax-table '(1))
              t)
-            ((nth 3 s)                 ; In a string
-             (c-put-char-property (nth 8 s) 'syntax-table '(1))
-             t)
+            ;; Unbalanced strings are now handled by
+            ;; `c-before-change-check-unbalanced-strings', etc.
+            ;; ((nth 3 s)                      ; In a string
+            ;;  (c-put-char-property (nth 8 s) 'syntax-table '(1))
+            ;;  t)
             ((> (nth 0 s) 0)           ; In a (,{,[
              (c-put-char-property (nth 1 s) 'syntax-table '(1))
              t)
@@ -1070,6 +1073,205 @@ Note that the style variables are always made local to 
the buffer."
            (forward-line))           ; no infinite loop with, e.g., "#//"
          )))))
 
+(defun c-unescaped-nls-in-string-p (&optional quote-pos)
+  ;; Return whether unescaped newlines can be inside strings.
+  ;;
+  ;; QUOTE-POS, if present, is the position of the opening quote of a string.
+  ;; Depending on the language, there might be a special character before it
+  ;; signifying the validity of such NLs.
+  (cond
+   ((null c-multiline-string-start-char) nil)
+   ((c-characterp c-multiline-string-start-char)
+    (and quote-pos
+        (eq (char-before quote-pos) c-multiline-string-start-char)))
+   (t t)))
+
+(defun c-multiline-string-start-is-being-detached (end)
+  ;; If (e.g.), the # character in Pike is being detached from the string
+  ;; opener it applies to, return t.  Else return nil.  END is the argument
+  ;; supplied to every before-change function.
+  (and (memq (char-after end) c-string-delims)
+       (c-characterp c-multiline-string-start-char)
+       (eq (char-before end) c-multiline-string-start-char)))
+
+(defun c-pps-to-string-delim (end)
+  ;; parse-partial-sexp forward to the next string quote, which is deemed to
+  ;; be a closing quote.  Return nil.
+  ;;
+  ;; We remove string-fence syntax-table text properties from characters we
+  ;; pass over.
+  (let* ((start (point))
+        (no-st-s `(0 nil nil ?\" nil nil 0 nil ,start nil nil))
+        (st-s `(0 nil nil t nil nil 0 nil ,start nil nil))
+        no-st-pos st-pos
+        )
+    (parse-partial-sexp start end nil nil no-st-s 'syntax-table)
+    (setq no-st-pos (point))
+    (goto-char start)
+    (while (progn
+            (parse-partial-sexp (point) end nil nil st-s 'syntax-table)
+            (c-clear-char-property (1- (point)) 'syntax-table)
+            (setq st-pos (point))
+            (and (< (point) end)
+                 (not (eq (char-before) ?\")))))
+    (goto-char (min no-st-pos st-pos))
+    nil))
+
+(defun c-before-change-check-unbalanced-strings (beg end)
+  ;; If BEG or END is inside an unbalanced string, remove the syntax-table
+  ;; text property from respectively the start or end of the string.  Also
+  ;; extend the region (c-new-BEG c-new-END) as necessary to cope with the
+  ;; change being the insertion of an odd number of quotes.
+  ;;
+  ;; POINT is undefined both at entry to and exit from this function, the
+  ;; buffer will have been widened, and match data will have been saved.
+  ;;
+  ;; This function is called exclusively as a before-change function via
+  ;; `c-get-state-before-change-functions'.
+  (c-save-buffer-state
+      ((end-limits
+       (progn
+         (goto-char (if (c-multiline-string-start-is-being-detached end)
+                        (1+ end)
+                      end))
+         (c-literal-limits)))
+       (end-literal-type (and end-limits
+                             (c-literal-type end-limits)))
+       (beg-limits
+       (progn
+         (goto-char beg)
+         (c-literal-limits)))
+       (beg-literal-type (and beg-limits
+                             (c-literal-type beg-limits))))
+
+    (when (eq end-literal-type 'string)
+      (setq c-new-END (max c-new-END (cdr end-limits))))
+    ;; It is possible the buffer change will include inserting a string quote.
+    ;; This could have the effect of flipping the meaning of any following
+    ;; quotes up until the next unescaped EOL.  Also guard against the change
+    ;; being the insertion of \ before an EOL, escaping it.
+    (cond
+     ((c-characterp c-multiline-string-start-char)
+      ;; The text about to be inserted might contain a multiline string
+      ;; opener.  Set c-new-END after anything which might be affected.
+      ;; Go to the end of the putative multiline string.
+      (goto-char end)
+      (c-pps-to-string-delim (point-max))
+      (when (< (point) (point-max))
+       (while
+           (and
+            (progn
+              (while
+                  (and
+                   (c-syntactic-re-search-forward
+                    "\"\\|\\s|" (point-max) t t)
+                   (progn
+                     (c-clear-char-property (1- (point)) 'syntax-table)
+                     (not (eq (char-before) ?\")))))
+              (eq (char-before) ?\"))
+            (if (eq (char-before (1- (point)))
+                    c-multiline-string-start-char)
+                (progn
+                  (c-pps-to-string-delim (point-max))
+                  (< (point) (point-max)))
+              (c-pps-to-string-delim (c-point 'eoll))
+              (< (point) (c-point 'eoll))))))
+      (setq c-new-END (max (point) c-new-END)))
+
+     ((< c-new-END (point-max))
+      (goto-char (1+ c-new-END))       ; might be a newline.
+      ;; In the following regexp, the initial \n caters for a newline getting
+      ;; joined to a preceding \ by the removal of what comes between.
+      (re-search-forward "\n?\\(\\\\\\(.\\|\n\\|\r\\)\\|[^\\\n\r]\\)*" nil t)
+      ;; We're at an EOLL or point-max.
+      (setq c-new-END (min (1+ (point)) (point-max)))
+      ;; FIXME!!!  Write a clever comment here.
+      (goto-char c-new-END)
+      (when (equal (c-get-char-property (1- (point)) 'syntax-table) '(15))
+       (backward-sexp)
+       (c-clear-char-property (1- c-new-END) 'syntax-table)
+       (c-clear-char-property (point) 'syntax-table)))
+
+     (t (if (memq (char-before c-new-END) c-string-delims)
+           (c-clear-char-property (1- c-new-END) 'syntax-table))))
+
+    (when (eq end-literal-type 'string)
+      (c-clear-char-property (1- (cdr end-limits)) 'syntax-table))
+
+    (when (eq beg-literal-type 'string)
+      (setq c-new-BEG (min c-new-BEG (car beg-limits)))
+      (c-clear-char-property (car beg-limits) 'syntax-table))))
+
+(defun c-after-change-re-mark-unbalanced-strings (beg _end _old-len)
+  ;; Mark any unbalanced strings in the region (c-new-BEG c-new-END) with
+  ;; string fence syntax-table text properties.
+  ;;
+  ;; POINT is undefined both at entry to and exit from this function, the
+  ;; buffer will have been widened, and match data will have been saved.
+  ;;
+  ;; This function is called exclusively as an after-change function via
+  ;; `c-before-font-lock-functions'.
+  (c-save-buffer-state
+      ((cll (progn (goto-char c-new-BEG)
+                  (c-literal-limits)))
+       (beg-literal-type (and cll (c-literal-type cll)))
+       (beg-limits
+       (cond
+        ((and (eq beg-literal-type 'string)
+              (c-unescaped-nls-in-string-p (car cll)))
+         (cons
+          (car cll)
+          (progn
+            (goto-char (1+ (car cll)))
+            (search-forward-regexp
+             (cdr (assq (char-after (car cll)) c-string-innards-re-alist))
+             nil t)
+            (min (1+ (point)) (point-max)))))
+        ((and (null beg-literal-type)
+              (goto-char beg)
+              (eq (char-before) c-multiline-string-start-char)
+              (memq (char-after) c-string-delims))
+         (cons (point)
+               (progn
+                 (forward-char)
+                 (search-forward-regexp
+                  (cdr (assq (char-before) c-string-innards-re-alist)) nil t)
+                 (1+ (point)))))
+        (cll)))
+       s)
+    (goto-char
+     (cond ((null beg-literal-type)
+           c-new-BEG)
+          ((eq beg-literal-type 'string)
+           (car beg-limits))
+          (t                           ; comment
+           (cdr beg-limits))))
+    (while
+       (and
+        (< (point) c-new-END)
+        (progn
+          ;; Skip over any comments before the next string.
+          (while (progn
+                   (setq s (parse-partial-sexp (point) c-new-END nil
+                                               nil s 'syntax-table))
+                   (and (not (nth 3 s))
+                        (< (point) c-new-END)
+                        (not (memq (char-before) c-string-delims)))))
+          ;; We're at the start of a string.
+          (memq (char-before) c-string-delims)))
+      (if (c-unescaped-nls-in-string-p (1- (point)))
+         (looking-at "[^\"]*")
+       (looking-at (cdr (assq (char-before) c-string-innards-re-alist))))
+      (cond
+       ((memq (char-after (match-end 0)) '(?\n ?\r))
+       (c-put-char-property (1- (point)) 'syntax-table '(15))
+       (c-put-char-property (match-end 0) 'syntax-table '(15)))
+       ((or (eq (match-end 0) (point-max))
+           (eq (char-after (match-end 0)) ?\\)) ; \ at EOB
+       (c-put-char-property (1- (point)) 'syntax-table '(15))))
+      (goto-char (min (1+ (match-end 0)) (point-max)))
+      (setq s nil))))
+
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; Parsing of quotes.
 ;;
@@ -1418,7 +1620,8 @@ Note that this is a strict tail, so won't match, e.g. 
\"0x....\".")
   ;; without an intervening call to `before-change-functions' when reverting
   ;; the buffer (see bug #24094).  Whatever the cause, assume that the entire
   ;; buffer has changed.
-  (when (not c-just-done-before-change)
+  (when (and (not c-just-done-before-change)
+            (not (c-called-from-text-property-change-p)))
     (save-restriction
       (widen)
       (c-before-change (point-min) (point-max))



reply via email to

[Prev in Thread] Current Thread [Next in Thread]