emacs-pretest-bug
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: bad syntax highlighting in shell-script mode


From: Stefan Monnier
Subject: Re: bad syntax highlighting in shell-script mode
Date: Tue, 18 Jul 2006 16:44:13 -0400
User-agent: Gnus/5.11 (Gnus v5.11) Emacs/22.0.50 (gnu/linux)

>>> Since $(...$(...)...) can be nested, does it mean that there's no way of
>>> highlighting it correctly using regular expressions?
>> 
>> It's not highlighted with regular expressions (only the initial $( is
>> matched with a regexp, the rest is handled with elisp code doing "manual
>> parsing").

> I'll see about extending the "manual parser" to cope with multiline-ness
> if you think it's worth it.

For what it's worth, I think I've fixed the other problem (the case of
parens inside quotes) with the patch below.

Reliably handling multiline elements is difficult.


        Stefan


Index: lisp/progmodes/sh-script.el
===================================================================
RCS file: /sources/emacs/emacs/lisp/progmodes/sh-script.el,v
retrieving revision 1.182
diff -u -r1.182 sh-script.el
--- lisp/progmodes/sh-script.el 17 Jul 2006 21:07:23 -0000      1.182
+++ lisp/progmodes/sh-script.el 18 Jul 2006 20:40:12 -0000
@@ -980,54 +980,53 @@
   (re-search-forward sh-here-doc-re limit t))
 
 (defun sh-quoted-subshell (limit)
-  "Search for a subshell embedded in a string. Find all the unescaped
-\" characters within said subshell, remembering that subshells can nest."
+  "Search for a subshell embedded in a string.
+Find all the unescaped
+\" characters within said subshell, remembering that subshells can nest.
+Argument LIMIT asdf."
   ;; FIXME: This can (and often does) match multiple lines, yet it makes no
   ;; effort to handle multiline cases correctly, so it ends up being
   ;; rather flakey.
-  (if (re-search-forward 
"\"\\(?:\\(?:.\\|\n\\)*?[^\\]\\(\\\\\\\\\\)*\\)?\\(\\$(\\|`\\)" limit t)
-      ;; bingo we have a $( or a ` inside a ""
-      (let ((char (char-after (point)))
-            (continue t)
-            (pos (point))
-            (data nil)    ;; value to put into match-data (and return)
-            (last nil)    ;; last char seen
-            (bq  (equal (match-string 1) "`")) ;; ` state flip-flop
-            (seen nil)    ;; list of important positions
-            (nest 1))     ;; subshell nesting level
-        (while (and continue char (<= pos limit))
-          ;; unescaped " inside a $( ... ) construct.
-          ;; state machine time...
-          ;; \ => ignore next char;
-          ;; ` => increase or decrease nesting level based on bq flag
-          ;; ) [where nesting > 0] => decrease nesting
-          ;; ( [where nesting > 0] => increase nesting
-          ;; ( [preceeded by $ ]   => increase nesting
-          ;; " [nesting <= 0 ]     => terminate, we're done.
-          ;; " [nesting >  0 ]     => remember this, it's not a proper "
-          ;; FIXME: don't count parens that appear within quotes.
-          (cond
-           ((eq ?\\ last) nil)
-           ((eq ?\` char) (setq nest (+ nest (if bq -1 1)) bq (not bq)))
-           ((and (> nest 0) (eq ?\) char))   (setq nest (1- nest)))
-           ((and (eq ?$ last) (eq ?\( char)) (setq nest (1+ nest)))
-           ((and (> nest 0) (eq ?\( char))   (setq nest (1+ nest)))
-           ((eq char ?\")
-            (if (>= 0 nest) (setq continue nil) (push pos seen))))
-          ;;(message "POS: %d [%d]" pos nest)
-          (setq last char
-                pos  (1+ pos)
-                char (char-after pos)) )
-        ;; FIXME: why construct a costly match data to pass to
-        ;; sh-apply-quoted-subshell rather than apply the highlight
-        ;; directly here?  -- Stef
-        (when seen
-          ;;(message "SEEN: %S" seen)
-          (setq data (list (current-buffer)))
-          (dolist(P seen)
-            (setq data (cons P (cons (1+ P) data))))
-          (store-match-data data))
-        data) ))
+  (when (re-search-forward 
"\"\\(?:\\(?:.\\|\n\\)*?[^\\]\\(\\\\\\\\\\)*\\)??\\(\\$(\\|`\\)" limit t)
+    ;; bingo we have a $( or a ` inside a ""
+    (let ((char (char-after (point)))
+          ;; `state' can be: double-quote, backquote, single-quote, code.
+          (state (if (eq (char-before) ?`) 'backquote 'code))
+          ;; Stacked states in the context.
+          (states nil))
+      (while (and state (< (point) limit))
+        ;; unescape " inside a $( ... ) construct.
+        (case (char-after)
+          (?\\ (if (not (eq state 'single-quote)) (forward-char 1)))
+          (?\" (case state
+                 (double-quote (setq state (pop states)))
+                 (single-quote nil)
+                 (t (push state states) (setq state 'double-quote)))
+               (if state (put-text-property (point) (1+ (point))
+                                            'syntax-table '(1))))
+          (?\' (case state
+                 (single-quote (setq state (pop states)))
+                 (double-quote nil)
+                 (t (push state states) (setq state 'single-quote))))
+          (?\` (case state
+                 (backquote (setq state (pop states)))
+                 (single-quote nil)
+                 (t (push state states) (setq state 'backquote))))
+          (?\$ (if (not (eq (char-after (1+ (point))) ?\())
+                   nil
+                 (case state
+                   (single-quote nil)
+                   (t (push state states) (setq state 'code)))))
+          (?\( (case state
+                 ((double-quote single-quote) nil)
+                 (t (push state states) (setq state 'code))))
+          (?\) (case state
+                 ((double-quote single-quote) nil)
+                 (t (setq state (pop states))))))
+           
+        (forward-char 1)))
+    t))
+            
 
 (defun sh-is-quoted-p (pos)
   (and (eq (char-before pos) ?\\)
@@ -1059,17 +1058,6 @@
     (when (save-excursion (backward-char 2) (looking-at ";;\\|in"))
       sh-st-punc)))
 
-(defun sh-apply-quoted-subshell ()
-  "Apply the `sh-st-punc' syntax to all the matches in `match-data'.
-This is used to flag quote characters in subshell constructs inside strings
-\(which should therefore not be treated as normal quote characters\)"
-  (let ((m (match-data)) a b)
-    (while m
-      (setq a (car  m)
-            b (cadr m)
-            m (cddr m))
-      (put-text-property a b 'syntax-table sh-st-punc))) sh-st-punc)
-
 (defconst sh-font-lock-syntactic-keywords
   ;; A `#' begins a comment when it is unquoted and at the beginning of a
   ;; word.  In the shell, words are separated by metacharacters.




reply via email to

[Prev in Thread] Current Thread [Next in Thread]