[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: bad syntax highlighting in shell-script mode
From: |
Stefan Monnier |
Subject: |
Re: bad syntax highlighting in shell-script mode |
Date: |
Tue, 18 Jul 2006 16:44:13 -0400 |
User-agent: |
Gnus/5.11 (Gnus v5.11) Emacs/22.0.50 (gnu/linux) |
>>> Since $(...$(...)...) can be nested, does it mean that there's no way of
>>> highlighting it correctly using regular expressions?
>>
>> It's not highlighted with regular expressions (only the initial $( is
>> matched with a regexp, the rest is handled with elisp code doing "manual
>> parsing").
> I'll see about extending the "manual parser" to cope with multiline-ness
> if you think it's worth it.
For what it's worth, I think I've fixed the other problem (the case of
parens inside quotes) with the patch below.
Reliably handling multiline elements is difficult.
Stefan
Index: lisp/progmodes/sh-script.el
===================================================================
RCS file: /sources/emacs/emacs/lisp/progmodes/sh-script.el,v
retrieving revision 1.182
diff -u -r1.182 sh-script.el
--- lisp/progmodes/sh-script.el 17 Jul 2006 21:07:23 -0000 1.182
+++ lisp/progmodes/sh-script.el 18 Jul 2006 20:40:12 -0000
@@ -980,54 +980,53 @@
(re-search-forward sh-here-doc-re limit t))
(defun sh-quoted-subshell (limit)
- "Search for a subshell embedded in a string. Find all the unescaped
-\" characters within said subshell, remembering that subshells can nest."
+ "Search for a subshell embedded in a string.
+Find all the unescaped
+\" characters within said subshell, remembering that subshells can nest.
+Argument LIMIT asdf."
;; FIXME: This can (and often does) match multiple lines, yet it makes no
;; effort to handle multiline cases correctly, so it ends up being
;; rather flakey.
- (if (re-search-forward
"\"\\(?:\\(?:.\\|\n\\)*?[^\\]\\(\\\\\\\\\\)*\\)?\\(\\$(\\|`\\)" limit t)
- ;; bingo we have a $( or a ` inside a ""
- (let ((char (char-after (point)))
- (continue t)
- (pos (point))
- (data nil) ;; value to put into match-data (and return)
- (last nil) ;; last char seen
- (bq (equal (match-string 1) "`")) ;; ` state flip-flop
- (seen nil) ;; list of important positions
- (nest 1)) ;; subshell nesting level
- (while (and continue char (<= pos limit))
- ;; unescaped " inside a $( ... ) construct.
- ;; state machine time...
- ;; \ => ignore next char;
- ;; ` => increase or decrease nesting level based on bq flag
- ;; ) [where nesting > 0] => decrease nesting
- ;; ( [where nesting > 0] => increase nesting
- ;; ( [preceeded by $ ] => increase nesting
- ;; " [nesting <= 0 ] => terminate, we're done.
- ;; " [nesting > 0 ] => remember this, it's not a proper "
- ;; FIXME: don't count parens that appear within quotes.
- (cond
- ((eq ?\\ last) nil)
- ((eq ?\` char) (setq nest (+ nest (if bq -1 1)) bq (not bq)))
- ((and (> nest 0) (eq ?\) char)) (setq nest (1- nest)))
- ((and (eq ?$ last) (eq ?\( char)) (setq nest (1+ nest)))
- ((and (> nest 0) (eq ?\( char)) (setq nest (1+ nest)))
- ((eq char ?\")
- (if (>= 0 nest) (setq continue nil) (push pos seen))))
- ;;(message "POS: %d [%d]" pos nest)
- (setq last char
- pos (1+ pos)
- char (char-after pos)) )
- ;; FIXME: why construct a costly match data to pass to
- ;; sh-apply-quoted-subshell rather than apply the highlight
- ;; directly here? -- Stef
- (when seen
- ;;(message "SEEN: %S" seen)
- (setq data (list (current-buffer)))
- (dolist(P seen)
- (setq data (cons P (cons (1+ P) data))))
- (store-match-data data))
- data) ))
+ (when (re-search-forward
"\"\\(?:\\(?:.\\|\n\\)*?[^\\]\\(\\\\\\\\\\)*\\)??\\(\\$(\\|`\\)" limit t)
+ ;; bingo we have a $( or a ` inside a ""
+ (let ((char (char-after (point)))
+ ;; `state' can be: double-quote, backquote, single-quote, code.
+ (state (if (eq (char-before) ?`) 'backquote 'code))
+ ;; Stacked states in the context.
+ (states nil))
+ (while (and state (< (point) limit))
+ ;; unescape " inside a $( ... ) construct.
+ (case (char-after)
+ (?\\ (if (not (eq state 'single-quote)) (forward-char 1)))
+ (?\" (case state
+ (double-quote (setq state (pop states)))
+ (single-quote nil)
+ (t (push state states) (setq state 'double-quote)))
+ (if state (put-text-property (point) (1+ (point))
+ 'syntax-table '(1))))
+ (?\' (case state
+ (single-quote (setq state (pop states)))
+ (double-quote nil)
+ (t (push state states) (setq state 'single-quote))))
+ (?\` (case state
+ (backquote (setq state (pop states)))
+ (single-quote nil)
+ (t (push state states) (setq state 'backquote))))
+ (?\$ (if (not (eq (char-after (1+ (point))) ?\())
+ nil
+ (case state
+ (single-quote nil)
+ (t (push state states) (setq state 'code)))))
+ (?\( (case state
+ ((double-quote single-quote) nil)
+ (t (push state states) (setq state 'code))))
+ (?\) (case state
+ ((double-quote single-quote) nil)
+ (t (setq state (pop states))))))
+
+ (forward-char 1)))
+ t))
+
(defun sh-is-quoted-p (pos)
(and (eq (char-before pos) ?\\)
@@ -1059,17 +1058,6 @@
(when (save-excursion (backward-char 2) (looking-at ";;\\|in"))
sh-st-punc)))
-(defun sh-apply-quoted-subshell ()
- "Apply the `sh-st-punc' syntax to all the matches in `match-data'.
-This is used to flag quote characters in subshell constructs inside strings
-\(which should therefore not be treated as normal quote characters\)"
- (let ((m (match-data)) a b)
- (while m
- (setq a (car m)
- b (cadr m)
- m (cddr m))
- (put-text-property a b 'syntax-table sh-st-punc))) sh-st-punc)
-
(defconst sh-font-lock-syntactic-keywords
;; A `#' begins a comment when it is unquoted and at the beginning of a
;; word. In the shell, words are separated by metacharacters.