emacs-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

master d8abff398b: Fontify python escape sequences in literals


From: Lars Ingebrigtsen
Subject: master d8abff398b: Fontify python escape sequences in literals
Date: Sat, 6 Aug 2022 09:02:46 -0400 (EDT)

branch: master
commit d8abff398bc45a791783c5c463838ba6fa3f030e
Author: lWarne <laurencewarne@gmail.com>
Commit: Lars Ingebrigtsen <larsi@gnus.org>

    Fontify python escape sequences in literals
    
    * lisp/progmodes/python.el (python-rx): Add regular expressions
    matching escape codes in string and byte literals
    (python--string-bytes-literal-matcher): new function
    (python--not-raw-bytes-literal-start-regexp): new constant
    (python--not-raw-string-literal-start-regexp): new constant
    * test/lisp/progmodes/python-tests.el: Add tests for new
    fontification (bug#57004).
---
 lisp/progmodes/python.el            | 56 ++++++++++++++++++++++++++-
 test/lisp/progmodes/python-tests.el | 77 +++++++++++++++++++++++++++++++++++++
 2 files changed, 131 insertions(+), 2 deletions(-)

diff --git a/lisp/progmodes/python.el b/lisp/progmodes/python.el
index b8fc7d4c54..27bdbae311 100644
--- a/lisp/progmodes/python.el
+++ b/lisp/progmodes/python.el
@@ -427,7 +427,19 @@ This variant of `rx' supports common Python named REGEXPS."
                                  (: "vim:" (* space) "set" (+ space)
                                     "fileencoding" (* space) ?= (* space)
                                     (group-n 1 (+ (or word ?-)))
-                                    (* space) ":")))))
+                                    (* space) ":"))))
+            (bytes-escape-sequence
+             (seq (not "\\")
+                  (group (or "\\\\" "\\'" "\\a" "\\b" "\\f"
+                             "\\n" "\\r" "\\t" "\\v"
+                             (seq "\\" (= 3 (in "0-7")))
+                             (seq "\\x" hex hex)))))
+            (string-escape-sequence
+             (or bytes-escape-sequence
+                 (seq (not "\\")
+                      (or (group-n 1 "\\u" (= 4 hex))
+                          (group-n 1 "\\U" (= 8 hex))
+                          (group-n 1 "\\N{" (*? anychar) "}"))))))
      (rx ,@regexps)))
 
 
@@ -539,6 +551,29 @@ the {...} holes that appear within f-strings."
         (goto-char (min limit (1+ send)))
         (setq ppss (syntax-ppss))))))
 
+(defconst python--not-raw-bytes-literal-start-regexp
+  (rx (or bos (not alnum)) (or "b" "B") (or "\"" "\"\"\"" "'" "'''") eos)
+  "A regular expression matching the start of a not-raw bytes literal.")
+
+(defconst python--not-raw-string-literal-start-regexp
+  (rx (or bos (not alnum)) (? (or "u" "U" "F" "f")) (or "\"" "\"\"\"" "'" 
"'''") eos)
+  "A regular expression matching the start of a not-raw string literal.")
+
+(defun python--string-bytes-literal-matcher (regexp start-regexp)
+  "Match REGEXP within a string or bytes literal whose start matches 
START-REGEXP."
+  (lambda (limit)
+    (cl-loop for result = (re-search-forward regexp limit t)
+             for result-valid = (and
+                                 result
+                                 (let* ((pos (nth 8 (syntax-ppss)))
+                                        (before-quote
+                                         (buffer-substring-no-properties
+                                          (max (- pos 5) (point-min))
+                                          (min (+ pos 1) (point-max)))))
+                                   (string-match-p start-regexp before-quote)))
+             until (or (not result) result-valid)
+             finally return (and result-valid result))))
+
 (defvar python-font-lock-keywords-level-1
   `((,(python-rx symbol-start "def" (1+ space) (group symbol-name))
      (1 font-lock-function-name-face))
@@ -716,7 +751,24 @@ sign in chained assignment."
                   grouped-assignment-target (* space)
                   (or ")" "]") (* space)
                   assignment-operator))
-     (1 font-lock-variable-name-face)))
+     (1 font-lock-variable-name-face))
+    ;; escape sequences within bytes literals
+    ;;   "\\" "\'" "\a" "\b" "\f" "\n" "\r" "\t" "\v"
+    ;;   "\ooo" character with octal value ooo
+    ;;   "\xhh" character with hex value hh
+    (,(python--string-bytes-literal-matcher
+       (python-rx bytes-escape-sequence)
+       python--not-raw-bytes-literal-start-regexp)
+     (1 font-lock-constant-face t))
+    ;; escape sequences within string literals, the same as appear in bytes
+    ;; literals in addition to:
+    ;;   "\uxxxx" Character with 16-bit hex value xxxx
+    ;;   "\Uxxxxxxxx" Character with 32-bit hex value xxxxxxxx
+    ;;   "\N{name}" Character named name in the Unicode database
+    (,(python--string-bytes-literal-matcher
+       (python-rx string-escape-sequence)
+       python--not-raw-string-literal-start-regexp)
+     (1 'font-lock-constant-face t)))
   "Font lock keywords to use in `python-mode' for maximum decoration.
 
 This decoration level includes everything in
diff --git a/test/lisp/progmodes/python-tests.el 
b/test/lisp/progmodes/python-tests.el
index 6f2ad87f81..07f2c4f09a 100644
--- a/test/lisp/progmodes/python-tests.el
+++ b/test/lisp/progmodes/python-tests.el
@@ -380,6 +380,83 @@ def f(x: CustomInt) -> CustomInt:
      (128 . font-lock-builtin-face) (131)
      (144 . font-lock-keyword-face) (150))))
 
+(ert-deftest python-font-lock-escape-sequence-string-newline ()
+  (python-tests-assert-faces
+   "'\\n'
+\"\\n\"
+f'\\n'
+f\"\\n\"
+u'\\n'
+u\"\\n\""
+   '((1 . font-lock-doc-face)
+     (2 . font-lock-constant-face)
+     (4 . font-lock-doc-face) (5)
+     (6 . font-lock-doc-face)
+     (7 . font-lock-constant-face)
+     (9 . font-lock-doc-face) (10)
+     (12 . font-lock-string-face)
+     (13 . font-lock-constant-face)
+     (15 . font-lock-string-face) (16)
+     (18 . font-lock-string-face)
+     (19 . font-lock-constant-face)
+     (21 . font-lock-string-face) (22)
+     (24 . font-lock-string-face)
+     (25 . font-lock-constant-face)
+     (27 . font-lock-string-face) (28)
+     (30 . font-lock-string-face)
+     (31 . font-lock-constant-face)
+     (33 . font-lock-string-face))))
+
+(ert-deftest python-font-lock-escape-sequence-bytes-newline ()
+  (python-tests-assert-faces
+   "b'\\n'
+b\"\\n\""
+   '((1)
+     (2 . font-lock-doc-face)
+     (3 . font-lock-constant-face)
+     (5 . font-lock-doc-face) (6)
+     (8 . font-lock-doc-face)
+     (9 . font-lock-constant-face)
+     (11 . font-lock-doc-face))))
+
+(ert-deftest python-font-lock-escape-sequence-hex-octal ()
+  (python-tests-assert-faces
+   "b'\\x12 \\777'
+'\\x12 \\777'"
+   '((1)
+     (2 . font-lock-doc-face)
+     (3 . font-lock-constant-face)
+     (7 . font-lock-doc-face)
+     (8 . font-lock-constant-face)
+     (12 . font-lock-doc-face) (13)
+     (14 . font-lock-doc-face)
+     (15 . font-lock-constant-face)
+     (19 . font-lock-doc-face)
+     (20 . font-lock-constant-face)
+     (24 . font-lock-doc-face))))
+
+(ert-deftest python-font-lock-escape-sequence-unicode ()
+  (python-tests-assert-faces
+   "b'\\u1234 \\U00010348 \\N{Plus-Minus Sign}'
+'\\u1234 \\U00010348 \\N{Plus-Minus Sign}'"
+   '((1)
+     (2 . font-lock-doc-face) (41)
+     (42 . font-lock-doc-face)
+     (43 . font-lock-constant-face)
+     (49 . font-lock-doc-face)
+     (50 . font-lock-constant-face)
+     (60 . font-lock-doc-face)
+     (61 . font-lock-constant-face)
+     (80 . font-lock-doc-face))))
+
+(ert-deftest python-font-lock-raw-escape-sequence ()
+  (python-tests-assert-faces
+   "rb'\\x12 \123 \\n'
+r'\\x12 \123 \\n \\u1234 \\U00010348 \\N{Plus-Minus Sign}'"
+   '((1)
+     (3 . font-lock-doc-face) (14)
+     (16 . font-lock-doc-face))))
+
 
 ;;; Indentation
 



reply via email to

[Prev in Thread] Current Thread [Next in Thread]