emacs-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[PATCH] Interpret #r"..." as a raw string


From: Naoya Yamashita
Subject: [PATCH] Interpret #r"..." as a raw string
Date: Sat, 27 Feb 2021 03:18:57 +0900 (JST)

Hi, all.

I write a patch to allow Emacs reader interpret raw string.

As you know, we already has some special marker using `#` to make
Emacs reader work in a special way.  For example, we have `#[` to
indicate byte-compiled object and `#s(` to indicate hash-table.

I introduce raw string using this architecture, if users put `#r`
before string, Emacs reader interpret it as a raw string.

Many programming language has a Raw string feature[^1], so I want to
use raw string in Emacs-lisp.

To see more concrete example, please see the attached patch testcases.


^1: 
https://en.wikipedia.org/wiki/Comparison_of_programming_languages_(strings)#Quoted_raw


Regards,
Naoya
>From 649c6f9c8aa994b992f3353d2ad373461ed24d15 Mon Sep 17 00:00:00 2001
From: Naoya Yamashita <conao3@gmail.com>
Date: Sat, 27 Feb 2021 02:55:19 +0900
Subject: [PATCH] Interpret #r"..." as a raw string

* src/lread.c (read1): Add new reader symbol, #r", indicates raw string
* test/src/lread-tests.el (lread-raw-string-1, lread-raw-string-2,
lread-raw-string-usage-1, lread-raw-string-usage-2): Add testcases
---
 src/lread.c             | 67 +++++++++++++++++++++++++++++++++++++++++
 test/src/lread-tests.el | 36 ++++++++++++++++++++++
 2 files changed, 103 insertions(+)

diff --git a/src/lread.c b/src/lread.c
index dea1b232ff..d2d7eee407 100644
--- a/src/lread.c
+++ b/src/lread.c
@@ -2835,6 +2835,73 @@ read1 (Lisp_Object readcharfun, int *pch, bool 
first_in_list)
 
     case '#':
       c = READCHAR;
+      if (c == 'r')
+       {
+         c = READCHAR;
+         if (c == '"')
+           {
+             ptrdiff_t count = SPECPDL_INDEX ();
+             char *read_buffer = stackbuf;
+             ptrdiff_t read_buffer_size = sizeof stackbuf;
+             char *heapbuf = NULL;
+             char *p = read_buffer;
+             char *end = read_buffer + read_buffer_size;
+             int ch;
+             /* True if we saw an escape sequence specifying
+                a multibyte character.  */
+             bool force_multibyte = false;
+             /* True if we saw an escape sequence specifying
+                a single-byte character.  */
+             bool force_singlebyte = false;
+             bool cancel = false;
+             ptrdiff_t nchars = 0;
+
+             while ((ch = READCHAR) >= 0
+                    && ch != '\"')
+               {
+                 if (end - p < MAX_MULTIBYTE_LENGTH)
+                   {
+                     ptrdiff_t offset = p - read_buffer;
+                     read_buffer = grow_read_buffer (read_buffer, offset,
+                                                     &heapbuf, 
&read_buffer_size,
+                                                     count);
+                     p = read_buffer + offset;
+                     end = read_buffer + read_buffer_size;
+                   }
+
+                 p += CHAR_STRING (ch, (unsigned char *) p);
+                 if (CHAR_BYTE8_P (ch))
+                   force_singlebyte = true;
+                 else if (! ASCII_CHAR_P (ch))
+                   force_multibyte = true;
+                 nchars++;
+               }
+
+             if (ch < 0)
+               end_of_file_error ();
+
+             /* If purifying, and string starts with \ newline,
+                return zero instead.  This is for doc strings
+                that we are really going to find in etc/DOC.nn.nn.  */
+             if (!NILP (Vpurify_flag) && NILP (Vdoc_file_name) && cancel)
+               return unbind_to (count, make_fixnum (0));
+
+             if (! force_multibyte && force_singlebyte)
+               {
+                 /* READ_BUFFER contains raw 8-bit bytes and no multibyte
+                    forms.  Convert it to unibyte.  */
+                 nchars = str_as_unibyte ((unsigned char *) read_buffer,
+                                          p - read_buffer);
+                 p = read_buffer + nchars;
+               }
+
+             Lisp_Object result
+               = make_specified_string (read_buffer, nchars, p - read_buffer,
+                                        (force_multibyte
+                                         || (p - read_buffer != nchars)));
+             return unbind_to (count, result);
+           }
+       }
       if (c == 's')
        {
          c = READCHAR;
diff --git a/test/src/lread-tests.el b/test/src/lread-tests.el
index f2a60bcf32..4357c27ee0 100644
--- a/test/src/lread-tests.el
+++ b/test/src/lread-tests.el
@@ -28,6 +28,42 @@
 (require 'ert)
 (require 'ert-x)
 
+(ert-deftest lread-raw-string-1 ()
+  (should (string-equal
+           (read "#r\"\\(?:def\\(?:macro\\|un\\)\\)\"")
+           "\\(?:def\\(?:macro\\|un\\)\\)")))
+
+(ert-deftest lread-raw-string-2 ()
+  (should (string-equal
+           (read "#r\"\\n\"")
+           "\\n")))
+
+(ert-deftest lread-raw-string-usage-1 ()
+  (should (equal
+           (let ((str "(defmacro leaf () nil)"))
+             (string-match "(\\(def\\(?:macro\\|un\\)\\) \\([^ ]+\\)" str)
+             (list (match-string 1 str) (match-string 2 str)))
+           '("defmacro" "leaf")))
+
+  (should (equal
+           (let ((str "(defmacro leaf () nil)"))
+             (string-match #r"(\(def\(?:macro\|un\)\) \([^ ]+\)" str)
+             (list (match-string 1 str) (match-string 2 str)))
+           '("defmacro" "leaf"))))
+
+(ert-deftest lread-raw-string-usage-2 ()
+  (should (equal
+           (let ((str "(def\\macro leaf () nil)"))
+             (string-match "(\\(def\\\\macro\\) \\([^ ]+\\)" str)
+             (list (match-string 1 str) (match-string 2 str)))
+           '("def\\macro" "leaf")))
+
+  (should (equal
+           (let ((str "(def\\macro leaf () nil)"))
+             (string-match #r"(\(def\macro\) \([^ ]+\)" str)
+             (list (match-string 1 str) (match-string 2 str)))
+           '("def\\macro" "leaf"))))
+
 (ert-deftest lread-char-number ()
   (should (equal (read "?\\N{U+A817}") #xA817)))
 
-- 
2.30.1


reply via email to

[Prev in Thread] Current Thread [Next in Thread]