guile-commits
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Guile-commits] 02/03: PEG: string-peg: Better support for escaping.


From: Ludovic Courtès
Subject: [Guile-commits] 02/03: PEG: string-peg: Better support for escaping.
Date: Sun, 22 Dec 2024 16:19:00 -0500 (EST)

civodul pushed a commit to branch main
in repository guile.

commit 38ad264979b06535200ed59dcc80b30cb02777fc
Author: Ekaitz Zarraga <ekaitz@elenq.tech>
AuthorDate: Sun Dec 22 21:01:07 2024 +0100

    PEG: string-peg: Better support for escaping.
    
    * module/ice-9/peg/string-peg.scm (peg-as-peg): Augment with rules for
    hexadecimal digits, “\uXXX” for characters, “\t” for tabs, etc.
    
    Signed-off-by: Ludovic Courtès <ludo@gnu.org>
---
 module/ice-9/peg/string-peg.scm | 26 +++++++++++++++++++++++---
 1 file changed, 23 insertions(+), 3 deletions(-)

diff --git a/module/ice-9/peg/string-peg.scm b/module/ice-9/peg/string-peg.scm
index 745d8e8e7..9891f2ae5 100644
--- a/module/ice-9/peg/string-peg.scm
+++ b/module/ice-9/peg/string-peg.scm
@@ -67,9 +67,10 @@ Literal <-- SQUOTE (!SQUOTE Char)* SQUOTE Spacing
 NotInClass <-- OPENBRACKET NOTIN  (!CLOSEBRACKET Range)* CLOSEBRACKET Spacing
 Class <-- OPENBRACKET !NOTIN  (!CLOSEBRACKET Range)* CLOSEBRACKET Spacing
 Range <-- Char DASH Char / Char
-Char <-- '\\\\' [nrt'\"\\[\\]\\\\]
+Char <-- '\\\\' [nrtf'\"\\[\\]\\\\]
        / '\\\\' [0-7][0-7][0-7]
        / '\\\\' [0-7][0-7]?
+       / '\\\\' 'u' HEX HEX HEX HEX
        / !'\\\\' .
 
 # NOTE: `<--` and `<` are extensions
@@ -79,6 +80,7 @@ DQUOTE < [\"]
 DASH < '-'
 OPENBRACKET < '['
 CLOSEBRACKET < ']'
+HEX <- [0-9a-fA-F]
 NOTIN < '^'
 SLASH < '/' Spacing
 AND <-- '&' Spacing
@@ -92,7 +94,7 @@ DOT <-- '.' Spacing
 
 Spacing < (Space / Comment)*
 Comment < '#' (!EndOfLine .)* EndOfLine
-Space < ' ' / '\t' / EndOfLine
+Space < ' ' / '\\t' / EndOfLine
 EndOfLine < '\\r\\n' / '\\n' / '\\r'
 EndOfFile < !.
 ")
@@ -144,12 +146,15 @@ EndOfFile < !.
 (define-sexp-parser Range all
   (or (and Char DASH Char) Char))
 (define-sexp-parser Char all
-  (or (and "\\" (or "n" "r" "t" "'" "\"" "[" "]" "\\"))
+  (or (and "\\" (or "n" "r" "t" "f" "'" "\"" "[" "]" "\\"))
       (and "\\" (range #\0 #\7) (range #\0 #\7) (range #\0 #\7))
       (and "\\" (range #\0 #\7) (? (range #\0 #\7)))
+      (and "\\" "u" HEX HEX HEX HEX)
       (and (not-followed-by "\\") peg-any)))
 (define-sexp-parser LEFTARROW body
   (and (or "<--" "<-" "<") Spacing)) ; NOTE: <-- and < are extensions
+(define-sexp-parser HEX body
+  (or (range #\0 #\9) (range #\a #\f) (range #\A #\F)))
 (define-sexp-parser NOTIN none
   (and "^"))
 (define-sexp-parser SLASH none
@@ -372,12 +377,27 @@ EndOfFile < !.
                      (* (- (char->integer x) (char->integer #\0)) y))
                    (reverse (string->list charstr 1))
                    '(1 8 64)))))
+      ((char=? #\u (string-ref charstr 1))
+       (integer->char
+         (reduce + 0
+                 (map
+                   (lambda (x y)
+                     (* (cond
+                          ((char-numeric? x)
+                           (- (char->integer x) (char->integer #\0)))
+                          ((char-alphabetic? x)
+                           (+ 10 (- (char->integer x) (char->integer #\a)))))
+                        y))
+                   (reverse (string->list (string-downcase charstr) 2))
+                   '(1 16 256 4096)))))
       (else
         (case (string-ref charstr 1)
           ((#\n) #\newline)
           ((#\r) #\return)
           ((#\t) #\tab)
+          ((#\f) #\page)
           ((#\') #\')
+          ((#\") #\")
           ((#\]) #\])
           ((#\\) #\\)
           ((#\[) #\[))))))



reply via email to

[Prev in Thread] Current Thread [Next in Thread]