[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[nongnu] elpa/parseclj 91c2ad82af 147/185: Add enough feature to be able
From: |
ELPA Syncer |
Subject: |
[nongnu] elpa/parseclj 91c2ad82af 147/185: Add enough feature to be able to parse clojure.core |
Date: |
Tue, 28 Dec 2021 14:05:30 -0500 (EST) |
branch: elpa/parseclj
commit 91c2ad82afa0c9d5c9a5ec7c44bb13b6047da6f1
Author: Arne Brasseur <arne@arnebrasseur.net>
Commit: Arne Brasseur <arne@arnebrasseur.net>
Add enough feature to be able to parse clojure.core
- metadata ^
- deref @...
- shorthand lambdas #(..)
- vars #'
- hex numbers
- regexes
---
parseclj-ast.el | 4 ++
parseclj-lex.el | 122 ++++++++++++++++++++++++++++++++++++++++------
parseclj-parser.el | 32 ++++++++++--
test/parseclj-lex-test.el | 32 +++++++++---
test/parseclj-test.el | 67 +++++++++++++++++++++++++
5 files changed, 234 insertions(+), 23 deletions(-)
diff --git a/parseclj-ast.el b/parseclj-ast.el
index 5719659068..dd3cc86475 100644
--- a/parseclj-ast.el
+++ b/parseclj-ast.el
@@ -133,6 +133,10 @@ on available options."
:tag (intern (substring (a-get
opening-token :form) 1))
:children children)
stack))
+ (:metadata (cons (parseclj-ast-node :with-meta
+ pos
+ :children children)
+ stack))
(t (cons
(parseclj-ast-node type pos :children children)
stack)))))
diff --git a/parseclj-lex.el b/parseclj-lex.el
index 2f5f8f57b4..6a53f55b05 100644
--- a/parseclj-lex.el
+++ b/parseclj-lex.el
@@ -36,6 +36,7 @@
:symbol
:keyword
:string
+ :regex
:character)
"Types of tokens that represent leaf nodes in the AST.")
@@ -44,6 +45,21 @@
:rbrace)
"Types of tokens that mark the end of a non-atomic form.")
+(defvar parseclj-lex--prefix-tokens '(:quote
+ :backquote
+ :unquote
+ :unquote-splice
+ :discard
+ :tag
+ :reader-conditional
+ :reader-conditional-splice
+ :var
+ :deref)
+ "Tokens that modify the form that follows.")
+
+(defvar parseclj-lex--prefix-2-tokens '(:metadata)
+ "Tokens that modify the two forms that follow.")
+
;; Token interface
(defun parseclj-lex-token (type form pos &rest attributes)
@@ -81,6 +97,11 @@ A token is an association list with :token-type as its first
key."
(and (consp token)
(cdr (assq :token-type token))))
+(defun parseclj-lex-token-form (token)
+ "Get the form of TOKEN."
+ (and (consp token)
+ (cdr (assq :form token))))
+
(defun parseclj-lex-leaf-token-p (token)
"Return t if the given AST TOKEN is a leaf node."
(member (parseclj-lex-token-type token) parseclj-lex--leaf-tokens))
@@ -89,6 +110,9 @@ A token is an association list with :token-type as its first
key."
"Return t if the given ast TOKEN is a closing token."
(member (parseclj-lex-token-type token) parseclj-lex--closing-tokens))
+(defun parseclj-lex-error-p (token)
+ "Return t if the TOKEN represents a lexing error token."
+ (eq (parseclj-lex-token-type token) :lex-error))
;; Elisp values from tokens
@@ -177,18 +201,32 @@ S goes through three transformations:
(<= (char-after (point)) ?9))
(right-char)))
+(defun parseclj-lex-skip-hex ()
+ "Skip all consecutive hex digits after point."
+ (while (and (char-after (point))
+ (or (<= ?0 (char-after (point)) ?9)
+ (<= ?a (char-after (point)) ?f)
+ (<= ?A (char-after (point)) ?F)))
+ (right-char)))
+
(defun parseclj-lex-skip-number ()
"Skip a number at point."
;; [\+\-]?\d+\.\d+
- (when (member (char-after (point)) '(?+ ?-))
- (right-char))
+ (if (and (eq ?0 (char-after (point)))
+ (eq ?x (char-after (1+ (point)))))
+ (progn
+ (right-char 2)
+ (parseclj-lex-skip-hex))
+ (progn
+ (when (member (char-after (point)) '(?+ ?-))
+ (right-char))
- (parseclj-lex-skip-digits)
+ (parseclj-lex-skip-digits)
- (when (eq (char-after (point)) ?.)
- (right-char))
+ (when (eq (char-after (point)) ?.)
+ (right-char))
- (parseclj-lex-skip-digits))
+ (parseclj-lex-skip-digits))))
(defun parseclj-lex-number ()
"Consume a number and return a `:number' token representing it."
@@ -270,22 +308,39 @@ are returned as their own lex tokens."
((equal sym "false") (parseclj-lex-token :false "false" pos))
(t (parseclj-lex-token :symbol sym pos))))))
-(defun parseclj-lex-string ()
- "Return a lex token representing a string.
-If EOF is reached without finding a closing double quote, a :lex-error
-token is returned."
+(defun parseclj-lex-string* ()
+ "Helper for string/regex lexing.
+Returns either the string, or an error token"
(let ((pos (point)))
(right-char)
(while (not (or (equal (char-after (point)) ?\") (parseclj-lex-at-eof-p)))
(if (equal (char-after (point)) ?\\)
(right-char 2)
(right-char)))
- (if (equal (char-after (point)) ?\")
- (progn
- (right-char)
- (parseclj-lex-token :string (buffer-substring-no-properties pos
(point)) pos))
+ (when (equal (char-after (point)) ?\")
+ (right-char)
+ (buffer-substring-no-properties pos (point)))))
+
+(defun parseclj-lex-string ()
+ "Return a lex token representing a string.
+If EOF is reached without finding a closing double quote, a :lex-error
+token is returned."
+ (let ((pos (point))
+ (str (parseclj-lex-string*)))
+ (if str
+ (parseclj-lex-token :string str pos)
(parseclj-lex-error-token pos :invalid-string))))
+(defun parseclj-lex-regex ()
+ "Return a lex token representing a regular expression.
+If EOF is reached without finding a closing double quote, a :lex-error
+token is returned."
+ (let ((pos (1- (point)))
+ (str (parseclj-lex-string*)))
+ (if str
+ (parseclj-lex-token :regex (concat "#" str) pos)
+ (parseclj-lex-error-token pos :invalid-regex))))
+
(defun parseclj-lex-lookahead (n)
"Return a lookahead string of N characters after point."
(buffer-substring-no-properties (point) (min (+ (point) n) (point-max))))
@@ -387,6 +442,22 @@ See `parseclj-lex-token'."
(right-char)
(parseclj-lex-token :rbrace "}" pos))
+ ((equal char ?')
+ (right-char)
+ (parseclj-lex-token :quote "'" pos))
+
+ ((equal char ?`)
+ (right-char)
+ (parseclj-lex-token :backquote "'" pos))
+
+ ((equal char ?~)
+ (right-char)
+ (if (eq ?@ (char-after (point)))
+ (progn
+ (right-char)
+ (parseclj-lex-token :unquote-splice "~@" pos))
+ (parseclj-lex-token :unquote "~" pos)))
+
((parseclj-lex-at-number-p)
(parseclj-lex-number))
@@ -405,6 +476,14 @@ See `parseclj-lex-token'."
((equal char ?\;)
(parseclj-lex-comment))
+ ((equal char ?^)
+ (right-char)
+ (parseclj-lex-token :metadata "^" pos))
+
+ ((equal char ?@)
+ (right-char)
+ (parseclj-lex-token :deref "@" pos))
+
((equal char ?#)
(right-char)
(let ((char (char-after (point))))
@@ -415,6 +494,21 @@ See `parseclj-lex-token'."
((equal char ?_)
(right-char)
(parseclj-lex-token :discard "#_" pos))
+ ((equal char ?\()
+ (right-char)
+ (parseclj-lex-token :lambda "#(" pos))
+ ((equal char ?')
+ (right-char)
+ (parseclj-lex-token :var "#'" pos))
+ ((equal char ?\")
+ (parseclj-lex-regex))
+ ((equal char ?\?)
+ (right-char)
+ (if (eq ?@ (char-after (point)))
+ (progn
+ (right-char)
+ (parseclj-lex-token :reader-conditional-splice "#?@" pos))
+ (parseclj-lex-token :reader-conditional "#?" pos)))
((parseclj-lex-symbol-start-p char t)
(right-char)
(parseclj-lex-token :tag (concat "#"
(parseclj-lex-get-symbol-at-point (1+ pos))) pos))
diff --git a/parseclj-parser.el b/parseclj-parser.el
index 34ed9d4694..d16f504a8c 100644
--- a/parseclj-parser.el
+++ b/parseclj-parser.el
@@ -47,7 +47,11 @@ can be handled with `condition-case'."
(defun parseclj--find-opening-token (stack closing-token)
"Scan STACK for an opening-token matching CLOSING-TOKEN."
(cl-case (parseclj-lex-token-type closing-token)
- (:rparen :lparen)
+ (:rparen (parseclj-lex-token-type
+ (seq-find (lambda (token)
+ (member (parseclj-lex-token-type token)
+ '(:lparen :lambda)))
+ stack)))
(:rbracket :lbracket)
(:rbrace (parseclj-lex-token-type
(seq-find (lambda (token)
@@ -192,6 +196,11 @@ functions. Additionally the following options are
recognized
;; (message "STACK: %S" stack)
;; (message "TOKEN: %S\n" token)
+ (when (and fail-fast (parseclj-lex-error-p token))
+ (parseclj--error "Invalid token at %s: %S"
+ (a-get token :pos)
+ (parseclj-lex-token-form token)))
+
;; Reduce based on the top item on the stack (collections)
(cond
((parseclj-lex-leaf-token-p token)
@@ -204,7 +213,7 @@ functions. Additionally the following options are recognized
;; Reduce based on top two items on the stack (special prefixed elements)
(let* ((top-value (parseclj--take-value stack value-p))
- (opening-token (parseclj--take-token (nthcdr (length top-value)
stack) value-p '(:discard :tag)))
+ (opening-token (parseclj--take-token (nthcdr (length top-value)
stack) value-p parseclj-lex--prefix-tokens))
new-stack)
(while (and top-value opening-token)
;; (message "Reducing...")
@@ -214,8 +223,25 @@ functions. Additionally the following options are
recognized
(setq new-stack (nthcdr (+ (length top-value) (length
opening-token)) stack))
(setq stack (funcall reduce-branch new-stack (car opening-token)
(append (cdr opening-token) top-value) options))
+ ;; recur
(setq top-value (parseclj--take-value stack value-p))
- (setq opening-token (parseclj--take-token (nthcdr (length top-value)
stack) value-p '(:discard :tag)))))
+ (setq opening-token (parseclj--take-token (nthcdr (length top-value)
stack) value-p parseclj-lex--prefix-tokens))))
+
+ ;; Reduce based on top three items on the stack (metadata, namespaced
maps)
+ (let* ((top-value-1 (parseclj--take-value stack value-p))
+ (top-value-2 (parseclj--take-value (nthcdr (length top-value-1)
stack) value-p))
+ (opening-token (parseclj--take-token (nthcdr (+ (length
top-value-1)
+ (length
top-value-2)) stack) value-p parseclj-lex--prefix-2-tokens))
+ new-stack)
+ (while (and top-value-1 top-value-2 opening-token)
+ (setq new-stack (nthcdr (apply #'+ (mapcar #'length (list
top-value-1 top-value-2 opening-token))) stack))
+ (setq stack (funcall reduce-branch new-stack (car opening-token)
(append (cdr opening-token) top-value-1 top-value-2) options))
+
+ ;; recur
+ (setq top-value-1 (parseclj--take-value stack value-p))
+ (setq top-value-2 (parseclj--take-value (nthcdr (length top-value-1)
stack) value-p))
+ (setq opening-token (parseclj--take-token (nthcdr (+ (length
top-value-1)
+ (length
top-value-2)) stack) value-p parseclj-lex--prefix-2-tokens))))
(setq token (parseclj-lex-next)))
diff --git a/test/parseclj-lex-test.el b/test/parseclj-lex-test.el
index 183c6caa42..10cdac81d3 100644
--- a/test/parseclj-lex-test.el
+++ b/test/parseclj-lex-test.el
@@ -52,6 +52,25 @@
(:form . "123e34M")
(:pos . 1)))))
+ (with-temp-buffer
+ (insert "0xff00AA")
+ (goto-char 1)
+ (should (equal (parseclj-lex-next) '((:token-type . :number)
+ (:form . "0xff00AA")
+ (:pos . 1)))))
+
+ (with-temp-buffer
+ (insert "#?(:clj 1 :cljs 2)")
+ (goto-char 1)
+ (should (equal (parseclj-lex-next)
+ '((:token-type . :reader-conditional) (:form . "#?") (:pos
. 1)))))
+
+ (with-temp-buffer
+ (insert "#?@(:clj [1] :cljs [2])")
+ (goto-char 1)
+ (should (equal (parseclj-lex-next)
+ '((:token-type . :reader-conditional-splice) (:form .
"#?@") (:pos . 1)))))
+
(with-temp-buffer
(insert "123x")
(goto-char 1)
@@ -203,12 +222,7 @@
(should (equal (parseclj-lex-next) (parseclj-lex-token :number "13" 18)))
(should (equal (parseclj-lex-next) (parseclj-lex-token :whitespace " "
20)))
(should (equal (parseclj-lex-next) (parseclj-lex-token :number "14" 21)))
- (should (equal (parseclj-lex-next) (parseclj-lex-token :rparen ")" 23))))
-
- (with-temp-buffer
- (insert "~")
- (goto-char 1)
- (should (equal (parseclj-lex-next) (parseclj-lex-token :lex-error "~"
1)))))
+ (should (equal (parseclj-lex-next) (parseclj-lex-token :rparen ")" 23)))))
(ert-deftest parseclj-lex-test-at-number-p ()
(dolist (str '("123" ".9" "+1" "0" "-456"))
@@ -295,6 +309,12 @@
(goto-char 1)
(should (equal (parseclj-lex-string) (parseclj-lex-token :string
"\"abc\\\"\"" 1)))))
+(ert-deftest parseclj-lex-test-regex ()
+ (with-temp-buffer
+ (insert "#\"abc\"")
+ (goto-char 1)
+ (should (equal (parseclj-lex-next) (parseclj-lex-token :regex "#\"abc\""
1)))))
+
(ert-deftest parseclj-lex-test-tag ()
(with-temp-buffer
(insert "#inst")
diff --git a/test/parseclj-test.el b/test/parseclj-test.el
index 190922abb1..1b8e795d52 100644
--- a/test/parseclj-test.el
+++ b/test/parseclj-test.el
@@ -185,6 +185,73 @@
((:node-type . :whitespace) (:position .
6) (:form . " "))
((:node-type . :number) (:position . 7)
(:form . "5") (:value . 5)))))))))
+(ert-deftest parseclj--parse-reader-conditionals-test ()
+ (should (equal
+ (parseclj-parse-clojure "#?(:clj 1 :cljs 2)")
+ '((:node-type . :root)
+ (:position . 1)
+ (:children ((:node-type . :reader-conditional)
+ (:position . 1)
+ (:children ((:node-type . :list)
+ (:position . 3)
+ (:children ((:node-type . :keyword)
+ (:position . 4)
+ (:form . ":clj")
+ (:value . :clj))
+ ((:node-type . :number)
+ (:position . 9)
+ (:form . "1")
+ (:value . 1))
+ ((:node-type . :keyword)
+ (:position . 11)
+ (:form . ":cljs")
+ (:value . :cljs))
+ ((:node-type . :number)
+ (:position . 17)
+ (:form . "2")
+ (:value . 2)))))))))))
+
+(ert-deftest parseclj--parse-metadata-test ()
+ (should (equal
+ (parseclj-parse-clojure "^{} [123]")
+ '((:node-type . :root)
+ (:position . 1)
+ (:children ((:node-type . :with-meta)
+ (:position . 1)
+ (:children ((:node-type . :vector)
+ (:position . 5)
+ (:children ((:node-type . :number)
+ (:position . 6)
+ (:form . "123")
+ (:value . 123))))
+ ((:node-type . :map)
+ (:position . 2)
+ (:children)))))))))
+
+(ert-deftest parseclj--parse-var-test ()
+ (should (equal
+ (parseclj-parse-clojure "#'foo")
+ '((:node-type . :root)
+ (:position . 1)
+ (:children ((:node-type . :var)
+ (:position . 1)
+ (:children ((:node-type . :symbol)
+ (:position . 3)
+ (:form . "foo")
+ (:value . foo)))))))))
+
+(ert-deftest parseclj--parse-lambda-test ()
+ (should (equal
+ (parseclj-parse-clojure "#(foo)")
+ '((:node-type . :root)
+ (:position . 1)
+ (:children ((:node-type . :lambda)
+ (:position . 1)
+ (:children ((:node-type . :symbol)
+ (:position . 3)
+ (:form . "foo")
+ (:value . foo)))))))))
+
(ert-deftest parseclj--take-token-test ()
(should (equal
(parseclj--take-token
- [nongnu] elpa/parseclj d6525d30c6 111/185: Merge pull request #10 from lambdaisland/fix-ast-reduce-tag, (continued)
- [nongnu] elpa/parseclj d6525d30c6 111/185: Merge pull request #10 from lambdaisland/fix-ast-reduce-tag, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj 0ef32ad912 120/185: Add support for having single quotes in symbols/keywords, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj 0afb8c5f09 123/185: Add `parseclj-lex-error-token` helper, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj b2588ce0fb 126/185: Use EVM to setup Travis CI, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj 46cfcd3120 129/185: Merge pull request #16 from lambdaisland/parseclj-lex-error-token, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj 61577603f2 136/185: Update README.md with installation and usage information, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj a82f229014 139/185: Ignore *.elc files, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj 7fba1624e0 142/185: Update DESIGN.md to name difference between parseclj and parseedn, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj 3693dd148f 145/185: Fix Cask file, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj af6102c4a8 146/185: Minor fix to README file, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj 91c2ad82af 147/185: Add enough feature to be able to parse clojure.core,
ELPA Syncer <=
- [nongnu] elpa/parseclj b40670a561 149/185: Add namespaced maps, fix backquote implementation, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj ff0443c27c 151/185: Add :read-one option, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj 40e36c41eb 006/185: alist-get was only introduced in 25.1? :sadpanda:, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj 6f36bbf6b4 009/185: Try to set up a build matrix, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj d4286f9071 010/185: Give README the right extension, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj ce7ad0e427 025/185: implement strings, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj a08b85ffa8 032/185: Implement parsing maps, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj 60fd8eb965 053/185: Copy tests from edn.el, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj 212e0dc42b 063/185: silly typo, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj 96b8180987 087/185: Unparse ASTs that have lexical preservation., ELPA Syncer, 2021/12/28