[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[nongnu] elpa/parseclj 0f16fcf2fa 084/185: Implement parsing with lexica
From: |
ELPA Syncer |
Subject: |
[nongnu] elpa/parseclj 0f16fcf2fa 084/185: Implement parsing with lexical preservation (keep whitespace, comments) |
Date: |
Tue, 28 Dec 2021 14:05:20 -0500 (EST) |
branch: elpa/parseclj
commit 0f16fcf2fafdf2ba741f59e7fc561d605be92a4b
Author: Arne Brasseur <arne@arnebrasseur.net>
Commit: Arne Brasseur <arne@arnebrasseur.net>
Implement parsing with lexical preservation (keep whitespace, comments)
---
parseclj-ast.el | 44 +++++++++++++++++++++++++++++++++-----------
parseclj.el | 14 ++++++++++----
test/parseclj-test.el | 35 +++++++++++++++++++++++++++++++++--
3 files changed, 76 insertions(+), 17 deletions(-)
diff --git a/parseclj-ast.el b/parseclj-ast.el
index 0632fa35e8..29a0207d5e 100644
--- a/parseclj-ast.el
+++ b/parseclj-ast.el
@@ -30,18 +30,33 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;; Parser
-(defun parseclj--make-node (type position &rest kvs)
+(defun parseclj-ast--node (type position &rest kvs)
(apply 'a-list ':node-type type ':position position kvs))
(defun parseclj-ast--reduce-leaf (stack token)
- (if (eq (parseclj-lex-token-type token) :whitespace)
+ (if (member (parseclj-lex-token-type token) '(:whitespace :comment))
stack
(cons
- (parseclj--make-node (parseclj-lex-token-type token) (a-get token 'pos)
- ':form (a-get token 'form)
- ':value (parseclj--leaf-token-value token))
+ (parseclj-ast--node (parseclj-lex-token-type token)
+ (a-get token 'pos)
+ ':form (a-get token 'form)
+ ':value (parseclj--leaf-token-value token))
stack)))
+(defun parseclj-ast--reduce-leaf-with-lexical-preservation (stack token)
+ (let ((token-type (parseclj-lex-token-type token))
+ (top (car stack)))
+ (if (member token-type '(:whitespace :comment))
+ ;; merge consecutive whitespace or comment tokens
+ (if (eq token-type (a-get top :node-type))
+ (cons (a-update top :form #'concat (a-get token 'form))
+ (cdr stack))
+ (cons (parseclj-ast--node (parseclj-lex-token-type token)
+ (a-get token 'pos)
+ ':form (a-get token 'form))
+ stack))
+ (parseclj-ast--reduce-leaf stack token))))
+
(defun parseclj-ast--reduce-branch (stack opener-token children)
(let* ((pos (a-get opener-token 'pos))
(type (parseclj-lex-token-type opener-token))
@@ -51,16 +66,23 @@
(:lbrace :map)
(t type))))
(cl-case type
- (:root (parseclj--make-node :root 0 :children children))
+ (:root (parseclj-ast--node :root 0 :children children))
(:discard stack)
- (:tag (list (parseclj--make-node :tag
- pos
- :tag (intern (substring (a-get
opener-token 'form) 1))
- :children children)))
+ (:tag (list (parseclj-ast--node :tag
+ pos
+ :tag (intern (substring (a-get
opener-token 'form) 1))
+ :children children)))
(t (cons
- (parseclj--make-node type pos :children children)
+ (parseclj-ast--node type pos :children children)
stack)))))
+(defun parseclj-ast--reduce-branch-with-lexical-preservation (&rest args)
+ (let ((node (apply #'parseclj-ast--reduce-branch args)))
+ (cl-list*
+ (car node) ;; make sure :node-type remains the first element in the list
+ '(:lexical-preservation . t)
+ (cdr node))))
+
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Unparser
diff --git a/parseclj.el b/parseclj.el
index d4cf1fefa0..9791b11eaa 100644
--- a/parseclj.el
+++ b/parseclj.el
@@ -179,7 +179,7 @@ functions.
(setf stack (funcall reduce-branch (cddr stack) lookup (list
top))))))
;; reduce root
- (setf stack (funcall reduce-branch stack '((type . :root) (pos . 1))
stack))
+ (setf stack (funcall reduce-branch stack '((type . :root) (pos . 1))
(reverse stack)))
;; (message "RESULT: %S" stack)
stack))
@@ -204,9 +204,15 @@ key-value pairs to specify parsing options.
(insert (car string-and-options))
(goto-char 1)
(apply 'parseclj-parse-clojure (cdr string-and-options)))
- (parseclj-parse #'parseclj-ast--reduce-leaf
- #'parseclj-ast--reduce-branch
- (apply 'a-list string-and-options))))
+ (let* ((options (apply 'a-list string-and-options))
+ (lexical? (a-get options :lexical-preservation)))
+ (parseclj-parse (if lexical?
+ #'parseclj-ast--reduce-leaf-with-lexical-preservation
+ #'parseclj-ast--reduce-leaf)
+ (if lexical?
+
#'parseclj-ast--reduce-branch-with-lexical-preservation
+ #'parseclj-ast--reduce-branch)
+ options))))
(provide 'parseclj)
diff --git a/test/parseclj-test.el b/test/parseclj-test.el
index a20695a803..ae623a4c88 100644
--- a/test/parseclj-test.el
+++ b/test/parseclj-test.el
@@ -30,8 +30,39 @@
(require 'ert)
(require 'parseclj)
-;; needs testing of individual functions. all testing now is at the top level
-;; through parse/unparse
+(ert-deftest parseclj-parse-clojure-with-lexical-preservation-test ()
+ (should (equal
+ (parseclj-parse-clojure ";; foo\nbar")
+ '((:node-type . :root)
+ (:position . 0)
+ (:children ((:node-type . :symbol)
+ (:position . 8)
+ (:form . "bar")
+ (:value . bar))))))
+ (should (equal
+ (parseclj-parse-clojure ";; foo\nbar" :lexical-preservation t)
+ '((:node-type . :root)
+ (:lexical-preservation . t)
+ (:position . 0)
+ (:children ((:node-type . :comment)
+ (:position . 1)
+ (:form . ";; foo\n"))
+ ((:node-type . :symbol)
+ (:position . 8)
+ (:form . "bar")
+ (:value . bar))))))
+ (should (equal
+ (parseclj-parse-clojure ";; foo\n;;baz\nbar" :lexical-preservation
t)
+ '((:node-type . :root)
+ (:lexical-preservation . t)
+ (:position . 0)
+ (:children ((:node-type . :comment)
+ (:position . 1)
+ (:form . ";; foo\n;;baz\n"))
+ ((:node-type . :symbol)
+ (:position . 14)
+ (:form . "bar")
+ (:value . bar)))))))
(provide 'parseclj-test)
- [nongnu] elpa/parseclj a424f87c1b 054/185: :#/# is a valid keyword. :::hello is not, (continued)
- [nongnu] elpa/parseclj a424f87c1b 054/185: :#/# is a valid keyword. :::hello is not, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj f34b8460a8 055/185: Correctly parse numbers in scientific notation, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj 6fe4ce6095 058/185: Add script to compare speed of edn.el and clj-parse.el, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj 7d70ee4c38 059/185: Add support for tagged literals, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj 4367db07aa 061/185: Move tests to tests/, bench to benchmark/, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj 46570ce843 060/185: A # can be part of a symbol, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj fcd1a086f6 081/185: Rename clj-ast to parseclj-ast, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj 987bd16a57 089/185: Position starts at 1, not at 0 (just like (point)), ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj 313fc4e630 077/185: Rename to parseclj, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj 7f8bcd405e 095/185: Make checkdoc happy with parseclj.el, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj 0f16fcf2fa 084/185: Implement parsing with lexical preservation (keep whitespace, comments),
ELPA Syncer <=
- [nongnu] elpa/parseclj ca854455f6 099/185: Add missing requirement to `parseedn`, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj adbc0cabf5 100/185: Move `parseclj--{leaf, closing}-tokens` to `parseclj-lex` module, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj 7794d9ee59 101/185: Add docstrings for `parseclj-lex` module, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj 296a093132 102/185: Remove `parseclj-unparse`, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj d6525d30c6 111/185: Merge pull request #10 from lambdaisland/fix-ast-reduce-tag, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj 0ef32ad912 120/185: Add support for having single quotes in symbols/keywords, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj 0afb8c5f09 123/185: Add `parseclj-lex-error-token` helper, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj b2588ce0fb 126/185: Use EVM to setup Travis CI, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj 46cfcd3120 129/185: Merge pull request #16 from lambdaisland/parseclj-lex-error-token, ELPA Syncer, 2021/12/28
- [nongnu] elpa/parseclj 61577603f2 136/185: Update README.md with installation and usage information, ELPA Syncer, 2021/12/28