[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
master 4339e70a942 1/3: Make sure treesit-parse-string gc its temp buffe
From: |
Yuan Fu |
Subject: |
master 4339e70a942 1/3: Make sure treesit-parse-string gc its temp buffer (bug#71012) |
Date: |
Sat, 24 Aug 2024 18:27:31 -0400 (EDT) |
branch: master
commit 4339e70a942770ce4e17d16a9708e4bdf5630514
Author: Yuan Fu <casouri@gmail.com>
Commit: Yuan Fu <casouri@gmail.com>
Make sure treesit-parse-string gc its temp buffer (bug#71012)
* doc/lispref/parsing.texi (Using Parser): Add notice.
* lisp/treesit.el (treesit-parse-string): Remove function.
* src/treesit.c (make_treesit_parser): Init the new filed.
(treesit_delete_parser): Collect the temp buffer.
(Ftreesit_parse_string): New function.
* src/treesit.h (Lisp_TS_Parser): New field.
---
doc/lispref/parsing.texi | 6 ++++--
lisp/treesit.el | 11 -----------
src/treesit.c | 48 +++++++++++++++++++++++++++++++++++++++++++++++-
src/treesit.h | 4 ++++
4 files changed, 55 insertions(+), 14 deletions(-)
diff --git a/doc/lispref/parsing.texi b/doc/lispref/parsing.texi
index ddf02d9283b..20b1085b46c 100644
--- a/doc/lispref/parsing.texi
+++ b/doc/lispref/parsing.texi
@@ -486,8 +486,10 @@ string. Unlike a buffer, parsing a string is a one-off
operation, and
there is no way to update the result.
@defun treesit-parse-string string language
-This function parses @var{string} using @var{language}, and returns
-the root node of the generated syntax tree.
+This function parses @var{string} using @var{language}, and returns the
+root node of the generated syntax tree. @emph{Do not} use this function
+in a loop: this is a convenience function intended for one-off use, and
+it isn't optimized; for heavy workload, use a temporary buffer instead.
@end defun
@heading Be notified by changes to the parse tree
diff --git a/lisp/treesit.el b/lisp/treesit.el
index c91864725da..86dc4733d37 100644
--- a/lisp/treesit.el
+++ b/lisp/treesit.el
@@ -123,17 +123,6 @@ of max unsigned 32-bit value for byte offsets into buffer
text."
;;; Parser API supplement
-(defun treesit-parse-string (string language)
- "Parse STRING using a parser for LANGUAGE.
-Return the root node of the syntax tree."
- ;; We can't use `with-temp-buffer' because it kills the buffer when
- ;; returning from the form.
- (let ((buf (generate-new-buffer " *treesit-parse-string*")))
- (with-current-buffer buf
- (insert string)
- (treesit-parser-root-node
- (treesit-parser-create language)))))
-
(defvar-local treesit-language-at-point-function nil
"A function that returns the language at point.
This is used by `treesit-language-at', which is used by various
diff --git a/src/treesit.c b/src/treesit.c
index 27779692923..a41892b1cac 100644
--- a/src/treesit.c
+++ b/src/treesit.c
@@ -1181,6 +1181,7 @@ make_treesit_parser (Lisp_Object buffer, TSParser *parser,
lisp_parser->visible_end = BUF_ZV_BYTE (XBUFFER (buffer));
lisp_parser->timestamp = 0;
lisp_parser->deleted = false;
+ lisp_parser->need_to_gc_buffer = false;
eassert (lisp_parser->visible_beg <= lisp_parser->visible_end);
return make_lisp_ptr (lisp_parser, Lisp_Vectorlike);
}
@@ -1220,6 +1221,8 @@ make_treesit_query (Lisp_Object query, Lisp_Object
language)
void
treesit_delete_parser (struct Lisp_TS_Parser *lisp_parser)
{
+ if (lisp_parser->need_to_gc_buffer)
+ Fkill_buffer (lisp_parser->buffer);
ts_tree_delete (lisp_parser->tree);
ts_parser_delete (lisp_parser->parser);
}
@@ -1859,6 +1862,49 @@ positions. PARSER is the parser issuing the
notification. */)
return Qnil;
}
+// Why don't we use ts_parse_string? I tried, but it requires too much
+// change throughout treesit.c: we either return a root node that has no
+// associated parser, or one that has a parser but the parser doesn't
+// have associated buffer. Both route requires us to add checks and
+// branches everytime we use the parser of a node or the buffer of a
+// parser. I tried route 1, and found that on top of needing to add a
+// bunch of branches to handle the no-parser case, many functions
+// requires a parser alongside the node (getting the tree, or language
+// symbol, etc), and I would need to rewrite those as well. Overall
+// it's just not worth it--this is just a convenience function. --yuan
+DEFUN ("treesit-parse-string",
+ Ftreesit_parse_string, Streesit_parse_string,
+ 2, 2, 0,
+ doc: /* Parse STRING using a parser for LANGUAGE.
+
+Return the root node of the result parse tree. DO NOT use this function
+in a loop: this function is intended for one-off use and isn't
+optimized; for heavy workload, use a temporary buffer instead. */)
+ (Lisp_Object string, Lisp_Object language)
+{
+ CHECK_SYMBOL (language);
+ CHECK_STRING (string);
+
+ Lisp_Object name_str = build_string (" *treesit-parse-string*");
+ Lisp_Object buffer_name = Fgenerate_new_buffer_name (name_str, Qnil);
+ Lisp_Object buffer = Fget_buffer_create (buffer_name, Qnil);
+
+ struct buffer *old_buffer = current_buffer;
+ set_buffer_internal (XBUFFER (buffer));
+ insert1 (string);
+ set_buffer_internal (old_buffer);
+
+ Lisp_Object parser = Ftreesit_parser_create (language, buffer, Qt, Qnil);
+ XTS_PARSER (parser)->need_to_gc_buffer = true;
+
+ /* Make sure the temp buffer doesn't reference the parser, otherwise
+ the buffer and parser cross-reference each other and the parser is
+ never garbage-collected. */
+ BVAR (XBUFFER (buffer), ts_parser_list) = Qnil;
+
+ return Ftreesit_parser_root_node (parser);
+}
+
/*** Node API */
@@ -4245,7 +4291,7 @@ applies to LANGUAGE-A will be redirected to LANGUAGE-B
instead. */);
defsubr (&Streesit_parser_tag);
defsubr (&Streesit_parser_root_node);
- /* defsubr (&Streesit_parse_string); */
+ defsubr (&Streesit_parse_string);
defsubr (&Streesit_parser_set_included_ranges);
defsubr (&Streesit_parser_included_ranges);
diff --git a/src/treesit.h b/src/treesit.h
index 3da4cc155ea..cd84fa358c5 100644
--- a/src/treesit.h
+++ b/src/treesit.h
@@ -82,6 +82,10 @@ struct Lisp_TS_Parser
/* If this field is true, parser functions raises
treesit-parser-deleted signal. */
bool deleted;
+ /* If this field is true, deleting the parser should also delete the
+ associated buffer. This is for parsers created by
+ treesit-parse-string, which uses a hidden temp buffer. */
+ bool need_to_gc_buffer;
};
/* A wrapper around a tree-sitter node. */