emacs-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

master 4339e70a942 1/3: Make sure treesit-parse-string gc its temp buffe


From: Yuan Fu
Subject: master 4339e70a942 1/3: Make sure treesit-parse-string gc its temp buffer (bug#71012)
Date: Sat, 24 Aug 2024 18:27:31 -0400 (EDT)

branch: master
commit 4339e70a942770ce4e17d16a9708e4bdf5630514
Author: Yuan Fu <casouri@gmail.com>
Commit: Yuan Fu <casouri@gmail.com>

    Make sure treesit-parse-string gc its temp buffer (bug#71012)
    
    * doc/lispref/parsing.texi (Using Parser): Add notice.
    * lisp/treesit.el (treesit-parse-string): Remove function.
    * src/treesit.c (make_treesit_parser): Init the new filed.
    (treesit_delete_parser): Collect the temp buffer.
    (Ftreesit_parse_string): New function.
    * src/treesit.h (Lisp_TS_Parser): New field.
---
 doc/lispref/parsing.texi |  6 ++++--
 lisp/treesit.el          | 11 -----------
 src/treesit.c            | 48 +++++++++++++++++++++++++++++++++++++++++++++++-
 src/treesit.h            |  4 ++++
 4 files changed, 55 insertions(+), 14 deletions(-)

diff --git a/doc/lispref/parsing.texi b/doc/lispref/parsing.texi
index ddf02d9283b..20b1085b46c 100644
--- a/doc/lispref/parsing.texi
+++ b/doc/lispref/parsing.texi
@@ -486,8 +486,10 @@ string.  Unlike a buffer, parsing a string is a one-off 
operation, and
 there is no way to update the result.
 
 @defun treesit-parse-string string language
-This function parses @var{string} using @var{language}, and returns
-the root node of the generated syntax tree.
+This function parses @var{string} using @var{language}, and returns the
+root node of the generated syntax tree.  @emph{Do not} use this function
+in a loop: this is a convenience function intended for one-off use, and
+it isn't optimized; for heavy workload, use a temporary buffer instead.
 @end defun
 
 @heading Be notified by changes to the parse tree
diff --git a/lisp/treesit.el b/lisp/treesit.el
index c91864725da..86dc4733d37 100644
--- a/lisp/treesit.el
+++ b/lisp/treesit.el
@@ -123,17 +123,6 @@ of max unsigned 32-bit value for byte offsets into buffer 
text."
 
 ;;; Parser API supplement
 
-(defun treesit-parse-string (string language)
-  "Parse STRING using a parser for LANGUAGE.
-Return the root node of the syntax tree."
-  ;; We can't use `with-temp-buffer' because it kills the buffer when
-  ;; returning from the form.
-  (let ((buf (generate-new-buffer " *treesit-parse-string*")))
-    (with-current-buffer buf
-      (insert string)
-      (treesit-parser-root-node
-       (treesit-parser-create language)))))
-
 (defvar-local treesit-language-at-point-function nil
   "A function that returns the language at point.
 This is used by `treesit-language-at', which is used by various
diff --git a/src/treesit.c b/src/treesit.c
index 27779692923..a41892b1cac 100644
--- a/src/treesit.c
+++ b/src/treesit.c
@@ -1181,6 +1181,7 @@ make_treesit_parser (Lisp_Object buffer, TSParser *parser,
   lisp_parser->visible_end = BUF_ZV_BYTE (XBUFFER (buffer));
   lisp_parser->timestamp = 0;
   lisp_parser->deleted = false;
+  lisp_parser->need_to_gc_buffer = false;
   eassert (lisp_parser->visible_beg <= lisp_parser->visible_end);
   return make_lisp_ptr (lisp_parser, Lisp_Vectorlike);
 }
@@ -1220,6 +1221,8 @@ make_treesit_query (Lisp_Object query, Lisp_Object 
language)
 void
 treesit_delete_parser (struct Lisp_TS_Parser *lisp_parser)
 {
+  if (lisp_parser->need_to_gc_buffer)
+    Fkill_buffer (lisp_parser->buffer);
   ts_tree_delete (lisp_parser->tree);
   ts_parser_delete (lisp_parser->parser);
 }
@@ -1859,6 +1862,49 @@ positions.  PARSER is the parser issuing the 
notification.   */)
   return Qnil;
 }
 
+// Why don't we use ts_parse_string?  I tried, but it requires too much
+// change throughout treesit.c: we either return a root node that has no
+// associated parser, or one that has a parser but the parser doesn't
+// have associated buffer. Both route requires us to add checks and
+// branches everytime we use the parser of a node or the buffer of a
+// parser.  I tried route 1, and found that on top of needing to add a
+// bunch of branches to handle the no-parser case, many functions
+// requires a parser alongside the node (getting the tree, or language
+// symbol, etc), and I would need to rewrite those as well.  Overall
+// it's just not worth it--this is just a convenience function. --yuan
+DEFUN ("treesit-parse-string",
+       Ftreesit_parse_string, Streesit_parse_string,
+       2, 2, 0,
+       doc: /* Parse STRING using a parser for LANGUAGE.
+
+Return the root node of the result parse tree.  DO NOT use this function
+in a loop: this function is intended for one-off use and isn't
+optimized; for heavy workload, use a temporary buffer instead.  */)
+  (Lisp_Object string, Lisp_Object language)
+{
+  CHECK_SYMBOL (language);
+  CHECK_STRING (string);
+
+  Lisp_Object name_str = build_string (" *treesit-parse-string*");
+  Lisp_Object buffer_name = Fgenerate_new_buffer_name (name_str, Qnil);
+  Lisp_Object buffer = Fget_buffer_create (buffer_name, Qnil);
+
+  struct buffer *old_buffer = current_buffer;
+  set_buffer_internal (XBUFFER (buffer));
+  insert1 (string);
+  set_buffer_internal (old_buffer);
+
+  Lisp_Object parser = Ftreesit_parser_create (language, buffer, Qt, Qnil);
+  XTS_PARSER (parser)->need_to_gc_buffer = true;
+
+  /* Make sure the temp buffer doesn't reference the parser, otherwise
+     the buffer and parser cross-reference each other and the parser is
+     never garbage-collected.  */
+  BVAR (XBUFFER (buffer), ts_parser_list) = Qnil;
+
+  return Ftreesit_parser_root_node (parser);
+}
+
 
 /*** Node API  */
 
@@ -4245,7 +4291,7 @@ applies to LANGUAGE-A will be redirected to LANGUAGE-B 
instead.  */);
   defsubr (&Streesit_parser_tag);
 
   defsubr (&Streesit_parser_root_node);
-  /* defsubr (&Streesit_parse_string); */
+  defsubr (&Streesit_parse_string);
 
   defsubr (&Streesit_parser_set_included_ranges);
   defsubr (&Streesit_parser_included_ranges);
diff --git a/src/treesit.h b/src/treesit.h
index 3da4cc155ea..cd84fa358c5 100644
--- a/src/treesit.h
+++ b/src/treesit.h
@@ -82,6 +82,10 @@ struct Lisp_TS_Parser
   /* If this field is true, parser functions raises
      treesit-parser-deleted signal.  */
   bool deleted;
+  /* If this field is true, deleting the parser should also delete the
+     associated buffer.  This is for parsers created by
+     treesit-parse-string, which uses a hidden temp buffer.  */
+  bool need_to_gc_buffer;
 };
 
 /* A wrapper around a tree-sitter node.  */



reply via email to

[Prev in Thread] Current Thread [Next in Thread]