emacs-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

emacs-30 3435464452b 3/4: Fix the range handling in treesit.c


From: Yuan Fu
Subject: emacs-30 3435464452b 3/4: Fix the range handling in treesit.c
Date: Sun, 8 Sep 2024 23:52:53 -0400 (EDT)

branch: emacs-30
commit 3435464452b4947098b8ccbe93e5c0afdd2ed06e
Author: Yuan Fu <casouri@gmail.com>
Commit: Yuan Fu <casouri@gmail.com>

    Fix the range handling in treesit.c
    
    1. In treesit_sync_visible_region, reduce the ranges for a parser so it
    doesn't go beyond the visible range.
    
    2. To avoid possible infinite recursion, add a within_reparse field to
    parsers.  Previously we were using the need_reparse field to avoid
    infinite recursion, but lisp programs in a parser's after change hook
    might make some buffer edit which turns need_reparse to true. To avoid
    that, we now use an explicit field.  If a parser's after change function
    makes a buffer edit, lisp program ends up with a desynced parse tree,
    but that's better than possible infinite recursion.  Also after change
    function shouldn't edit the buffer.
    
    3. In treesit_make_ranges, use parser's visible_beg instead of buffer's
    BEGV.  I mean technically whenever we make ranges, buffer's BEGV should
    be equal to parser's visible_beg, but better not take that uncertainty,
    also makes the code more readable.
    
    4. In Ftreesit_parser_included_ranges, move visible region sync code
    before the body of the function.
    
    * src/treesit.c (treesit_sync_visible_region): Minimally fix ranges so
    it doesn't exceed parser's visible range.
    (treesit_call_after_change_functions): Update calling sigature to
    treesit_make_ranges.
    (treesit_ensure_parsed, make_treesit_parser): Use the new field
    within_reparse.
    (treesit_make_ranges): Use parser's visible_beg instead of buffer's
    BEGV.
    (Ftreesit_parser_included_ranges): Move visible region check before
    function body.
    * src/treesit.h (Lisp_TS_Parser): Add new field within_reparse.
---
 src/treesit.c | 92 +++++++++++++++++++++++++++++++++++++++++++----------------
 src/treesit.h | 13 +++++++--
 2 files changed, 77 insertions(+), 28 deletions(-)

diff --git a/src/treesit.c b/src/treesit.c
index 351bd65819a..970754f3c1b 100644
--- a/src/treesit.c
+++ b/src/treesit.c
@@ -1045,6 +1045,48 @@ treesit_sync_visible_region (Lisp_Object parser)
 
   XTS_PARSER (parser)->visible_beg = visible_beg;
   XTS_PARSER (parser)->visible_end = visible_end;
+
+  /* Fix ranges so that the ranges stays with in visible_end.  Here we
+     try to do minimal work so that the ranges is minimally correct such
+     that there's no OOB error.  Usually treesit-update-ranges should
+     update the parser with actually correct ranges.  */
+  if (NILP (XTS_PARSER (parser)->last_set_ranges)) return;
+  uint32_t len;
+  const TSRange *ranges
+    = ts_parser_included_ranges (XTS_PARSER (parser)->parser, &len);
+  /* We might need to discard some ranges that exceeds visible_end, in
+     that case, new_len is the length of the new ranges array (which
+     will be shorter than len).  */
+  uint32_t new_len = 0;
+  uint32_t new_end = 0;
+  for (int idx = 0; idx < len; idx++)
+    {
+      TSRange range = ranges[idx];
+      /* If this range starts after visible_end, we don't include this
+         range and the ranges after it in the new ranges.  */
+      if (range.start_byte + visible_beg >= visible_end)
+       break;
+      /* If this range's end is after visible_end, we don't include any
+         ranges after it, and changes the end of this range to
+         visible_end.  */
+      if (range.end_byte + visible_beg > visible_end)
+       {
+         new_end = visible_end - visible_beg;
+         new_len++;
+         break;
+       }
+      new_len++;
+    }
+  if (new_len != len || new_end != 0)
+    {
+      TSRange *new_ranges = xmalloc (sizeof (TSRange) * new_len);
+      memcpy (new_ranges, ranges, sizeof (TSRange) * new_len);
+      new_ranges[new_len - 1].end_byte = new_end;
+      /* TODO: What should we do if this fails?  */
+      ts_parser_set_included_ranges (XTS_PARSER (parser)->parser,
+                                    new_ranges, new_len);
+      xfree (new_ranges);
+    }
 }
 
 static void
@@ -1057,7 +1099,8 @@ treesit_check_buffer_size (struct buffer *buffer)
              make_fixnum (buffer_size_bytes));
 }
 
-static Lisp_Object treesit_make_ranges (const TSRange *, uint32_t, struct 
buffer *);
+static Lisp_Object treesit_make_ranges (const TSRange *, uint32_t,
+                                       Lisp_Object, struct buffer *);
 
 static void
 treesit_call_after_change_functions (TSTree *old_tree, TSTree *new_tree,
@@ -1071,7 +1114,7 @@ treesit_call_after_change_functions (TSTree *old_tree, 
TSTree *new_tree,
     {
       uint32_t len;
       TSRange *ranges = ts_tree_get_changed_ranges (old_tree, new_tree, &len);
-      lisp_ranges = treesit_make_ranges (ranges, len, buf);
+      lisp_ranges = treesit_make_ranges (ranges, len, parser, buf);
       xfree (ranges);
     }
   else
@@ -1098,6 +1141,9 @@ treesit_call_after_change_functions (TSTree *old_tree, 
TSTree *new_tree,
 static void
 treesit_ensure_parsed (Lisp_Object parser)
 {
+  if (XTS_PARSER (parser)->within_reparse) return;
+  XTS_PARSER (parser)->within_reparse = true;
+
   struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer);
 
   /* Before we parse, catch up with the narrowing situation.  */
@@ -1106,10 +1152,11 @@ treesit_ensure_parsed (Lisp_Object parser)
      because it might set the flag to true.  */
   treesit_sync_visible_region (parser);
 
-  /* Make sure this comes before everything else, see comment
-     (ref:notifier-inside-ensure-parsed) for more detail.  */
   if (!XTS_PARSER (parser)->need_reparse)
-    return;
+    {
+      XTS_PARSER (parser)->within_reparse = false;
+      return;
+    }
 
   TSParser *treesit_parser = XTS_PARSER (parser)->parser;
   TSTree *tree = XTS_PARSER (parser)->tree;
@@ -1134,14 +1181,10 @@ treesit_ensure_parsed (Lisp_Object parser)
   XTS_PARSER (parser)->need_reparse = false;
   XTS_PARSER (parser)->timestamp++;
 
-  /* After-change functions should run at the very end, most crucially
-     after need_reparse is set to false, this way if the function
-     calls some tree-sitter function which invokes
-     treesit_ensure_parsed again, it returns early and do not
-     recursively call the after change functions again.
-     (ref:notifier-inside-ensure-parsed)  */
   treesit_call_after_change_functions (tree, new_tree, parser);
   ts_tree_delete (tree);
+
+  XTS_PARSER (parser)->within_reparse = false;
 }
 
 /* This is the read function provided to tree-sitter to read from a
@@ -1224,6 +1267,7 @@ make_treesit_parser (Lisp_Object buffer, TSParser *parser,
   lisp_parser->visible_end = BUF_ZV_BYTE (XBUFFER (buffer));
   lisp_parser->timestamp = 0;
   lisp_parser->deleted = false;
+  lisp_parser->within_reparse = false;
   eassert (lisp_parser->visible_beg <= lisp_parser->visible_end);
   return make_lisp_ptr (lisp_parser, Lisp_Vectorlike);
 }
@@ -1672,14 +1716,14 @@ treesit_check_range_argument (Lisp_Object ranges)
    convert between tree-sitter buffer offset and buffer position.  */
 static Lisp_Object
 treesit_make_ranges (const TSRange *ranges, uint32_t len,
-                    struct buffer *buffer)
+                    Lisp_Object parser, struct buffer *buffer)
 {
   Lisp_Object list = Qnil;
   for (int idx = 0; idx < len; idx++)
     {
       TSRange range = ranges[idx];
-      uint32_t beg_byte = range.start_byte + BUF_BEGV_BYTE (buffer);
-      uint32_t end_byte = range.end_byte + BUF_BEGV_BYTE (buffer);
+      uint32_t beg_byte = range.start_byte + XTS_PARSER (parser)->visible_beg;
+      uint32_t end_byte = range.end_byte + XTS_PARSER (parser)->visible_beg;
       eassert (BUF_BEGV_BYTE (buffer) <= beg_byte);
       eassert (beg_byte <= end_byte);
       eassert (end_byte <= BUF_ZV_BYTE (buffer));
@@ -1725,11 +1769,9 @@ buffer.  */)
   if (NILP (ranges))
     {
       /* If RANGES is nil, make parser to parse the whole document.
-        To do that we give tree-sitter a 0 length, the range is a
-        dummy.  */
-      TSRange treesit_range = {{0, 0}, {0, 0}, 0, 0};
+        To do that we give tree-sitter a 0 length.  */
       success = ts_parser_set_included_ranges (XTS_PARSER (parser)->parser,
-                                              &treesit_range , 0);
+                                              NULL , 0);
     }
   else
     {
@@ -1742,7 +1784,6 @@ buffer.  */)
 
       /* We can use XFIXNUM, XCAR, XCDR freely because we have checked
         the input by treesit_check_range_argument.  */
-
       for (int idx = 0; !NILP (ranges); idx++, ranges = XCDR (ranges))
        {
          Lisp_Object range = XCAR (ranges);
@@ -1787,6 +1828,10 @@ See also `treesit-parser-set-included-ranges'.  */)
   treesit_check_parser (parser);
   treesit_initialize ();
 
+  /* Our return value depends on the buffer state (BUF_BEGV_BYTE,
+     etc), so we need to sync up.  */
+  treesit_check_buffer_size (XBUFFER (XTS_PARSER (parser)->buffer));
+  treesit_sync_visible_region (parser);
   /* When the parser doesn't have a range set and we call
      ts_parser_included_ranges on it, it doesn't return an empty list,
      but rather return DEFAULT_RANGE.  (A single range where start_byte
@@ -1799,13 +1844,10 @@ See also `treesit-parser-set-included-ranges'.  */)
   const TSRange *ranges
     = ts_parser_included_ranges (XTS_PARSER (parser)->parser, &len);
 
-  /* Our return value depends on the buffer state (BUF_BEGV_BYTE,
-     etc), so we need to sync up.  */
-  treesit_check_buffer_size (XBUFFER (XTS_PARSER (parser)->buffer));
-  treesit_sync_visible_region (parser);
-
   struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer);
-  return treesit_make_ranges (ranges, len, buffer);
+
+
+  return treesit_make_ranges (ranges, len, parser, buffer);
 }
 
 DEFUN ("treesit-parser-notifiers", Ftreesit_parser_notifiers,
diff --git a/src/treesit.h b/src/treesit.h
index d3c6aa4c250..f8227a64b0a 100644
--- a/src/treesit.h
+++ b/src/treesit.h
@@ -45,9 +45,12 @@ struct Lisp_TS_Parser
      same tag.  A tag is primarily used to differentiate between
      parsers for the same language.  */
   Lisp_Object tag;
-  /* The Lisp ranges last set.  This is use to compare to the new
-     ranges the users wants to set, and avoid reparse if the new
-     ranges is the same as the last set one.  */
+  /* The Lisp ranges last set.  This is use to compare to the new ranges
+     the users wants to set, and avoid reparse if the new ranges is the
+     same as the last set one.  This might go out of sync with the
+     ranges we return from Ftreesit_parser_included_ranges, if we did a
+     ranges fix in treesit_sync_visible_region, but I don't think
+     that'll cause any harm.  */
   Lisp_Object last_set_ranges;
   /* The buffer associated with this parser.  */
   Lisp_Object buffer;
@@ -82,6 +85,10 @@ struct Lisp_TS_Parser
   /* If this field is true, parser functions raises
      treesit-parser-deleted signal.  */
   bool deleted;
+  /* This field is set to true when treesit_ensure_parsed runs, to
+     prevent infinite recursion due to calling after change
+     functions.  */
+  bool within_reparse;
 };
 
 /* A wrapper around a tree-sitter node.  */



reply via email to

[Prev in Thread] Current Thread [Next in Thread]