[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
emacs-30 3435464452b 3/4: Fix the range handling in treesit.c
From: |
Yuan Fu |
Subject: |
emacs-30 3435464452b 3/4: Fix the range handling in treesit.c |
Date: |
Sun, 8 Sep 2024 23:52:53 -0400 (EDT) |
branch: emacs-30
commit 3435464452b4947098b8ccbe93e5c0afdd2ed06e
Author: Yuan Fu <casouri@gmail.com>
Commit: Yuan Fu <casouri@gmail.com>
Fix the range handling in treesit.c
1. In treesit_sync_visible_region, reduce the ranges for a parser so it
doesn't go beyond the visible range.
2. To avoid possible infinite recursion, add a within_reparse field to
parsers. Previously we were using the need_reparse field to avoid
infinite recursion, but lisp programs in a parser's after change hook
might make some buffer edit which turns need_reparse to true. To avoid
that, we now use an explicit field. If a parser's after change function
makes a buffer edit, lisp program ends up with a desynced parse tree,
but that's better than possible infinite recursion. Also after change
function shouldn't edit the buffer.
3. In treesit_make_ranges, use parser's visible_beg instead of buffer's
BEGV. I mean technically whenever we make ranges, buffer's BEGV should
be equal to parser's visible_beg, but better not take that uncertainty,
also makes the code more readable.
4. In Ftreesit_parser_included_ranges, move visible region sync code
before the body of the function.
* src/treesit.c (treesit_sync_visible_region): Minimally fix ranges so
it doesn't exceed parser's visible range.
(treesit_call_after_change_functions): Update calling sigature to
treesit_make_ranges.
(treesit_ensure_parsed, make_treesit_parser): Use the new field
within_reparse.
(treesit_make_ranges): Use parser's visible_beg instead of buffer's
BEGV.
(Ftreesit_parser_included_ranges): Move visible region check before
function body.
* src/treesit.h (Lisp_TS_Parser): Add new field within_reparse.
---
src/treesit.c | 92 +++++++++++++++++++++++++++++++++++++++++++----------------
src/treesit.h | 13 +++++++--
2 files changed, 77 insertions(+), 28 deletions(-)
diff --git a/src/treesit.c b/src/treesit.c
index 351bd65819a..970754f3c1b 100644
--- a/src/treesit.c
+++ b/src/treesit.c
@@ -1045,6 +1045,48 @@ treesit_sync_visible_region (Lisp_Object parser)
XTS_PARSER (parser)->visible_beg = visible_beg;
XTS_PARSER (parser)->visible_end = visible_end;
+
+ /* Fix ranges so that the ranges stays with in visible_end. Here we
+ try to do minimal work so that the ranges is minimally correct such
+ that there's no OOB error. Usually treesit-update-ranges should
+ update the parser with actually correct ranges. */
+ if (NILP (XTS_PARSER (parser)->last_set_ranges)) return;
+ uint32_t len;
+ const TSRange *ranges
+ = ts_parser_included_ranges (XTS_PARSER (parser)->parser, &len);
+ /* We might need to discard some ranges that exceeds visible_end, in
+ that case, new_len is the length of the new ranges array (which
+ will be shorter than len). */
+ uint32_t new_len = 0;
+ uint32_t new_end = 0;
+ for (int idx = 0; idx < len; idx++)
+ {
+ TSRange range = ranges[idx];
+ /* If this range starts after visible_end, we don't include this
+ range and the ranges after it in the new ranges. */
+ if (range.start_byte + visible_beg >= visible_end)
+ break;
+ /* If this range's end is after visible_end, we don't include any
+ ranges after it, and changes the end of this range to
+ visible_end. */
+ if (range.end_byte + visible_beg > visible_end)
+ {
+ new_end = visible_end - visible_beg;
+ new_len++;
+ break;
+ }
+ new_len++;
+ }
+ if (new_len != len || new_end != 0)
+ {
+ TSRange *new_ranges = xmalloc (sizeof (TSRange) * new_len);
+ memcpy (new_ranges, ranges, sizeof (TSRange) * new_len);
+ new_ranges[new_len - 1].end_byte = new_end;
+ /* TODO: What should we do if this fails? */
+ ts_parser_set_included_ranges (XTS_PARSER (parser)->parser,
+ new_ranges, new_len);
+ xfree (new_ranges);
+ }
}
static void
@@ -1057,7 +1099,8 @@ treesit_check_buffer_size (struct buffer *buffer)
make_fixnum (buffer_size_bytes));
}
-static Lisp_Object treesit_make_ranges (const TSRange *, uint32_t, struct
buffer *);
+static Lisp_Object treesit_make_ranges (const TSRange *, uint32_t,
+ Lisp_Object, struct buffer *);
static void
treesit_call_after_change_functions (TSTree *old_tree, TSTree *new_tree,
@@ -1071,7 +1114,7 @@ treesit_call_after_change_functions (TSTree *old_tree,
TSTree *new_tree,
{
uint32_t len;
TSRange *ranges = ts_tree_get_changed_ranges (old_tree, new_tree, &len);
- lisp_ranges = treesit_make_ranges (ranges, len, buf);
+ lisp_ranges = treesit_make_ranges (ranges, len, parser, buf);
xfree (ranges);
}
else
@@ -1098,6 +1141,9 @@ treesit_call_after_change_functions (TSTree *old_tree,
TSTree *new_tree,
static void
treesit_ensure_parsed (Lisp_Object parser)
{
+ if (XTS_PARSER (parser)->within_reparse) return;
+ XTS_PARSER (parser)->within_reparse = true;
+
struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer);
/* Before we parse, catch up with the narrowing situation. */
@@ -1106,10 +1152,11 @@ treesit_ensure_parsed (Lisp_Object parser)
because it might set the flag to true. */
treesit_sync_visible_region (parser);
- /* Make sure this comes before everything else, see comment
- (ref:notifier-inside-ensure-parsed) for more detail. */
if (!XTS_PARSER (parser)->need_reparse)
- return;
+ {
+ XTS_PARSER (parser)->within_reparse = false;
+ return;
+ }
TSParser *treesit_parser = XTS_PARSER (parser)->parser;
TSTree *tree = XTS_PARSER (parser)->tree;
@@ -1134,14 +1181,10 @@ treesit_ensure_parsed (Lisp_Object parser)
XTS_PARSER (parser)->need_reparse = false;
XTS_PARSER (parser)->timestamp++;
- /* After-change functions should run at the very end, most crucially
- after need_reparse is set to false, this way if the function
- calls some tree-sitter function which invokes
- treesit_ensure_parsed again, it returns early and do not
- recursively call the after change functions again.
- (ref:notifier-inside-ensure-parsed) */
treesit_call_after_change_functions (tree, new_tree, parser);
ts_tree_delete (tree);
+
+ XTS_PARSER (parser)->within_reparse = false;
}
/* This is the read function provided to tree-sitter to read from a
@@ -1224,6 +1267,7 @@ make_treesit_parser (Lisp_Object buffer, TSParser *parser,
lisp_parser->visible_end = BUF_ZV_BYTE (XBUFFER (buffer));
lisp_parser->timestamp = 0;
lisp_parser->deleted = false;
+ lisp_parser->within_reparse = false;
eassert (lisp_parser->visible_beg <= lisp_parser->visible_end);
return make_lisp_ptr (lisp_parser, Lisp_Vectorlike);
}
@@ -1672,14 +1716,14 @@ treesit_check_range_argument (Lisp_Object ranges)
convert between tree-sitter buffer offset and buffer position. */
static Lisp_Object
treesit_make_ranges (const TSRange *ranges, uint32_t len,
- struct buffer *buffer)
+ Lisp_Object parser, struct buffer *buffer)
{
Lisp_Object list = Qnil;
for (int idx = 0; idx < len; idx++)
{
TSRange range = ranges[idx];
- uint32_t beg_byte = range.start_byte + BUF_BEGV_BYTE (buffer);
- uint32_t end_byte = range.end_byte + BUF_BEGV_BYTE (buffer);
+ uint32_t beg_byte = range.start_byte + XTS_PARSER (parser)->visible_beg;
+ uint32_t end_byte = range.end_byte + XTS_PARSER (parser)->visible_beg;
eassert (BUF_BEGV_BYTE (buffer) <= beg_byte);
eassert (beg_byte <= end_byte);
eassert (end_byte <= BUF_ZV_BYTE (buffer));
@@ -1725,11 +1769,9 @@ buffer. */)
if (NILP (ranges))
{
/* If RANGES is nil, make parser to parse the whole document.
- To do that we give tree-sitter a 0 length, the range is a
- dummy. */
- TSRange treesit_range = {{0, 0}, {0, 0}, 0, 0};
+ To do that we give tree-sitter a 0 length. */
success = ts_parser_set_included_ranges (XTS_PARSER (parser)->parser,
- &treesit_range , 0);
+ NULL , 0);
}
else
{
@@ -1742,7 +1784,6 @@ buffer. */)
/* We can use XFIXNUM, XCAR, XCDR freely because we have checked
the input by treesit_check_range_argument. */
-
for (int idx = 0; !NILP (ranges); idx++, ranges = XCDR (ranges))
{
Lisp_Object range = XCAR (ranges);
@@ -1787,6 +1828,10 @@ See also `treesit-parser-set-included-ranges'. */)
treesit_check_parser (parser);
treesit_initialize ();
+ /* Our return value depends on the buffer state (BUF_BEGV_BYTE,
+ etc), so we need to sync up. */
+ treesit_check_buffer_size (XBUFFER (XTS_PARSER (parser)->buffer));
+ treesit_sync_visible_region (parser);
/* When the parser doesn't have a range set and we call
ts_parser_included_ranges on it, it doesn't return an empty list,
but rather return DEFAULT_RANGE. (A single range where start_byte
@@ -1799,13 +1844,10 @@ See also `treesit-parser-set-included-ranges'. */)
const TSRange *ranges
= ts_parser_included_ranges (XTS_PARSER (parser)->parser, &len);
- /* Our return value depends on the buffer state (BUF_BEGV_BYTE,
- etc), so we need to sync up. */
- treesit_check_buffer_size (XBUFFER (XTS_PARSER (parser)->buffer));
- treesit_sync_visible_region (parser);
-
struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer);
- return treesit_make_ranges (ranges, len, buffer);
+
+
+ return treesit_make_ranges (ranges, len, parser, buffer);
}
DEFUN ("treesit-parser-notifiers", Ftreesit_parser_notifiers,
diff --git a/src/treesit.h b/src/treesit.h
index d3c6aa4c250..f8227a64b0a 100644
--- a/src/treesit.h
+++ b/src/treesit.h
@@ -45,9 +45,12 @@ struct Lisp_TS_Parser
same tag. A tag is primarily used to differentiate between
parsers for the same language. */
Lisp_Object tag;
- /* The Lisp ranges last set. This is use to compare to the new
- ranges the users wants to set, and avoid reparse if the new
- ranges is the same as the last set one. */
+ /* The Lisp ranges last set. This is use to compare to the new ranges
+ the users wants to set, and avoid reparse if the new ranges is the
+ same as the last set one. This might go out of sync with the
+ ranges we return from Ftreesit_parser_included_ranges, if we did a
+ ranges fix in treesit_sync_visible_region, but I don't think
+ that'll cause any harm. */
Lisp_Object last_set_ranges;
/* The buffer associated with this parser. */
Lisp_Object buffer;
@@ -82,6 +85,10 @@ struct Lisp_TS_Parser
/* If this field is true, parser functions raises
treesit-parser-deleted signal. */
bool deleted;
+ /* This field is set to true when treesit_ensure_parsed runs, to
+ prevent infinite recursion due to calling after change
+ functions. */
+ bool within_reparse;
};
/* A wrapper around a tree-sitter node. */