From d16b54bab67bece43b9bd84d46f5f2a203f3eaf9 Mon Sep 17 00:00:00 2001 From: ObserverOfTime Date: Wed, 9 Oct 2024 10:48:58 +0300 Subject: [PATCH 1/3] feat!: drop Python 3.9 support and musl linux aarch64 wheels --- .github/workflows/ci.yml | 2 +- .github/workflows/pypi.yml | 2 +- pyproject.toml | 4 +-- tree_sitter/binding/module.c | 64 ++++++++++++++---------------------- tree_sitter/binding/types.h | 5 --- 5 files changed, 28 insertions(+), 49 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 98d6511..82b9f85 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -21,7 +21,7 @@ jobs: strategy: fail-fast: false matrix: - python: ["3.9", "3.10", "3.11", "3.12", "3.13"] + python: ["3.10", "3.11", "3.12", "3.13"] os: [ubuntu-latest, macos-13, windows-latest] runs-on: ${{matrix.os}} steps: diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml index 37e4cf0..e68273f 100644 --- a/.github/workflows/pypi.yml +++ b/.github/workflows/pypi.yml @@ -52,7 +52,7 @@ jobs: uses: pypa/cibuildwheel@v2.21 env: CIBW_ARCHS: ${{matrix.cibw_arch}} - CIBW_SKIP: pp* + CIBW_SKIP: "pp* *-musllinux_aarch64" CIBW_TEST_SKIP: "*-*linux_aarch64 *-macosx_x86_64 *-win_arm64" - name: Upload wheels uses: actions/upload-artifact@v4 diff --git a/pyproject.toml b/pyproject.toml index 3c7e6cd..cec6871 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -17,7 +17,7 @@ classifiers = [ "Topic :: Text Processing :: Linguistic", "Typing :: Typed", ] -requires-python = ">=3.9" +requires-python = ">=3.10" readme = "README.md" [project.urls] @@ -40,7 +40,7 @@ tests = [ ] [tool.ruff] -target-version = "py39" +target-version = "py310" line-length = 100 indent-width = 4 extend-exclude = [ diff --git a/tree_sitter/binding/module.c b/tree_sitter/binding/module.c index 8f7026f..291d53d 100644 --- a/tree_sitter/binding/module.c +++ b/tree_sitter/binding/module.c @@ -15,23 +15,6 @@ extern PyType_Spec range_type_spec; extern PyType_Spec tree_cursor_type_spec; extern PyType_Spec tree_type_spec; -// TODO(0.24): drop Python 3.9 support -#if PY_MINOR_VERSION > 9 -#define AddObjectRef PyModule_AddObjectRef -#else -static int AddObjectRef(PyObject *module, const char *name, PyObject *value) { - if (value == NULL) { - PyErr_Format(PyExc_SystemError, "PyModule_AddObjectRef() %s == NULL", name); - return -1; - } - int ret = PyModule_AddObject(module, name, value); - if (ret == 0) { - Py_INCREF(value); - } - return ret; -} -#endif - static inline PyObject *import_attribute(const char *mod, const char *attr) { PyObject *module = PyImport_ImportModule(mod); if (module == NULL) { @@ -106,27 +89,27 @@ PyMODINIT_FUNC PyInit__binding(void) { (PyTypeObject *)PyType_FromModuleAndSpec(module, &tree_cursor_type_spec, NULL); state->tree_type = (PyTypeObject *)PyType_FromModuleAndSpec(module, &tree_type_spec, NULL); - if ((AddObjectRef(module, "Language", (PyObject *)state->language_type) < 0) || - (AddObjectRef(module, "LookaheadIterator", (PyObject *)state->lookahead_iterator_type) < - 0) || - (AddObjectRef(module, "LookaheadNamesIterator", - (PyObject *)state->lookahead_names_iterator_type) < 0) || - (AddObjectRef(module, "Node", (PyObject *)state->node_type) < 0) || - (AddObjectRef(module, "Parser", (PyObject *)state->parser_type) < 0) || - (AddObjectRef(module, "Query", (PyObject *)state->query_type) < 0) || - (AddObjectRef(module, "QueryPredicateAnyof", - (PyObject *)state->query_predicate_anyof_type) < 0) || - (AddObjectRef(module, "QueryPredicateEqCapture", - (PyObject *)state->query_predicate_eq_capture_type) < 0) || - (AddObjectRef(module, "QueryPredicateEqString", - (PyObject *)state->query_predicate_eq_string_type) < 0) || - (AddObjectRef(module, "QueryPredicateGeneric", - (PyObject *)state->query_predicate_generic_type) < 0) || - (AddObjectRef(module, "QueryPredicateMatch", - (PyObject *)state->query_predicate_match_type) < 0) || - (AddObjectRef(module, "Range", (PyObject *)state->range_type) < 0) || - (AddObjectRef(module, "Tree", (PyObject *)state->tree_type) < 0) || - (AddObjectRef(module, "TreeCursor", (PyObject *)state->tree_cursor_type) < 0)) { + if ((PyModule_AddObjectRef(module, "Language", (PyObject *)state->language_type) < 0) || + (PyModule_AddObjectRef(module, "LookaheadIterator", + (PyObject *)state->lookahead_iterator_type) < 0) || + (PyModule_AddObjectRef(module, "LookaheadNamesIterator", + (PyObject *)state->lookahead_names_iterator_type) < 0) || + (PyModule_AddObjectRef(module, "Node", (PyObject *)state->node_type) < 0) || + (PyModule_AddObjectRef(module, "Parser", (PyObject *)state->parser_type) < 0) || + (PyModule_AddObjectRef(module, "Query", (PyObject *)state->query_type) < 0) || + (PyModule_AddObjectRef(module, "QueryPredicateAnyof", + (PyObject *)state->query_predicate_anyof_type) < 0) || + (PyModule_AddObjectRef(module, "QueryPredicateEqCapture", + (PyObject *)state->query_predicate_eq_capture_type) < 0) || + (PyModule_AddObjectRef(module, "QueryPredicateEqString", + (PyObject *)state->query_predicate_eq_string_type) < 0) || + (PyModule_AddObjectRef(module, "QueryPredicateGeneric", + (PyObject *)state->query_predicate_generic_type) < 0) || + (PyModule_AddObjectRef(module, "QueryPredicateMatch", + (PyObject *)state->query_predicate_match_type) < 0) || + (PyModule_AddObjectRef(module, "Range", (PyObject *)state->range_type) < 0) || + (PyModule_AddObjectRef(module, "Tree", (PyObject *)state->tree_type) < 0) || + (PyModule_AddObjectRef(module, "TreeCursor", (PyObject *)state->tree_cursor_type) < 0)) { goto cleanup; } @@ -134,7 +117,8 @@ PyMODINIT_FUNC PyInit__binding(void) { "tree_sitter.QueryError", PyDoc_STR("An error that occurred while attempting to create a :class:`Query`."), PyExc_ValueError, NULL); - if (state->query_error == NULL || AddObjectRef(module, "QueryError", state->query_error) < 0) { + if (state->query_error == NULL || + PyModule_AddObjectRef(module, "QueryError", state->query_error) < 0) { goto cleanup; } @@ -155,7 +139,7 @@ PyMODINIT_FUNC PyInit__binding(void) { Py_DECREF(point_kwargs); Py_DECREF(namedtuple); if (state->point_type == NULL || - AddObjectRef(module, "Point", (PyObject *)state->point_type) < 0) { + PyModule_AddObjectRef(module, "Point", (PyObject *)state->point_type) < 0) { goto cleanup; } diff --git a/tree_sitter/binding/types.h b/tree_sitter/binding/types.h index 4d73ead..35b5a69 100644 --- a/tree_sitter/binding/types.h +++ b/tree_sitter/binding/types.h @@ -6,11 +6,6 @@ #define HAS_LANGUAGE_NAMES (TREE_SITTER_LANGUAGE_VERSION >= 15) -#if PY_MINOR_VERSION < 10 -#define Py_TPFLAGS_DISALLOW_INSTANTIATION 0 -#define Py_TPFLAGS_IMMUTABLETYPE 0 -#endif - // Types typedef struct { From 0c05091d66535fbef40b0bc763d243d71721231d Mon Sep 17 00:00:00 2001 From: ObserverOfTime Date: Thu, 17 Oct 2024 15:28:51 +0300 Subject: [PATCH 2/3] feat: implement missing methods - Language.copy - Language.node_kind_is_supertype - Node.child_with_descendant - Node.field_name_for_named_child - Parser.logger - Parser.print_dot_graphs - Tree.copy - Tree.print_dot_graph --- docs/classes/tree_sitter.Language.rst | 13 +- docs/classes/tree_sitter.LogType.rst | 13 ++ docs/classes/tree_sitter.Node.rst | 9 +- docs/classes/tree_sitter.Parser.rst | 8 +- docs/classes/tree_sitter.Query.rst | 48 ------ docs/classes/tree_sitter.QueryError.rst | 2 - docs/classes/tree_sitter.QueryPredicate.rst | 2 - docs/classes/tree_sitter.Tree.rst | 15 +- docs/classes/tree_sitter.TreeCursor.rst | 8 - docs/conf.py | 14 +- docs/index.rst | 1 + pyproject.toml | 2 +- tests/test_parser.py | 148 +++++++++++------- tree_sitter/__init__.py | 5 + tree_sitter/__init__.pyi | 47 ++++-- tree_sitter/binding/language.c | 47 +++++- tree_sitter/binding/lookahead_iterator.c | 6 +- .../binding/lookahead_names_iterator.c | 3 +- tree_sitter/binding/module.c | 13 ++ tree_sitter/binding/node.c | 76 +++++++-- tree_sitter/binding/parser.c | 100 +++++++++++- tree_sitter/binding/query.c | 27 ++-- tree_sitter/binding/tree.c | 47 +++++- tree_sitter/binding/tree_cursor.c | 10 +- tree_sitter/binding/types.h | 2 + 25 files changed, 470 insertions(+), 196 deletions(-) create mode 100644 docs/classes/tree_sitter.LogType.rst diff --git a/docs/classes/tree_sitter.Language.rst b/docs/classes/tree_sitter.Language.rst index f65423e..bccfa79 100644 --- a/docs/classes/tree_sitter.Language.rst +++ b/docs/classes/tree_sitter.Language.rst @@ -3,13 +3,12 @@ Language .. autoclass:: tree_sitter.Language - .. versionchanged:: 0.23.0 - - The argument can now be a `capsule `_. - Methods ------- + .. automethod:: copy + + .. versionadded:: 0.24.0 .. automethod:: field_id_for_name .. automethod:: field_name_for_id .. automethod:: id_for_node_kind @@ -17,12 +16,18 @@ Language .. automethod:: next_state .. automethod:: node_kind_for_id .. automethod:: node_kind_is_named + .. automethod:: node_kind_is_supertype + + .. versionadded:: 0.24.0 .. automethod:: node_kind_is_visible .. automethod:: query Special Methods --------------- + .. automethod:: __copy__ + + .. versionadded:: 0.24.0 .. automethod:: __eq__ .. automethod:: __hash__ diff --git a/docs/classes/tree_sitter.LogType.rst b/docs/classes/tree_sitter.LogType.rst new file mode 100644 index 0000000..b5f78c2 --- /dev/null +++ b/docs/classes/tree_sitter.LogType.rst @@ -0,0 +1,13 @@ +LogType +======= + +.. autoclass:: tree_sitter.LogType + :show-inheritance: + + .. versionadded:: 0.24.0 + + Members + ------- + + .. autoattribute:: PARSE + .. autoattribute:: LEX diff --git a/docs/classes/tree_sitter.Node.rst b/docs/classes/tree_sitter.Node.rst index 45284b6..9cebb2b 100644 --- a/docs/classes/tree_sitter.Node.rst +++ b/docs/classes/tree_sitter.Node.rst @@ -11,13 +11,20 @@ Node .. automethod:: child_by_field_name .. automethod:: child_containing_descendant - .. versionadded:: 0.23.0 + .. deprecated:: 0.24.0 + Use :meth:`child_with_descendant` instead + .. automethod:: child_with_descendant + + .. versionadded:: 0.24.0 .. automethod:: children_by_field_id .. automethod:: children_by_field_name .. automethod:: descendant_for_byte_range .. automethod:: descendant_for_point_range .. automethod:: edit .. automethod:: field_name_for_child + .. automethod:: field_name_for_named_child + + .. versionadded:: 0.24.0 .. automethod:: named_child .. automethod:: named_descendant_for_byte_range .. automethod:: named_descendant_for_point_range diff --git a/docs/classes/tree_sitter.Parser.rst b/docs/classes/tree_sitter.Parser.rst index bdbf642..c60da93 100644 --- a/docs/classes/tree_sitter.Parser.rst +++ b/docs/classes/tree_sitter.Parser.rst @@ -7,10 +7,9 @@ Parser ------- .. automethod:: parse + .. automethod:: print_dot_graphs - .. versionchanged:: 0.23.0 - - No longer accepts a ``keep_text`` parameter. + .. versionadded:: 0.24.0 .. automethod:: reset Attributes @@ -18,4 +17,7 @@ Parser .. autoattribute:: included_ranges .. autoattribute:: language + .. autoattribute:: logger + + .. versionadded:: 0.24.0 .. autoattribute:: timeout_micros diff --git a/docs/classes/tree_sitter.Query.rst b/docs/classes/tree_sitter.Query.rst index ae031dd..2944a4f 100644 --- a/docs/classes/tree_sitter.Query.rst +++ b/docs/classes/tree_sitter.Query.rst @@ -25,79 +25,31 @@ Query .. important:: Predicates cannot be used if the tree was parsed from a callback. - - .. versionchanged:: 0.23.0 - - Range arguments removed, :class:`predicate ` argument added, - return type changed to ``dict[str, list[Node]]``. .. automethod:: disable_capture - - .. versionadded:: 0.23.0 .. automethod:: disable_pattern - - .. versionadded:: 0.23.0 .. automethod:: end_byte_for_pattern - - .. versionadded:: 0.23.0 .. automethod:: is_pattern_guaranteed_at_step - - .. versionadded:: 0.23.0 .. automethod:: is_pattern_non_local - - .. versionadded:: 0.23.0 .. automethod:: is_pattern_rooted - - .. versionadded:: 0.23.0 .. automethod:: matches .. important:: Predicates cannot be used if the tree was parsed from a callback. - - .. versionchanged:: 0.23.0 - - Range arguments removed, :class:`predicate ` argument added, - return type changed to ``list[tuple[int, dict[str, list[Node]]]]``. .. automethod:: pattern_assertions - - .. versionadded:: 0.23.0 .. automethod:: pattern_settings - - .. versionadded:: 0.23.0 .. automethod:: set_byte_range - - .. versionadded:: 0.23.0 .. automethod:: set_point_range - - .. versionadded:: 0.23.0 .. automethod:: start_byte_for_pattern - - .. versionadded:: 0.23.0 .. automethod:: set_match_limit - - .. versionadded:: 0.23.0 .. automethod:: set_max_start_depth - - .. versionadded:: 0.23.0 .. automethod:: set_timeout_micros - .. versionadded:: 0.23.1 - Attributes ---------- .. autoattribute:: capture_count - - .. versionadded:: 0.23.0 .. autoattribute:: did_exceed_match_limit - - .. versionadded:: 0.23.0 .. autoattribute:: match_limit - - .. versionadded:: 0.23.0 .. autoattribute:: pattern_count - - .. versionadded:: 0.23.0 .. autoattribute:: timeout_micros - - .. versionadded:: 0.23.1 diff --git a/docs/classes/tree_sitter.QueryError.rst b/docs/classes/tree_sitter.QueryError.rst index 6ef1793..309eaa1 100644 --- a/docs/classes/tree_sitter.QueryError.rst +++ b/docs/classes/tree_sitter.QueryError.rst @@ -3,5 +3,3 @@ QueryError .. autoclass:: tree_sitter.QueryError :show-inheritance: - - .. versionadded:: 0.23.0 diff --git a/docs/classes/tree_sitter.QueryPredicate.rst b/docs/classes/tree_sitter.QueryPredicate.rst index 265130b..1efa5d5 100644 --- a/docs/classes/tree_sitter.QueryPredicate.rst +++ b/docs/classes/tree_sitter.QueryPredicate.rst @@ -4,8 +4,6 @@ QueryPredicate .. autoclass:: tree_sitter.QueryPredicate :show-inheritance: - .. versionadded:: 0.23.0 - Special Methods --------------- diff --git a/docs/classes/tree_sitter.Tree.rst b/docs/classes/tree_sitter.Tree.rst index aab0505..3d081d5 100644 --- a/docs/classes/tree_sitter.Tree.rst +++ b/docs/classes/tree_sitter.Tree.rst @@ -7,15 +7,26 @@ Tree ------- .. automethod:: changed_ranges + .. automethod:: copy + + .. versionadded:: 0.24.0 .. automethod:: edit + .. automethod:: print_dot_graph + + .. versionadded:: 0.24.0 .. automethod:: root_node_with_offset .. automethod:: walk + Special Methods + --------------- + + .. automethod:: __copy__ + + .. versionadded:: 0.24.0 + Attributes ---------- .. autoattribute:: included_ranges .. autoattribute:: language - - .. versionadded:: 0.23.0 .. autoattribute:: root_node diff --git a/docs/classes/tree_sitter.TreeCursor.rst b/docs/classes/tree_sitter.TreeCursor.rst index be25710..b219ff5 100644 --- a/docs/classes/tree_sitter.TreeCursor.rst +++ b/docs/classes/tree_sitter.TreeCursor.rst @@ -10,15 +10,7 @@ TreeCursor .. automethod:: goto_descendant .. automethod:: goto_first_child .. automethod:: goto_first_child_for_byte - - .. versionchanged:: 0.23.0 - - Returns the child index instead of a `bool`. .. automethod:: goto_first_child_for_point - - .. versionchanged:: 0.23.0 - - Returns the child index instead of a `bool`. .. automethod:: goto_last_child .. automethod:: goto_next_sibling .. automethod:: goto_parent diff --git a/docs/conf.py b/docs/conf.py index 654c1e5..61fb55d 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -17,14 +17,16 @@ "sphinx.ext.intersphinx", "sphinx.ext.githubpages", ] -source_suffix = ".rst" +source_suffix = { + ".rst": "restructuredtext" +} master_doc = "index" language = "en" -needs_sphinx = "7.3" +needs_sphinx = "7.4" templates_path = ["_templates"] intersphinx_mapping = { - "python": ("https://docs.python.org/3.9/", None), + "python": ("https://docs.python.org/3.10/", None), } autoclass_content = "class" @@ -54,7 +56,7 @@ html_favicon = "_static/favicon.png" -special_doc = regex("\S*self[^.]+") +special_doc = regex(r"\S*self[^.]+") def process_signature(_app, _what, name, _obj, _options, _signature, return_annotation): @@ -68,6 +70,8 @@ def process_signature(_app, _what, name, _obj, _options, _signature, return_anno return "(start_point, end_point, start_byte, end_byte)", return_annotation if name == "tree_sitter.QueryPredicate": return None, return_annotation + if name == "tree_sitter.LogType": + return None, return_annotation def process_docstring(_app, what, name, _obj, _options, lines): @@ -83,6 +87,8 @@ def process_docstring(_app, what, name, _obj, _options, lines): def process_bases(_app, name, _obj, _options, bases): if name == "tree_sitter.Point": bases[-1] = ":class:`~typing.NamedTuple`" + if name == "tree_sitter.LogType": + bases[-1] = ":class:`~enum.IntEnum`" if name == "tree_sitter.LookaheadIterator": bases[-1] = ":class:`~collections.abc.Iterator`" diff --git a/docs/index.rst b/docs/index.rst index 62dc309..9929e31 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -30,6 +30,7 @@ Classes :nosignatures: tree_sitter.Language + tree_sitter.LogType tree_sitter.LookaheadIterator tree_sitter.Node tree_sitter.Parser diff --git a/pyproject.toml b/pyproject.toml index cec6871..e933e58 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,7 +30,7 @@ name = "Max Brunsfeld" email = "maxbrunsfeld@gmail.com" [project.optional-dependencies] -docs = ["sphinx~=7.3", "sphinx-book-theme"] +docs = ["sphinx~=7.4", "sphinx-book-theme"] tests = [ "tree-sitter-html>=0.23.0", "tree-sitter-javascript>=0.23.0", diff --git a/tests/test_parser.py b/tests/test_parser.py index 9a228a1..fd4f102 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -1,6 +1,6 @@ from unittest import TestCase -from tree_sitter import Language, Parser, Range, Tree +from tree_sitter import Language, LogType, Parser, Range, Tree import tree_sitter_html import tree_sitter_javascript @@ -84,6 +84,13 @@ def test_setters(self): parser.timeout_micros = self.timeout self.assertEqual(parser.timeout_micros, self.timeout) + with self.subTest(setter="logger"): + def logger(log_type, message): + print(log_type.name, message) + + parser.logger = logger + self.assertEqual(parser.logger, logger) + def test_deleters(self): parser = Parser() @@ -95,10 +102,14 @@ def test_deleters(self): del parser.included_ranges self.assertListEqual(parser.included_ranges, [self.max_range]) - with self.subTest(setter="timeout_micros"): + with self.subTest(deleter="timeout_micros"): del parser.timeout_micros self.assertEqual(parser.timeout_micros, 0) + with self.subTest(deleter="logger"): + del parser.logger + self.assertEqual(parser.logger, None) + def test_parse_buffer(self): parser = Parser(self.javascript) with self.subTest(type="bytes"): @@ -136,24 +147,22 @@ def test_parse_utf16_encoding(self): parser = Parser(self.javascript) def read(byte_position, _): - return source_code[byte_position: byte_position + 2] + return source_code[byte_position : byte_position + 2] - tree = parser.parse(read, encoding="utf-16") + tree = parser.parse(read, encoding="utf16") root_node = tree.root_node snake_node = root_node.children[0].children[0].children[2] - snake = source_code[snake_node.start_byte + 2:snake_node.end_byte - 2] + snake = source_code[snake_node.start_byte + 2 : snake_node.end_byte - 2] self.assertEqual(snake_node.type, "string") self.assertEqual(snake.decode("utf16"), "🐍") self.assertIs(tree.language, self.javascript) - def test_parse_invalid_encoding(self): parser = Parser(self.python) with self.assertRaises(ValueError): parser.parse(b"foo", encoding="ascii") - def test_parse_with_one_included_range(self): source_code = b"hi" parser = Parser(self.html) @@ -226,7 +235,7 @@ def test_parse_with_multiple_included_ranges(self): + " (text)" + " (element (start_tag (tag_name)) (end_tag (tag_name)))" + " (text)" - + " (end_tag (tag_name))))" + + " (end_tag (tag_name))))", ) self.assertEqual(html_tree.included_ranges, html_ranges) @@ -269,7 +278,7 @@ def test_parse_with_included_range_containing_mismatched_positions(self): self.assertEqual( str(html_tree.root_node), - "(document (element (start_tag (tag_name)) (text) (end_tag (tag_name))))" + "(document (element (start_tag (tag_name)) (text) (end_tag (tag_name))))", ) def test_parse_with_included_range_boundaries(self): @@ -279,20 +288,23 @@ def test_parse_with_included_range_boundaries(self): range2_start_byte = source_code.index(b" d() ") range2_end_byte = range2_start_byte + len(b" d() ") - parser = Parser(self.javascript, included_ranges=[ - Range( - start_byte=range1_start_byte, - end_byte=range1_end_byte, - start_point=(0, range1_start_byte), - end_point=(0, range1_end_byte), - ), - Range( - start_byte=range2_start_byte, - end_byte=range2_end_byte, - start_point=(0, range2_start_byte), - end_point=(0, range2_end_byte), - ) - ]) + parser = Parser( + self.javascript, + included_ranges=[ + Range( + start_byte=range1_start_byte, + end_byte=range1_end_byte, + start_point=(0, range1_start_byte), + end_point=(0, range1_end_byte), + ), + Range( + start_byte=range2_start_byte, + end_byte=range2_end_byte, + start_point=(0, range2_start_byte), + end_point=(0, range2_end_byte), + ), + ], + ) tree = parser.parse(source_code) root = tree.root_node @@ -303,11 +315,11 @@ def test_parse_with_included_range_boundaries(self): self.assertEqual( str(root), - "(program" + - " (expression_statement (call_expression" + - " function: (identifier) arguments: (arguments)))" + - " (expression_statement (call_expression" + - " function: (identifier) arguments: (arguments))))" + "(program" + + " (expression_statement (call_expression" + + " function: (identifier) arguments: (arguments)))" + + " (expression_statement (call_expression" + + " function: (identifier) arguments: (arguments))))", ) self.assertEqual(statement1.start_byte, source_code.index(b"b()")) @@ -357,10 +369,10 @@ def test_parse_with_a_newly_excluded_range(self): self.assertEqual( str(tree.root_node), - "(document (text) (element" + - " (start_tag (tag_name))" + - " (element (start_tag (tag_name)) (end_tag (tag_name)))" + - " (end_tag (tag_name))))" + "(document (text) (element" + + " (start_tag (tag_name))" + + " (element (start_tag (tag_name)) (end_tag (tag_name)))" + + " (end_tag (tag_name))))", ) self.assertEqual( @@ -381,8 +393,8 @@ def test_parse_with_a_newly_excluded_range(self): end_byte=directive_end, start_point=(0, directive_start), end_point=(0, directive_end), - ) - ] + ), + ], ) def test_parsing_with_a_newly_included_range(self): @@ -400,9 +412,9 @@ def test_parsing_with_a_newly_included_range(self): tree = parser.parse(source_code) self.assertEqual( str(tree.root_node), - "(program" + - " (expression_statement (call_expression" + - " function: (identifier) arguments: (arguments))))" + "(program" + + " (expression_statement (call_expression" + + " function: (identifier) arguments: (arguments))))", ) # Parse both the first and third code directives as JavaScript, using the old tree as a @@ -414,16 +426,13 @@ def test_parsing_with_a_newly_included_range(self): tree2 = parser.parse(source_code) self.assertEqual( str(tree2.root_node), - "(program" + - " (expression_statement (call_expression" + - " function: (identifier) arguments: (arguments)))" + - " (expression_statement (call_expression" + - " function: (identifier) arguments: (arguments))))" - ) - self.assertEqual( - tree2.changed_ranges(tree), - [simple_range(range1_end, range3_end)] + "(program" + + " (expression_statement (call_expression" + + " function: (identifier) arguments: (arguments)))" + + " (expression_statement (call_expression" + + " function: (identifier) arguments: (arguments))))", ) + self.assertEqual(tree2.changed_ranges(tree), [simple_range(range1_end, range3_end)]) # Parse all three code directives as JavaScript, using the old tree as a # reference. @@ -435,15 +444,48 @@ def test_parsing_with_a_newly_included_range(self): tree3 = parser.parse(source_code) self.assertEqual( str(tree3.root_node), - "(program" + - " (expression_statement (call_expression" + - " function: (identifier) arguments: (arguments)))" + - " (expression_statement (call_expression" + - " function: (identifier) arguments: (arguments)))" - + " (expression_statement (call_expression" + - " function: (identifier) arguments: (arguments))))" + "(program" + + " (expression_statement (call_expression" + + " function: (identifier) arguments: (arguments)))" + + " (expression_statement (call_expression" + + " function: (identifier) arguments: (arguments)))" + + " (expression_statement (call_expression" + + " function: (identifier) arguments: (arguments))))", ) self.assertEqual( tree3.changed_ranges(tree2), [simple_range(range2_start + 1, range2_end - 1)], ) + + def test_logging(self): + from logging import getLogger + + def logger(log_type: LogType, message: str): + match log_type: + case LogType.PARSE: + parse_logger.info(message) + case LogType.LEX: + lex_logger.info(message) + + parse_logger = getLogger("tree_sitter.PARSE") + lex_logger = getLogger("tree_sitter.LEX") + parser = Parser(self.python, logger=logger) + with self.assertLogs("tree_sitter") as logs: + parser.parse(b"foo") + + self.assertEqual(logs.records[0].name, "tree_sitter.PARSE") + self.assertEqual(logs.records[0].message, "new_parse") + self.assertEqual(logs.records[3].name, "tree_sitter.LEX") + self.assertEqual(logs.records[3].message, "consume character:'f'") + + def test_dot_graphs(self): + from tempfile import TemporaryFile + + new_parse = ["graph {\n", 'label="new_parse"\n', "}\n"] + parser = Parser(self.python) + with TemporaryFile("w+") as f: + parser.print_dot_graphs(f) + parser.parse(b"foo") + f.seek(0) + lines = [f.readline(), f.readline(), f.readline()] + self.assertListEqual(lines, new_parse) diff --git a/tree_sitter/__init__.py b/tree_sitter/__init__.py index 6f148d4..6ed0a89 100644 --- a/tree_sitter/__init__.py +++ b/tree_sitter/__init__.py @@ -4,6 +4,7 @@ from ._binding import ( Language, + LogType, LookaheadIterator, Node, Parser, @@ -17,10 +18,13 @@ MIN_COMPATIBLE_LANGUAGE_VERSION, ) +LogType.__doc__ = "The type of a log message." + Point.__doc__ = "A position in a multi-line text document, in terms of rows and columns." Point.row.__doc__ = "The zero-based row of the document." Point.column.__doc__ = "The zero-based column of the document." + class QueryPredicate(_Protocol): """A custom query predicate that runs on a pattern.""" def __call__(self, predicate, args, pattern_index, captures): @@ -49,6 +53,7 @@ def __call__(self, predicate, args, pattern_index, captures): __all__ = [ "Language", + "LogType", "LookaheadIterator", "Node", "Parser", diff --git a/tree_sitter/__init__.pyi b/tree_sitter/__init__.pyi index c42dc58..3e3ac10 100644 --- a/tree_sitter/__init__.pyi +++ b/tree_sitter/__init__.pyi @@ -1,15 +1,24 @@ +from enum import IntEnum from collections.abc import ByteString, Callable, Iterator, Sequence from typing import Annotated, Any, Final, Literal, NamedTuple, Protocol, Self, final, overload +class _SupportsFileno(Protocol): + def fileno(self) -> int: ... + class Point(NamedTuple): row: int column: int +class LogType(IntEnum): + PARSE: int + LEX: int + @final class Language: + # TODO(0.25): ptr: Callable[[], CapsuleType] | CapsuleType def __init__(self, ptr: Annotated[int | object, "TSLanguage *"], /) -> None: ... - # TODO(0.24): implement name + # TODO(0.25): implement name # @property # def name(self) -> str | None: ... @@ -25,15 +34,18 @@ class Language: def id_for_node_kind(self, kind: str, named: bool, /) -> int | None: ... def node_kind_is_named(self, id: int, /) -> bool: ... def node_kind_is_visible(self, id: int, /) -> bool: ... + def node_kind_is_supertype(self, id: int, /) -> bool: ... def field_name_for_id(self, field_id: int, /) -> str | None: ... def field_id_for_name(self, name: str, /) -> int | None: ... def next_state(self, state: int, id: int, /) -> int: ... def lookahead_iterator(self, state: int, /) -> LookaheadIterator | None: ... def query(self, source: str, /) -> Query: ... + def copy(self) -> Language: ... def __repr__(self) -> str: ... def __eq__(self, other: Any, /) -> bool: ... def __ne__(self, other: Any, /) -> bool: ... def __hash__(self) -> int: ... + def __copy__(self) -> Language: ... @final class Node: @@ -111,10 +123,13 @@ class Node: def named_child(self, index: int, /) -> Node | None: ... def child_by_field_id(self, id: int, /) -> Node | None: ... def child_by_field_name(self, name: str, /) -> Node | None: ... + @deprecated("Use child_with_descendant instead") def child_containing_descendant(self, descendant: Node, /) -> Node | None: ... + def child_with_descendant(self, descendant: Node, /) -> Node | None: ... def children_by_field_id(self, id: int, /) -> list[Node]: ... def children_by_field_name(self, name: str, /) -> list[Node]: ... def field_name_for_child(self, child_index: int, /) -> str | None: ... + def field_name_for_named_child(self, child_index: int, /) -> str | None: ... def descendant_for_byte_range( self, start_byte: int, @@ -159,6 +174,7 @@ class Tree: offset_extent: Point | tuple[int, int], /, ) -> Node | None: ... + def copy(self) -> Tree: ... def edit( self, start_byte: int, @@ -170,8 +186,8 @@ class Tree: ) -> None: ... def walk(self) -> TreeCursor: ... def changed_ranges(self, new_tree: Tree) -> list[Range]: ... - # TODO(0.24): add print_dot_graph - # TODO(0.24): add copy methods + def print_dot_graph(self, file: _SupportsFileno) -> None: ... + def __copy__(self) -> Tree: ... @final class TreeCursor: @@ -206,6 +222,7 @@ class Parser: *, included_ranges: Sequence[Range] | None = None, timeout_micros: int | None = None, + logger: Callable[[LogType, str], None] | None = None, ) -> None: ... @property def language(self) -> Language | None: ... @@ -225,6 +242,12 @@ class Parser: def timeout_micros(self, timeout: int) -> None: ... @timeout_micros.deleter def timeout_micros(self) -> None: ... + @property + def logger(self) -> Callable[[LogType, str], None] | None: ... + @logger.setter + def logger(self, logger: Callable[[LogType, str], None]) -> None: ... + @logger.deleter + def logger(self) -> None: ... @overload def parse( self, @@ -242,8 +265,7 @@ class Parser: encoding: Literal["utf8", "utf16"] = "utf8", ) -> Tree: ... def reset(self) -> None: ... - # TODO(0.24): add set_logger - # TODO(0.24): add print_dot_graphs + def print_dot_graphs(self, file: _SupportsFileno | None) -> None: ... class QueryError(ValueError): ... @@ -253,7 +275,7 @@ class QueryPredicate(Protocol): predicate: str, args: list[tuple[str, Literal["capture", "string"]]], pattern_index: int, - captures: dict[str, list[Node]] + captures: dict[str, list[Node]], ) -> bool: ... @final @@ -275,7 +297,7 @@ class Query: def set_byte_range(self, byte_range: tuple[int, int]) -> Self: ... def set_point_range( self, - point_range: tuple[Point | tuple[int, int], Point | tuple[int, int]] + point_range: tuple[Point | tuple[int, int], Point | tuple[int, int]], ) -> Self: ... def disable_pattern(self, index: int) -> Self: ... def disable_capture(self, capture: str) -> Self: ... @@ -283,13 +305,13 @@ class Query: self, node: Node, /, - predicate: QueryPredicate | None = None + predicate: QueryPredicate | None = None, ) -> dict[str, list[Node]]: ... def matches( self, node: Node, /, - predicate: QueryPredicate | None = None + predicate: QueryPredicate | None = None, ) -> list[tuple[int, dict[str, list[Node]]]]: ... def pattern_settings(self, index: int) -> dict[str, str | None]: ... def pattern_assertions(self, index: int) -> dict[str, tuple[str | None, bool]]: ... @@ -307,14 +329,14 @@ class LookaheadIterator(Iterator[int]): @property def current_symbol_name(self) -> str: ... - # TODO(0.24): rename to reset + # TODO(0.25): rename to reset def reset_state(self, state: int, language: Language | None = None) -> bool: ... def iter_names(self) -> Iterator[str]: ... - # TODO(0.24): implement iter_symbols + # TODO(0.25): implement iter_symbols # def iter_symbols(self) -> Iterator[int]: ... - # TODO(0.24): return tuple[int, str] + # TODO(0.25): return tuple[int, str] def __next__(self) -> int: ... @final @@ -338,6 +360,7 @@ class Range: def __ne__(self, other: Any, /) -> bool: ... def __repr__(self) -> str: ... def __hash__(self) -> int: ... + # TODO(0.25): __replace__ LANGUAGE_VERSION: Final[int] diff --git a/tree_sitter/binding/language.c b/tree_sitter/binding/language.c index e942dbd..b8cb79e 100644 --- a/tree_sitter/binding/language.c +++ b/tree_sitter/binding/language.c @@ -126,6 +126,15 @@ PyObject *language_node_kind_is_visible(Language *self, PyObject *args) { return PyBool_FromLong(symbol_type <= TSSymbolTypeAnonymous); } +PyObject *language_node_kind_is_supertype(Language *self, PyObject *args) { + TSSymbol symbol; + if (!PyArg_ParseTuple(args, "H:node_kind_is_supertype", &symbol)) { + return NULL; + } + TSSymbolType symbol_type = ts_language_symbol_type(self->language, symbol); + return PyBool_FromLong(symbol_type <= TSSymbolTypeSupertype); +} + PyObject *language_field_name_for_id(Language *self, PyObject *args) { uint16_t field_id; if (!PyArg_ParseTuple(args, "H:field_name_for_id", &field_id)) { @@ -174,8 +183,7 @@ PyObject *language_lookahead_iterator(Language *self, PyObject *args) { if (iter == NULL) { return NULL; } - Py_INCREF(self); - iter->language = (PyObject *)self; + iter->language = Py_NewRef(self); iter->lookahead_iterator = lookahead_iterator; return PyObject_Init((PyObject *)iter, state->lookahead_iterator_type); } @@ -190,6 +198,17 @@ PyObject *language_query(Language *self, PyObject *args) { return PyObject_CallFunction((PyObject *)state->query_type, "Os#", self, source, length); } +PyObject *language_copy(Language *self, PyObject *Py_UNUSED(args)) { + ModuleState *state = GET_MODULE_STATE(self); + Language *copied = PyObject_New(Language, state->language_type); + if (copied == NULL) { + return NULL; + } + + copied->language = (TSLanguage *)ts_language_copy(self->language); + return PyObject_Init((PyObject *)copied, state->language_type); +} + PyDoc_STRVAR(language_node_kind_for_id_doc, "node_kind_for_id(self, id, /)\n--\n\n" "Get the name of the node kind for the given numerical id."); @@ -202,6 +221,10 @@ PyDoc_STRVAR(language_node_kind_is_visible_doc, "node_kind_is_visible(self, id, /)\n--\n\n" "Check if the node type for the given numerical id " "is visible (as opposed to an auxiliary node type)."); +PyDoc_STRVAR(language_node_kind_is_supertype_doc, + "node_kind_is_supertype(self, id, /)\n--\n\n" + "Check if the node type for the given numerical id is a supertype.\n\nSupertype " + "nodes represent abstract categories of syntax nodes (e.g. \"expression\")."); PyDoc_STRVAR(language_field_name_for_id_doc, "field_name_for_id(self, field_id, /)\n--\n\n" "Get the field name for the given numerical id."); PyDoc_STRVAR(language_field_id_for_name_doc, "field_id_for_name(self, name, /)\n--\n\n" @@ -218,6 +241,10 @@ PyDoc_STRVAR( language_query_doc, "query(self, source, /)\n--\n\n" "Create a new :class:`Query` from a string containing one or more S-expression patterns."); +PyDoc_STRVAR(language_copy_doc, "copy(self, /)\n--\n\n" + "Create a copy of the language."); +PyDoc_STRVAR(language_copy2_doc, "__copy__(self, /)\n--\n\n" + "Use :func:`copy.copy` to create a copy of the language."); static PyMethodDef language_methods[] = { { @@ -244,6 +271,12 @@ static PyMethodDef language_methods[] = { .ml_flags = METH_VARARGS, .ml_doc = language_node_kind_is_visible_doc, }, + { + .ml_name = "node_kind_is_supertype", + .ml_meth = (PyCFunction)language_node_kind_is_supertype, + .ml_flags = METH_VARARGS, + .ml_doc = language_node_kind_is_supertype_doc, + }, { .ml_name = "field_name_for_id", .ml_meth = (PyCFunction)language_field_name_for_id, @@ -274,6 +307,16 @@ static PyMethodDef language_methods[] = { .ml_flags = METH_VARARGS, .ml_doc = language_query_doc, }, + { + .ml_name = "copy", + .ml_meth = (PyCFunction)language_copy, + .ml_flags = METH_NOARGS, + .ml_doc = language_copy_doc, + }, + {.ml_name = "__copy__", + .ml_meth = (PyCFunction)language_copy, + .ml_flags = METH_NOARGS, + .ml_doc = language_copy2_doc}, {NULL}, }; diff --git a/tree_sitter/binding/lookahead_iterator.c b/tree_sitter/binding/lookahead_iterator.c index 49fc58f..ed1fccc 100644 --- a/tree_sitter/binding/lookahead_iterator.c +++ b/tree_sitter/binding/lookahead_iterator.c @@ -25,8 +25,7 @@ PyObject *lookahead_iterator_get_language(LookaheadIterator *self, void *Py_UNUS language->version = ts_language_version(language->language); self->language = PyObject_Init((PyObject *)language, state->language_type); } - Py_INCREF(self->language); - return self->language; + return Py_NewRef(self->language); } PyObject *lookahead_iterator_get_current_symbol(LookaheadIterator *self, void *Py_UNUSED(payload)) { @@ -62,8 +61,7 @@ PyObject *lookahead_iterator_reset_state(LookaheadIterator *self, PyObject *args } PyObject *lookahead_iterator_iter(LookaheadIterator *self) { - Py_INCREF(self); - return (PyObject *)self; + return Py_NewRef(self); } PyObject *lookahead_iterator_next(LookaheadIterator *self) { diff --git a/tree_sitter/binding/lookahead_names_iterator.c b/tree_sitter/binding/lookahead_names_iterator.c index 5bced94..fb3132f 100644 --- a/tree_sitter/binding/lookahead_names_iterator.c +++ b/tree_sitter/binding/lookahead_names_iterator.c @@ -9,8 +9,7 @@ void lookahead_names_iterator_dealloc(LookaheadNamesIterator *self) { } PyObject *lookahead_names_iterator_iter(LookaheadNamesIterator *self) { - Py_INCREF(self); - return (PyObject *)self; + return Py_NewRef(self); } PyObject *lookahead_names_iterator_next(LookaheadNamesIterator *self) { diff --git a/tree_sitter/binding/module.c b/tree_sitter/binding/module.c index 291d53d..ff45ff1 100644 --- a/tree_sitter/binding/module.c +++ b/tree_sitter/binding/module.c @@ -29,6 +29,7 @@ static void module_free(void *self) { ModuleState *state = PyModule_GetState((PyObject *)self); ts_tree_cursor_delete(&state->default_cursor); Py_XDECREF(state->language_type); + Py_XDECREF(state->log_type_type); Py_XDECREF(state->lookahead_iterator_type); Py_XDECREF(state->lookahead_names_iterator_type); Py_XDECREF(state->node_type); @@ -143,6 +144,18 @@ PyMODINIT_FUNC PyInit__binding(void) { goto cleanup; } + PyObject *int_enum = import_attribute("enum", "IntEnum"); + if (int_enum == NULL) { + goto cleanup; + } + state->log_type_type = (PyTypeObject *)PyObject_CallFunction( + int_enum, "s{sisi}", "LogType", "PARSE", TSLogTypeParse, "LEX", TSLogTypeLex); + if (state->log_type_type == NULL || + PyModule_AddObjectRef(module, "LogType", (PyObject *)state->log_type_type) < 0) { + goto cleanup; + }; + Py_DECREF(int_enum); + PyModule_AddIntConstant(module, "LANGUAGE_VERSION", TREE_SITTER_LANGUAGE_VERSION); PyModule_AddIntConstant(module, "MIN_COMPATIBLE_LANGUAGE_VERSION", TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION); diff --git a/tree_sitter/binding/node.c b/tree_sitter/binding/node.c index 4717e7b..bc6f71d 100644 --- a/tree_sitter/binding/node.c +++ b/tree_sitter/binding/node.c @@ -6,8 +6,7 @@ PyObject *node_new_internal(ModuleState *state, TSNode node, PyObject *tree) { return NULL; } self->node = node; - Py_INCREF(tree); - self->tree = tree; + self->tree = Py_NewRef(tree); self->children = NULL; return PyObject_Init((PyObject *)self, state->node_type); } @@ -53,8 +52,7 @@ PyObject *node_walk(Node *self, PyObject *Py_UNUSED(args)) { return NULL; } - Py_INCREF(self->tree); - tree_cursor->tree = self->tree; + tree_cursor->tree = Py_NewRef(self->tree); tree_cursor->node = NULL; tree_cursor->cursor = ts_tree_cursor_new(self->node); return PyObject_Init((PyObject *)tree_cursor, state->tree_cursor_type); @@ -223,6 +221,27 @@ PyObject *node_field_name_for_child(Node *self, PyObject *args) { return PyUnicode_FromString(field_name); } +PyObject *node_field_name_for_named_child(Node *self, PyObject *args) { + long index; + if (!PyArg_ParseTuple(args, "l:field_name_for_named_child", &index)) { + return NULL; + } + if (index < 0) { + PyErr_SetString(PyExc_ValueError, "child index must be positive"); + return NULL; + } + if ((uint32_t)index >= ts_node_child_count(self->node)) { + PyErr_SetString(PyExc_IndexError, "child index out of range"); + return NULL; + } + + const char *field_name = ts_node_field_name_for_named_child(self->node, index); + if (field_name == NULL) { + Py_RETURN_NONE; + } + return PyUnicode_FromString(field_name); +} + PyObject *node_descendant_for_byte_range(Node *self, PyObject *args) { ModuleState *state = GET_MODULE_STATE(self); uint32_t start_byte, end_byte; @@ -285,6 +304,9 @@ PyObject *node_child_containing_descendant(Node *self, PyObject *args) { if (!PyArg_ParseTuple(args, "O!:child_containing_descendant", &descendant, state->node_type)) { return NULL; } + if (REPLACE("child_containing_descendant", "child_with_descendant") < 0) { + return NULL; + } TSNode child = ts_node_child_containing_descendant(self->node, descendant); if (ts_node_is_null(child)) { @@ -293,6 +315,20 @@ PyObject *node_child_containing_descendant(Node *self, PyObject *args) { return node_new_internal(state, child, self->tree); } +PyObject *node_child_with_descendant(Node *self, PyObject *args) { + ModuleState *state = GET_MODULE_STATE(self); + TSNode descendant; + if (!PyArg_ParseTuple(args, "O!:child_with_descendant", &descendant, state->node_type)) { + return NULL; + } + + TSNode child = ts_node_child_with_descendant(self->node, descendant); + if (ts_node_is_null(child)) { + Py_RETURN_NONE; + } + return node_new_internal(state, child, self->tree); +} + PyObject *node_get_id(Node *self, void *Py_UNUSED(payload)) { return PyLong_FromVoidPtr((void *)self->node.id); } @@ -399,8 +435,7 @@ PyObject *node_get_end_point(Node *self, void *Py_UNUSED(payload)) { PyObject *node_get_children(Node *self, void *Py_UNUSED(payload)) { ModuleState *state = GET_MODULE_STATE(self); if (self->children) { - Py_INCREF(self->children); - return self->children; + return Py_NewRef(self->children); } uint32_t length = ts_node_child_count(self->node); @@ -421,9 +456,8 @@ PyObject *node_get_children(Node *self, void *Py_UNUSED(payload)) { } } while (ts_tree_cursor_goto_next_sibling(&state->default_cursor)); } - Py_INCREF(result); - self->children = result; - return result; + self->children = Py_NewRef(result); + return self->children; } PyObject *node_get_named_children(Node *self, void *payload) { @@ -444,8 +478,7 @@ PyObject *node_get_named_children(Node *self, void *payload) { for (uint32_t i = 0, j = 0; i < length; ++i) { PyObject *child = PyList_GetItem(self->children, i); if (ts_node_is_named(((Node *)child)->node)) { - Py_INCREF(child); - if (PyList_SetItem(result, j++, child)) { + if (PyList_SetItem(result, j++, Py_NewRef(child))) { Py_DECREF(result); return NULL; } @@ -600,6 +633,9 @@ PyDoc_STRVAR(node_children_by_field_name_doc, "children_by_field_name(self, name PyDoc_STRVAR(node_field_name_for_child_doc, "field_name_for_child(self, child_index, /)\n--\n\n" "Get the field name of this node's child at the given index."); +PyDoc_STRVAR(node_field_name_for_named_child_doc, + "field_name_for_child(self, child_index, /)\n--\n\n" + "Get the field name of this node's *named* child at the given index."); PyDoc_STRVAR(node_descendant_for_byte_range_doc, "descendant_for_byte_range(self, start_byte, end_byte, /)\n--\n\n" "Get the smallest node within this node that spans the given byte range."); @@ -614,7 +650,11 @@ PyDoc_STRVAR(node_named_descendant_for_point_range_doc, "Get the smallest *named* node within this node that spans the given point range."); PyDoc_STRVAR(node_child_containing_descendant_doc, "child_containing_descendant(self, descendant, /)\n--\n\n" - "Get the child of the node that contains the given descendant."); + "Get the child of the node that contains the given descendant." DOC_ATTENTION + "This will not return the descendant if it is a direct child of this node."); +PyDoc_STRVAR(node_child_with_descendant_doc, + "child_with_descendant(self, descendant, /)\n--\n\n" + "Get the node that contains the given descendant."); static PyMethodDef node_methods[] = { { @@ -671,6 +711,12 @@ static PyMethodDef node_methods[] = { .ml_flags = METH_VARARGS, .ml_doc = node_field_name_for_child_doc, }, + { + .ml_name = "field_name_for_named_child", + .ml_meth = (PyCFunction)node_field_name_for_named_child, + .ml_flags = METH_VARARGS, + .ml_doc = node_field_name_for_named_child_doc, + }, { .ml_name = "descendant_for_byte_range", .ml_meth = (PyCFunction)node_descendant_for_byte_range, @@ -701,6 +747,12 @@ static PyMethodDef node_methods[] = { .ml_flags = METH_VARARGS, .ml_doc = node_child_containing_descendant_doc, }, + { + .ml_name = "child_with_descendant", + .ml_meth = (PyCFunction)node_child_with_descendant, + .ml_flags = METH_VARARGS, + .ml_doc = node_child_with_descendant_doc, + }, {NULL}, }; diff --git a/tree_sitter/binding/parser.c b/tree_sitter/binding/parser.c index 5cf4239..73d1c57 100644 --- a/tree_sitter/binding/parser.c +++ b/tree_sitter/binding/parser.c @@ -9,18 +9,33 @@ typedef struct { ModuleState *state; } ReadWrapperPayload; +typedef struct { + PyObject *callback; + PyTypeObject *log_type_type; +} LoggerPayload; + +static void free_logger(const TSParser *parser) { + TSLogger logger = ts_parser_logger(parser); + if (logger.payload != NULL) { + PyMem_Free(logger.payload); + } +} + PyObject *parser_new(PyTypeObject *cls, PyObject *Py_UNUSED(args), PyObject *Py_UNUSED(kwargs)) { Parser *self = (Parser *)cls->tp_alloc(cls, 0); if (self != NULL) { self->parser = ts_parser_new(); self->language = NULL; + self->logger = NULL; } return (PyObject *)self; } void parser_dealloc(Parser *self) { + free_logger(self->parser); ts_parser_delete(self->parser); Py_XDECREF(self->language); + Py_XDECREF(self->logger); Py_TYPE(self)->tp_free(self); } @@ -190,6 +205,19 @@ PyObject *parser_reset(Parser *self, void *Py_UNUSED(payload)) { Py_RETURN_NONE; } +PyObject *parser_print_dot_graphs(Parser *self, PyObject *arg) { + if (arg == Py_None) { + ts_parser_print_dot_graphs(self->parser, -1); + } else { + int fd = PyObject_AsFileDescriptor(arg); + if (fd < 0) { + return NULL; + } + ts_parser_print_dot_graphs(self->parser, fd); + } + Py_RETURN_NONE; +} + PyObject *parser_get_timeout_micros(Parser *self, void *Py_UNUSED(payload)) { return PyLong_FromUnsignedLong(ts_parser_timeout_micros(self->parser)); } @@ -273,8 +301,49 @@ PyObject *parser_get_language(Parser *self, void *Py_UNUSED(payload)) { if (!self->language) { Py_RETURN_NONE; } - Py_INCREF(self->language); - return self->language; + return Py_NewRef(self->language); +} + +PyObject *parser_get_logger(Parser *self, void *Py_UNUSED(payload)) { + if (!self->logger) { + Py_RETURN_NONE; + } + return Py_NewRef(self->logger); +} + +static void log_callback(void *payload, TSLogType log_type, const char *buffer) { + LoggerPayload *logger_payload = (LoggerPayload *)payload; + PyObject *log_type_enum = + PyObject_CallFunction((PyObject *)logger_payload->log_type_type, "i", log_type); + PyObject_CallFunction(logger_payload->callback, "Os", log_type_enum, buffer); +} + +int parser_set_logger(Parser *self, PyObject *arg, void *Py_UNUSED(payload)) { + free_logger(self->parser); + + if (arg == NULL || arg == Py_None) { + Py_XDECREF(self->logger); + self->logger = NULL; + TSLogger logger = {NULL, NULL}; + ts_parser_set_logger(self->parser, logger); + return 0; + } + if (!PyCallable_Check(arg)) { + PyErr_Format(PyExc_TypeError, "logger must be assigned a Callable object, not %s", + arg->ob_type->tp_name); + return -1; + } + + Py_XSETREF(self->logger, Py_NewRef(arg)); + + ModuleState *state = GET_MODULE_STATE(self); + LoggerPayload *payload = PyMem_Malloc(sizeof(LoggerPayload)); + payload->callback = self->logger; + payload->log_type_type = state->log_type_type; + TSLogger logger = {payload, log_callback}; + ts_parser_set_logger(self->parser, logger); + + return 0; } int parser_set_language(Parser *self, PyObject *arg, void *Py_UNUSED(payload)) { @@ -304,18 +373,17 @@ int parser_set_language(Parser *self, PyObject *arg, void *Py_UNUSED(payload)) { return -1; } - Py_INCREF(language); - Py_XSETREF(self->language, (PyObject *)language); + Py_XSETREF(self->language, Py_NewRef(language)); return 0; } int parser_init(Parser *self, PyObject *args, PyObject *kwargs) { ModuleState *state = GET_MODULE_STATE(self); - PyObject *language = NULL, *included_ranges = NULL, *timeout_micros = NULL; - char *keywords[] = {"language", "included_ranges", "timeout_micros", NULL}; - if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O!$OO:__init__", keywords, + PyObject *language = NULL, *included_ranges = NULL, *timeout_micros = NULL, *logger = NULL; + char *keywords[] = {"language", "included_ranges", "timeout_micros", "logger", NULL}; + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|O!$OOO:__init__", keywords, state->language_type, &language, &included_ranges, - &timeout_micros)) { + &timeout_micros, &logger)) { return -1; } @@ -328,6 +396,9 @@ int parser_init(Parser *self, PyObject *args, PyObject *kwargs) { if (SET_ATTRIBUTE_ERROR(timeout_micros)) { return -1; } + if (SET_ATTRIBUTE_ERROR(logger)) { + return -1; + } return 0; } @@ -347,6 +418,11 @@ PyDoc_STRVAR( "If the parser previously failed because of a timeout, then by default, it will resume where " "it left off on the next call to :meth:`parse`.\nIf you don't want to resume, and instead " "intend to use this parser to parse some other document, you must call :meth:`reset` first."); +PyDoc_STRVAR(parser_print_dot_graphs_doc, + "print_dot_graphs(self, /, file)\n--\n\n" + "Set the file descriptor to which the parser should write debugging " + "graphs during parsing. The graphs are formatted in the DOT language. " + "You can turn off this logging by passing ``None``."); static PyMethodDef parser_methods[] = { { @@ -361,6 +437,12 @@ static PyMethodDef parser_methods[] = { .ml_flags = METH_NOARGS, .ml_doc = parser_reset_doc, }, + { + .ml_name = "print_dot_graphs", + .ml_meth = (PyCFunction)parser_print_dot_graphs, + .ml_flags = METH_O, + .ml_doc = parser_print_dot_graphs_doc, + }, {NULL}, }; @@ -371,6 +453,8 @@ static PyGetSetDef parser_accessors[] = { PyDoc_STR("The ranges of text that the parser will include when parsing."), NULL}, {"timeout_micros", (getter)parser_get_timeout_micros, (setter)parser_set_timeout_micros, PyDoc_STR("The duration in microseconds that parsing is allowed to take."), NULL}, + {"logger", (getter)parser_get_logger, (setter)parser_set_logger, + PyDoc_STR("The logger that the parser should use during parsing."), NULL}, {NULL}, }; diff --git a/tree_sitter/binding/query.c b/tree_sitter/binding/query.c index c6593aa..0c071f4 100644 --- a/tree_sitter/binding/query.c +++ b/tree_sitter/binding/query.c @@ -592,8 +592,7 @@ PyObject *query_pattern_settings(Query *self, PyObject *args) { return NULL; } PyObject *item = PyList_GetItem(self->settings, pattern_index); - Py_INCREF(item); - return item; + return Py_NewRef(item); } PyObject *query_pattern_assertions(Query *self, PyObject *args) { @@ -607,8 +606,7 @@ PyObject *query_pattern_assertions(Query *self, PyObject *args) { return NULL; } PyObject *item = PyList_GetItem(self->assertions, pattern_index); - Py_INCREF(item); - return item; + return Py_NewRef(item); } PyObject *query_set_timeout_micros(Query *self, PyObject *args) { @@ -617,8 +615,7 @@ PyObject *query_set_timeout_micros(Query *self, PyObject *args) { return NULL; } ts_query_cursor_set_timeout_micros(self->cursor, timeout_micros); - Py_INCREF(self); - return (PyObject *)self; + return Py_NewRef(self); } PyObject *query_set_match_limit(Query *self, PyObject *args) { @@ -631,8 +628,7 @@ PyObject *query_set_match_limit(Query *self, PyObject *args) { return NULL; } ts_query_cursor_set_match_limit(self->cursor, match_limit); - Py_INCREF(self); - return (PyObject *)self; + return Py_NewRef(self); } PyObject *query_set_max_start_depth(Query *self, PyObject *args) { @@ -641,8 +637,7 @@ PyObject *query_set_max_start_depth(Query *self, PyObject *args) { return NULL; } ts_query_cursor_set_max_start_depth(self->cursor, max_start_depth); - Py_INCREF(self); - return (PyObject *)self; + return Py_NewRef(self); } PyObject *query_set_byte_range(Query *self, PyObject *args) { @@ -651,8 +646,7 @@ PyObject *query_set_byte_range(Query *self, PyObject *args) { return NULL; } ts_query_cursor_set_byte_range(self->cursor, start_byte, end_byte); - Py_INCREF(self); - return (PyObject *)self; + return Py_NewRef(self); } PyObject *query_set_point_range(Query *self, PyObject *args) { @@ -662,8 +656,7 @@ PyObject *query_set_point_range(Query *self, PyObject *args) { return NULL; } ts_query_cursor_set_point_range(self->cursor, start_point, end_point); - Py_INCREF(self); - return (PyObject *)self; + return Py_NewRef(self); } PyObject *query_disable_pattern(Query *self, PyObject *args) { @@ -672,8 +665,7 @@ PyObject *query_disable_pattern(Query *self, PyObject *args) { return NULL; } ts_query_disable_pattern(self->query, pattern_index); - Py_INCREF(self); - return (PyObject *)self; + return Py_NewRef(self); } PyObject *query_disable_capture(Query *self, PyObject *args) { @@ -683,8 +675,7 @@ PyObject *query_disable_capture(Query *self, PyObject *args) { return NULL; } ts_query_disable_capture(self->query, capture_name, length); - Py_INCREF(self); - return (PyObject *)self; + return Py_NewRef(self); } PyObject *query_start_byte_for_pattern(Query *self, PyObject *args) { diff --git a/tree_sitter/binding/tree.c b/tree_sitter/binding/tree.c index 9c0ab22..8eb426b 100644 --- a/tree_sitter/binding/tree.c +++ b/tree_sitter/binding/tree.c @@ -37,8 +37,7 @@ PyObject *tree_walk(Tree *self, PyObject *Py_UNUSED(args)) { return NULL; } - Py_INCREF(self); - tree_cursor->tree = (PyObject *)self; + tree_cursor->tree = Py_NewRef(self); tree_cursor->node = NULL; tree_cursor->cursor = ts_tree_cursor_new(ts_tree_root_node(self->tree)); return PyObject_Init((PyObject *)tree_cursor, state->tree_cursor_type); @@ -75,6 +74,24 @@ PyObject *tree_edit(Tree *self, PyObject *args, PyObject *kwargs) { Py_RETURN_NONE; } +PyObject *tree_copy(Tree *self, PyObject *Py_UNUSED(args)) { + ModuleState *state = GET_MODULE_STATE(self); + Tree *copied = PyObject_New(Tree, state->tree_type); + if (copied == NULL) { + return NULL; + } + + copied->tree = ts_tree_copy(self->tree); + return PyObject_Init((PyObject *)copied, state->tree_type); +} + +PyObject *tree_print_dot_graph(Tree *self, PyObject *arg) { + int fd = PyObject_AsFileDescriptor(arg); + if (fd < 0) return NULL; + ts_tree_print_dot_graph(self->tree, fd); + Py_RETURN_NONE; +} + PyObject *tree_changed_ranges(Tree *self, PyObject *args, PyObject *kwargs) { ModuleState *state = GET_MODULE_STATE(self); PyObject *new_tree; @@ -128,8 +145,7 @@ PyObject *tree_get_included_ranges(Tree *self, PyObject *Py_UNUSED(args)) { } PyObject *tree_get_language(Tree *self, PyObject *Py_UNUSED(args)) { - Py_INCREF(self->language); - return self->language; + return Py_NewRef(self->language); } PyDoc_STRVAR(tree_root_node_with_offset_doc, @@ -152,6 +168,13 @@ PyDoc_STRVAR( "ranges match up to the new tree.\n\nGenerally, you'll want to call this method " "right after calling the :meth:`Parser.parse` method. Call it on the old tree that " "was passed to the method, and pass the new tree that was returned from it."); +PyDoc_STRVAR(tree_print_dot_graph_doc, + "print_dot_graph(self, /, file)\n--\n\n" + "Write a DOT graph describing the syntax tree to the given file."); +PyDoc_STRVAR(tree_copy_doc, "copy(self, /)\n--\n\n" + "Create a shallow copy of the tree."); +PyDoc_STRVAR(tree_copy2_doc, "__copy__(self, /)\n--\n\n" + "Use :func:`copy.copy` to create a copy of the tree."); static PyMethodDef tree_methods[] = { { @@ -178,6 +201,22 @@ static PyMethodDef tree_methods[] = { .ml_flags = METH_KEYWORDS | METH_VARARGS, .ml_doc = tree_changed_ranges_doc, }, + { + .ml_name = "print_dot_graph", + .ml_meth = (PyCFunction)tree_print_dot_graph, + .ml_flags = METH_O, + .ml_doc = tree_print_dot_graph_doc, + }, + { + .ml_name = "copy", + .ml_meth = (PyCFunction)tree_copy, + .ml_flags = METH_NOARGS, + .ml_doc = tree_copy_doc, + }, + {.ml_name = "__copy__", + .ml_meth = (PyCFunction)tree_copy, + .ml_flags = METH_NOARGS, + .ml_doc = tree_copy2_doc}, {NULL}, }; diff --git a/tree_sitter/binding/tree_cursor.c b/tree_sitter/binding/tree_cursor.c index 48822e9..113eddd 100644 --- a/tree_sitter/binding/tree_cursor.c +++ b/tree_sitter/binding/tree_cursor.c @@ -18,8 +18,7 @@ PyObject *tree_cursor_get_node(TreeCursor *self, void *Py_UNUSED(payload)) { ModuleState *state = GET_MODULE_STATE(self); self->node = node_new_internal(state, current_node, self->tree); } - Py_INCREF(self->node); - return self->node; + return Py_NewRef(self->node); } PyObject *tree_cursor_get_field_id(TreeCursor *self, void *Py_UNUSED(payload)) { @@ -169,8 +168,7 @@ PyObject *tree_cursor_copy(TreeCursor *self, PyObject *Py_UNUSED(args)) { return NULL; } - Py_INCREF(self->tree); - copied->tree = self->tree; + copied->tree = Py_NewRef(self->tree); copied->cursor = ts_tree_cursor_copy(&self->cursor); return PyObject_Init((PyObject *)copied, state->tree_cursor_type); } @@ -183,7 +181,7 @@ PyDoc_STRVAR( tree_cursor_goto_last_child_doc, "goto_last_child(self, /)\n--\n\n" "Move this cursor to the last child of its current node." DOC_RETURNS "``True`` " - "if the cursor successfully moved, or ``False`` if there were no children." DOC_ATTENTION + "if the cursor successfully moved, or ``False`` if there were no children." DOC_CAUTION "This method may be slower than :meth:`goto_first_child` because it needs " "to iterate through all the children to compute the child's position."); PyDoc_STRVAR(tree_cursor_goto_parent_doc, @@ -199,7 +197,7 @@ PyDoc_STRVAR(tree_cursor_goto_previous_sibling_doc, "goto_previous_sibling(self, /)\n--\n\n" "Move this cursor to the previous sibling of its current node." DOC_RETURNS "``True`` if the cursor successfully moved, or ``False`` if there was no previous " - "sibling." DOC_ATTENTION + "sibling." DOC_CAUTION "This method may be slower than :meth:`goto_next_sibling` due to how node positions " "are stored.\nIn the worst case, this will need to iterate through all the children " "up to the previous sibling node to recalculate its position."); diff --git a/tree_sitter/binding/types.h b/tree_sitter/binding/types.h index 35b5a69..b41890a 100644 --- a/tree_sitter/binding/types.h +++ b/tree_sitter/binding/types.h @@ -35,6 +35,7 @@ typedef struct { PyObject_HEAD TSParser *parser; PyObject *language; + PyObject *logger; } Parser; typedef struct { @@ -109,6 +110,7 @@ typedef struct { PyObject *re_compile; PyObject *query_error; PyTypeObject *language_type; + PyTypeObject *log_type_type; PyTypeObject *lookahead_iterator_type; PyTypeObject *lookahead_names_iterator_type; PyTypeObject *node_type; From 1e4896605da53c9fd49c9b794ab1613565c0ac76 Mon Sep 17 00:00:00 2001 From: ObserverOfTime Date: Thu, 17 Oct 2024 15:38:42 +0300 Subject: [PATCH 3/3] feat(language): deprecate int constructor --- tree_sitter/__init__.pyi | 8 ++++++-- tree_sitter/binding/language.c | 3 +++ 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/tree_sitter/__init__.pyi b/tree_sitter/__init__.pyi index 3e3ac10..b9f9fea 100644 --- a/tree_sitter/__init__.pyi +++ b/tree_sitter/__init__.pyi @@ -1,6 +1,7 @@ from enum import IntEnum from collections.abc import ByteString, Callable, Iterator, Sequence from typing import Annotated, Any, Final, Literal, NamedTuple, Protocol, Self, final, overload +from typing_extensions import deprecated class _SupportsFileno(Protocol): def fileno(self) -> int: ... @@ -15,8 +16,11 @@ class LogType(IntEnum): @final class Language: - # TODO(0.25): ptr: Callable[[], CapsuleType] | CapsuleType - def __init__(self, ptr: Annotated[int | object, "TSLanguage *"], /) -> None: ... + @overload + @deprecated("int argument support is deprecated") + def __init__(self, ptr: Annotated[int, "TSLanguage *"], /) -> None: ... + @overload + def __init__(self, ptr: Annotated[object, "TSLanguage *"], /) -> None: ... # TODO(0.25): implement name # @property diff --git a/tree_sitter/binding/language.c b/tree_sitter/binding/language.c index b8cb79e..858ddb8 100644 --- a/tree_sitter/binding/language.c +++ b/tree_sitter/binding/language.c @@ -16,6 +16,9 @@ int language_init(Language *self, PyObject *args, PyObject *Py_UNUSED(kwargs)) { } return -1; } + if (DEPRECATE("int argument support is deprecated") < 0) { + return -1; + } self->language = PyLong_AsVoidPtr(language); }