diff --git a/README.rst b/README.rst index 796f2b5..a0ccfca 100644 --- a/README.rst +++ b/README.rst @@ -32,7 +32,6 @@ The API Reference is here: http://asttokens.readthedocs.io/en/latest/api-index.h Usage ----- -ASTTokens works with both Python2 and Python3. ASTTokens can annotate both trees built by `ast `_, AND those built by `astroid `_. diff --git a/asttokens/asttokens.py b/asttokens/asttokens.py index 9f8c5a1..b537786 100644 --- a/asttokens/asttokens.py +++ b/asttokens/asttokens.py @@ -104,9 +104,6 @@ class ASTTokens(ASTTextBase): def __init__(self, source_text, parse=False, tree=None, filename='', tokens=None): # type: (Any, bool, Optional[Module], str, Iterable[TokenInfo]) -> None - # FIXME: Strictly, the type of source_text is one of the six string types, but hard to specify with mypy given - # https://mypy.readthedocs.io/en/stable/common_issues.html#variables-vs-type-aliases - super(ASTTokens, self).__init__(source_text, filename) self._tree = ast.parse(source_text, filename) if parse else tree @@ -292,9 +289,6 @@ class ASTText(ASTTextBase): """ def __init__(self, source_text, tree=None, filename=''): # type: (Any, Optional[Module], str) -> None - # FIXME: Strictly, the type of source_text is one of the six string types, but hard to specify with mypy given - # https://mypy.readthedocs.io/en/stable/common_issues.html#variables-vs-type-aliases - super(ASTText, self).__init__(source_text, filename) self._tree = tree @@ -327,10 +321,6 @@ def _get_text_positions_tokenless(self, node, padded): """ Version of ``get_text_positions()`` that doesn't use tokens. """ - if sys.version_info[:2] < (3, 8): # pragma: no cover - # This is just for mpypy - raise AssertionError("This method should only be called internally after checking supports_tokenless()") - if is_module(node): # Modules don't have position info, so just return the range of the whole text. # The token-using method does something different, but its behavior seems weird and inconsistent. @@ -413,16 +403,14 @@ def get_text_positions(self, node, padded): return self.asttokens.get_text_positions(node, padded) -# Node types that _get_text_positions_tokenless doesn't support. Only relevant for Python 3.8+. -_unsupported_tokenless_types = () # type: Tuple[str, ...] -if sys.version_info[:2] >= (3, 8): - # no lineno - _unsupported_tokenless_types += ("arguments", "Arguments", "withitem") - if sys.version_info[:2] == (3, 8): - # _get_text_positions_tokenless works incorrectly for these types due to bugs in Python 3.8. - _unsupported_tokenless_types += ("arg", "Starred") - # no lineno in 3.8 - _unsupported_tokenless_types += ("Slice", "ExtSlice", "Index", "keyword") +# Node types that _get_text_positions_tokenless doesn't support. +# These initial values are missing lineno. +_unsupported_tokenless_types = ("arguments", "Arguments", "withitem") # type: Tuple[str, ...] +if sys.version_info[:2] == (3, 8): + # _get_text_positions_tokenless works incorrectly for these types due to bugs in Python 3.8. + _unsupported_tokenless_types += ("arg", "Starred") + # no lineno in 3.8 + _unsupported_tokenless_types += ("Slice", "ExtSlice", "Index", "keyword") def supports_tokenless(node=None): @@ -434,7 +422,6 @@ def supports_tokenless(node=None): The following cases are not supported: - - Python 3.7 and earlier - PyPy - ``ast.arguments`` / ``astroid.Arguments`` - ``ast.withitem`` @@ -459,6 +446,5 @@ def supports_tokenless(node=None): ) ) ) - and sys.version_info[:2] >= (3, 8) and 'pypy' not in sys.version.lower() ) diff --git a/asttokens/mark_tokens.py b/asttokens/mark_tokens.py index c8a7811..f866b1c 100644 --- a/asttokens/mark_tokens.py +++ b/asttokens/mark_tokens.py @@ -21,7 +21,6 @@ from . import util from .asttokens import ASTTokens -from .util import AstConstant from .astroid_compat import astroid_node_classes as nc, BaseContainer as AstroidBaseContainer if TYPE_CHECKING: @@ -177,13 +176,6 @@ def handle_comp(self, open_brace, node, first_token, last_token): util.expect_token(before, token.OP, open_brace) return (before, last_token) - # Python 3.8 fixed the starting position of list comprehensions: - # https://bugs.python.org/issue31241 - if sys.version_info < (3, 8): - def visit_listcomp(self, node, first_token, last_token): - # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token] - return self.handle_comp('[', node, first_token, last_token) - def visit_comprehension(self, node, # type: AstNode first_token, # type: util.Token @@ -296,26 +288,19 @@ def handle_bare_tuple(self, node, first_token, last_token): last_token = maybe_comma return (first_token, last_token) - if sys.version_info >= (3, 8): - # In Python3.8 parsed tuples include parentheses when present. - def handle_tuple_nonempty(self, node, first_token, last_token): - # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token] - assert isinstance(node, ast.Tuple) or isinstance(node, AstroidBaseContainer) - # It's a bare tuple if the first token belongs to the first child. The first child may - # include extraneous parentheses (which don't create new nodes), so account for those too. - child = node.elts[0] - if TYPE_CHECKING: - child = cast(AstNode, child) - child_first, child_last = self._gobble_parens(child.first_token, child.last_token, True) - if first_token == child_first: - return self.handle_bare_tuple(node, first_token, last_token) - return (first_token, last_token) - else: - # Before python 3.8, parsed tuples do not include parens. - def handle_tuple_nonempty(self, node, first_token, last_token): - # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token] - (first_token, last_token) = self.handle_bare_tuple(node, first_token, last_token) - return self._gobble_parens(first_token, last_token, False) + # In Python3.8 parsed tuples include parentheses when present. + def handle_tuple_nonempty(self, node, first_token, last_token): + # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token] + assert isinstance(node, ast.Tuple) or isinstance(node, AstroidBaseContainer) + # It's a bare tuple if the first token belongs to the first child. The first child may + # include extraneous parentheses (which don't create new nodes), so account for those too. + child = node.elts[0] + if TYPE_CHECKING: + child = cast(AstNode, child) + child_first, child_last = self._gobble_parens(child.first_token, child.last_token, True) + if first_token == child_first: + return self.handle_bare_tuple(node, first_token, last_token) + return (first_token, last_token) def visit_tuple(self, node, first_token, last_token): # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token] @@ -417,19 +402,15 @@ def visit_num(self, node, first_token, last_token): # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token] return self.handle_num(node, cast(ast.Num, node).n, first_token, last_token) - # In Astroid, the Num and Str nodes are replaced by Const. def visit_const(self, node, first_token, last_token): # type: (AstNode, util.Token, util.Token) -> Tuple[util.Token, util.Token] - assert isinstance(node, AstConstant) or isinstance(node, nc.Const) + assert isinstance(node, ast.Constant) or isinstance(node, nc.Const) if isinstance(node.value, numbers.Number): return self.handle_num(node, node.value, first_token, last_token) elif isinstance(node.value, (str, bytes)): return self.visit_str(node, first_token, last_token) return (first_token, last_token) - # In Python >= 3.6, there is a similar class 'Constant' for literals - # In 3.8 it became the type produced by ast.parse - # https://bugs.python.org/issue32892 visit_constant = visit_const def visit_keyword(self, node, first_token, last_token): diff --git a/asttokens/util.py b/asttokens/util.py index 58856b4..df3e729 100644 --- a/asttokens/util.py +++ b/asttokens/util.py @@ -20,10 +20,21 @@ import tokenize from abc import ABCMeta from ast import Module, expr, AST -from typing import Callable, Dict, Iterable, Iterator, List, Optional, Tuple, Union, cast, Any, TYPE_CHECKING - -import astroid - +from functools import lru_cache +from typing import ( + Callable, + Dict, + Iterable, + Iterator, + List, + Optional, + Tuple, + Union, + cast, + Any, + TYPE_CHECKING, + Type, +) if TYPE_CHECKING: # pragma: no cover from .astroid_compat import NodeNG @@ -67,13 +78,6 @@ def __str__(self): return token_repr(self.type, self.string) -if sys.version_info >= (3, 6): - AstConstant = ast.Constant -else: - class AstConstant: - value = object() - - def match_token(token, tok_type, tok_str=None): # type: (Token, int, Optional[str]) -> bool """Returns true if token is of the given type and, if a string is given, has that string.""" @@ -91,22 +95,13 @@ def expect_token(token, tok_type, tok_str=None): token_repr(tok_type, tok_str), str(token), token.start[0], token.start[1] + 1)) -# These were previously defined in tokenize.py and distinguishable by being greater than -# token.N_TOKEN. As of python3.7, they are in token.py, and we check for them explicitly. -if sys.version_info >= (3, 7): - def is_non_coding_token(token_type): - # type: (int) -> bool - """ - These are considered non-coding tokens, as they don't affect the syntax tree. - """ - return token_type in (token.NL, token.COMMENT, token.ENCODING) -else: - def is_non_coding_token(token_type): - # type: (int) -> bool - """ - These are considered non-coding tokens, as they don't affect the syntax tree. - """ - return token_type >= token.N_TOKENS + +def is_non_coding_token(token_type): + # type: (int) -> bool + """ + These are considered non-coding tokens, as they don't affect the syntax tree. + """ + return token_type in (token.NL, token.COMMENT, token.ENCODING) def generate_tokens(text): @@ -201,10 +196,19 @@ def is_expr_stmt(node): return node.__class__.__name__ == 'Expr' + +CONSTANT_CLASSES: Tuple[Type, ...] = (ast.Constant,) +try: + from astroid import Const + CONSTANT_CLASSES += (Const,) +except ImportError: # pragma: no cover + # astroid is not available + pass + def is_constant(node): # type: (AstNode) -> bool """Returns whether node is a Constant node.""" - return isinstance(node, (ast.Constant, astroid.Const)) + return isinstance(node, CONSTANT_CLASSES) def is_ellipsis(node): @@ -421,72 +425,61 @@ def last_stmt(node): return node -if sys.version_info[:2] >= (3, 8): - from functools import lru_cache - @lru_cache(maxsize=None) - def fstring_positions_work(): - # type: () -> bool - """ - The positions attached to nodes inside f-string FormattedValues have some bugs - that were fixed in Python 3.9.7 in https://github.com/python/cpython/pull/27729. - This checks for those bugs more concretely without relying on the Python version. - Specifically this checks: - - Values with a format spec or conversion - - Repeated (i.e. identical-looking) expressions - - f-strings implicitly concatenated over multiple lines. - - Multiline, triple-quoted f-strings. - """ - source = """( - f"a {b}{b} c {d!r} e {f:g} h {i:{j}} k {l:{m:n}}" - f"a {b}{b} c {d!r} e {f:g} h {i:{j}} k {l:{m:n}}" - f"{x + y + z} {x} {y} {z} {z} {z!a} {z:z}" - f''' - {s} {t} - {u} {v} - ''' - )""" - tree = ast.parse(source) - name_nodes = [node for node in ast.walk(tree) if isinstance(node, ast.Name)] - name_positions = [(node.lineno, node.col_offset) for node in name_nodes] - positions_are_unique = len(set(name_positions)) == len(name_positions) - correct_source_segments = all( - ast.get_source_segment(source, node) == node.id - for node in name_nodes - ) - return positions_are_unique and correct_source_segments +@lru_cache(maxsize=None) +def fstring_positions_work(): + # type: () -> bool + """ + The positions attached to nodes inside f-string FormattedValues have some bugs + that were fixed in Python 3.9.7 in https://github.com/python/cpython/pull/27729. + This checks for those bugs more concretely without relying on the Python version. + Specifically this checks: + - Values with a format spec or conversion + - Repeated (i.e. identical-looking) expressions + - f-strings implicitly concatenated over multiple lines. + - Multiline, triple-quoted f-strings. + """ + source = """( + f"a {b}{b} c {d!r} e {f:g} h {i:{j}} k {l:{m:n}}" + f"a {b}{b} c {d!r} e {f:g} h {i:{j}} k {l:{m:n}}" + f"{x + y + z} {x} {y} {z} {z} {z!a} {z:z}" + f''' + {s} {t} + {u} {v} + ''' + )""" + tree = ast.parse(source) + name_nodes = [node for node in ast.walk(tree) if isinstance(node, ast.Name)] + name_positions = [(node.lineno, node.col_offset) for node in name_nodes] + positions_are_unique = len(set(name_positions)) == len(name_positions) + correct_source_segments = all( + ast.get_source_segment(source, node) == node.id + for node in name_nodes + ) + return positions_are_unique and correct_source_segments - def annotate_fstring_nodes(tree): - # type: (ast.AST) -> None - """ - Add a special attribute `_broken_positions` to nodes inside f-strings - if the lineno/col_offset cannot be trusted. - """ - if sys.version_info >= (3, 12): - # f-strings were weirdly implemented until https://peps.python.org/pep-0701/ - # In Python 3.12, inner nodes have sensible positions. - return - for joinedstr in walk(tree, include_joined_str=True): - if not isinstance(joinedstr, ast.JoinedStr): - continue - for part in joinedstr.values: - # The ast positions of the FormattedValues/Constant nodes span the full f-string, which is weird. - setattr(part, '_broken_positions', True) # use setattr for mypy - - if isinstance(part, ast.FormattedValue): - if not fstring_positions_work(): - for child in walk(part.value): - setattr(child, '_broken_positions', True) - - if part.format_spec: # this is another JoinedStr - # Again, the standard positions span the full f-string. - setattr(part.format_spec, '_broken_positions', True) - -else: - def fstring_positions_work(): - # type: () -> bool - return False - - def annotate_fstring_nodes(_tree): - # type: (ast.AST) -> None - pass +def annotate_fstring_nodes(tree): + # type: (ast.AST) -> None + """ + Add a special attribute `_broken_positions` to nodes inside f-strings + if the lineno/col_offset cannot be trusted. + """ + if sys.version_info >= (3, 12): + # f-strings were weirdly implemented until https://peps.python.org/pep-0701/ + # In Python 3.12, inner nodes have sensible positions. + return + for joinedstr in walk(tree, include_joined_str=True): + if not isinstance(joinedstr, ast.JoinedStr): + continue + for part in joinedstr.values: + # The ast positions of the FormattedValues/Constant nodes span the full f-string, which is weird. + setattr(part, '_broken_positions', True) # use setattr for mypy + + if isinstance(part, ast.FormattedValue): + if not fstring_positions_work(): + for child in walk(part.value): + setattr(child, '_broken_positions', True) + + if part.format_spec: # this is another JoinedStr + # Again, the standard positions span the full f-string. + setattr(part.format_spec, '_broken_positions', True)