From a34e52ba873c7db07bdf88853720cb877676d14f Mon Sep 17 00:00:00 2001 From: James Owen Date: Fri, 31 May 2024 16:34:11 +0100 Subject: [PATCH] Pass through source position information in spans Prior to this change, we were tracking source position information in the stream, but were not storing it in the resulting spans. This change adds start_position and end_position SourcePosition attributes to Span nodes. To make this work we end up complicating the constructor for Span nodes, so that it can either take a location or an index/position combo (for JSON deserialization). --- fluent.syntax/fluent/syntax/ast.py | 39 +++++++++++++++++++--- fluent.syntax/fluent/syntax/parser.py | 2 +- fluent.syntax/fluent/syntax/stream.py | 6 ++-- fluent.syntax/tests/syntax/test_entry.py | 32 ++++++++++++++++-- fluent.syntax/tests/syntax/test_visitor.py | 1 + 5 files changed, 70 insertions(+), 10 deletions(-) diff --git a/fluent.syntax/fluent/syntax/ast.py b/fluent.syntax/fluent/syntax/ast.py index aa193b09..7ccdd4f0 100644 --- a/fluent.syntax/fluent/syntax/ast.py +++ b/fluent.syntax/fluent/syntax/ast.py @@ -388,18 +388,47 @@ def __init__(self, row_index: int, column_index: int, **kwargs: Any): self.column_index = column_index class Span(BaseNode): - def __init__(self, start: Location, end: Location, **kwargs: Any): + def __init__( + self, + start: Location | int, + end: Location | int, + start_position: SourcePosition | None = None, + end_position: SourcePosition | None = None, + **kwargs: Any, + ): super().__init__(**kwargs) - self.start = start - self.end = end + + # We support two forms of arguments to the constructor. This is to allow the parser to use + # Location tuples for convenience, but to allow position objects to be passed during json + # deserialization + start_index, start_position = self._coerce_location_and_position(start, start_position) + end_index, end_position = self._coerce_location_and_position(end, end_position) + + self.start = start_index + self.end = end_index + self.start_position = start_position + self.end_position = end_position + + def _coerce_location_and_position( + self, + location_or_index: Location | int, + position: SourcePosition | None, + ) -> tuple[int, SourcePosition]: + if isinstance(location_or_index, int): + assert position is not None, "position must be passed if location is not passed" + return location_or_index, position + else: + assert position is None, "position must not be passed if location is passed" + index, row, column = location_or_index + return index, SourcePosition(row, column) @property def start_location(self) -> Location: - return self.start + return self.start, self.start_position.row_index, self.start_position.column_index @property def end_location(self) -> Location: - return self.end + return self.end, self.end_position.row_index, self.end_position.column_index class Annotation(SyntaxNode): diff --git a/fluent.syntax/fluent/syntax/parser.py b/fluent.syntax/fluent/syntax/parser.py index e69d0056..665b3d92 100644 --- a/fluent.syntax/fluent/syntax/parser.py +++ b/fluent.syntax/fluent/syntax/parser.py @@ -85,7 +85,7 @@ def parse(self, source: str) -> ast.Resource: res = ast.Resource(entries) if self.with_spans: - res.add_span(0, ps.current_location) + res.add_span((0, 0, 0), ps.current_location) return res diff --git a/fluent.syntax/fluent/syntax/stream.py b/fluent.syntax/fluent/syntax/stream.py index ae9e958d..819ac239 100644 --- a/fluent.syntax/fluent/syntax/stream.py +++ b/fluent.syntax/fluent/syntax/stream.py @@ -7,7 +7,9 @@ # Represents a location in the parser stream (for convenience) # - Index -Location: TypeAlias = int +# - Row index (in source) +# - Column index (in source) +Location: TypeAlias = tuple[int, int, int] class ParserStream: @@ -38,7 +40,7 @@ def char_at(self, offset: int) -> Union[str, None]: @property def current_location(self) -> Location: - return self.index + return self.index, self.row_index, self.column_index @property def current_char(self) -> Union[str, None]: diff --git a/fluent.syntax/tests/syntax/test_entry.py b/fluent.syntax/tests/syntax/test_entry.py index 060c5240..2da9b6f3 100644 --- a/fluent.syntax/tests/syntax/test_entry.py +++ b/fluent.syntax/tests/syntax/test_entry.py @@ -65,7 +65,21 @@ def test_return_junk(self): "arguments": ["="], "code": "E0003", "message": 'Expected token: "="', - "span": {"end": 23, "start": 23, "type": "Span"}, + "span": { + "end": 23, + "start": 23, + "end_position": { + "column_index": 4, + "row_index": 1, + "type": "SourcePosition", + }, + "start_position": { + "column_index": 4, + "row_index": 1, + "type": "SourcePosition", + }, + "type": "Span" + }, "type": "Annotation", } ], @@ -111,7 +125,21 @@ def test_do_not_ignore_invalid_comments(self): "arguments": [" "], "code": "E0003", "message": 'Expected token: " "', - "span": {"end": 21, "start": 21, "type": "Span"}, + "span": { + "end": 21, + "start": 21, + "end_position": { + "column_index": 2, + "row_index": 1, + "type": "SourcePosition", + }, + "start_position": { + "column_index": 2, + "row_index": 1, + "type": "SourcePosition", + }, + "type": "Span", + }, "type": "Annotation", } ], diff --git a/fluent.syntax/tests/syntax/test_visitor.py b/fluent.syntax/tests/syntax/test_visitor.py index 1b3f4f16..4954e225 100644 --- a/fluent.syntax/tests/syntax/test_visitor.py +++ b/fluent.syntax/tests/syntax/test_visitor.py @@ -44,6 +44,7 @@ def test_resource(self): "Identifier": 4, "Attribute": 1, "Span": 10, + "SourcePosition": 20, }, )