Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add source position information to span nodes #202

Open
wants to merge 7 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 51 additions & 4 deletions fluent.syntax/fluent/syntax/ast.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
import sys
from typing import Any, Callable, Dict, List, TypeVar, Union, cast

from .stream import Location

Node = TypeVar("Node", bound="BaseNode")
ToJsonFn = Callable[[Dict[str, Any]], Any]

Expand Down Expand Up @@ -121,7 +123,7 @@ def __init__(self, span: Union["Span", None] = None, **kwargs: Any):
super().__init__(**kwargs)
self.span = span

def add_span(self, start: int, end: int) -> None:
def add_span(self, start: Location, end: Location) -> None:
self.span = Span(start, end)


Expand Down Expand Up @@ -376,12 +378,57 @@ def __init__(
def add_annotation(self, annot: "Annotation") -> None:
self.annotations.append(annot)

class SourcePosition(BaseNode):
"""
Represents a zero-indexed row/column position in the source string.
"""
def __init__(self, row_index: int, column_index: int, **kwargs: Any):
super().__init__(**kwargs)
self.row_index = row_index
self.column_index = column_index

class Span(BaseNode):
def __init__(self, start: int, end: int, **kwargs: Any):
def __init__(
self,
start: Location | int,
end: Location | int,
start_position: SourcePosition | None = None,
end_position: SourcePosition | None = None,
**kwargs: Any,
):
super().__init__(**kwargs)
self.start = start
self.end = end

# We support two forms of arguments to the constructor. This is to allow the parser to use
# Location tuples for convenience, but to allow position objects to be passed during json
# deserialization
start_index, start_position = self._coerce_location_and_position(start, start_position)
end_index, end_position = self._coerce_location_and_position(end, end_position)

self.start = start_index
self.end = end_index
self.start_position = start_position
self.end_position = end_position

def _coerce_location_and_position(
self,
location_or_index: Location | int,
position: SourcePosition | None,
) -> tuple[int, SourcePosition]:
if isinstance(location_or_index, int):
assert position is not None, "position must be passed if location is not passed"
return location_or_index, position
else:
assert position is None, "position must not be passed if location is passed"
index, row, column = location_or_index
return index, SourcePosition(row, column)

@property
def start_location(self) -> Location:
return self.start, self.start_position.row_index, self.start_position.column_index

@property
def end_location(self) -> Location:
return self.end, self.end_position.row_index, self.end_position.column_index


class Annotation(SyntaxNode):
Expand Down
45 changes: 24 additions & 21 deletions fluent.syntax/fluent/syntax/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from . import ast
from .errors import ParseError
from .stream import EOL, FluentParserStream
from .stream import EOL, FluentParserStream, Location

R = TypeVar("R", bound=ast.SyntaxNode)

Expand All @@ -15,15 +15,15 @@ def decorated(
if not self.with_spans:
return fn(self, ps, *args, **kwargs)

start = ps.index
start = ps.current_location
node = fn(self, ps, *args, **kwargs)

# Don't re-add the span if the node already has it. This may happen
# when one decorated function calls another decorated function.
if node.span is not None:
return node

end = ps.index
end = ps.current_location
node.add_span(start, end)
return node

Expand Down Expand Up @@ -85,7 +85,7 @@ def parse(self, source: str) -> ast.Resource:
res = ast.Resource(entries)

if self.with_spans:
res.add_span(0, ps.index)
res.add_span((0, 0, 0), ps.current_location)

return res

Expand All @@ -111,29 +111,32 @@ def parse_entry(self, source: str) -> ast.EntryType:
return self.get_entry_or_junk(ps)

def get_entry_or_junk(self, ps: FluentParserStream) -> ast.EntryType:
entry_start_pos = ps.index
entry_start_index = ps.index
entry_start_location = ps.current_location

try:
entry = self.get_entry(ps)
ps.expect_line_end()
return entry
except ParseError as err:
error_index = ps.index
ps.skip_to_next_entry_start(entry_start_pos)
next_entry_start = ps.index
if next_entry_start < error_index:
error_location = ps.current_location

ps.skip_to_next_entry_start(entry_start_index)
next_entry_start_index = ps.index
if next_entry_start_index < error_index:
# The position of the error must be inside of the Junk's span.
error_index = next_entry_start
error_location = ps.current_location

# Create a Junk instance
slice = ps.string[entry_start_pos:next_entry_start]
slice = ps.string[entry_start_index:next_entry_start_index]
junk = ast.Junk(slice)
if self.with_spans:
junk.add_span(entry_start_pos, next_entry_start)
junk.add_span(entry_start_location, ps.current_location)
annot = ast.Annotation(
err.code, list(err.args) if err.args else None, err.message
)
annot.add_span(error_index, error_index)
annot.add_span(error_location, error_location)
junk.add_annotation(annot)
return junk

Expand Down Expand Up @@ -379,24 +382,24 @@ def get_pattern(self, ps: FluentParserStream, is_block: bool) -> ast.Pattern:
if is_block:
# A block pattern is a pattern which starts on a new line. Measure
# the indent of this first line for the dedentation logic.
blank_start = ps.index
blank_start = ps.current_location
first_indent = ps.skip_blank_inline()
elements.append(self.Indent(first_indent, blank_start, ps.index))
elements.append(self.Indent(first_indent, blank_start, ps.current_location))
common_indent_length = len(first_indent)
else:
# Should get fixed by the subsequent min() operation
common_indent_length = cast(int, float("infinity"))

while ps.current_char:
if ps.current_char == EOL:
blank_start = ps.index
blank_start = ps.current_location
blank_lines = ps.peek_blank_block()
if ps.is_value_continuation():
ps.skip_to_peek()
indent = ps.skip_blank_inline()
common_indent_length = min(common_indent_length, len(indent))
elements.append(
self.Indent(blank_lines + indent, blank_start, ps.index)
self.Indent(blank_lines + indent, blank_start, ps.current_location)
)
continue

Expand All @@ -420,7 +423,7 @@ def get_pattern(self, ps: FluentParserStream, is_block: bool) -> ast.Pattern:
return ast.Pattern(dedented)

class Indent(ast.SyntaxNode):
def __init__(self, value: str, start: int, end: int):
def __init__(self, value: str, start: Location, end: Location):
super(FluentParser.Indent, self).__init__()
self.value = value
self.add_span(start, end)
Expand Down Expand Up @@ -453,8 +456,8 @@ def dedent(
sum = ast.TextElement(prev.value + element.value)
if self.with_spans:
sum.add_span(
cast(ast.Span, prev.span).start,
cast(ast.Span, element.span).end,
cast(ast.Span, prev.span).start_location,
cast(ast.Span, element.span).end_location,
)
trimmed[-1] = sum
continue
Expand All @@ -465,8 +468,8 @@ def dedent(
text_element = ast.TextElement(element.value)
if self.with_spans:
text_element.add_span(
cast(ast.Span, element.span).start,
cast(ast.Span, element.span).end,
cast(ast.Span, element.span).start_location,
cast(ast.Span, element.span).end_location,
)
element = text_element

Expand Down
44 changes: 43 additions & 1 deletion fluent.syntax/fluent/syntax/stream.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,26 @@
from typing import Callable, Union
from typing import Callable, TypeAlias, Union

from typing_extensions import Literal

from .errors import ParseError


# Represents a location in the parser stream (for convenience)
# - Index
# - Row index (in source)
# - Column index (in source)
Location: TypeAlias = tuple[int, int, int]


class ParserStream:
def __init__(self, string: str):
self.string = string
self.index = 0
self.row_index = 0
self.column_index = 0
self.peek_offset = 0
self._peek_row_offset = 0
self._peek_column_offset = 0

def get(self, offset: int) -> Union[str, None]:
try:
Expand All @@ -27,6 +38,10 @@ def char_at(self, offset: int) -> Union[str, None]:

return self.get(offset)

@property
def current_location(self) -> Location:
return self.index, self.row_index, self.column_index

@property
def current_char(self) -> Union[str, None]:
return self.char_at(self.index)
Expand All @@ -37,9 +52,19 @@ def current_peek(self) -> Union[str, None]:

def next(self) -> Union[str, None]:
self.peek_offset = 0
self._peek_row_offset = 0
self._peek_column_offset = 0

# Skip over CRLF as if it was a single character.
if self.get(self.index) == "\r" and self.get(self.index + 1) == "\n":
self.index += 1
# If we have reached a newline reset the position
if self.get(self.index) == "\n":
self.row_index += 1
self.column_index = 0
else:
self.column_index += 1

self.index += 1
return self.get(self.index)

Expand All @@ -50,6 +75,13 @@ def peek(self) -> Union[str, None]:
and self.get(self.index + self.peek_offset + 1) == "\n"
):
self.peek_offset += 1

if self.get(self.index + self.peek_offset) == "\n":
self._peek_row_offset += 1
self._peek_column_offset = 0
else:
self._peek_column_offset += 1

self.peek_offset += 1
return self.get(self.index + self.peek_offset)

Expand All @@ -58,7 +90,17 @@ def reset_peek(self, offset: int = 0) -> None:

def skip_to_peek(self) -> None:
self.index += self.peek_offset
self.row_index += self._peek_row_offset
if self._peek_row_offset:
# There have been newlines during the peek, so the column offset is the column index
# since the last newline
self.column_index = self._peek_column_offset
else:
self.column_index += self._peek_column_offset

self.peek_offset = 0
self._peek_row_offset = 0
self._peek_column_offset = 0


EOL = "\n"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,35 @@
"span": {
"type": "Span",
"start": 13,
"end": 13
"end": 13,
"start_position": {
"type": "SourcePosition",
"row_index": 0,
"column_index": 13
},
"end_position": {
"type": "SourcePosition",
"row_index": 0,
"column_index": 13
}
}
}
],
"content": "err1 = { foo.23 }\n",
"span": {
"type": "Span",
"start": 0,
"end": 18
"end": 18,
"start_position": {
"type": "SourcePosition",
"row_index": 0,
"column_index": 0
},
"end_position": {
"type": "SourcePosition",
"row_index": 1,
"column_index": 0
}
}
},
{
Expand All @@ -38,21 +58,51 @@
"span": {
"type": "Span",
"start": 31,
"end": 31
"end": 31,
"start_position": {
"type": "SourcePosition",
"row_index": 1,
"column_index": 13
},
"end_position": {
"type": "SourcePosition",
"row_index": 1,
"column_index": 13
}
}
}
],
"content": "err2 = { foo. }\n",
"span": {
"type": "Span",
"start": 18,
"end": 34
"end": 34,
"start_position": {
"type": "SourcePosition",
"row_index": 1,
"column_index": 0
},
"end_position": {
"type": "SourcePosition",
"row_index": 2,
"column_index": 0
}
}
}
],
"span": {
"type": "Span",
"start": 0,
"end": 34
"end": 34,
"start_position": {
"type": "SourcePosition",
"row_index": 0,
"column_index": 0
},
"end_position": {
"type": "SourcePosition",
"row_index": 2,
"column_index": 1
}
}
}
Loading