projectfluent · leamingrad · May 31, 2024 · Jun 2, 2024 · May 31, 2024 · May 31, 2024
diff --git a/fluent.syntax/fluent/syntax/ast.py b/fluent.syntax/fluent/syntax/ast.py
@@ -3,6 +3,8 @@
 import sys
 from typing import Any, Callable, Dict, List, TypeVar, Union, cast
 
+from .stream import Location
+
 Node = TypeVar("Node", bound="BaseNode")
 ToJsonFn = Callable[[Dict[str, Any]], Any]
 
@@ -121,7 +123,7 @@ def __init__(self, span: Union["Span", None] = None, **kwargs: Any):
         super().__init__(**kwargs)
         self.span = span
 
-    def add_span(self, start: int, end: int) -> None:
+    def add_span(self, start: Location, end: Location) -> None:
         self.span = Span(start, end)
 
 
@@ -376,12 +378,57 @@ def __init__(
     def add_annotation(self, annot: "Annotation") -> None:
         self.annotations.append(annot)
 
+class SourcePosition(BaseNode):
+    """
+    Represents a zero-indexed row/column position in the source string.
+    """
+    def __init__(self, row_index: int, column_index: int, **kwargs: Any):
+        super().__init__(**kwargs)
+        self.row_index = row_index
+        self.column_index = column_index
 
 class Span(BaseNode):
-    def __init__(self, start: int, end: int, **kwargs: Any):
+    def __init__(
+        self,
+        start: Location | int,
+        end: Location | int,
+        start_position: SourcePosition | None = None,
+        end_position: SourcePosition | None = None,
+        **kwargs: Any,
+    ):
         super().__init__(**kwargs)
-        self.start = start
-        self.end = end
+
+        # We support two forms of arguments to the constructor. This is to allow the parser to use
+        # Location tuples for convenience, but to allow position objects to be passed during json
+        # deserialization
+        start_index, start_position = self._coerce_location_and_position(start, start_position)
+        end_index, end_position = self._coerce_location_and_position(end, end_position)
+
+        self.start = start_index
+        self.end = end_index
+        self.start_position = start_position
+        self.end_position = end_position
+
+    def _coerce_location_and_position(
+        self,
+        location_or_index: Location | int,
+        position: SourcePosition | None,
+    ) -> tuple[int, SourcePosition]:
+        if isinstance(location_or_index, int):
+            assert position is not None, "position must be passed if location is not passed"
+            return location_or_index, position
+        else:
+            assert position is None, "position must not be passed if location is passed"
+            index, row, column = location_or_index
+            return index, SourcePosition(row, column)
+
+    @property
+    def start_location(self) -> Location:
+        return self.start, self.start_position.row_index, self.start_position.column_index
+
+    @property
+    def end_location(self) -> Location:
+        return self.end, self.end_position.row_index, self.end_position.column_index
 
 
 class Annotation(SyntaxNode):

diff --git a/fluent.syntax/fluent/syntax/parser.py b/fluent.syntax/fluent/syntax/parser.py
@@ -3,7 +3,7 @@
 
 from . import ast
 from .errors import ParseError
-from .stream import EOL, FluentParserStream
+from .stream import EOL, FluentParserStream, Location
 
 R = TypeVar("R", bound=ast.SyntaxNode)
 
@@ -15,15 +15,15 @@ def decorated(
         if not self.with_spans:
             return fn(self, ps, *args, **kwargs)
 
-        start = ps.index
+        start = ps.current_location
         node = fn(self, ps, *args, **kwargs)
 
         # Don't re-add the span if the node already has it. This may happen
         # when one decorated function calls another decorated function.
         if node.span is not None:
             return node
 
-        end = ps.index
+        end = ps.current_location
         node.add_span(start, end)
         return node
 
@@ -85,7 +85,7 @@ def parse(self, source: str) -> ast.Resource:
         res = ast.Resource(entries)
 
         if self.with_spans:
-            res.add_span(0, ps.index)
+            res.add_span((0, 0, 0), ps.current_location)
 
         return res
 
@@ -111,29 +111,32 @@ def parse_entry(self, source: str) -> ast.EntryType:
         return self.get_entry_or_junk(ps)
 
     def get_entry_or_junk(self, ps: FluentParserStream) -> ast.EntryType:
-        entry_start_pos = ps.index
+        entry_start_index = ps.index
+        entry_start_location = ps.current_location
 
         try:
             entry = self.get_entry(ps)
             ps.expect_line_end()
             return entry
         except ParseError as err:
             error_index = ps.index
-            ps.skip_to_next_entry_start(entry_start_pos)
-            next_entry_start = ps.index
-            if next_entry_start < error_index:
+            error_location = ps.current_location
+
+            ps.skip_to_next_entry_start(entry_start_index)
+            next_entry_start_index = ps.index
+            if next_entry_start_index < error_index:
                 # The position of the error must be inside of the Junk's span.
-                error_index = next_entry_start
+                error_location = ps.current_location
 
             # Create a Junk instance
-            slice = ps.string[entry_start_pos:next_entry_start]
+            slice = ps.string[entry_start_index:next_entry_start_index]
             junk = ast.Junk(slice)
             if self.with_spans:
-                junk.add_span(entry_start_pos, next_entry_start)
+                junk.add_span(entry_start_location, ps.current_location)
             annot = ast.Annotation(
                 err.code, list(err.args) if err.args else None, err.message
             )
-            annot.add_span(error_index, error_index)
+            annot.add_span(error_location, error_location)
             junk.add_annotation(annot)
             return junk
 
@@ -379,24 +382,24 @@ def get_pattern(self, ps: FluentParserStream, is_block: bool) -> ast.Pattern:
         if is_block:
             # A block pattern is a pattern which starts on a new line. Measure
             # the indent of this first line for the dedentation logic.
-            blank_start = ps.index
+            blank_start = ps.current_location
             first_indent = ps.skip_blank_inline()
-            elements.append(self.Indent(first_indent, blank_start, ps.index))
+            elements.append(self.Indent(first_indent, blank_start, ps.current_location))
             common_indent_length = len(first_indent)
         else:
             # Should get fixed by the subsequent min() operation
             common_indent_length = cast(int, float("infinity"))
 
         while ps.current_char:
             if ps.current_char == EOL:
-                blank_start = ps.index
+                blank_start = ps.current_location
                 blank_lines = ps.peek_blank_block()
                 if ps.is_value_continuation():
                     ps.skip_to_peek()
                     indent = ps.skip_blank_inline()
                     common_indent_length = min(common_indent_length, len(indent))
                     elements.append(
-                        self.Indent(blank_lines + indent, blank_start, ps.index)
+                        self.Indent(blank_lines + indent, blank_start, ps.current_location)
                     )
                     continue
 
@@ -420,7 +423,7 @@ def get_pattern(self, ps: FluentParserStream, is_block: bool) -> ast.Pattern:
         return ast.Pattern(dedented)
 
     class Indent(ast.SyntaxNode):
-        def __init__(self, value: str, start: int, end: int):
+        def __init__(self, value: str, start: Location, end: Location):
             super(FluentParser.Indent, self).__init__()
             self.value = value
             self.add_span(start, end)
@@ -453,8 +456,8 @@ def dedent(
                 sum = ast.TextElement(prev.value + element.value)
                 if self.with_spans:
                     sum.add_span(
-                        cast(ast.Span, prev.span).start,
-                        cast(ast.Span, element.span).end,
+                        cast(ast.Span, prev.span).start_location,
+                        cast(ast.Span, element.span).end_location,
                     )
                 trimmed[-1] = sum
                 continue
@@ -465,8 +468,8 @@ def dedent(
                 text_element = ast.TextElement(element.value)
                 if self.with_spans:
                     text_element.add_span(
-                        cast(ast.Span, element.span).start,
-                        cast(ast.Span, element.span).end,
+                        cast(ast.Span, element.span).start_location,
+                        cast(ast.Span, element.span).end_location,
                     )
                 element = text_element
 

diff --git a/fluent.syntax/fluent/syntax/stream.py b/fluent.syntax/fluent/syntax/stream.py
@@ -1,15 +1,26 @@
-from typing import Callable, Union
+from typing import Callable, TypeAlias, Union
 
 from typing_extensions import Literal
 
 from .errors import ParseError
 
 
+# Represents a location in the parser stream (for convenience)
+# - Index
+# - Row index (in source)
+# - Column index (in source)
+Location: TypeAlias = tuple[int, int, int]
+
+
 class ParserStream:
     def __init__(self, string: str):
         self.string = string
         self.index = 0
+        self.row_index = 0
+        self.column_index = 0
         self.peek_offset = 0
+        self._peek_row_offset = 0
+        self._peek_column_offset = 0
 
     def get(self, offset: int) -> Union[str, None]:
         try:
@@ -27,6 +38,10 @@ def char_at(self, offset: int) -> Union[str, None]:
 
         return self.get(offset)
 
+    @property
+    def current_location(self) -> Location:
+        return self.index, self.row_index, self.column_index
+
     @property
     def current_char(self) -> Union[str, None]:
         return self.char_at(self.index)
@@ -37,9 +52,19 @@ def current_peek(self) -> Union[str, None]:
 
     def next(self) -> Union[str, None]:
         self.peek_offset = 0
+        self._peek_row_offset = 0
+        self._peek_column_offset = 0
+
         # Skip over CRLF as if it was a single character.
         if self.get(self.index) == "\r" and self.get(self.index + 1) == "\n":
             self.index += 1
+        # If we have reached a newline reset the position
+        if self.get(self.index) == "\n":
+            self.row_index += 1
+            self.column_index = 0
+        else:
+            self.column_index += 1
+
         self.index += 1
         return self.get(self.index)
 
@@ -50,6 +75,13 @@ def peek(self) -> Union[str, None]:
             and self.get(self.index + self.peek_offset + 1) == "\n"
         ):
             self.peek_offset += 1
+
+        if self.get(self.index + self.peek_offset) == "\n":
+            self._peek_row_offset += 1
+            self._peek_column_offset = 0
+        else:
+            self._peek_column_offset += 1
+
         self.peek_offset += 1
         return self.get(self.index + self.peek_offset)
 
@@ -58,7 +90,17 @@ def reset_peek(self, offset: int = 0) -> None:
 
     def skip_to_peek(self) -> None:
         self.index += self.peek_offset
+        self.row_index += self._peek_row_offset
+        if self._peek_row_offset:
+            # There have been newlines during the peek, so the column offset is the column index
+            # since the last newline
+            self.column_index = self._peek_column_offset
+        else:
+            self.column_index += self._peek_column_offset
+
         self.peek_offset = 0
+        self._peek_row_offset = 0
+        self._peek_column_offset = 0
 
 
 EOL = "\n"

diff --git a/fluent.syntax/tests/syntax/fixtures_structure/attribute_expression_with_wrong_attr.json b/fluent.syntax/tests/syntax/fixtures_structure/attribute_expression_with_wrong_attr.json
@@ -14,15 +14,35 @@
           "span": {
             "type": "Span",
             "start": 13,
-            "end": 13
+            "end": 13,
+            "start_position": {
+              "type": "SourcePosition",
+              "row_index": 0,
+              "column_index": 13
+            },
+            "end_position": {
+              "type": "SourcePosition",
+              "row_index": 0,
+              "column_index": 13
+            }
           }
         }
       ],
       "content": "err1 = { foo.23 }\n",
       "span": {
         "type": "Span",
         "start": 0,
-        "end": 18
+        "end": 18,
+        "start_position": {
+          "type": "SourcePosition",
+          "row_index": 0,
+          "column_index": 0
+        },
+        "end_position": {
+          "type": "SourcePosition",
+          "row_index": 1,
+          "column_index": 0
+        }
       }
     },
     {
@@ -38,21 +58,51 @@
           "span": {
             "type": "Span",
             "start": 31,
-            "end": 31
+            "end": 31,
+            "start_position": {
+              "type": "SourcePosition",
+              "row_index": 1,
+              "column_index": 13
+            },
+            "end_position": {
+              "type": "SourcePosition",
+              "row_index": 1,
+              "column_index": 13
+            }
           }
         }
       ],
       "content": "err2 = { foo. }\n",
       "span": {
         "type": "Span",
         "start": 18,
-        "end": 34
+        "end": 34,
+        "start_position": {
+          "type": "SourcePosition",
+          "row_index": 1,
+          "column_index": 0
+        },
+        "end_position": {
+          "type": "SourcePosition",
+          "row_index": 2,
+          "column_index": 0
+        }
       }
     }
   ],
   "span": {
     "type": "Span",
     "start": 0,
-    "end": 34
+    "end": 34,
+    "start_position": {
+      "type": "SourcePosition",
+      "row_index": 0,
+      "column_index": 0
+    },
+    "end_position": {
+      "type": "SourcePosition",
+      "row_index": 2,
+      "column_index": 1
+    }
   }
 }