Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
229 changes: 227 additions & 2 deletions tests/python/unit/utilities/test_utilities_parsing.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,42 @@
"""Test COMPASS Ordinance parsing utilities"""

import json
from pathlib import Path
from unittest.mock import MagicMock

import numpy as np
import pandas as pd
import pytest

from compass.utilities.parsing import (
clean_backticks_from_llm_response,
extract_ord_year_from_doc_attrs,
llm_response_as_json,
load_config,
merge_overlapping_texts,
num_ordinances_dataframe,
num_ordinances_in_doc,
ordinances_bool_index,
)
from compass.exceptions import COMPASSValueError


@pytest.mark.parametrize(
"in_str,expected",
[
("plain text", "plain text"),
("```code```", "code"),
("```\ncode\n```", "code\n"),
(" ```json\ncode``` ", "json\ncode"),
("\n```\ncode\n```\n", "code\n"),
("\r\n```\r\ncode\r\n```\r\n", "\r\ncode\r\n"),
("```", ""),
],
)
def test_clean_backticks_from_llm_response(in_str, expected):
"""Test the `clean_backticks_from_llm_response` function"""

assert clean_backticks_from_llm_response(in_str) == expected


@pytest.mark.parametrize(
Expand All @@ -16,9 +45,11 @@
(' {"a": 1} ', {"a": 1}),
('```json\n{"a": True, "b": False}```', {"a": True, "b": False}),
('{"a": True', {}),
('json\n{"key": "value"}', {"key": "value"}),
('{"a": True, "b": False}', {"a": True, "b": False}),
],
)
def test_sync_retry(in_str, expected):
def test_llm_response_as_json(in_str, expected):
"""Test the `llm_response_as_json` function"""

assert llm_response_as_json(in_str) == expected
Expand All @@ -38,7 +69,10 @@ def test_sync_retry(in_str, expected):
"Some text. Some overlap. More text. More text that "
"shouldn't be touched. Some overlap. More text.\nSome "
"non-overlapping text.",
)
),
([], 300, ""),
(["single chunk"], 300, "single chunk"),
(["", None, "valid text"], 300, "valid text"),
],
)
def test_merge_overlapping_texts(text_chunks, n, expected):
Expand All @@ -47,5 +81,196 @@ def test_merge_overlapping_texts(text_chunks, n, expected):
assert merge_overlapping_texts(text_chunks, n) == expected


@pytest.mark.parametrize(
"doc_attrs,expected",
[
({"date": (2023, 5, 15)}, 2023),
({"date": (2020, 1, 1)}, 2020),
({"date": (None, 5, 15)}, None),
({"date": (0, 5, 15)}, None),
({"date": (-1, 5, 15)}, None),
({}, None),
({"other_key": "value"}, None),
],
)
def test_extract_ord_year_from_doc_attrs(doc_attrs, expected):
"""Test the `extract_ord_year_from_doc_attrs` function"""

assert extract_ord_year_from_doc_attrs(doc_attrs) == expected


def test_num_ordinances_in_doc_none():
"""Test `num_ordinances_in_doc` with None document"""

assert num_ordinances_in_doc(None) == 0


def test_num_ordinances_in_doc_no_ordinance_values():
"""Test `num_ordinances_in_doc` with document missing ordinance_values"""

doc = MagicMock()
doc.attrs = {}
assert num_ordinances_in_doc(doc) == 0


def test_num_ordinances_in_doc_with_ordinances():
"""Test `num_ordinances_in_doc` with valid ordinances"""

doc = MagicMock()
doc.attrs = {
"ordinance_values": pd.DataFrame(
{
"feature": ["setback", "height", "noise"],
"value": [100, 200, None],
"summary": ["test", None, "test"],
}
)
}
assert num_ordinances_in_doc(doc) == 3


def test_num_ordinances_in_doc_with_exclude_features():
"""Test `num_ordinances_in_doc` with excluded features"""

doc = MagicMock()
doc.attrs = {
"ordinance_values": pd.DataFrame(
{
"feature": ["setback", "height", "noise"],
"value": [100, 200, 300],
"summary": ["test", "test", "test"],
}
)
}
assert num_ordinances_in_doc(doc, exclude_features=["noise"]) == 2


def test_num_ordinances_dataframe_empty():
"""Test `num_ordinances_dataframe` with empty DataFrame"""

data = pd.DataFrame()
assert num_ordinances_dataframe(data) == 0


def test_num_ordinances_dataframe_with_values():
"""Test `num_ordinances_dataframe` with valid data"""

data = pd.DataFrame(
{
"feature": ["setback", "height", "noise"],
"value": [100, 200, None],
"summary": ["test", None, "test"],
}
)
assert num_ordinances_dataframe(data) == 3


def test_num_ordinances_dataframe_with_exclude():
"""Test `num_ordinances_dataframe` with excluded features"""

data = pd.DataFrame(
{
"feature": ["setback", "HEIGHT", "noise"],
"value": [100, 200, 300],
"summary": ["test", "test", "test"],
}
)
assert num_ordinances_dataframe(data, exclude_features=["height"]) == 2


def test_ordinances_bool_index_none():
"""Test `ordinances_bool_index` with None data"""

result = ordinances_bool_index(None)
assert isinstance(result, np.ndarray)
assert len(result) == 0
assert result.dtype == bool


def test_ordinances_bool_index_empty():
"""Test `ordinances_bool_index` with empty DataFrame"""

data = pd.DataFrame()
result = ordinances_bool_index(data)
assert isinstance(result, np.ndarray)
assert len(result) == 0


def test_ordinances_bool_index_no_check_cols():
"""Test `ordinances_bool_index` with DataFrame missing required columns"""

data = pd.DataFrame({"feature": ["setback", "height"]})
result = ordinances_bool_index(data)
assert isinstance(result, np.ndarray)
assert len(result) == 0


def test_ordinances_bool_index_with_values():
"""Test `ordinances_bool_index` with valid data"""

data = pd.DataFrame(
{
"feature": ["setback", "height", "noise", "extra"],
"value": [100, None, None, 400],
"summary": [None, "test", None, "test"],
}
)
result = ordinances_bool_index(data)
expected = np.array([True, True, False, True])
np.testing.assert_array_equal(result, expected)


def test_ordinances_bool_index_value_only():
"""Test `ordinances_bool_index` with only value column"""

data = pd.DataFrame(
{"feature": ["setback", "height", "noise"], "value": [100, None, 300]}
)
result = ordinances_bool_index(data)
expected = np.array([True, False, True])
np.testing.assert_array_equal(result, expected)


def test_load_config_json(tmp_path):
"""Test `load_config` with JSON file"""

config_data = {"key": "value", "number": 42}
config_file = tmp_path / "test_config.json"
with config_file.open("w", encoding="utf-8") as f:
json.dump(config_data, f)

result = load_config(config_file)
assert result == config_data


def test_load_config_json5(tmp_path):
"""Test `load_config` with JSON5 file"""

config_content = """{
// This is a comment
"key": "value",
"number": 42,
}"""
config_file = tmp_path / "test_config.json5"
with config_file.open("w", encoding="utf-8") as f:
f.write(config_content)

result = load_config(config_file)
assert result == {"key": "value", "number": 42}


def test_load_config_invalid_extension(tmp_path):
"""Test `load_config` with invalid file extension"""

config_file = tmp_path / "test_config.txt"
config_file.touch()

with pytest.raises(
COMPASSValueError,
match=r"Got unknown config file extension: \.txt",
):
load_config(config_file)


if __name__ == "__main__":
pytest.main(["-q", "--show-capture=all", Path(__file__), "-rapP"])