From beda29c619fea9e51e93a7bf884011eccf32f81a Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Sat, 8 Nov 2025 13:55:32 -0700 Subject: [PATCH 1/3] Add utilities tests --- .../unit/utilities/test_utilities_parsing.py | 228 +++++++++++++++++- 1 file changed, 226 insertions(+), 2 deletions(-) diff --git a/tests/python/unit/utilities/test_utilities_parsing.py b/tests/python/unit/utilities/test_utilities_parsing.py index 499be87b0..56bb34400 100644 --- a/tests/python/unit/utilities/test_utilities_parsing.py +++ b/tests/python/unit/utilities/test_utilities_parsing.py @@ -1,13 +1,41 @@ """Test COMPASS Ordinance parsing utilities""" +import json from pathlib import Path +from unittest.mock import MagicMock +import numpy as np +import pandas as pd import pytest from compass.utilities.parsing import ( + clean_backticks_from_llm_response, + extract_ord_year_from_doc_attrs, llm_response_as_json, + load_config, merge_overlapping_texts, + num_ordinances_dataframe, + num_ordinances_in_doc, + ordinances_bool_index, ) +from compass.exceptions import COMPASSValueError + + +@pytest.mark.parametrize( + "in_str,expected", + [ + ("plain text", "plain text"), + ("```code```", "code"), + ("```\ncode\n```", "code\n"), + (" ```json\ncode``` ", "json\ncode"), + ("\n```\ncode\n```\n", "code\n"), + ("```", ""), + ], +) +def test_clean_backticks_from_llm_response(in_str, expected): + """Test the `clean_backticks_from_llm_response` function""" + + assert clean_backticks_from_llm_response(in_str) == expected @pytest.mark.parametrize( @@ -16,9 +44,11 @@ (' {"a": 1} ', {"a": 1}), ('```json\n{"a": True, "b": False}```', {"a": True, "b": False}), ('{"a": True', {}), + ('json\n{"key": "value"}', {"key": "value"}), + ('{"a": True, "b": False}', {"a": True, "b": False}), ], ) -def test_sync_retry(in_str, expected): +def test_llm_response_as_json(in_str, expected): """Test the `llm_response_as_json` function""" assert llm_response_as_json(in_str) == expected @@ -38,7 +68,10 @@ def test_sync_retry(in_str, expected): "Some text. Some overlap. More text. More text that " "shouldn't be touched. Some overlap. More text.\nSome " "non-overlapping text.", - ) + ), + ([], 300, ""), + (["single chunk"], 300, "single chunk"), + (["", None, "valid text"], 300, "valid text"), ], ) def test_merge_overlapping_texts(text_chunks, n, expected): @@ -47,5 +80,196 @@ def test_merge_overlapping_texts(text_chunks, n, expected): assert merge_overlapping_texts(text_chunks, n) == expected +@pytest.mark.parametrize( + "doc_attrs,expected", + [ + ({"date": (2023, 5, 15)}, 2023), + ({"date": (2020, 1, 1)}, 2020), + ({"date": (None, 5, 15)}, None), + ({"date": (0, 5, 15)}, None), + ({"date": (-1, 5, 15)}, None), + ({}, None), + ({"other_key": "value"}, None), + ], +) +def test_extract_ord_year_from_doc_attrs(doc_attrs, expected): + """Test the `extract_ord_year_from_doc_attrs` function""" + + assert extract_ord_year_from_doc_attrs(doc_attrs) == expected + + +def test_num_ordinances_in_doc_none(): + """Test `num_ordinances_in_doc` with None document""" + + assert num_ordinances_in_doc(None) == 0 + + +def test_num_ordinances_in_doc_no_ordinance_values(): + """Test `num_ordinances_in_doc` with document missing ordinance_values""" + + doc = MagicMock() + doc.attrs = {} + assert num_ordinances_in_doc(doc) == 0 + + +def test_num_ordinances_in_doc_with_ordinances(): + """Test `num_ordinances_in_doc` with valid ordinances""" + + doc = MagicMock() + doc.attrs = { + "ordinance_values": pd.DataFrame( + { + "feature": ["setback", "height", "noise"], + "value": [100, 200, None], + "summary": ["test", None, "test"], + } + ) + } + assert num_ordinances_in_doc(doc) == 3 + + +def test_num_ordinances_in_doc_with_exclude_features(): + """Test `num_ordinances_in_doc` with excluded features""" + + doc = MagicMock() + doc.attrs = { + "ordinance_values": pd.DataFrame( + { + "feature": ["setback", "height", "noise"], + "value": [100, 200, 300], + "summary": ["test", "test", "test"], + } + ) + } + assert num_ordinances_in_doc(doc, exclude_features=["noise"]) == 2 + + +def test_num_ordinances_dataframe_empty(): + """Test `num_ordinances_dataframe` with empty DataFrame""" + + data = pd.DataFrame() + assert num_ordinances_dataframe(data) == 0 + + +def test_num_ordinances_dataframe_with_values(): + """Test `num_ordinances_dataframe` with valid data""" + + data = pd.DataFrame( + { + "feature": ["setback", "height", "noise"], + "value": [100, 200, None], + "summary": ["test", None, "test"], + } + ) + assert num_ordinances_dataframe(data) == 3 + + +def test_num_ordinances_dataframe_with_exclude(): + """Test `num_ordinances_dataframe` with excluded features""" + + data = pd.DataFrame( + { + "feature": ["setback", "HEIGHT", "noise"], + "value": [100, 200, 300], + "summary": ["test", "test", "test"], + } + ) + assert num_ordinances_dataframe(data, exclude_features=["height"]) == 2 + + +def test_ordinances_bool_index_none(): + """Test `ordinances_bool_index` with None data""" + + result = ordinances_bool_index(None) + assert isinstance(result, np.ndarray) + assert len(result) == 0 + assert result.dtype == bool + + +def test_ordinances_bool_index_empty(): + """Test `ordinances_bool_index` with empty DataFrame""" + + data = pd.DataFrame() + result = ordinances_bool_index(data) + assert isinstance(result, np.ndarray) + assert len(result) == 0 + + +def test_ordinances_bool_index_no_check_cols(): + """Test `ordinances_bool_index` with DataFrame missing required columns""" + + data = pd.DataFrame({"feature": ["setback", "height"]}) + result = ordinances_bool_index(data) + assert isinstance(result, np.ndarray) + assert len(result) == 0 + + +def test_ordinances_bool_index_with_values(): + """Test `ordinances_bool_index` with valid data""" + + data = pd.DataFrame( + { + "feature": ["setback", "height", "noise", "extra"], + "value": [100, None, None, 400], + "summary": [None, "test", None, "test"], + } + ) + result = ordinances_bool_index(data) + expected = np.array([True, True, False, True]) + np.testing.assert_array_equal(result, expected) + + +def test_ordinances_bool_index_value_only(): + """Test `ordinances_bool_index` with only value column""" + + data = pd.DataFrame( + {"feature": ["setback", "height", "noise"], "value": [100, None, 300]} + ) + result = ordinances_bool_index(data) + expected = np.array([True, False, True]) + np.testing.assert_array_equal(result, expected) + + +def test_load_config_json(tmp_path): + """Test `load_config` with JSON file""" + + config_data = {"key": "value", "number": 42} + config_file = tmp_path / "test_config.json" + with config_file.open("w", encoding="utf-8") as f: + json.dump(config_data, f) + + result = load_config(config_file) + assert result == config_data + + +def test_load_config_json5(tmp_path): + """Test `load_config` with JSON5 file""" + + config_content = """{ + // This is a comment + "key": "value", + "number": 42, + }""" + config_file = tmp_path / "test_config.json5" + with config_file.open("w", encoding="utf-8") as f: + f.write(config_content) + + result = load_config(config_file) + assert result == {"key": "value", "number": 42} + + +def test_load_config_invalid_extension(tmp_path): + """Test `load_config` with invalid file extension""" + + config_file = tmp_path / "test_config.txt" + config_file.touch() + + with pytest.raises( + COMPASSValueError, + match=r"Got unknown config file extension: \.txt", + ): + load_config(config_file) + + if __name__ == "__main__": pytest.main(["-q", "--show-capture=all", Path(__file__), "-rapP"]) From 1dd862a860bf734ddcc929849d710eea631e6862 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Sat, 8 Nov 2025 15:31:12 -0700 Subject: [PATCH 2/3] Add tests folder to workflow file --- .github/workflows/ci-python.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/ci-python.yml b/.github/workflows/ci-python.yml index 34f8c64bb..3b3af0c0d 100644 --- a/.github/workflows/ci-python.yml +++ b/.github/workflows/ci-python.yml @@ -5,12 +5,14 @@ on: branches: [main] paths: - compass/** + - tests/** - .github/workflows/ci-python.yml - pyproject.toml - pixi.lock pull_request: paths: - compass/** + - tests/** - .github/workflows/ci-python.yml - pyproject.toml - pixi.lock From 41af9db936c34b5d42171402e4f7dd2baff0d060 Mon Sep 17 00:00:00 2001 From: ppinchuk Date: Sat, 15 Nov 2025 14:46:10 -0700 Subject: [PATCH 3/3] Add extra test --- tests/python/unit/utilities/test_utilities_parsing.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/python/unit/utilities/test_utilities_parsing.py b/tests/python/unit/utilities/test_utilities_parsing.py index 56bb34400..e505515f3 100644 --- a/tests/python/unit/utilities/test_utilities_parsing.py +++ b/tests/python/unit/utilities/test_utilities_parsing.py @@ -29,6 +29,7 @@ ("```\ncode\n```", "code\n"), (" ```json\ncode``` ", "json\ncode"), ("\n```\ncode\n```\n", "code\n"), + ("\r\n```\r\ncode\r\n```\r\n", "\r\ncode\r\n"), ("```", ""), ], )