NatLabRockies · ppinchuk · Nov 15, 2025 · Nov 8, 2025 · Nov 8, 2025 · Nov 8, 2025
@@ -1,13 +1,42 @@
 """Test COMPASS Ordinance parsing utilities"""
 
+import json
 from pathlib import Path
+from unittest.mock import MagicMock
 
+import numpy as np
+import pandas as pd
 import pytest
 
 from compass.utilities.parsing import (
+    clean_backticks_from_llm_response,
+    extract_ord_year_from_doc_attrs,
     llm_response_as_json,
+    load_config,
     merge_overlapping_texts,
+    num_ordinances_dataframe,
+    num_ordinances_in_doc,
+    ordinances_bool_index,
 )
+from compass.exceptions import COMPASSValueError
+
+
+@pytest.mark.parametrize(
+    "in_str,expected",
+    [
+        ("plain text", "plain text"),
+        ("```code```", "code"),
+        ("```\ncode\n```", "code\n"),
+        ("  ```json\ncode```  ", "json\ncode"),
+        ("\n```\ncode\n```\n", "code\n"),
+        ("\r\n```\r\ncode\r\n```\r\n", "\r\ncode\r\n"),
+        ("```", ""),
+    ],
+)
+def test_clean_backticks_from_llm_response(in_str, expected):
+    """Test the `clean_backticks_from_llm_response` function"""
+
+    assert clean_backticks_from_llm_response(in_str) == expected
 
 
 @pytest.mark.parametrize(
@@ -16,9 +45,11 @@
         (' {"a": 1} ', {"a": 1}),
         ('```json\n{"a": True, "b": False}```', {"a": True, "b": False}),
         ('{"a": True', {}),
+        ('json\n{"key": "value"}', {"key": "value"}),
+        ('{"a": True, "b": False}', {"a": True, "b": False}),
     ],
 )
-def test_sync_retry(in_str, expected):
+def test_llm_response_as_json(in_str, expected):
     """Test the `llm_response_as_json` function"""
 
     assert llm_response_as_json(in_str) == expected
@@ -38,7 +69,10 @@ def test_sync_retry(in_str, expected):
             "Some text. Some overlap. More text. More text that "
             "shouldn't be touched. Some overlap. More text.\nSome "
             "non-overlapping text.",
-        )
+        ),
+        ([], 300, ""),
+        (["single chunk"], 300, "single chunk"),
+        (["", None, "valid text"], 300, "valid text"),
     ],
 )
 def test_merge_overlapping_texts(text_chunks, n, expected):
@@ -47,5 +81,196 @@ def test_merge_overlapping_texts(text_chunks, n, expected):
     assert merge_overlapping_texts(text_chunks, n) == expected
 
 
+@pytest.mark.parametrize(
+    "doc_attrs,expected",
+    [
+        ({"date": (2023, 5, 15)}, 2023),
+        ({"date": (2020, 1, 1)}, 2020),
+        ({"date": (None, 5, 15)}, None),
+        ({"date": (0, 5, 15)}, None),
+        ({"date": (-1, 5, 15)}, None),
+        ({}, None),
+        ({"other_key": "value"}, None),
+    ],
+)
+def test_extract_ord_year_from_doc_attrs(doc_attrs, expected):
+    """Test the `extract_ord_year_from_doc_attrs` function"""
+
+    assert extract_ord_year_from_doc_attrs(doc_attrs) == expected
+
+
+def test_num_ordinances_in_doc_none():
+    """Test `num_ordinances_in_doc` with None document"""
+
+    assert num_ordinances_in_doc(None) == 0
+
+
+def test_num_ordinances_in_doc_no_ordinance_values():
+    """Test `num_ordinances_in_doc` with document missing ordinance_values"""
+
+    doc = MagicMock()
+    doc.attrs = {}
+    assert num_ordinances_in_doc(doc) == 0
+
+
+def test_num_ordinances_in_doc_with_ordinances():
+    """Test `num_ordinances_in_doc` with valid ordinances"""
+
+    doc = MagicMock()
+    doc.attrs = {
+        "ordinance_values": pd.DataFrame(
+            {
+                "feature": ["setback", "height", "noise"],
+                "value": [100, 200, None],
+                "summary": ["test", None, "test"],
+            }
+        )
+    }
+    assert num_ordinances_in_doc(doc) == 3
+
+
+def test_num_ordinances_in_doc_with_exclude_features():
+    """Test `num_ordinances_in_doc` with excluded features"""
+
+    doc = MagicMock()
+    doc.attrs = {
+        "ordinance_values": pd.DataFrame(
+            {
+                "feature": ["setback", "height", "noise"],
+                "value": [100, 200, 300],
+                "summary": ["test", "test", "test"],
+            }
+        )
+    }
+    assert num_ordinances_in_doc(doc, exclude_features=["noise"]) == 2
+
+
+def test_num_ordinances_dataframe_empty():
+    """Test `num_ordinances_dataframe` with empty DataFrame"""
+
+    data = pd.DataFrame()
+    assert num_ordinances_dataframe(data) == 0
+
+
+def test_num_ordinances_dataframe_with_values():
+    """Test `num_ordinances_dataframe` with valid data"""
+
+    data = pd.DataFrame(
+        {
+            "feature": ["setback", "height", "noise"],
+            "value": [100, 200, None],
+            "summary": ["test", None, "test"],
+        }
+    )
+    assert num_ordinances_dataframe(data) == 3
+
+
+def test_num_ordinances_dataframe_with_exclude():
+    """Test `num_ordinances_dataframe` with excluded features"""
+
+    data = pd.DataFrame(
+        {
+            "feature": ["setback", "HEIGHT", "noise"],
+            "value": [100, 200, 300],
+            "summary": ["test", "test", "test"],
+        }
+    )
+    assert num_ordinances_dataframe(data, exclude_features=["height"]) == 2
+
+
+def test_ordinances_bool_index_none():
+    """Test `ordinances_bool_index` with None data"""
+
+    result = ordinances_bool_index(None)
+    assert isinstance(result, np.ndarray)
+    assert len(result) == 0
+    assert result.dtype == bool
+
+
+def test_ordinances_bool_index_empty():
+    """Test `ordinances_bool_index` with empty DataFrame"""
+
+    data = pd.DataFrame()
+    result = ordinances_bool_index(data)
+    assert isinstance(result, np.ndarray)
+    assert len(result) == 0
+
+
+def test_ordinances_bool_index_no_check_cols():
+    """Test `ordinances_bool_index` with DataFrame missing required columns"""
+
+    data = pd.DataFrame({"feature": ["setback", "height"]})
+    result = ordinances_bool_index(data)
+    assert isinstance(result, np.ndarray)
+    assert len(result) == 0
+
+
+def test_ordinances_bool_index_with_values():
+    """Test `ordinances_bool_index` with valid data"""
+
+    data = pd.DataFrame(
+        {
+            "feature": ["setback", "height", "noise", "extra"],
+            "value": [100, None, None, 400],
+            "summary": [None, "test", None, "test"],
+        }
+    )
+    result = ordinances_bool_index(data)
+    expected = np.array([True, True, False, True])
+    np.testing.assert_array_equal(result, expected)
+
+
+def test_ordinances_bool_index_value_only():
+    """Test `ordinances_bool_index` with only value column"""
+
+    data = pd.DataFrame(
+        {"feature": ["setback", "height", "noise"], "value": [100, None, 300]}
+    )
+    result = ordinances_bool_index(data)
+    expected = np.array([True, False, True])
+    np.testing.assert_array_equal(result, expected)
+
+
+def test_load_config_json(tmp_path):
+    """Test `load_config` with JSON file"""
+
+    config_data = {"key": "value", "number": 42}
+    config_file = tmp_path / "test_config.json"
+    with config_file.open("w", encoding="utf-8") as f:
+        json.dump(config_data, f)
+
+    result = load_config(config_file)
+    assert result == config_data
+
+
+def test_load_config_json5(tmp_path):
+    """Test `load_config` with JSON5 file"""
+
+    config_content = """{
+        // This is a comment
+        "key": "value",
+        "number": 42,
+    }"""
+    config_file = tmp_path / "test_config.json5"
+    with config_file.open("w", encoding="utf-8") as f:
+        f.write(config_content)
+
+    result = load_config(config_file)
+    assert result == {"key": "value", "number": 42}
+
+
+def test_load_config_invalid_extension(tmp_path):
+    """Test `load_config` with invalid file extension"""
+
+    config_file = tmp_path / "test_config.txt"
+    config_file.touch()
+
+    with pytest.raises(
+        COMPASSValueError,
+        match=r"Got unknown config file extension: \.txt",
+    ):
+        load_config(config_file)
+
+
 if __name__ == "__main__":
     pytest.main(["-q", "--show-capture=all", Path(__file__), "-rapP"])