Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
2a2cf74
Rename class
ppinchuk Feb 9, 2026
a6cd95d
Add new LLM task and usage category
ppinchuk Feb 10, 2026
0d7ddb0
Update base plugin abstract methods
ppinchuk Feb 10, 2026
15de4f1
Update plugin interface to implement new abstract methods
ppinchuk Feb 10, 2026
6005b3e
Base text collector has new base class
ppinchuk Feb 10, 2026
5f30992
Add missing import
ppinchuk Feb 10, 2026
cdfdec9
Break out validation methods for easier subclassing
ppinchuk Feb 10, 2026
026dbe3
Collected text now registered as a cleaned output file
ppinchuk Feb 10, 2026
e02900b
Implement new base interface
ppinchuk Feb 10, 2026
1b237aa
Update plugin implementations
ppinchuk Feb 10, 2026
6128720
Move file
ppinchuk Feb 10, 2026
d8baf99
Add `SchemaOutputLLMCaller` class
ppinchuk Feb 10, 2026
b43ba2b
Use new base extractor functions
ppinchuk Feb 10, 2026
e1b7d9b
Add NoOp implementations for extraction pieces
ppinchuk Feb 10, 2026
0320751
Add load config from GAPs to utilities IO module
ppinchuk Feb 10, 2026
265d5b9
Add `CaseInsensitiveEnum`
ppinchuk Feb 10, 2026
9c99fb1
Remove unused function
ppinchuk Feb 10, 2026
e4aaf4e
Use new loading function
ppinchuk Feb 10, 2026
c0aafc4
question -> query
ppinchuk Feb 10, 2026
54bdd0d
Use new load function
ppinchuk Feb 10, 2026
636e493
Fix imports
ppinchuk Feb 10, 2026
d5a2c5e
Add one-shot plugin components
ppinchuk Feb 10, 2026
18e4184
Add `generate_query_templates`
ppinchuk Feb 10, 2026
975e863
Add MVP of one-shot plugin implementation
ppinchuk Feb 10, 2026
c1588e9
Populate namespace
ppinchuk Feb 10, 2026
015f191
Add output schemas
ppinchuk Feb 10, 2026
f53c39a
Add dependency
ppinchuk Feb 10, 2026
bde6ea2
Allow users to add plugin configs when running the CLI
ppinchuk Feb 10, 2026
7d5bd64
Fix tests
ppinchuk Feb 10, 2026
da8c112
Add one-shot example
ppinchuk Feb 11, 2026
cec29e8
PR review fixes
ppinchuk Feb 11, 2026
894e9e7
Add example to docs
ppinchuk Feb 11, 2026
34b4619
Add new examples
ppinchuk Feb 11, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions compass/_cli/finalize.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
"""COMPASS CLI finalize subcommand"""

import json
from datetime import datetime

import click
from rich.theme import Theme
from rich.console import Console

from compass.utilities import Directories
from compass.utilities.io import load_config
from compass.utilities.jurisdictions import Jurisdiction
from compass.utilities.parsing import load_config
from compass.utilities.finalize import save_run_meta, doc_infos_to_db, save_db
from compass.scripts.process import _initialize_model_params

Expand Down Expand Up @@ -62,8 +61,7 @@ def finalize(config):
start_datetime = datetime.fromtimestamp(dirs.out.stat().st_ctime)
end_datetime = datetime.fromtimestamp(jurisdictions_fp.stat().st_mtime)

with jurisdictions_fp.open("r", encoding="utf-8") as fh:
jurisdictions = json.load(fh)
jurisdictions = load_config(jurisdictions_fp)

console.print("Compiling databases...")
jurisdictions = jurisdictions.get("jurisdictions", [])
Expand Down
17 changes: 15 additions & 2 deletions compass/_cli/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,10 @@
from rich.console import Console

from compass.pb import COMPASS_PB
from compass.plugin import create_schema_based_one_shot_extraction_plugin
from compass.scripts.process import process_jurisdictions_with_openai
from compass.utilities.logs import AddLocationFilter
from compass.utilities.parsing import load_config
from compass.utilities.io import load_config


@click.command
Expand All @@ -41,10 +42,22 @@
is_flag=True,
help="Flag to hide progress bars during processing.",
)
def process(config, verbose, no_progress):
@click.option(
"--plugin",
"-p",
required=False,
default=None,
help="One-shot plugin configuration to add to COMPASS before processing",
)
def process(config, verbose, no_progress, plugin):
"""Download and extract ordinances for a list of jurisdictions"""
config = load_config(config)

if plugin is not None:
create_schema_based_one_shot_extraction_plugin(
config=plugin, tech=config["tech"]
)

custom_theme = Theme({"logging.level.trace": "rgb(94,79,162)"})
console = Console(theme=custom_theme)

Expand Down
4 changes: 4 additions & 0 deletions compass/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ def __init__(self, *args, **kwargs):
)


class COMPASSFileNotFoundError(COMPASSError, FileNotFoundError):
"""COMPASS FileNotFoundError"""


class COMPASSNotInitializedError(COMPASSError):
"""COMPASS not initialized error"""

Expand Down
4 changes: 2 additions & 2 deletions compass/extraction/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import logging
from warnings import warn

from compass.llm import StructuredLLMCaller
from compass.llm import JSONFromTextLLMCaller
from compass.extraction.date import DateExtractor
from compass.validation import (
ParseChunksWithMemory,
Expand Down Expand Up @@ -170,7 +170,7 @@ async def extract_date(doc, model_config, usage_tracker=None):
)
return doc

date_llm_caller = StructuredLLMCaller(
date_llm_caller = JSONFromTextLLMCaller(
llm_service=model_config.llm_service,
usage_tracker=usage_tracker,
**model_config.llm_call_kwargs,
Expand Down
10 changes: 5 additions & 5 deletions compass/extraction/date.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,20 +37,20 @@ class DateExtractor:
)
"""System message for date extraction LLM calls"""

def __init__(self, structured_llm_caller, text_splitter=None):
def __init__(self, json_llm_caller, text_splitter=None):
"""

Parameters
----------
structured_llm_caller : StructuredLLMCaller
json_llm_caller : JSONFromTextLLMCaller
Instance used for structured validation queries.
text_splitter : LCTextSplitter, optional
Optional text splitter (or subclass instance, or any object
that implements a `split_text` method) to attach to doc
(used for splitting out pages in an HTML document).
By default, ``None``.
"""
self.slc = structured_llm_caller
self.jlc = json_llm_caller
self.text_splitter = text_splitter

async def parse(self, doc):
Expand Down Expand Up @@ -84,7 +84,7 @@ async def _parse(self, doc):
)
if can_check_url_for_date:
logger.debug("Checking URL for date: %s", url)
response = await self.slc.call(
response = await self.jlc.call(
sys_msg=self.SYSTEM_MESSAGE,
content=(
"Please extract the date from the URL for this "
Expand All @@ -105,7 +105,7 @@ async def _parse(self, doc):
if not text:
continue

response = await self.slc.call(
response = await self.jlc.call(
sys_msg=self.SYSTEM_MESSAGE,
content=f"Please extract the date for this ordinance:\n{text}",
usage_sub_label=LLMUsageCategory.DATE_EXTRACTION,
Expand Down
8 changes: 4 additions & 4 deletions compass/extraction/small_wind/ordinance.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,12 +369,12 @@ class SmallWindOrdinanceTextExtractor(PromptBasedTextExtractor):
PROMPTS = [
{
"key": "wind_energy_systems_text",
"out_fn": "{jurisdiction} Wind Ordinance Text.txt",
"out_fn": "{jurisdiction} Wind Ordinance.txt",
"prompt": _WECS_TEXT_EXTRACTION_PROMPT,
},
{
"key": "cleaned_text_for_extraction",
"out_fn": "{jurisdiction} Cleaned Text.txt",
"out_fn": "{jurisdiction} Small Wind Ordinance.txt",
"prompt": _SMALL_WECS_TEXT_EXTRACTION_PROMPT,
},
]
Expand All @@ -398,12 +398,12 @@ class SmallWindPermittedUseDistrictsTextExtractor(PromptBasedTextExtractor):
PROMPTS = [
{
"key": "permitted_use_only_text",
"out_fn": "{jurisdiction} Permitted Use Only.txt",
"out_fn": "{jurisdiction} Permitted Use.txt",
"prompt": _PERMITTED_USES_TEXT_EXTRACTION_PROMPT,
},
{
"key": "districts_text",
"out_fn": "{jurisdiction} Districts.txt",
"out_fn": "{jurisdiction} Permitted Use Districts.txt",
"prompt": _WECS_PERMITTED_USES_TEXT_EXTRACTION_PROMPT,
},
]
Expand Down
4 changes: 2 additions & 2 deletions compass/extraction/small_wind/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ class StructuredSmallWindOrdinanceParser(StructuredSmallWindParser):
a decision-tree-based chain-of-thought prompt on the text for
each value to be extracted.
Key Relationships:
Uses a StructuredLLMCaller for LLM queries and multiple
Uses a JSONFromTextLLMCaller for LLM queries and multiple
AsyncDecisionTree instances to guide the extraction of
individual values.
"""
Expand Down Expand Up @@ -558,7 +558,7 @@ class StructuredSmallWindPermittedUseDistrictsParser(
a decision-tree-based chain-of-thought prompt on the text for
each value to be extracted.
Key Relationships:
Uses a StructuredLLMCaller for LLM queries and multiple
Uses a JSONFromTextLLMCaller for LLM queries and multiple
AsyncDecisionTree instances to guide the extraction of
individual values.
"""
Expand Down
10 changes: 5 additions & 5 deletions compass/extraction/small_wind/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
SmallWindPermittedUseDistrictsTextExtractor.OUT_LABEL
)

SMALL_WIND_QUESTION_TEMPLATES = [
SMALL_WIND_QUERY_TEMPLATES = [
"filetype:pdf {jurisdiction} wind energy conversion system ordinances",
"wind energy conversion system ordinances {jurisdiction}",
"{jurisdiction} wind WECS ordinance",
Expand Down Expand Up @@ -66,8 +66,8 @@ class COMPASSSmallWindExtractor(OrdinanceExtractionPlugin):
IDENTIFIER = "small wind"
"""str: Identifier for extraction task """

QUESTION_TEMPLATES = SMALL_WIND_QUESTION_TEMPLATES
"""list: List of search engine question templates for extraction"""
QUERY_TEMPLATES = SMALL_WIND_QUERY_TEMPLATES
"""list: List of search engine query templates for extraction"""

WEBSITE_KEYWORDS = BEST_SMALL_WIND_ORDINANCE_WEBSITE_URL_KEYWORDS
"""list: List of keywords
Expand All @@ -76,8 +76,8 @@ class COMPASSSmallWindExtractor(OrdinanceExtractionPlugin):
a website scrape for a wind ordinance document.
"""

heuristic = SmallWindHeuristic()
"""BaseHeuristic: Object with a ``check()`` method"""
HEURISTIC = SmallWindHeuristic
"""BaseHeuristic: Class with a ``check()`` method"""

TEXT_COLLECTORS = [
SmallWindOrdinanceTextCollector,
Expand Down
6 changes: 3 additions & 3 deletions compass/extraction/solar/ordinance.py
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ class SolarOrdinanceTextExtractor(PromptBasedTextExtractor):
PROMPTS = [
{
"key": "cleaned_text_for_extraction",
"out_fn": "{jurisdiction} Cleaned Text.txt",
"out_fn": "{jurisdiction} Utility Scale Solar Ordinance.txt",
"prompt": _SEF_TEXT_EXTRACTION_PROMPT,
},
]
Expand All @@ -309,12 +309,12 @@ class SolarPermittedUseDistrictsTextExtractor(PromptBasedTextExtractor):
PROMPTS = [
{
"key": "permitted_use_only_text",
"out_fn": "{jurisdiction} Permitted Use Only.txt",
"out_fn": "{jurisdiction} Permitted Use.txt",
"prompt": _PERMITTED_USES_TEXT_EXTRACTION_PROMPT,
},
{
"key": "districts_text",
"out_fn": "{jurisdiction} Districts.txt",
"out_fn": "{jurisdiction} Permitted Use Districts.txt",
"prompt": _SEF_PERMITTED_USES_TEXT_EXTRACTION_PROMPT,
},
]
Expand Down
4 changes: 2 additions & 2 deletions compass/extraction/solar/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ class StructuredSolarOrdinanceParser(StructuredSolarParser):
a decision-tree-based chain-of-thought prompt on the text for
each value to be extracted.
Key Relationships:
Uses a StructuredLLMCaller for LLM queries and multiple
Uses a JSONFromTextLLMCaller for LLM queries and multiple
AsyncDecisionTree instances to guide the extraction of
individual values.
"""
Expand Down Expand Up @@ -494,7 +494,7 @@ class StructuredSolarPermittedUseDistrictsParser(StructuredSolarParser):
a decision-tree-based chain-of-thought prompt on the text for
each value to be extracted.
Key Relationships:
Uses a StructuredLLMCaller for LLM queries and multiple
Uses a JSONFromTextLLMCaller for LLM queries and multiple
AsyncDecisionTree instances to guide the extraction of
individual values.
"""
Expand Down
10 changes: 5 additions & 5 deletions compass/extraction/solar/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
SolarPermittedUseDistrictsTextExtractor.OUT_LABEL
)

SOLAR_QUESTION_TEMPLATES = [
SOLAR_QUERY_TEMPLATES = [
"filetype:pdf {jurisdiction} solar energy conversion system ordinances",
"solar energy conversion system ordinances {jurisdiction}",
"{jurisdiction} solar energy farm ordinance",
Expand Down Expand Up @@ -67,8 +67,8 @@ class COMPASSSolarExtractor(OrdinanceExtractionPlugin):
IDENTIFIER = "solar"
"""str: Identifier for extraction task """

QUESTION_TEMPLATES = SOLAR_QUESTION_TEMPLATES
"""list: List of search engine question templates for extraction"""
QUERY_TEMPLATES = SOLAR_QUERY_TEMPLATES
"""list: List of search engine query templates for extraction"""

WEBSITE_KEYWORDS = BEST_SOLAR_ORDINANCE_WEBSITE_URL_KEYWORDS
"""list: List of keywords
Expand All @@ -77,8 +77,8 @@ class COMPASSSolarExtractor(OrdinanceExtractionPlugin):
a website scrape for a wind ordinance document.
"""

heuristic = SolarHeuristic()
"""BaseHeuristic: Object with a ``check()`` method"""
HEURISTIC = SolarHeuristic
"""BaseHeuristic: Class with a ``check()`` method"""

TEXT_COLLECTORS = [
SolarOrdinanceTextCollector,
Expand Down
42 changes: 28 additions & 14 deletions compass/extraction/water/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
logger = logging.getLogger(__name__)


WATER_RIGHTS_QUESTION_TEMPLATES = [
WATER_RIGHTS_QUERY_TEMPLATES = [
"{jurisdiction} rules",
"{jurisdiction} management plan",
"{jurisdiction} well permits",
Expand Down Expand Up @@ -66,26 +66,40 @@ class TexasWaterRightsExtractor(BaseExtractionPlugin):
IDENTIFIER = "tx water rights"
"""str: Identifier for extraction task """

QUESTION_TEMPLATES = WATER_RIGHTS_QUESTION_TEMPLATES
"""list: List of search engine question templates for extraction"""

WEBSITE_KEYWORDS = BEST_WATER_RIGHTS_ORDINANCE_WEBSITE_URL_KEYWORDS
"""list: List of keywords

Keywords indicate links which should be prioritized when performing
a website scrape for a wind ordinance document.
"""

heuristic = WaterRightsHeuristic()
"""BaseHeuristic: Object with a ``check()`` method"""

JURISDICTION_DATA_FP = (
importlib.resources.files("compass")
/ "data"
/ "tx_water_districts.csv"
)
""":term:`path-like <path-like object>`: Path to Texas GCW names"""

async def get_query_templates(self): # noqa: PLR6301
"""Get a list of search engine query templates for extraction

Query templates can contain the placeholder ``{jurisdiction}``
which will be replaced with the full jurisdiction name during
the search engine query.
"""
return WATER_RIGHTS_QUERY_TEMPLATES

async def get_website_keywords(self): # noqa: PLR6301
"""Get a dict of website search keyword scores

Dictionary mapping keywords to scores that indicate links which
should be prioritized when performing a website scrape for a
document.
"""
return BEST_WATER_RIGHTS_ORDINANCE_WEBSITE_URL_KEYWORDS

async def get_heuristic(self): # noqa: PLR6301
"""Get a `BaseHeuristic` instance with a `check()` method

The ``check()`` method should accept a string of text and return
``True`` if the text passes the heuristic check and ``False``
otherwise.
"""
return WaterRightsHeuristic()

async def filter_docs(
self,
extraction_context,
Expand Down
8 changes: 4 additions & 4 deletions compass/extraction/wind/ordinance.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,12 +343,12 @@ class WindOrdinanceTextExtractor(PromptBasedTextExtractor):
PROMPTS = [
{
"key": "wind_energy_systems_text",
"out_fn": "{jurisdiction} Wind Ordinance Text.txt",
"out_fn": "{jurisdiction} Wind Ordinance.txt",
"prompt": _WECS_TEXT_EXTRACTION_PROMPT,
},
{
"key": "cleaned_text_for_extraction",
"out_fn": "{jurisdiction} Cleaned Text.txt",
"out_fn": "{jurisdiction} Utility Scale Wind Ordinance.txt",
"prompt": _LARGE_WECS_TEXT_EXTRACTION_PROMPT,
},
]
Expand All @@ -372,12 +372,12 @@ class WindPermittedUseDistrictsTextExtractor(PromptBasedTextExtractor):
PROMPTS = [
{
"key": "permitted_use_only_text",
"out_fn": "{jurisdiction} Permitted Use Only.txt",
"out_fn": "{jurisdiction} Permitted Use.txt",
"prompt": _PERMITTED_USES_TEXT_EXTRACTION_PROMPT,
},
{
"key": "districts_text",
"out_fn": "{jurisdiction} Districts.txt",
"out_fn": "{jurisdiction} Permitted Use Districts.txt",
"prompt": _WECS_PERMITTED_USES_TEXT_EXTRACTION_PROMPT,
},
]
Expand Down
4 changes: 2 additions & 2 deletions compass/extraction/wind/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ class StructuredWindOrdinanceParser(StructuredWindParser):
a decision-tree-based chain-of-thought prompt on the text for
each value to be extracted.
Key Relationships:
Uses a StructuredLLMCaller for LLM queries and multiple
Uses a JSONFromTextLLMCaller for LLM queries and multiple
AsyncDecisionTree instances to guide the extraction of
individual values.
"""
Expand Down Expand Up @@ -497,7 +497,7 @@ class StructuredWindPermittedUseDistrictsParser(StructuredWindParser):
a decision-tree-based chain-of-thought prompt on the text for
each value to be extracted.
Key Relationships:
Uses a StructuredLLMCaller for LLM queries and multiple
Uses a JSONFromTextLLMCaller for LLM queries and multiple
AsyncDecisionTree instances to guide the extraction of
individual values.
"""
Expand Down
Loading
Loading