diff --git a/compass/extraction/apply.py b/compass/extraction/apply.py index ccf0866d8..c36bed35e 100644 --- a/compass/extraction/apply.py +++ b/compass/extraction/apply.py @@ -67,12 +67,16 @@ async def check_for_ordinance_info( chunks = model_config.text_splitter.split_text(doc.text) chunk_parser = ParseChunksWithMemory(chunks, num_to_recall=2) - legal_text_validator = LegalTextValidator( - tech=tech, - llm_service=model_config.llm_service, - usage_tracker=usage_tracker, - doc_is_from_ocr=doc.attrs.get("from_ocr", False), - **model_config.llm_call_kwargs, + legal_text_validator = ( + None + if doc.attrs.get("is_legal_doc", False) + else LegalTextValidator( + tech=tech, + llm_service=model_config.llm_service, + usage_tracker=usage_tracker, + doc_is_from_ocr=doc.attrs.get("from_ocr", False), + **model_config.llm_call_kwargs, + ) ) ordinance_text_collector = ordinance_text_collector_class( @@ -142,6 +146,15 @@ async def extract_date(doc, model_config, usage_tracker=None): the attrs will contain a ``"date"`` key that will contain the parsed date information. """ + if "date" in doc.attrs: + logger.debug( + "Not extracting date for doc from %s. " + "Found existing date in doc attrs: %r", + doc.attrs.get("source"), + doc.attrs["date"], + ) + return doc + date_llm_caller = StructuredLLMCaller( llm_service=model_config.llm_service, usage_tracker=usage_tracker, diff --git a/compass/scripts/download.py b/compass/scripts/download.py index f3adad7b4..5d1bcbfd6 100644 --- a/compass/scripts/download.py +++ b/compass/scripts/download.py @@ -25,6 +25,7 @@ ) from compass.web.website_crawl import COMPASSCrawler, COMPASSLinkScorer from compass.utilities.enums import LLMTasks +from compass.utilities.io import load_local_docs from compass.pb import COMPASS_PB @@ -50,7 +51,7 @@ async def download_known_urls( are applied. By default, ``None``. file_loader_kwargs : dict, optional Dictionary of keyword arguments pairs to initialize - :class:`elm.web.file_loader.AsyncFileLoader`. + :class:`elm.web.file_loader.AsyncWebFileLoader`. By default, ``None``. Returns @@ -93,6 +94,61 @@ async def download_known_urls( return out_docs +async def load_known_docs(jurisdiction, fps, local_file_loader_kwargs=None): + """Load documents from known local paths + + Parameters + ---------- + jurisdiction : Jurisdiction + Jurisdiction instance representing the jurisdiction + corresponding to the documents. + fps : iterable of path-like + Collection of paths to load documents from. + local_file_loader_kwargs : dict, optional + Dictionary of keyword arguments pairs to initialize + :class:`elm.web.file_loader.AsyncLocalFileLoader`. + By default, ``None``. + + Returns + ------- + out_docs : list + List of :obj:`~elm.web.document.BaseDocument` instances + containing documents from the paths, or an empty list if + something went wrong during the retrieval process. + + Notes + ----- + Requires :class:`~compass.services.threaded.TempFileCachePB` + service to be running. + """ + + COMPASS_PB.update_jurisdiction_task( + jurisdiction.full_name, description="Loading known document(s)..." + ) + + local_file_loader_kwargs = local_file_loader_kwargs or {} + local_file_loader_kwargs.update( + {"file_cache_coroutine": TempFileCachePB.call} + ) + async with COMPASS_PB.file_download_prog_bar( + jurisdiction.full_name, len(fps) + ): + try: + out_docs = await load_local_docs(fps, **local_file_loader_kwargs) + except KeyboardInterrupt: + raise + except Exception as e: + msg = ( + "Encountered error of type %r while loading known documents: " + "%r" + ) + err_type = type(e) + logger.exception(msg, err_type, fps) + out_docs = [] + + return out_docs + + async def find_jurisdiction_website( jurisdiction, model_configs, @@ -116,7 +172,7 @@ async def find_jurisdiction_website( for all tasks. file_loader_kwargs : dict, optional Dictionary of keyword arguments pairs to initialize - :class:`elm.web.file_loader.AsyncFileLoader`. If found, the + :class:`elm.web.file_loader.AsyncWebFileLoader`. If found, the "pw_launch_kwargs" key in these will also be used to initialize the :class:`elm.web.search.google.PlaywrightGoogleLinkSearch` used for the Google URL search. By default, ``None``. @@ -202,7 +258,7 @@ async def download_jurisdiction_ordinances_from_website( the points are summed up. file_loader_kwargs : dict, optional Dictionary of keyword arguments pairs to initialize - :class:`elm.web.file_loader.AsyncFileLoader`. If found, the + :class:`elm.web.file_loader.AsyncWebFileLoader`. If found, the "pw_launch_kwargs" key in these will also be used to initialize the :class:`elm.web.search.google.PlaywrightGoogleLinkSearch` used for the Google URL search. By default, ``None``. @@ -327,7 +383,7 @@ async def download_jurisdiction_ordinances_from_website_compass_crawl( the points are summed up. file_loader_kwargs : dict, optional Dictionary of keyword arguments pairs to initialize - :class:`elm.web.file_loader.AsyncFileLoader`. If found, the + :class:`elm.web.file_loader.AsyncWebFileLoader`. If found, the "pw_launch_kwargs" key in these will also be used to initialize the :class:`elm.web.search.google.PlaywrightGoogleLinkSearch` used for the Google URL search. By default, ``None``. @@ -426,9 +482,10 @@ async def download_jurisdiction_ordinance_using_search_engine( ordinance document. By default, ``5``. file_loader_kwargs : dict, optional Dictionary of keyword-argument pairs to initialize - :class:`elm.web.file_loader.AsyncFileLoader` with. If found, the - "pw_launch_kwargs" key in these will also be used to initialize - the :class:`elm.web.search.google.PlaywrightGoogleLinkSearch` + :class:`elm.web.file_loader.AsyncWebFileLoader` with. If found, + the "pw_launch_kwargs" key in these will also be used to + initialize the + :class:`elm.web.search.google.PlaywrightGoogleLinkSearch` used for the google URL search. By default, ``None``. search_semaphore : :class:`asyncio.Semaphore`, optional Semaphore instance that can be used to limit the number of diff --git a/compass/scripts/process.py b/compass/scripts/process.py index e138dee83..f738dfe34 100644 --- a/compass/scripts/process.py +++ b/compass/scripts/process.py @@ -15,6 +15,7 @@ from compass.scripts.download import ( find_jurisdiction_website, download_known_urls, + load_known_docs, download_jurisdiction_ordinance_using_search_engine, download_jurisdiction_ordinances_from_website, download_jurisdiction_ordinances_from_website_compass_crawl, @@ -65,6 +66,8 @@ OCRPDFLoader, read_pdf_doc, read_pdf_doc_ocr, + read_pdf_file, + read_pdf_file_ocr, ) from compass.services.usage import UsageTracker from compass.services.openai import usage_from_response @@ -77,6 +80,7 @@ OrdDBFileWriter, UsageUpdater, JurisdictionUpdater, + HTMLFileLoader, ) from compass.utilities import ( LLM_COST_REGISTRY, @@ -152,6 +156,7 @@ async def process_jurisdictions_with_openai( # noqa: PLR0917, PLR0913 max_num_concurrent_website_searches=10, max_num_concurrent_jurisdictions=25, url_ignore_substrings=None, + known_local_docs=None, known_doc_urls=None, file_loader_kwargs=None, search_engines=None, @@ -163,18 +168,31 @@ async def process_jurisdictions_with_openai( # noqa: PLR0917, PLR0913 clean_dir=None, ordinance_file_dir=None, jurisdiction_dbs_dir=None, + perform_se_search=True, perform_website_search=True, llm_costs=None, log_level="INFO", keep_async_logs=False, ): - """Download and extract ordinances for a list of jurisdictions + """Extract ordinances for one or more jurisdiction(s) This function scrapes ordinance documents (PDFs or HTML text) for a - list of specified jurisdictions and processes them using one or more + given set of jurisdictions and processes them using one or more LLM models. Output files, logs, and intermediate artifacts are stored in configurable directories. + The processing has a well-defined order: + + 1. Process any/all known local documents + 2. Process any/all known document URLs + 3. Search engine-based search for ordinance documents + 4. Jurisdiction website crawl-based search for ordinance + documents + + Users can disable any of these steps via inputs to this function. If + any step returns a document with extractable ordinance information, + subsequent steps are skipped for that jurisdiction. + Parameters ---------- out_dir : path-like @@ -268,17 +286,49 @@ async def process_jurisdictions_with_openai( # noqa: PLR0917, PLR0913 all websites on the NREL domain, and the specific file located at `www.co.delaware.in.us/documents/1649699794_0382.pdf`. By default, ``None``. - known_doc_urls : dict or str, optional + known_local_docs : dict or path-like, optional A dictionary where keys are the jurisdiction codes (as strings) - and values are a string or list of strings representing known - URL's to check for those jurisdictions. If provided, these URL's - will be checked first and if an ordinance document is found, no - further scraping will be performed. This input can also be a - path to a JSON file containing the dictionary of code-to-url - mappings. By default, ``None``. + and values are lists of dictionaries containing information + about each document. The latter dictionaries should contain at + least the key ``"source_fp"`` pointing to the **full** path of + the local document file. All other keys will be added as + attributes to the loaded document instance. You can include the + key ``"is_legal_doc"`` to skip the legal document check for + known documents. Similarly, you can provide the ``"date"`` key, + which is a list of ``[year, month, day]``, some or all of which + can be null, to skip the date extraction step of the processing + pipeline. If this input is provided, local documents will be + checked first. See the top-level documentation of this function + for the full processing of the pipeline. This input can also be + a path to a JSON file containing the dictionary of + code-to-document-info mappings. By default, ``None``. + known_doc_urls : dict or path-like, optional + A dictionary where keys are the jurisdiction codes (as strings) + and values are lists of dictionaries containing information + about each document. The latter dictionaries should contain at + least the key ``"source"`` representing the known URL to check + for that document. All other keys will be added as attributes + to the loaded document instance. You can include the key + ``"is_legal_doc"`` to skip the legal document check for known + documents. Similarly, you can provide the ``"date"`` key, which + is a list of ``[year, month, day]``, some or all of which can + be null, to skip the date extraction step of the processing + pipeline. If this input is provided, the known URLs will be + checked before applying the search engine search. See the + top-level documentation of this function for the full processing + order of the pipeline. This input can also be a path to a JSON + file containing the dictionary of code-to-document-info + mappings. + + .. Note:: The same input can be used for both `known_local_docs` + and `known_doc_urls` as long as both ``"source_fp"`` + and ``"source"`` keys are provided in each document + info dictionary. + + By default, ``None``. file_loader_kwargs : dict, optional Dictionary of keyword arguments pairs to initialize - :class:`elm.web.file_loader.AsyncFileLoader`. If found, the + :class:`elm.web.file_loader.AsyncWebFileLoader`. If found, the "pw_launch_kwargs" key in these will also be used to initialize the :class:`elm.web.search.google.PlaywrightGoogleLinkSearch` used for the google URL search. By default, ``None``. @@ -337,6 +387,14 @@ async def process_jurisdictions_with_openai( # noqa: PLR0917, PLR0913 stored for each jurisdiction. If not provided, a ``jurisdiction_dbs`` subdirectory will be created inside `out_dir`. By default, ``None``. + perform_se_search : bool, default=True + Option to perform a search engine-based search for ordinance + documents. This is the standard way to collect ordinance + documents, and it is recommended to leave this set to ``True`` + unless you are re-processing local documents. If ``True``, the + search engine approach is used to locate ordinance documents + before falling back to a website crawl-based search (if that has + been selected). By default, ``True``. perform_website_search : bool, default=True Option to fallback to a jurisdiction website crawl-based search for ordinance documents if the search engine approach fails to @@ -396,6 +454,7 @@ async def process_jurisdictions_with_openai( # noqa: PLR0917, PLR0913 jdd=jurisdiction_dbs_dir, ) pk = ProcessKwargs( + known_local_docs, known_doc_urls, file_loader_kwargs, td_kwargs, @@ -419,6 +478,7 @@ async def process_jurisdictions_with_openai( # noqa: PLR0917, PLR0913 models=models, web_search_params=wsp, process_kwargs=pk, + perform_se_search=perform_se_search, perform_website_search=perform_website_search, log_level=log_level, ) @@ -438,6 +498,7 @@ def __init__( models, web_search_params=None, process_kwargs=None, + perform_se_search=True, perform_website_search=True, log_level="INFO", ): @@ -447,6 +508,7 @@ def __init__( self.models = models self.web_search_params = web_search_params or WebSearchParams() self.process_kwargs = process_kwargs or ProcessKwargs() + self.perform_se_search = perform_se_search self.perform_website_search = perform_website_search self.log_level = log_level @@ -497,7 +559,7 @@ def jurisdiction_semaphore(self): @cached_property def file_loader_kwargs(self): - """dict: Keyword arguments for `AsyncFileLoader`""" + """dict: Keyword arguments for `AsyncWebFileLoader`""" file_loader_kwargs = _configure_file_loader_kwargs( self.process_kwargs.file_loader_kwargs ) @@ -508,6 +570,34 @@ def file_loader_kwargs(self): ) return file_loader_kwargs + @cached_property + def local_file_loader_kwargs(self): + """dict: Keyword arguments for `AsyncLocalFileLoader`""" + file_loader_kwargs = { + "pdf_read_coroutine": read_pdf_file, + "pdf_read_kwargs": ( + self.process_kwargs.file_loader_kwargs.get("pdf_read_kwargs") + ), + "html_read_kwargs": ( + self.process_kwargs.file_loader_kwargs.get("html_read_kwargs") + ), + } + + if self.web_search_params.pytesseract_exe_fp is not None: + _setup_pytesseract(self.web_search_params.pytesseract_exe_fp) + file_loader_kwargs.update( + {"pdf_ocr_read_coroutine": read_pdf_file_ocr} + ) + return file_loader_kwargs + + @cached_property + def known_local_docs(self): + """dict: Known filepaths by jurisdiction code""" + known_local_docs = self.process_kwargs.known_local_docs or {} + if isinstance(known_local_docs, str): + known_local_docs = load_config(known_local_docs) + return {int(key): val for key, val in known_local_docs.items()} + @cached_property def known_doc_urls(self): """dict: Known URL's keyed by jurisdiction code""" @@ -548,6 +638,7 @@ def _base_services(self): tpe_kwargs=self.tpe_kwargs, ), PDFLoader(**(self.process_kwargs.ppe_kwargs or {})), + HTMLFileLoader(**self.tpe_kwargs), ] if self.web_search_params.pytesseract_exe_fp is not None: @@ -613,6 +704,7 @@ async def _run_all(self, jurisdictions): services = [model.llm_service for model in set(self.models.values())] services += self._base_services _ = self.file_loader_kwargs # init loader kwargs once + _ = self.local_file_loader_kwargs # init local loader kwargs once logger.info("Processing %d jurisdiction(s)", len(jurisdictions)) async with RunningAsyncServices(services): tasks = [] @@ -632,6 +724,7 @@ async def _run_all(self, jurisdictions): self._processed_jurisdiction_info_with_pb( jurisdiction, website, + self.known_local_docs.get(fips), self.known_doc_urls.get(fips), usage_tracker=usage_tracker, ), @@ -670,6 +763,7 @@ async def _process_jurisdiction_with_logging( self, jurisdiction, jurisdiction_website, + known_local_docs=None, known_doc_urls=None, usage_tracker=None, ): @@ -687,11 +781,14 @@ async def _process_jurisdiction_with_logging( self.models, self.web_search_params, self.file_loader_kwargs, + local_file_loader_kwargs=self.local_file_loader_kwargs, + known_local_docs=known_local_docs, known_doc_urls=known_doc_urls, browser_semaphore=self.browser_semaphore, crawl_semaphore=self.crawl_semaphore, search_engine_semaphore=self.search_engine_semaphore, jurisdiction_website=jurisdiction_website, + perform_se_search=self.perform_se_search, perform_website_search=self.perform_website_search, usage_tracker=usage_tracker, ).run(), @@ -721,11 +818,14 @@ def __init__( # noqa: PLR0913 web_search_params, file_loader_kwargs, *, + local_file_loader_kwargs=None, + known_local_docs=None, known_doc_urls=None, browser_semaphore=None, crawl_semaphore=None, search_engine_semaphore=None, jurisdiction_website=None, + perform_se_search=True, perform_website_search=True, usage_tracker=None, ): @@ -734,19 +834,22 @@ def __init__( # noqa: PLR0913 self.models = models self.web_search_params = web_search_params self.file_loader_kwargs = file_loader_kwargs + self.local_file_loader_kwargs = local_file_loader_kwargs + self.known_local_docs = known_local_docs self.known_doc_urls = known_doc_urls self.browser_semaphore = browser_semaphore self.crawl_semaphore = crawl_semaphore self.search_engine_semaphore = search_engine_semaphore self.usage_tracker = usage_tracker self.jurisdiction_website = jurisdiction_website + self.perform_se_search = perform_se_search self.perform_website_search = perform_website_search self.validate_user_website_input = True self._jsp = None @cached_property def file_loader_kwargs_no_ocr(self): - """dict: Keyword arguments for `AsyncFileLoader` with no OCR""" + """dict: Keyword arguments for `AsyncWebFileLoader` (no OCR)""" flk = deepcopy(self.file_loader_kwargs) flk.pop("pdf_ocr_read_coroutine", None) return flk @@ -776,35 +879,40 @@ async def run(self): async def _run(self): """Search for docs and parse them for ordinances""" + if self.known_local_docs: + doc = await self._try_find_ordinances( + method=self._load_known_local_documents, + ) + if doc is not None: + return doc + if self.known_doc_urls: - docs = await self._download_known_url_documents() - if docs is not None: - COMPASS_PB.update_jurisdiction_task( - self.jurisdiction.full_name, - description="Extracting structured data...", - ) - doc = await self._parse_docs_for_ordinances(docs) - else: - doc = None + doc = await self._try_find_ordinances( + method=self._download_known_url_documents, + ) + if doc is not None: + return doc + if self.perform_se_search: + doc = await self._try_find_ordinances( + method=self._find_documents_using_search_engine, + ) if doc is not None: return doc - docs = await self._find_documents_using_search_engine() - if docs is not None: - COMPASS_PB.update_jurisdiction_task( - self.jurisdiction.full_name, - description="Extracting structured data...", + if self.perform_website_search: + doc = await self._try_find_ordinances( + method=self._find_documents_from_website, ) - doc = await self._parse_docs_for_ordinances(docs) - else: - doc = None + if doc is not None: + return doc - if doc is not None or not self.perform_website_search: - return doc + return None - docs = await self._find_documents_from_website() - if not docs: + async def _try_find_ordinances(self, method, *args, **kwargs): + """Try to find ordinances using specified method""" + docs = await method(*args, **kwargs) + if docs is None: return None COMPASS_PB.update_jurisdiction_task( @@ -813,15 +921,54 @@ async def _run(self): ) return await self._parse_docs_for_ordinances(docs) + async def _load_known_local_documents(self): + """Load local ordinance documents""" + + docs = await load_known_docs( + self.jurisdiction, + [info["source_fp"] for info in self.known_local_docs], + local_file_loader_kwargs=self.local_file_loader_kwargs, + ) + + if not docs: + return None + + _add_known_doc_attrs_to_all_docs( + docs, self.known_local_docs, key="source_fp" + ) + docs = await filter_ordinance_docs( + docs, + self.jurisdiction, + self.models, + heuristic=self.tech_specs.heuristic, + tech=self.tech_specs.name, + ordinance_text_collector_class=( + self.tech_specs.ordinance_text_collector + ), + permitted_use_text_collector_class=( + self.tech_specs.permitted_use_text_collector + ), + usage_tracker=self.usage_tracker, + check_for_correct_jurisdiction=False, + ) + if not docs: + return None + + for doc in docs: + doc.attrs["jurisdiction"] = self.jurisdiction + doc.attrs["jurisdiction_name"] = self.jurisdiction.full_name + doc.attrs["jurisdiction_website"] = None + doc.attrs["compass_crawl"] = False + + await self._record_usage() + return docs + async def _download_known_url_documents(self): """Download ordinance documents from known URLs""" - if isinstance(self.known_doc_urls, str): - self.known_doc_urls = [self.known_doc_urls] - docs = await download_known_urls( self.jurisdiction, - self.known_doc_urls, + [info["source"] for info in self.known_doc_urls], browser_semaphore=self.browser_semaphore, file_loader_kwargs=self.file_loader_kwargs, ) @@ -829,6 +976,9 @@ async def _download_known_url_documents(self): if not docs: return None + _add_known_doc_attrs_to_all_docs( + docs, self.known_doc_urls, key="source" + ) docs = await filter_ordinance_docs( docs, self.jurisdiction, @@ -1418,3 +1568,21 @@ def _compute_total_cost_from_usage(tracked_usage): ) return total_cost + + +def _add_known_doc_attrs_to_all_docs(docs, doc_infos, key): + """Add user-defined doc attributes to all loaded docs""" + for doc in docs: + source_fp = doc.attrs.get(key) + if not source_fp: + continue + + _add_known_doc_attrs(doc, source_fp, doc_infos, key) + + +def _add_known_doc_attrs(doc, source_fp, doc_infos, key): + """Add user-defined doc attributes to a loaded doc""" + for info in doc_infos: + if str(info[key]) == str(source_fp): + doc.attrs.update(info) + return diff --git a/compass/services/cpu.py b/compass/services/cpu.py index 77135bd82..480be34b0 100644 --- a/compass/services/cpu.py +++ b/compass/services/cpu.py @@ -3,6 +3,7 @@ import ast import asyncio import contextlib +from pathlib import Path from functools import partial from concurrent.futures import ProcessPoolExecutor @@ -94,6 +95,29 @@ def _read_pdf_ocr(pdf_bytes, tesseract_cmd, **kwargs): return doc +def _read_pdf_file(pdf_fp, **kwargs): + """Utility func so that pdftotext.PDF doesn't have to be pickled""" + with Path(pdf_fp).open("rb") as fh: + pdf_bytes = fh.read() + + pages = read_pdf(pdf_bytes, verbose=False) + return PDFDocument(pages, **kwargs), pdf_bytes + + +def _read_pdf_file_ocr(pdf_fp, tesseract_cmd, **kwargs): + """Utility function that mimics `_read_pdf_file`""" + if tesseract_cmd: + _configure_pytesseract(tesseract_cmd) + + with Path(pdf_fp).open("rb") as fh: + pdf_bytes = fh.read() + + pages = read_pdf_ocr(pdf_bytes, verbose=False) + doc = PDFDocument(_try_decode_ocr_pages(pages), **kwargs) + doc.attrs["from_ocr"] = True + return doc, pdf_bytes + + def _configure_pytesseract(tesseract_cmd): """Set the tesseract_cmd""" import pytesseract # noqa: PLC0415 @@ -130,6 +154,25 @@ async def read_pdf_doc(pdf_bytes, **kwargs): return await PDFLoader.call(_read_pdf, pdf_bytes, **kwargs) +async def read_pdf_file(pdf_fp, **kwargs): + """Read local PDF file in a Process Pool + + Parameters + ---------- + pdf_fp : path-like + Path to PDF file (non-OCR). + **kwargs + Keyword-value arguments to pass to + :class:`elm.web.document.PDFDocument` initializer. + + Returns + ------- + elm.web.document.PDFDocument + PDFDocument instances with pages loaded as text. + """ + return await PDFLoader.call(_read_pdf_file, pdf_fp, **kwargs) + + async def read_pdf_doc_ocr(pdf_bytes, **kwargs): """Read PDF file using OCR (pytesseract) @@ -158,3 +201,33 @@ async def read_pdf_doc_ocr(pdf_bytes, **kwargs): tesseract_cmd=pytesseract.pytesseract.tesseract_cmd, **kwargs, ) + + +async def read_pdf_file_ocr(pdf_fp, **kwargs): + """Read local PDF file using OCR (pytesseract) + + Note that Pytesseract must be set up properly for this method to + work. In particular, the `pytesseract.pytesseract.tesseract_cmd` + attribute must be set to point to the pytesseract exe. + + Parameters + ---------- + pdf_fp : path-like + Path to PDF file (OCR). + **kwargs + Keyword-value arguments to pass to + :class:`elm.web.document.PDFDocument` initializer. + + Returns + ------- + elm.web.document.PDFDocument + PDFDocument instances with pages loaded as text. + """ + import pytesseract # noqa: PLC0415 + + return await OCRPDFLoader.call( + _read_pdf_file_ocr, + pdf_fp, + tesseract_cmd=pytesseract.pytesseract.tesseract_cmd, + **kwargs, + ) diff --git a/compass/services/threaded.py b/compass/services/threaded.py index c5bbb10c9..c91951444 100644 --- a/compass/services/threaded.py +++ b/compass/services/threaded.py @@ -1,6 +1,7 @@ """COMPASS Ordinance Threaded services""" import json +import uuid import shutil import asyncio import hashlib @@ -12,7 +13,7 @@ from datetime import datetime, timedelta from concurrent.futures import ThreadPoolExecutor -from elm.web.document import PDFDocument +from elm.web.document import PDFDocument, HTMLDocument from elm.web.utilities import write_url_doc_to_file from compass import COMPASS_DEBUG_LEVEL @@ -29,6 +30,13 @@ def _cache_file_with_hash(doc, file_content, out_dir, make_name_unique=False): """Cache file and compute its hash""" + if "source" not in doc.attrs: + doc.attrs["source"] = ( + str(doc.attrs["source_fp"]) + if "source_fp" in doc.attrs + else str(uuid.uuid4()) + ) + cache_fp = write_url_doc_to_file( doc=doc, file_content=file_content, @@ -456,6 +464,31 @@ async def process( self._is_processing = False +class HTMLFileLoader(ThreadedService): + """Service that loads HTML files from disk""" + + @property + def can_process(self): + """bool: ``True`` because can always read file""" + return True + + async def process(self, html_fp, **kwargs): + """Read HTML file from disk + + Parameters + ---------- + html_fp : path-like + Path to HTML file on disk. + **kwargs + Additional keyword-value argument pairs to pass to + :class:`elm.web.document.HTMLDocument`. + """ + loop = asyncio.get_running_loop() + return await loop.run_in_executor( + self.pool, _read_html_file, html_fp, **kwargs + ) + + def _dump_usage(fp, tracker): """Dump usage to an existing file""" if not Path(fp).exists(): @@ -556,3 +589,30 @@ def _compute_jurisdiction_cost(usage_tracker): ) return total_cost + + +def _read_html_file(html_fp, **kwargs): + """Default read HTML function (runs in main thread)""" + with Path(html_fp).open("r", encoding="utf-8") as fh: + text = fh.read() + + return HTMLDocument([text], **kwargs), text + + +async def read_html_file(html_fp, **kwargs): + """Read HTML file in a threaded pool + + Parameters + ---------- + html_fp : path-like + Path to HTML file on disk. + **kwargs + Keyword-value argument pairs to pass to + :class:`elm.web.document.HTMLDocument`. + + Returns + ------- + elm.web.document.HTMLDocument + HTMLDocument instance with text loaded into page. + """ + return await HTMLFileLoader.call(html_fp, **kwargs) diff --git a/compass/utilities/__init__.py b/compass/utilities/__init__.py index 04754bb38..0251ef138 100644 --- a/compass/utilities/__init__.py +++ b/compass/utilities/__init__.py @@ -20,6 +20,7 @@ ordinances_bool_index, ) from .nt import ProcessKwargs, TechSpec +from .io import load_local_docs RTS_SEPARATORS = [ diff --git a/compass/utilities/io.py b/compass/utilities/io.py new file mode 100644 index 000000000..daa367b7f --- /dev/null +++ b/compass/utilities/io.py @@ -0,0 +1,45 @@ +"""COMPASS I/O utilities""" + +import pprint +import logging + +from elm.web.file_loader import AsyncLocalFileLoader + + +logger = logging.getLogger(__name__) + + +async def load_local_docs(fps, **kwargs): + """Load a document for each input filepath + + Parameters + ---------- + fps : iterable of path-like + Iterable of paths representing documents to load. + kwargs + Keyword-argument pairs to initialize + :class:`elm.web.file_loader.AsyncFileLoader`. + + Returns + ------- + list + List of non-empty document instances containing information from + the URL's. If a URL could not be fetched (i.e. document instance + is empty), it will not be included in the output list. + """ + logger.trace("Loading docs for the following paths:\n%r", fps) + logger.trace( + "kwargs for AsyncLocalFileLoader:\n%s", + pprint.PrettyPrinter().pformat(kwargs), + ) + file_loader = AsyncLocalFileLoader(**kwargs) + docs = await file_loader.fetch_all(*fps) + + page_lens = { + doc.attrs.get("source_fp", "Unknown"): len(doc.pages) for doc in docs + } + logger.debug( + "Loaded the following number of pages for docs:\n%s", + pprint.PrettyPrinter().pformat(page_lens), + ) + return [doc for doc in docs if not doc.empty] diff --git a/compass/utilities/nt.py b/compass/utilities/nt.py index b99b6b897..d937aad32 100644 --- a/compass/utilities/nt.py +++ b/compass/utilities/nt.py @@ -5,6 +5,7 @@ ProcessKwargs = namedtuple( "ProcessKwargs", [ + "known_local_docs", "known_doc_urls", "file_loader_kwargs", "td_kwargs", diff --git a/compass/validation/content.py b/compass/validation/content.py index a7c478797..8e41cbe33 100644 --- a/compass/validation/content.py +++ b/compass/validation/content.py @@ -312,7 +312,7 @@ async def _check_chunk_for_legal_text(self, key, text_chunk): async def parse_by_chunks( chunk_parser, heuristic, - legal_text_validator, + legal_text_validator=None, callbacks=None, min_chunks_to_process=3, ): @@ -334,9 +334,10 @@ async def parse_by_chunks( fast check meant to quickly dispose of chunks of text. Any chunk that fails this check will NOT be passed to the callback parsers. - legal_text_validator : LegalTextValidator + legal_text_validator : LegalTextValidator, optional Instance of `LegalTextValidator` that can be used to validate - each chunk for legal text. + each chunk for legal text. If not provided, the legal text check + will be skipped. By default, ``None``. callbacks : list, optional List of async callbacks that take a `chunk_parser` and `index` as inputs and return a boolean determining whether the text @@ -353,14 +354,18 @@ async def parse_by_chunks( for ind, text in enumerate(chunk_parser.text_chunks): passed_heuristic_mem.append(heuristic.check(text)) if ind < min_chunks_to_process: - is_legal = await legal_text_validator.check_chunk( - chunk_parser, ind - ) - if not is_legal: # don't bother checking this chunk - continue + if legal_text_validator is not None: + is_legal = await legal_text_validator.check_chunk( + chunk_parser, ind + ) + if not is_legal: # don't bother checking this chunk + continue # don't bother checking this document - elif not legal_text_validator.is_legal_text: + elif ( + legal_text_validator is not None + and not legal_text_validator.is_legal_text + ): return # hasn't passed heuristic, so don't pass it to callbacks diff --git a/compass/validation/location.py b/compass/validation/location.py index 0d2145c02..0a54682dd 100644 --- a/compass/validation/location.py +++ b/compass/validation/location.py @@ -7,7 +7,7 @@ import asyncio import logging -from elm.web.file_loader import AsyncFileLoader +from elm.web.file_loader import AsyncWebFileLoader from compass.llm.calling import BaseLLMCaller, ChatLLMCaller, LLMCaller from compass.common import setup_async_decision_tree, run_async_tree @@ -267,7 +267,7 @@ def __init__( limits are applied. By default, ``None``. file_loader_kwargs : dict, optional Dictionary of keyword arguments pairs to initialize - :class:`elm.web.file_loader.AsyncFileLoader`. + :class:`elm.web.file_loader.AsyncWebFileLoader`. By default, ``None``. **kwargs Additional keyword arguments to pass to the @@ -302,7 +302,7 @@ async def check(self, url, jurisdiction): if url_is_correct_jurisdiction: return True - fl = AsyncFileLoader( + fl = AsyncWebFileLoader( browser_semaphore=self.browser_semaphore, **self.file_loader_kwargs, ) diff --git a/compass/web/website_crawl.py b/compass/web/website_crawl.py index d71694a37..63df168c7 100644 --- a/compass/web/website_crawl.py +++ b/compass/web/website_crawl.py @@ -26,7 +26,7 @@ from playwright._impl._errors import Error as PlaywrightError # noqa: PLC2701 from elm.web.utilities import pw_page from elm.web.document import PDFDocument, HTMLDocument -from elm.web.file_loader import AsyncFileLoader +from elm.web.file_loader import AsyncWebFileLoader from elm.web.website_crawl import ELMLinkScorer, _SCORE_KEY # noqa: PLC2701 @@ -149,9 +149,9 @@ def __init__( contains the link title text. file_loader_kwargs : dict, optional Additional keyword-value argument pairs to pass to the - :class:`~elm.web.file_loader.AsyncFileLoader` class. If this - dictionary contains the ``pw_launch_kwargs`` key, it's value - (assumes to be another dictionary) will be used to + :class:`~elm.web.file_loader.AsyncWebFileLoader` class. If + this dictionary contains the ``pw_launch_kwargs`` key, it's + value (assumes to be another dictionary) will be used to initialize the playwright instances used for the crawl. By default, ``None``. already_visited : set, optional @@ -181,7 +181,7 @@ def __init__( file_loader_kwargs = file_loader_kwargs or {} flk = {"verify_ssl": False} flk.update(file_loader_kwargs or {}) - self.afl = AsyncFileLoader(**flk) + self.afl = AsyncWebFileLoader(**flk) self.pw_launch_kwargs = ( file_loader_kwargs.get("pw_launch_kwargs") or {} ) diff --git a/pixi.lock b/pixi.lock index 07d81099c..aea93992f 100644 --- a/pixi.lock +++ b/pixi.lock @@ -20,7 +20,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/asttokens-3.0.0-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/attrs-25.4.0-pyh71513ae_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/beautifulsoup4-4.14.2-pyha770c72_0.conda - - conda: https://conda.anaconda.org/conda-forge/linux-64/brotli-python-1.1.0-py313h7033f15_4.conda + - conda: https://conda.anaconda.org/conda-forge/linux-64/brotli-python-1.2.0-py313h09d1b84_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/bzip2-1.0.8-hda65f42_8.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/c-ares-1.34.5-hb9d3cd8_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/ca-certificates-2025.10.5-hbd8a1cb_0.conda @@ -244,7 +244,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/d9/69/4402ea66272dacc10b298cca18ed73e1c0791ff2ae9ed218d3859f9698ac/h5py-3.15.1-cp313-cp313-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl - pypi: https://files.pythonhosted.org/packages/66/da/412cc1711b6c77b7ca852f48b93bae5d8722cdabe86e9427ea2e204dfefd/h5pyd-0.23.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/9a/92/cf3ab0b652b082e66876d08da57fcc6fa2f0e6c70dfbbafbd470bb73eb47/hf_xet-1.2.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl - - pypi: https://files.pythonhosted.org/packages/6e/5a/221bb56bff551f5f5652c2a0bcb56ad347b8e1f13e95709b3bb9b8a6e545/huggingface_hub-1.0.0-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/db/fb/d71f914bc69e6357cbde04db62ef15497cd27926d95f03b4930997c4c390/huggingface_hub-1.0.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/c3/5b/9512c5fb6c8218332b530f13500c6ff5f3ce3342f35e0dd7be9ac3856fd3/humanize-4.14.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/20/b0/36bd937216ec521246249be3bf9855081de4c5e06a0c9b4219dbeda50373/importlib_metadata-8.7.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl @@ -312,7 +312,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/binutils-2.44-hf1166c9_4.conda - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/binutils_impl_linux-aarch64-2.44-ha36da51_4.conda - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/binutils_linux-aarch64-2.44-hf1166c9_4.conda - - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/brotli-python-1.1.0-py313he352c24_4.conda + - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/brotli-python-1.2.0-py313h41095e9_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/bzip2-1.0.8-h4777abc_8.conda - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/c-ares-1.34.5-h86ecc28_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/c-compiler-1.11.0-hdceaead_0.conda @@ -574,7 +574,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/6a/c2/fc6375d07ea3962df7afad7d863fe4bde18bb88530678c20d4c90c18de1d/h5py-3.15.1-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl - pypi: https://files.pythonhosted.org/packages/66/da/412cc1711b6c77b7ca852f48b93bae5d8722cdabe86e9427ea2e204dfefd/h5pyd-0.23.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/46/92/3f7ec4a1b6a65bf45b059b6d4a5d38988f63e193056de2f420137e3c3244/hf_xet-1.2.0-cp37-abi3-manylinux_2_28_aarch64.whl - - pypi: https://files.pythonhosted.org/packages/6e/5a/221bb56bff551f5f5652c2a0bcb56ad347b8e1f13e95709b3bb9b8a6e545/huggingface_hub-1.0.0-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/db/fb/d71f914bc69e6357cbde04db62ef15497cd27926d95f03b4930997c4c390/huggingface_hub-1.0.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/c3/5b/9512c5fb6c8218332b530f13500c6ff5f3ce3342f35e0dd7be9ac3856fd3/humanize-4.14.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/20/b0/36bd937216ec521246249be3bf9855081de4c5e06a0c9b4219dbeda50373/importlib_metadata-8.7.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl @@ -638,7 +638,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/asttokens-3.0.0-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/attrs-25.4.0-pyh71513ae_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/beautifulsoup4-4.14.2-pyha770c72_0.conda - - conda: https://conda.anaconda.org/conda-forge/osx-64/brotli-python-1.1.0-py313h253db18_4.conda + - conda: https://conda.anaconda.org/conda-forge/osx-64/brotli-python-1.2.0-py313hd4eab94_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/bzip2-1.0.8-h500dc9f_8.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/c-ares-1.34.5-hf13058a_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/ca-certificates-2025.10.5-hbd8a1cb_0.conda @@ -853,7 +853,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/88/b3/40207e0192415cbff7ea1d37b9f24b33f6d38a5a2f5d18a678de78f967ae/h5py-3.15.1-cp313-cp313-macosx_10_13_x86_64.whl - pypi: https://files.pythonhosted.org/packages/66/da/412cc1711b6c77b7ca852f48b93bae5d8722cdabe86e9427ea2e204dfefd/h5pyd-0.23.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/96/2d/22338486473df5923a9ab7107d375dbef9173c338ebef5098ef593d2b560/hf_xet-1.2.0-cp37-abi3-macosx_10_12_x86_64.whl - - pypi: https://files.pythonhosted.org/packages/6e/5a/221bb56bff551f5f5652c2a0bcb56ad347b8e1f13e95709b3bb9b8a6e545/huggingface_hub-1.0.0-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/db/fb/d71f914bc69e6357cbde04db62ef15497cd27926d95f03b4930997c4c390/huggingface_hub-1.0.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/c3/5b/9512c5fb6c8218332b530f13500c6ff5f3ce3342f35e0dd7be9ac3856fd3/humanize-4.14.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/20/b0/36bd937216ec521246249be3bf9855081de4c5e06a0c9b4219dbeda50373/importlib_metadata-8.7.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl @@ -919,7 +919,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/asttokens-3.0.0-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/attrs-25.4.0-pyh71513ae_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/beautifulsoup4-4.14.2-pyha770c72_0.conda - - conda: https://conda.anaconda.org/conda-forge/osx-arm64/brotli-python-1.1.0-py313hb4b7877_4.conda + - conda: https://conda.anaconda.org/conda-forge/osx-arm64/brotli-python-1.2.0-py313h79bbab8_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/bzip2-1.0.8-hd037594_8.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/c-ares-1.34.5-h5505292_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/ca-certificates-2025.10.5-hbd8a1cb_0.conda @@ -1134,7 +1134,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/31/96/ba99a003c763998035b0de4c299598125df5fc6c9ccf834f152ddd60e0fb/h5py-3.15.1-cp313-cp313-macosx_11_0_arm64.whl - pypi: https://files.pythonhosted.org/packages/66/da/412cc1711b6c77b7ca852f48b93bae5d8722cdabe86e9427ea2e204dfefd/h5pyd-0.23.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/7f/8c/c5becfa53234299bc2210ba314eaaae36c2875e0045809b82e40a9544f0c/hf_xet-1.2.0-cp37-abi3-macosx_11_0_arm64.whl - - pypi: https://files.pythonhosted.org/packages/6e/5a/221bb56bff551f5f5652c2a0bcb56ad347b8e1f13e95709b3bb9b8a6e545/huggingface_hub-1.0.0-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/db/fb/d71f914bc69e6357cbde04db62ef15497cd27926d95f03b4930997c4c390/huggingface_hub-1.0.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/c3/5b/9512c5fb6c8218332b530f13500c6ff5f3ce3342f35e0dd7be9ac3856fd3/humanize-4.14.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/20/b0/36bd937216ec521246249be3bf9855081de4c5e06a0c9b4219dbeda50373/importlib_metadata-8.7.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl @@ -1201,7 +1201,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/asttokens-3.0.0-pyhd8ed1ab_1.conda - conda: https://conda.anaconda.org/conda-forge/noarch/attrs-25.4.0-pyh71513ae_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/beautifulsoup4-4.14.2-pyha770c72_0.conda - - conda: https://conda.anaconda.org/conda-forge/win-64/brotli-python-1.1.0-py313hfe59770_4.conda + - conda: https://conda.anaconda.org/conda-forge/win-64/brotli-python-1.2.0-py313hf510273_0.conda - conda: https://conda.anaconda.org/conda-forge/win-64/bzip2-1.0.8-h0ad9c76_8.conda - conda: https://conda.anaconda.org/conda-forge/noarch/ca-certificates-2025.10.5-h4c7d964_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/cachetools-6.2.1-pyhd8ed1ab_0.conda @@ -1409,7 +1409,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/e5/ea/fbb258a98863f99befb10ed727152b4ae659f322e1d9c0576f8a62754e81/h5py-3.15.1-cp313-cp313-win_amd64.whl - pypi: https://files.pythonhosted.org/packages/66/da/412cc1711b6c77b7ca852f48b93bae5d8722cdabe86e9427ea2e204dfefd/h5pyd-0.23.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/cb/44/870d44b30e1dcfb6a65932e3e1506c103a8a5aea9103c337e7a53180322c/hf_xet-1.2.0-cp37-abi3-win_amd64.whl - - pypi: https://files.pythonhosted.org/packages/6e/5a/221bb56bff551f5f5652c2a0bcb56ad347b8e1f13e95709b3bb9b8a6e545/huggingface_hub-1.0.0-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/db/fb/d71f914bc69e6357cbde04db62ef15497cd27926d95f03b4930997c4c390/huggingface_hub-1.0.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/c3/5b/9512c5fb6c8218332b530f13500c6ff5f3ce3342f35e0dd7be9ac3856fd3/humanize-4.14.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/20/b0/36bd937216ec521246249be3bf9855081de4c5e06a0c9b4219dbeda50373/importlib_metadata-8.7.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/62/a1/3d680cbfd5f4b8f15abc1d571870c5fc3e594bb582bc3b64ea099db13e56/jinja2-3.1.6-py3-none-any.whl @@ -1568,7 +1568,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/langchain-core-1.0.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/langchain-text-splitters-1.0.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/langsmith-0.3.45-pyhd8ed1ab_1.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/lark-1.3.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/lark-1.3.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.17-h717163a_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.44-h1aa0949_4.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/leptonica-1.83.1-hb768ceb_6.conda @@ -1902,7 +1902,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/langchain-core-1.0.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/langchain-text-splitters-1.0.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/langsmith-0.3.45-pyhd8ed1ab_1.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/lark-1.3.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/lark-1.3.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/lcms2-2.17-hc88f144_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/ld_impl_linux-aarch64-2.44-hd32f0e1_4.conda - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/leptonica-1.83.1-h1cde89c_6.conda @@ -2222,7 +2222,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/langchain-core-1.0.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/langchain-text-splitters-1.0.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/langsmith-0.3.45-pyhd8ed1ab_1.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/lark-1.3.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/lark-1.3.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/lcms2-2.17-h72f5680_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/leptonica-1.83.1-h19e8429_6.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/lerc-4.0.0-hcca01a6_1.conda @@ -2520,7 +2520,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/langchain-core-1.0.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/langchain-text-splitters-1.0.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/langsmith-0.3.45-pyhd8ed1ab_1.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/lark-1.3.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/lark-1.3.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/lcms2-2.17-h7eeda09_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/leptonica-1.83.1-h64fa29b_6.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/lerc-4.0.0-hd64df32_1.conda @@ -2817,7 +2817,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/langchain-core-1.0.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/langchain-text-splitters-1.0.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/langsmith-0.3.45-pyhd8ed1ab_1.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/lark-1.3.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/lark-1.3.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/win-64/lcms2-2.17-hbcf6048_0.conda - conda: https://conda.anaconda.org/conda-forge/win-64/leptonica-1.83.1-hb723d09_6.conda - conda: https://conda.anaconda.org/conda-forge/win-64/lerc-4.0.0-h6470a55_1.conda @@ -3051,8 +3051,8 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/bleach-6.2.0-pyh29332c3_4.conda - conda: https://conda.anaconda.org/conda-forge/noarch/bleach-with-css-6.2.0-h82add2a_4.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/blosc-1.21.6-he440d0b_1.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/boto3-1.40.59-pyhd8ed1ab_0.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/botocore-1.40.59-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/boto3-1.40.60-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/botocore-1.40.60-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/branca-0.8.2-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/brotli-1.1.0-hb03c661_4.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/brotli-bin-1.1.0-hb03c661_4.conda @@ -3177,7 +3177,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/langchain-core-1.0.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/langchain-text-splitters-1.0.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/langsmith-0.3.45-pyhd8ed1ab_1.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/lark-1.3.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/lark-1.3.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.17-h717163a_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.44-h1aa0949_4.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/leptonica-1.83.1-hb768ceb_6.conda @@ -3510,8 +3510,8 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/bleach-6.2.0-pyh29332c3_4.conda - conda: https://conda.anaconda.org/conda-forge/noarch/bleach-with-css-6.2.0-h82add2a_4.conda - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/blosc-1.21.6-hb4dfabd_1.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/boto3-1.40.59-pyhd8ed1ab_0.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/botocore-1.40.59-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/boto3-1.40.60-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/botocore-1.40.60-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/branca-0.8.2-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/brotli-1.1.0-he30d5cf_4.conda - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/brotli-bin-1.1.0-he30d5cf_4.conda @@ -3658,7 +3658,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/langchain-core-1.0.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/langchain-text-splitters-1.0.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/langsmith-0.3.45-pyhd8ed1ab_1.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/lark-1.3.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/lark-1.3.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/lcms2-2.17-hc88f144_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/ld_impl_linux-aarch64-2.44-hd32f0e1_4.conda - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/leptonica-1.83.1-h1cde89c_6.conda @@ -4003,8 +4003,8 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/bleach-6.2.0-pyh29332c3_4.conda - conda: https://conda.anaconda.org/conda-forge/noarch/bleach-with-css-6.2.0-h82add2a_4.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/blosc-1.21.6-hd145fbb_1.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/boto3-1.40.59-pyhd8ed1ab_0.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/botocore-1.40.59-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/boto3-1.40.60-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/botocore-1.40.60-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/branca-0.8.2-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/brotli-1.1.0-h1c43f85_4.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/brotli-bin-1.1.0-h1c43f85_4.conda @@ -4126,7 +4126,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/langchain-core-1.0.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/langchain-text-splitters-1.0.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/langsmith-0.3.45-pyhd8ed1ab_1.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/lark-1.3.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/lark-1.3.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/lcms2-2.17-h72f5680_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/leptonica-1.83.1-h19e8429_6.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/lerc-4.0.0-hcca01a6_1.conda @@ -4448,8 +4448,8 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/bleach-6.2.0-pyh29332c3_4.conda - conda: https://conda.anaconda.org/conda-forge/noarch/bleach-with-css-6.2.0-h82add2a_4.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/blosc-1.21.6-h7dd00d9_1.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/boto3-1.40.59-pyhd8ed1ab_0.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/botocore-1.40.59-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/boto3-1.40.60-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/botocore-1.40.60-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/branca-0.8.2-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/brotli-1.1.0-h6caf38d_4.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/brotli-bin-1.1.0-h6caf38d_4.conda @@ -4571,7 +4571,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/langchain-core-1.0.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/langchain-text-splitters-1.0.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/langsmith-0.3.45-pyhd8ed1ab_1.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/lark-1.3.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/lark-1.3.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/lcms2-2.17-h7eeda09_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/leptonica-1.83.1-h64fa29b_6.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/lerc-4.0.0-hd64df32_1.conda @@ -4893,8 +4893,8 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/bleach-6.2.0-pyh29332c3_4.conda - conda: https://conda.anaconda.org/conda-forge/noarch/bleach-with-css-6.2.0-h82add2a_4.conda - conda: https://conda.anaconda.org/conda-forge/win-64/blosc-1.21.6-hfd34d9b_1.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/boto3-1.40.59-pyhd8ed1ab_0.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/botocore-1.40.59-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/boto3-1.40.60-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/botocore-1.40.60-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/branca-0.8.2-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/win-64/brotli-1.1.0-hfd05255_4.conda - conda: https://conda.anaconda.org/conda-forge/win-64/brotli-bin-1.1.0-hfd05255_4.conda @@ -5013,7 +5013,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/langchain-core-1.0.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/langchain-text-splitters-1.0.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/langsmith-0.3.45-pyhd8ed1ab_1.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/lark-1.3.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/lark-1.3.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/win-64/lcms2-2.17-hbcf6048_0.conda - conda: https://conda.anaconda.org/conda-forge/win-64/leptonica-1.83.1-hb723d09_6.conda - conda: https://conda.anaconda.org/conda-forge/win-64/lerc-4.0.0-h6470a55_1.conda @@ -5407,7 +5407,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/langchain-core-1.0.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/langchain-text-splitters-1.0.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/langsmith-0.3.45-pyhd8ed1ab_1.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/lark-1.3.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/lark-1.3.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.17-h717163a_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.44-h1aa0949_4.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/leptonica-1.83.1-hb768ceb_6.conda @@ -5741,7 +5741,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/langchain-core-1.0.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/langchain-text-splitters-1.0.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/langsmith-0.3.45-pyhd8ed1ab_1.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/lark-1.3.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/lark-1.3.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/lcms2-2.17-hc88f144_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/ld_impl_linux-aarch64-2.44-hd32f0e1_4.conda - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/leptonica-1.83.1-h1cde89c_6.conda @@ -6064,7 +6064,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/langchain-core-1.0.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/langchain-text-splitters-1.0.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/langsmith-0.3.45-pyhd8ed1ab_1.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/lark-1.3.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/lark-1.3.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/lcms2-2.17-h72f5680_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/leptonica-1.83.1-h19e8429_6.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/lerc-4.0.0-hcca01a6_1.conda @@ -6365,7 +6365,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/langchain-core-1.0.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/langchain-text-splitters-1.0.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/langsmith-0.3.45-pyhd8ed1ab_1.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/lark-1.3.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/lark-1.3.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/lcms2-2.17-h7eeda09_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/leptonica-1.83.1-h64fa29b_6.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/lerc-4.0.0-hd64df32_1.conda @@ -6665,7 +6665,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/langchain-core-1.0.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/langchain-text-splitters-1.0.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/langsmith-0.3.45-pyhd8ed1ab_1.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/lark-1.3.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/lark-1.3.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/win-64/lcms2-2.17-hbcf6048_0.conda - conda: https://conda.anaconda.org/conda-forge/win-64/leptonica-1.83.1-hb723d09_6.conda - conda: https://conda.anaconda.org/conda-forge/win-64/lerc-4.0.0-h6470a55_1.conda @@ -6970,7 +6970,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/langchain-core-1.0.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/langchain-text-splitters-1.0.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/langsmith-0.3.45-pyhd8ed1ab_1.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/lark-1.3.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/lark-1.3.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.17-h717163a_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/ld_impl_linux-64-2.44-h1aa0949_4.conda - conda: https://conda.anaconda.org/conda-forge/linux-64/leptonica-1.83.1-hb768ceb_6.conda @@ -7303,7 +7303,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/langchain-core-1.0.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/langchain-text-splitters-1.0.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/langsmith-0.3.45-pyhd8ed1ab_1.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/lark-1.3.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/lark-1.3.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/lcms2-2.17-hc88f144_0.conda - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/ld_impl_linux-aarch64-2.44-hd32f0e1_4.conda - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/leptonica-1.83.1-h1cde89c_6.conda @@ -7624,7 +7624,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/langchain-core-1.0.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/langchain-text-splitters-1.0.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/langsmith-0.3.45-pyhd8ed1ab_1.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/lark-1.3.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/lark-1.3.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/lcms2-2.17-h72f5680_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/leptonica-1.83.1-h19e8429_6.conda - conda: https://conda.anaconda.org/conda-forge/osx-64/lerc-4.0.0-hcca01a6_1.conda @@ -7924,7 +7924,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/langchain-core-1.0.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/langchain-text-splitters-1.0.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/langsmith-0.3.45-pyhd8ed1ab_1.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/lark-1.3.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/lark-1.3.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/lcms2-2.17-h7eeda09_0.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/leptonica-1.83.1-h64fa29b_6.conda - conda: https://conda.anaconda.org/conda-forge/osx-arm64/lerc-4.0.0-hd64df32_1.conda @@ -8223,7 +8223,7 @@ environments: - conda: https://conda.anaconda.org/conda-forge/noarch/langchain-core-1.0.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/langchain-text-splitters-1.0.0-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/noarch/langsmith-0.3.45-pyhd8ed1ab_1.conda - - conda: https://conda.anaconda.org/conda-forge/noarch/lark-1.3.0-pyhd8ed1ab_0.conda + - conda: https://conda.anaconda.org/conda-forge/noarch/lark-1.3.1-pyhd8ed1ab_0.conda - conda: https://conda.anaconda.org/conda-forge/win-64/lcms2-2.17-hbcf6048_0.conda - conda: https://conda.anaconda.org/conda-forge/win-64/leptonica-1.83.1-hb723d09_6.conda - conda: https://conda.anaconda.org/conda-forge/win-64/lerc-4.0.0-h6470a55_1.conda @@ -8609,7 +8609,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/d5/4e/e4691508f9477ce67da2015d8c00acd751e6287739123113a9fca6f1604e/frozenlist-1.8.0-cp313-cp313-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl - pypi: https://files.pythonhosted.org/packages/47/71/70db47e4f6ce3e5c37a607355f80da8860a33226be640226ac52cb05ef2e/fsspec-2025.9.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/eb/43/aa9a10d0c971d0a0e353111a97913357f9271fb9a9867ec1053f79ca61be/geoip2-5.1.0-py3-none-any.whl - - pypi: https://files.pythonhosted.org/packages/77/93/ecf9f7caa99c71e969091e9a78789f11b2dea5c684917eab7c54a8d13560/google_api_core-2.27.0-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/54/8a/c75ed5fd7819742201ffffbd61bb081af4819ea882a6b84930fa93f8e96f/google_api_core-2.28.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/fa/28/be3b17bd6a190c8c2ec9e4fb65d43e6ecd7b7a1bb19ccc1d9ab4f687a58c/google_api_python_client-2.185.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/be/a4/7319a2a8add4cc352be9e3efeff5e2aacee917c85ca2fa1647e29089983c/google_auth-2.41.1-py2.py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/be/8a/fe34d2f3f9470a27b01c9e76226965863f153d5fbe276f83608562e49c04/google_auth_httplib2-0.2.0-py2.py3-none-any.whl @@ -8627,7 +8627,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/8c/a2/0d269db0f6163be503775dc8b6a6fa15820cc9fdc866f6ba608d86b721f2/httplib2-0.31.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl - - pypi: https://files.pythonhosted.org/packages/6e/5a/221bb56bff551f5f5652c2a0bcb56ad347b8e1f13e95709b3bb9b8a6e545/huggingface_hub-1.0.0-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/db/fb/d71f914bc69e6357cbde04db62ef15497cd27926d95f03b4930997c4c390/huggingface_hub-1.0.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/c3/5b/9512c5fb6c8218332b530f13500c6ff5f3ce3342f35e0dd7be9ac3856fd3/humanize-4.14.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/48/30/47d0bf6072f7252e6521f3447ccfa40b421b6824517f82854703d0f5a98b/hyperframe-6.1.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl @@ -8940,7 +8940,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/40/76/c202df58e3acdf12969a7895fd6f3bc016c642e6726aa63bd3025e0fc71c/frozenlist-1.8.0-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl - pypi: https://files.pythonhosted.org/packages/47/71/70db47e4f6ce3e5c37a607355f80da8860a33226be640226ac52cb05ef2e/fsspec-2025.9.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/eb/43/aa9a10d0c971d0a0e353111a97913357f9271fb9a9867ec1053f79ca61be/geoip2-5.1.0-py3-none-any.whl - - pypi: https://files.pythonhosted.org/packages/77/93/ecf9f7caa99c71e969091e9a78789f11b2dea5c684917eab7c54a8d13560/google_api_core-2.27.0-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/54/8a/c75ed5fd7819742201ffffbd61bb081af4819ea882a6b84930fa93f8e96f/google_api_core-2.28.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/fa/28/be3b17bd6a190c8c2ec9e4fb65d43e6ecd7b7a1bb19ccc1d9ab4f687a58c/google_api_python_client-2.185.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/be/a4/7319a2a8add4cc352be9e3efeff5e2aacee917c85ca2fa1647e29089983c/google_auth-2.41.1-py2.py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/be/8a/fe34d2f3f9470a27b01c9e76226965863f153d5fbe276f83608562e49c04/google_auth_httplib2-0.2.0-py2.py3-none-any.whl @@ -8958,7 +8958,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/8c/a2/0d269db0f6163be503775dc8b6a6fa15820cc9fdc866f6ba608d86b721f2/httplib2-0.31.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl - - pypi: https://files.pythonhosted.org/packages/6e/5a/221bb56bff551f5f5652c2a0bcb56ad347b8e1f13e95709b3bb9b8a6e545/huggingface_hub-1.0.0-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/db/fb/d71f914bc69e6357cbde04db62ef15497cd27926d95f03b4930997c4c390/huggingface_hub-1.0.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/c3/5b/9512c5fb6c8218332b530f13500c6ff5f3ce3342f35e0dd7be9ac3856fd3/humanize-4.14.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/48/30/47d0bf6072f7252e6521f3447ccfa40b421b6824517f82854703d0f5a98b/hyperframe-6.1.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl @@ -9213,7 +9213,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/30/ba/b0b3de23f40bc55a7057bd38434e25c34fa48e17f20ee273bbde5e0650f3/frozenlist-1.8.0-cp313-cp313-macosx_10_13_x86_64.whl - pypi: https://files.pythonhosted.org/packages/47/71/70db47e4f6ce3e5c37a607355f80da8860a33226be640226ac52cb05ef2e/fsspec-2025.9.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/eb/43/aa9a10d0c971d0a0e353111a97913357f9271fb9a9867ec1053f79ca61be/geoip2-5.1.0-py3-none-any.whl - - pypi: https://files.pythonhosted.org/packages/77/93/ecf9f7caa99c71e969091e9a78789f11b2dea5c684917eab7c54a8d13560/google_api_core-2.27.0-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/54/8a/c75ed5fd7819742201ffffbd61bb081af4819ea882a6b84930fa93f8e96f/google_api_core-2.28.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/fa/28/be3b17bd6a190c8c2ec9e4fb65d43e6ecd7b7a1bb19ccc1d9ab4f687a58c/google_api_python_client-2.185.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/be/a4/7319a2a8add4cc352be9e3efeff5e2aacee917c85ca2fa1647e29089983c/google_auth-2.41.1-py2.py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/be/8a/fe34d2f3f9470a27b01c9e76226965863f153d5fbe276f83608562e49c04/google_auth_httplib2-0.2.0-py2.py3-none-any.whl @@ -9231,7 +9231,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/8c/a2/0d269db0f6163be503775dc8b6a6fa15820cc9fdc866f6ba608d86b721f2/httplib2-0.31.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl - - pypi: https://files.pythonhosted.org/packages/6e/5a/221bb56bff551f5f5652c2a0bcb56ad347b8e1f13e95709b3bb9b8a6e545/huggingface_hub-1.0.0-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/db/fb/d71f914bc69e6357cbde04db62ef15497cd27926d95f03b4930997c4c390/huggingface_hub-1.0.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/c3/5b/9512c5fb6c8218332b530f13500c6ff5f3ce3342f35e0dd7be9ac3856fd3/humanize-4.14.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/48/30/47d0bf6072f7252e6521f3447ccfa40b421b6824517f82854703d0f5a98b/hyperframe-6.1.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl @@ -9488,7 +9488,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/0c/ab/6e5080ee374f875296c4243c381bbdef97a9ac39c6e3ce1d5f7d42cb78d6/frozenlist-1.8.0-cp313-cp313-macosx_11_0_arm64.whl - pypi: https://files.pythonhosted.org/packages/47/71/70db47e4f6ce3e5c37a607355f80da8860a33226be640226ac52cb05ef2e/fsspec-2025.9.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/eb/43/aa9a10d0c971d0a0e353111a97913357f9271fb9a9867ec1053f79ca61be/geoip2-5.1.0-py3-none-any.whl - - pypi: https://files.pythonhosted.org/packages/77/93/ecf9f7caa99c71e969091e9a78789f11b2dea5c684917eab7c54a8d13560/google_api_core-2.27.0-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/54/8a/c75ed5fd7819742201ffffbd61bb081af4819ea882a6b84930fa93f8e96f/google_api_core-2.28.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/fa/28/be3b17bd6a190c8c2ec9e4fb65d43e6ecd7b7a1bb19ccc1d9ab4f687a58c/google_api_python_client-2.185.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/be/a4/7319a2a8add4cc352be9e3efeff5e2aacee917c85ca2fa1647e29089983c/google_auth-2.41.1-py2.py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/be/8a/fe34d2f3f9470a27b01c9e76226965863f153d5fbe276f83608562e49c04/google_auth_httplib2-0.2.0-py2.py3-none-any.whl @@ -9506,7 +9506,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/8c/a2/0d269db0f6163be503775dc8b6a6fa15820cc9fdc866f6ba608d86b721f2/httplib2-0.31.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl - - pypi: https://files.pythonhosted.org/packages/6e/5a/221bb56bff551f5f5652c2a0bcb56ad347b8e1f13e95709b3bb9b8a6e545/huggingface_hub-1.0.0-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/db/fb/d71f914bc69e6357cbde04db62ef15497cd27926d95f03b4930997c4c390/huggingface_hub-1.0.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/c3/5b/9512c5fb6c8218332b530f13500c6ff5f3ce3342f35e0dd7be9ac3856fd3/humanize-4.14.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/48/30/47d0bf6072f7252e6521f3447ccfa40b421b6824517f82854703d0f5a98b/hyperframe-6.1.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl @@ -9744,7 +9744,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/d8/cf/174c91dbc9cc49bc7b7aab74d8b734e974d1faa8f191c74af9b7e80848e6/frozenlist-1.8.0-cp313-cp313-win_amd64.whl - pypi: https://files.pythonhosted.org/packages/47/71/70db47e4f6ce3e5c37a607355f80da8860a33226be640226ac52cb05ef2e/fsspec-2025.9.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/eb/43/aa9a10d0c971d0a0e353111a97913357f9271fb9a9867ec1053f79ca61be/geoip2-5.1.0-py3-none-any.whl - - pypi: https://files.pythonhosted.org/packages/77/93/ecf9f7caa99c71e969091e9a78789f11b2dea5c684917eab7c54a8d13560/google_api_core-2.27.0-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/54/8a/c75ed5fd7819742201ffffbd61bb081af4819ea882a6b84930fa93f8e96f/google_api_core-2.28.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/fa/28/be3b17bd6a190c8c2ec9e4fb65d43e6ecd7b7a1bb19ccc1d9ab4f687a58c/google_api_python_client-2.185.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/be/a4/7319a2a8add4cc352be9e3efeff5e2aacee917c85ca2fa1647e29089983c/google_auth-2.41.1-py2.py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/be/8a/fe34d2f3f9470a27b01c9e76226965863f153d5fbe276f83608562e49c04/google_auth_httplib2-0.2.0-py2.py3-none-any.whl @@ -9762,7 +9762,7 @@ environments: - pypi: https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/8c/a2/0d269db0f6163be503775dc8b6a6fa15820cc9fdc866f6ba608d86b721f2/httplib2-0.31.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl - - pypi: https://files.pythonhosted.org/packages/6e/5a/221bb56bff551f5f5652c2a0bcb56ad347b8e1f13e95709b3bb9b8a6e545/huggingface_hub-1.0.0-py3-none-any.whl + - pypi: https://files.pythonhosted.org/packages/db/fb/d71f914bc69e6357cbde04db62ef15497cd27926d95f03b4930997c4c390/huggingface_hub-1.0.1-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/c3/5b/9512c5fb6c8218332b530f13500c6ff5f3ce3342f35e0dd7be9ac3856fd3/humanize-4.14.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/48/30/47d0bf6072f7252e6521f3447ccfa40b421b6824517f82854703d0f5a98b/hyperframe-6.1.0-py3-none-any.whl - pypi: https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl @@ -10430,7 +10430,7 @@ packages: license: MIT license_family: MIT purls: - - pkg:pypi/argon2-cffi-bindings?source=compressed-mapping + - pkg:pypi/argon2-cffi-bindings?source=hash-mapping size: 34013 timestamp: 1759487134505 - conda: https://conda.anaconda.org/conda-forge/win-64/argon2-cffi-bindings-25.1.0-py313h5ea7bf4_1.conda @@ -10618,7 +10618,7 @@ packages: license: MIT license_family: MIT purls: - - pkg:pypi/attrs?source=compressed-mapping + - pkg:pypi/attrs?source=hash-mapping size: 60101 timestamp: 1759762331492 - conda: https://conda.anaconda.org/conda-forge/noarch/babel-2.17.0-pyhd8ed1ab_0.conda @@ -10846,11 +10846,11 @@ packages: purls: [] size: 49840 timestamp: 1733513605730 -- conda: https://conda.anaconda.org/conda-forge/noarch/boto3-1.40.59-pyhd8ed1ab_0.conda - sha256: dd76eeac294d8999e2bd500654d003ed1d847a88095b317d3c3e894a485be0e8 - md5: 4d8abb8bd813fc0e0e3f89b9d4ab794d +- conda: https://conda.anaconda.org/conda-forge/noarch/boto3-1.40.60-pyhd8ed1ab_0.conda + sha256: fc38d85e42485c5314f1b0ae7cc926f32ea331a9a1a881a4d10beb864cdc63a2 + md5: ee5e22c53ba363648bcc7b543e3a7d5e depends: - - botocore >=1.40.59,<1.41.0 + - botocore >=1.40.60,<1.41.0 - jmespath >=0.7.1,<2.0.0 - python >=3.10 - s3transfer >=0.14.0,<0.15.0 @@ -10858,8 +10858,8 @@ packages: license_family: Apache purls: - pkg:pypi/boto3?source=hash-mapping - size: 83931 - timestamp: 1761345176072 + size: 83535 + timestamp: 1761605704421 - pypi: https://files.pythonhosted.org/packages/fc/7b/dce396a3f7078e0432d40a9778602cbf0785ca91e7bcb64e05f19dfb5662/botocore-1.40.49-py3-none-any.whl name: botocore version: 1.40.49 @@ -10871,9 +10871,9 @@ packages: - urllib3>=1.25.4,!=2.2.0,<3 ; python_full_version >= '3.10' - awscrt==0.27.6 ; extra == 'crt' requires_python: '>=3.9' -- conda: https://conda.anaconda.org/conda-forge/noarch/botocore-1.40.59-pyhd8ed1ab_0.conda - sha256: aef6af2c651636292825d0a6aa4fb279185d28376290a7fe784beb449ba84d15 - md5: 191c52f7c125a900e4efa089b959afe6 +- conda: https://conda.anaconda.org/conda-forge/noarch/botocore-1.40.60-pyhd8ed1ab_0.conda + sha256: 84e9fc1920b72d267f1f53da380f8105d6b6ec3fd87174ff14ed2444e5c454a6 + md5: 349ddf56035a409dd24e3095babbf1dd depends: - jmespath >=0.7.1,<2.0.0 - python >=3.10 @@ -10882,9 +10882,9 @@ packages: license: Apache-2.0 license_family: Apache purls: - - pkg:pypi/botocore?source=hash-mapping - size: 8049796 - timestamp: 1761342808252 + - pkg:pypi/botocore?source=compressed-mapping + size: 8108533 + timestamp: 1761597111977 - conda: https://conda.anaconda.org/conda-forge/noarch/branca-0.8.2-pyhd8ed1ab_0.conda sha256: 1acf87c77d920edd098ddc91fa785efc10de871465dee0f463815b176e019e8b md5: 1fcdf88e7a8c296d3df8409bf0690db4 @@ -11065,6 +11065,23 @@ packages: - pkg:pypi/brotli?source=compressed-mapping size: 353639 timestamp: 1756599425945 +- conda: https://conda.anaconda.org/conda-forge/linux-64/brotli-python-1.2.0-py313h09d1b84_0.conda + sha256: 93eeadb5ef4ae211edb01f4a4d837e4b5ceba8ddaefdd68a0c982503c8cc86d1 + md5: dfd94363b679c74937b3926731ee861a + depends: + - __glibc >=2.17,<3.0.a0 + - libgcc >=14 + - libstdcxx >=14 + - python >=3.13,<3.14.0a0 + - python_abi 3.13.* *_cp313 + constrains: + - libbrotlicommon 1.2.0 h09219d5_0 + license: MIT + license_family: MIT + purls: + - pkg:pypi/brotli?source=hash-mapping + size: 367767 + timestamp: 1761592405814 - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/brotli-python-1.1.0-py313he352c24_4.conda sha256: 41f8e857f91fcc0e731dd02d44e8b730750c76dd00bedd0939e25fac7fbf8572 md5: d5993a664b52718233b0d7d8c72f71aa @@ -11082,6 +11099,23 @@ packages: - pkg:pypi/brotli?source=hash-mapping size: 358241 timestamp: 1756599658209 +- conda: https://conda.anaconda.org/conda-forge/linux-aarch64/brotli-python-1.2.0-py313h41095e9_0.conda + sha256: 58106e9512031afaaeedbec27ebe132df71ee51d8987adc9a147e0659536f214 + md5: 9969a2c8b28fded804adfe3edbbc98cd + depends: + - libgcc >=14 + - libstdcxx >=14 + - python >=3.13,<3.14.0a0 + - python >=3.13,<3.14.0a0 *_cp313 + - python_abi 3.13.* *_cp313 + constrains: + - libbrotlicommon 1.2.0 hd4db518_0 + license: MIT + license_family: MIT + purls: + - pkg:pypi/brotli?source=hash-mapping + size: 373445 + timestamp: 1761593044312 - conda: https://conda.anaconda.org/conda-forge/osx-64/brotli-python-1.1.0-py313h253db18_4.conda sha256: fc4db6916598d1c634de85337db6d351d6f1cb8a93679715e0ee572777a5007e md5: 8643345f12d0db3096a8aa0abd74f6e9 @@ -11098,6 +11132,22 @@ packages: - pkg:pypi/brotli?source=hash-mapping size: 369082 timestamp: 1756600456664 +- conda: https://conda.anaconda.org/conda-forge/osx-64/brotli-python-1.2.0-py313hd4eab94_0.conda + sha256: a70711b223c82d92ec9edd8cfef4305d803331d46b0ef48e55a4d63e124c9a0d + md5: ece2240943077fc695e29eb4e5596c77 + depends: + - __osx >=10.13 + - libcxx >=19 + - python >=3.13,<3.14.0a0 + - python_abi 3.13.* *_cp313 + constrains: + - libbrotlicommon 1.2.0 h105ed1c_0 + license: MIT + license_family: MIT + purls: + - pkg:pypi/brotli?source=hash-mapping + size: 390098 + timestamp: 1761593175378 - conda: https://conda.anaconda.org/conda-forge/osx-arm64/brotli-python-1.1.0-py313hb4b7877_4.conda sha256: a6402a7186ace5c3eb21ed4ce50eda3592c44ce38ab4e9a7ddd57d72b1e61fb3 md5: 9518cd948fc334d66119c16a2106a959 @@ -11115,6 +11165,23 @@ packages: - pkg:pypi/brotli?source=hash-mapping size: 341104 timestamp: 1756600117644 +- conda: https://conda.anaconda.org/conda-forge/osx-arm64/brotli-python-1.2.0-py313h79bbab8_0.conda + sha256: 74cf2c9450519acbdf32bb1ccc0adbd0c50db824ba5da9a27c4ff06d5e6e600b + md5: 213c6812f610efede1b2316540409a65 + depends: + - __osx >=11.0 + - libcxx >=19 + - python >=3.13,<3.14.0a0 + - python >=3.13,<3.14.0a0 *_cp313 + - python_abi 3.13.* *_cp313 + constrains: + - libbrotlicommon 1.2.0 h87ba0bc_0 + license: MIT + license_family: MIT + purls: + - pkg:pypi/brotli?source=hash-mapping + size: 359894 + timestamp: 1761592891981 - conda: https://conda.anaconda.org/conda-forge/win-64/brotli-python-1.1.0-py313hfe59770_4.conda sha256: 0e98ebafd586c4da7d848f9de94770cb27653ba9232a2badb28f8a01f6e48fb5 md5: 477bf04a8a3030368068ccd39b8c5532 @@ -11132,6 +11199,23 @@ packages: - pkg:pypi/brotli?source=compressed-mapping size: 323459 timestamp: 1756600051044 +- conda: https://conda.anaconda.org/conda-forge/win-64/brotli-python-1.2.0-py313hf510273_0.conda + sha256: 29020d8d62652cdd1c841c4b23563efc2558dc6b97e272f63ee6731e0513df94 + md5: 7cdbffd86ca06b75fee15d2762b3616d + depends: + - python >=3.13,<3.14.0a0 + - python_abi 3.13.* *_cp313 + - ucrt >=10.0.20348.0 + - vc >=14.3,<15 + - vc14_runtime >=14.44.35208 + constrains: + - libbrotlicommon 1.2.0 hc82b238_0 + license: MIT + license_family: MIT + purls: + - pkg:pypi/brotli?source=hash-mapping + size: 335623 + timestamp: 1761592891692 - pypi: https://files.pythonhosted.org/packages/8b/53/c60eb5bd26cf8689e361031bebc431437bc988555e80ba52d48c12c1d866/browserforge-1.2.3-py3-none-any.whl name: browserforge version: 1.2.3 @@ -14551,10 +14635,10 @@ packages: purls: [] size: 101935 timestamp: 1761246820068 -- pypi: https://files.pythonhosted.org/packages/77/93/ecf9f7caa99c71e969091e9a78789f11b2dea5c684917eab7c54a8d13560/google_api_core-2.27.0-py3-none-any.whl +- pypi: https://files.pythonhosted.org/packages/54/8a/c75ed5fd7819742201ffffbd61bb081af4819ea882a6b84930fa93f8e96f/google_api_core-2.28.0-py3-none-any.whl name: google-api-core - version: 2.27.0 - sha256: 779a380db4e21a4ee3d717cf8efbf324e53900bf37e1ffb273e5348a9916dd42 + version: 2.28.0 + sha256: b4362b0e2e6bc06037cfb0e2b28e2fe0c3f9d760dc311f314d5fb373768c7387 requires_dist: - googleapis-common-protos>=1.56.2,<2.0.0 - protobuf>=3.19.5,!=3.20.0,!=3.20.1,!=4.21.0,!=4.21.1,!=4.21.2,!=4.21.3,!=4.21.4,!=4.21.5,<7.0.0 @@ -15778,10 +15862,10 @@ packages: - pkg:pypi/httpx?source=hash-mapping size: 63082 timestamp: 1733663449209 -- pypi: https://files.pythonhosted.org/packages/6e/5a/221bb56bff551f5f5652c2a0bcb56ad347b8e1f13e95709b3bb9b8a6e545/huggingface_hub-1.0.0-py3-none-any.whl +- pypi: https://files.pythonhosted.org/packages/db/fb/d71f914bc69e6357cbde04db62ef15497cd27926d95f03b4930997c4c390/huggingface_hub-1.0.1-py3-none-any.whl name: huggingface-hub - version: 1.0.0 - sha256: 0f444cfc18ab3e40007a2ba0aa8649389430255c23a2e5f280d43bbcff40e276 + version: 1.0.1 + sha256: 7e255cd9b3432287a34a86933057abb1b341d20b97fb01c40cbd4e053764ae13 requires_dist: - filelock - fsspec>=2023.5.0 @@ -15793,7 +15877,6 @@ packages: - typer-slim - typing-extensions>=3.7.4.3 - hf-xet>=1.2.0,<2.0.0 ; platform_machine == 'AMD64' or platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64' - - aiohttp ; extra == 'all' - authlib>=1.3.2 ; extra == 'all' - fastapi ; extra == 'all' - httpx ; extra == 'all' @@ -15823,7 +15906,6 @@ packages: - types-toml ; extra == 'all' - types-tqdm ; extra == 'all' - types-urllib3 ; extra == 'all' - - aiohttp ; extra == 'dev' - authlib>=1.3.2 ; extra == 'dev' - fastapi ; extra == 'dev' - httpx ; extra == 'dev' @@ -15857,10 +15939,8 @@ packages: - fastai>=2.4 ; extra == 'fastai' - fastcore>=1.3.27 ; extra == 'fastai' - hf-xet>=1.1.3,<2.0.0 ; extra == 'hf-xet' - - aiohttp ; extra == 'inference' - mcp>=1.8.0 ; extra == 'mcp' - typer ; extra == 'mcp' - - aiohttp ; extra == 'mcp' - authlib>=1.3.2 ; extra == 'oauth' - fastapi ; extra == 'oauth' - httpx ; extra == 'oauth' @@ -15869,7 +15949,6 @@ packages: - mypy==1.15.0 ; extra == 'quality' - libcst>=1.4.0 ; extra == 'quality' - ty ; extra == 'quality' - - aiohttp ; extra == 'testing' - authlib>=1.3.2 ; extra == 'testing' - fastapi ; extra == 'testing' - httpx ; extra == 'testing' @@ -16029,7 +16108,7 @@ packages: license: BSD-3-Clause license_family: BSD purls: - - pkg:pypi/idna?source=compressed-mapping + - pkg:pypi/idna?source=hash-mapping size: 50721 timestamp: 1760286526795 - conda: https://conda.anaconda.org/conda-forge/noarch/imagesize-1.4.1-pyhd8ed1ab_0.tar.bz2 @@ -16131,6 +16210,7 @@ packages: constrains: - appnope >=0.1.2 license: BSD-3-Clause + license_family: BSD purls: - pkg:pypi/ipykernel?source=compressed-mapping size: 132289 @@ -16158,6 +16238,7 @@ packages: constrains: - appnope >=0.1.2 license: BSD-3-Clause + license_family: BSD purls: - pkg:pypi/ipykernel?source=compressed-mapping size: 132418 @@ -16185,6 +16266,7 @@ packages: constrains: - appnope >=0.1.2 license: BSD-3-Clause + license_family: BSD purls: - pkg:pypi/ipykernel?source=compressed-mapping size: 133820 @@ -16818,7 +16900,7 @@ packages: license: BSD-3-Clause license_family: BSD purls: - - pkg:pypi/jupyter-core?source=compressed-mapping + - pkg:pypi/jupyter-core?source=hash-mapping size: 65503 timestamp: 1760643864586 - conda: https://conda.anaconda.org/conda-forge/noarch/jupyter_events-0.12.0-pyh29332c3_0.conda @@ -17340,17 +17422,17 @@ packages: - atomicwrites ; extra == 'atomic-cache' - interegular>=0.3.1,<0.4.0 ; extra == 'interegular' requires_python: '>=3.8' -- conda: https://conda.anaconda.org/conda-forge/noarch/lark-1.3.0-pyhd8ed1ab_0.conda - sha256: 6370d6a458b4f11a9ab5db7eb05e895f55f276e6aa4c4bbac7dde412c87fae35 - md5: c9ee16acbcea5cc91d9f3eb1d8f903bd +- conda: https://conda.anaconda.org/conda-forge/noarch/lark-1.3.1-pyhd8ed1ab_0.conda + sha256: 49570840fb15f5df5d4b4464db8ee43a6d643031a2bc70ef52120a52e3809699 + md5: 9b965c999135d43a3d0f7bd7d024e26a depends: - python >=3.10 license: MIT license_family: MIT purls: - pkg:pypi/lark?source=compressed-mapping - size: 94267 - timestamp: 1758590674960 + size: 94312 + timestamp: 1761596921009 - conda: https://conda.anaconda.org/conda-forge/linux-64/lcms2-2.17-h717163a_0.conda sha256: d6a61830a354da022eae93fa896d0991385a875c6bba53c82263a289deda9db8 md5: 000e85703f0fd9594c81710dd5066471 @@ -23246,7 +23328,7 @@ packages: timestamp: 1733408419340 - pypi: ./ name: nrel-compass - version: 0.10.1.dev34+g524e3f9.d20251028 + version: 0.10.1.dev13+gd14f410.d20251028 sha256: 981242a42f5d99940fd264b915c33935a6ac37da4e8c037d5d0278bd0a6592e8 requires_dist: - click>=8.1.7,<9 @@ -23588,7 +23670,7 @@ packages: license: BSD-3-Clause license_family: BSD purls: - - pkg:pypi/numpy?source=compressed-mapping + - pkg:pypi/numpy?source=hash-mapping size: 7708038 timestamp: 1761162074399 - conda: https://conda.anaconda.org/conda-forge/osx-64/numpy-2.3.4-py313ha99c057_0.conda @@ -26956,7 +27038,7 @@ packages: license: MIT license_family: MIT purls: - - pkg:pypi/pyparsing?source=compressed-mapping + - pkg:pypi/pyparsing?source=hash-mapping size: 104044 timestamp: 1758436411254 - pypi: https://files.pythonhosted.org/packages/8e/5e/c86a5643653825d3c913719e788e41386bee415c2b87b4f955432f2de6b2/pypdf2-3.0.1-py3-none-any.whl @@ -27158,7 +27240,7 @@ packages: license: MIT license_family: MIT purls: - - pkg:pypi/pytest?source=compressed-mapping + - pkg:pypi/pytest?source=hash-mapping size: 276734 timestamp: 1757011891753 - conda: https://conda.anaconda.org/conda-forge/noarch/pytest-asyncio-0.25.3-pyh29332c3_0.conda @@ -27421,6 +27503,7 @@ packages: - python >=3.10 - python license: BSD-3-Clause + license_family: BSD purls: - pkg:pypi/python-dotenv?source=hash-mapping size: 26922 @@ -27692,7 +27775,7 @@ packages: license: BSD-3-Clause license_family: BSD purls: - - pkg:pypi/pyzmq?source=compressed-mapping + - pkg:pypi/pyzmq?source=hash-mapping size: 212218 timestamp: 1757387023399 - conda: https://conda.anaconda.org/conda-forge/linux-aarch64/pyzmq-27.1.0-py312h4552c38_0.conda @@ -31501,7 +31584,7 @@ packages: license: Apache-2.0 license_family: APACHE purls: - - pkg:pypi/websocket-client?source=compressed-mapping + - pkg:pypi/websocket-client?source=hash-mapping size: 61391 timestamp: 1759928175142 - conda: https://conda.anaconda.org/conda-forge/noarch/widgetsnbextension-4.0.14-pyhd8ed1ab_0.conda diff --git a/tests/python/integration/test_integrated.py b/tests/python/integration/test_integrated.py index 76b8e81fc..3bb1dec96 100644 --- a/tests/python/integration/test_integrated.py +++ b/tests/python/integration/test_integrated.py @@ -12,7 +12,7 @@ import openai import elm.web.html_pw from elm.web.search.dux import DuxDistributedGlobalSearch -from elm.web.file_loader import AsyncFileLoader +from elm.web.file_loader import AsyncWebFileLoader from elm.web.document import HTMLDocument from flaky import flaky @@ -220,7 +220,7 @@ async def search_location_with_logs( async def test_async_file_loader_with_temp_cache( monkeypatch, mock_get_methods, sample_file ): - """Test `AsyncFileLoader` with a `TempFileCache` service""" + """Test `AsyncWebFileLoader` with a `TempFileCache` service""" get_meth, get_html = mock_get_methods monkeypatch.setattr(aiohttp.ClientSession, "get", get_meth, raising=True) @@ -232,7 +232,7 @@ async def test_async_file_loader_with_temp_cache( truth = HTMLDocument([content]) async with RunningAsyncServices([TempFileCache()]): - loader = AsyncFileLoader(file_cache_coroutine=TempFileCache.call) + loader = AsyncWebFileLoader(file_cache_coroutine=TempFileCache.call) doc = await loader.fetch("Whatcom") assert doc.text == truth.text assert doc.attrs["source"] == "Whatcom" diff --git a/tests/python/unit/utilities/test_utilities_base.py b/tests/python/unit/utilities/test_utilities_base.py index 1f31236a6..5efcd3e98 100644 --- a/tests/python/unit/utilities/test_utilities_base.py +++ b/tests/python/unit/utilities/test_utilities_base.py @@ -1,4 +1,4 @@ -"""Test COMPASS Ordinance logging logic.""" +"""Test COMPASS Ordinance logging logic""" from pathlib import Path diff --git a/tests/python/unit/utilities/test_utilities_io.py b/tests/python/unit/utilities/test_utilities_io.py new file mode 100644 index 000000000..83dd5b048 --- /dev/null +++ b/tests/python/unit/utilities/test_utilities_io.py @@ -0,0 +1,123 @@ +"""Test COMPASS I/O utilities""" + +import os +from pathlib import Path + +import pytest + +from compass.utilities.io import load_local_docs +from compass.services.cpu import ( + PDFLoader, + OCRPDFLoader, + read_pdf_file, + read_pdf_file_ocr, +) +from compass.services.provider import RunningAsyncServices +from compass.services.threaded import read_html_file, HTMLFileLoader +from compass.exceptions import COMPASSNotInitializedError + + +PYT_CMD = os.getenv("TESSERACT_CMD") + + +@pytest.mark.asyncio +async def test_basic_load_pdf(test_data_files_dir): + """Test basic loading of local PDF document""" + test_fp = test_data_files_dir / "Caneadea New York.pdf" + + docs = await load_local_docs([test_fp]) + assert len(docs) == 1 + doc = docs[0] + assert not doc.empty + assert Path(doc.attrs.get("source_fp")) == test_fp + assert len(doc.pages) == 3 + + +@pytest.mark.asyncio +async def test_basic_load_html(test_data_files_dir): + """Test basic loading of local HTML document""" + test_fp = test_data_files_dir / "Whatcom.txt" + + docs = await load_local_docs([test_fp]) + assert len(docs) == 1 + doc = docs[0] + assert not doc.empty + assert Path(doc.attrs.get("source_fp")) == test_fp + assert len(doc.pages) == 1 + + +@pytest.mark.asyncio +async def test_basic_load_pdf_with_service(test_data_files_dir): + """Test basic loading of local PDF document with service""" + test_fp = test_data_files_dir / "Caneadea New York.pdf" + + with pytest.raises( + COMPASSNotInitializedError, + match=r"Must initialize the queue for 'PDFLoader'.", + ): + await read_pdf_file(test_fp) + + async with RunningAsyncServices([PDFLoader()]): + doc, __ = await read_pdf_file(test_fp) + doc_2 = await load_local_docs( + [test_fp], pdf_read_coroutine=read_pdf_file + ) + + assert not doc.empty + assert not doc_2[0].empty + assert doc.text == doc_2[0].text + + +@pytest.mark.skipif( + not PYT_CMD, reason="requires PyTesseract command to be set" +) +@pytest.mark.asyncio +async def test_basic_load_ocr_pdf_with_service(test_data_files_dir): + """Test basic loading of local PDF document with service""" + import pytesseract # noqa: PLC0415 + + pytesseract.pytesseract.tesseract_cmd = PYT_CMD + + test_fp = test_data_files_dir / "Sedgwick Kansas.pdf" + + with pytest.raises( + COMPASSNotInitializedError, + match=r"Must initialize the queue for 'OCRPDFLoader'.", + ): + await read_pdf_file_ocr(test_fp) + + async with RunningAsyncServices([OCRPDFLoader()]): + doc, __ = await read_pdf_file_ocr(test_fp) + doc_2 = await load_local_docs( + [test_fp], pdf_ocr_read_coroutine=read_pdf_file_ocr + ) + + assert not doc.empty + assert not doc_2[0].empty + assert doc.text == doc_2[0].text + + +@pytest.mark.asyncio +async def test_basic_load_html_with_service(test_data_files_dir): + """Test basic loading of local HTML document with service""" + test_fp = test_data_files_dir / "Whatcom.txt" + + with pytest.raises( + COMPASSNotInitializedError, + match=r"Must initialize the queue for 'HTMLFileLoader'.", + ): + await read_html_file(test_fp) + + async with RunningAsyncServices([HTMLFileLoader()]): + doc, __ = await read_html_file(test_fp) + doc_2 = await load_local_docs( + [test_fp], html_read_coroutine=read_html_file + ) + + assert not doc.empty + assert not doc_2[0].empty + assert doc.text == doc_2[0].text + + +if __name__ == "__main__": + pytest.main(["-q", "--show-capture=all", Path(__file__), "-rapP"]) diff --git a/tests/python/unit/validation/test_validation_location.py b/tests/python/unit/validation/test_validation_location.py index a0f697995..c51c76fc4 100644 --- a/tests/python/unit/validation/test_validation_location.py +++ b/tests/python/unit/validation/test_validation_location.py @@ -399,6 +399,7 @@ async def test_doc_text_matches_jurisdiction_pdf( ), ], ) +@pytest.mark.asyncio async def test_doc_text_matches_jurisdiction_ocr( oai_llm_service, test_data_files_dir, loc, doc_fn, truth ):