From 3a5fbaddb7427d7c5b9d54c0c87583b009e40afa Mon Sep 17 00:00:00 2001 From: rmuil1 Date: Mon, 2 Feb 2026 16:02:59 +0000 Subject: [PATCH 1/2] make project argument consistent across commands --- README.md | 11 +++++++ example_projects/eg0-basic/README.md | 2 +- src/pythinfer/cli.py | 44 ++++++++++++++++++---------- 3 files changed, 40 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index c88d717..0349a8c 100644 --- a/README.md +++ b/README.md @@ -34,12 +34,23 @@ A distinction is made between 'external' and 'internal' files. See below. This will create a `pythinfer.yaml` project file in the project folder, merge all RDF files it finds, perform inference, and then execute the SPARQL query against the inferred graph. +1. To use a specific project file, use the `--project` option before the command: + + ```bash + uvx ~/git/pythinfer --project pythinfer_celebrity.yaml query select_who_knows_whom.rq + ``` + 1. Edit the `pythinfer.yaml` file to specify which files to include, try again. Have fun. ![Demo of executing eg0 in CLI](demo-eg0.gif) ## Command Line Interface +### Global Options + +- `--project` / `-p`: Specify the path to a project configuration file. If not provided, pythinfer will search for `pythinfer.yaml` in the current directory and parent directories, or create a new project if none is found. +- `--verbose` / `-v`: Enable verbose (DEBUG) logging output. + ### Common Options - `--extra-export`: allows specifying extra export formats beyond the default trig. Can be used to 'flatten' quads to triples when exporting (by exporting to ttl or nt as well as trig) diff --git a/example_projects/eg0-basic/README.md b/example_projects/eg0-basic/README.md index 8fddb9c..b0c62c7 100644 --- a/example_projects/eg0-basic/README.md +++ b/example_projects/eg0-basic/README.md @@ -75,7 +75,7 @@ To demonstrate custom inference rules with SPARQL, a CONSTRUCT query `eg_rule_to (BTW: the CONSTRUCT query file is named `eg_rule...` instead of `infer...` to avoid being picked up by the automatic project creation in the test suite.) ```sh -uv run pythinfer query --no-cache --project pythinfer_celebrity.yaml select_who_knows_whom.rq +uv run pythinfer --project pythinfer_celebrity.yaml query --no-cache select_who_knows_whom.rq ``` NB: beware of [bug #33](https://github.com/robertmuil/pythinfer/issues/33): hence the `--no-cache` flag. diff --git a/src/pythinfer/cli.py b/src/pythinfer/cli.py index c1e8dd2..396f445 100644 --- a/src/pythinfer/cli.py +++ b/src/pythinfer/cli.py @@ -2,7 +2,7 @@ import logging from collections.abc import Sequence -from datetime import UTC, datetime +from contextvars import ContextVar from pathlib import Path from typing import Annotated @@ -19,6 +19,24 @@ ) from pythinfer.rdflibplus import DatasetView, graph_lengths +ProjectOption = Annotated[ + Path | None, + typer.Option( + "--project", + "-p", + help="Path to project configuration file (pythinfer.yaml)", + ), +] + +VerboseOption = Annotated[ + bool, + typer.Option( + "--verbose", + "-v", + help="Enable verbose (DEBUG) logging output", + ), +] + ExtraExportFormatOption = Annotated[ list[str] | None, typer.Option( @@ -32,6 +50,9 @@ app = typer.Typer() logger = logging.getLogger(__name__) +# Context variable to store the project path (thread-safe alternative to global) +_project_path_var: ContextVar[Path | None] = ContextVar("project_path", default=None) + def echo_success(msg: str) -> None: # noqa: D103 - self-explanatory function typer.secho(msg, fg=typer.colors.GREEN) @@ -73,14 +94,11 @@ def configure_logging(*, verbose: bool) -> None: @app.callback() def main_callback( *, - verbose: bool = typer.Option( - False, # noqa: FBT003 - "--verbose", - "-v", - help="Enable verbose (DEBUG) logging output", - ), + project: ProjectOption = None, + verbose: VerboseOption = False, ) -> None: """Global options for pythinfer CLI.""" + _project_path_var.set(project) configure_logging(verbose=verbose) @@ -108,7 +126,6 @@ def create( @app.command() def merge( - config: Path | None = None, output: Path | None = None, *, export_external: bool = False, @@ -117,14 +134,13 @@ def merge( """Merge graphs as specified in the config file and save. Args: - config: path to the project configuration file output: path for data to be saved to (defaults to `derived/merged.trig`) export_external: whether to include external graphs in output extra_export_format: additional export format(s) (besides trig), can be specified multiple times """ - project = load_project(config) + project = load_project(_project_path_var.get()) ds, external_graph_ids = merge_graphs( project, output=output or True, @@ -137,7 +153,6 @@ def merge( @app.command() def infer( - config: Path | None = None, backend: str = "owlrl", output: Path | None = None, *, @@ -150,7 +165,6 @@ def infer( """Run inference backends on merged graph. Args: - config: path to Project defining the inputs backend: OWL inference engine to use output: output path for final inferences (None for project-based default) include_unwanted_triples: include all valid inferences, even unhelpful @@ -161,7 +175,7 @@ def infer( can be specified multiple times """ - project = load_project(config) + project = load_project(_project_path_var.get()) # Force no_cache when extra export formats requested, otherwise exports won't happen if extra_export_format and not no_cache: @@ -212,7 +226,6 @@ def infer( @app.command() def query( query: str, - project: Path | None = None, graph: list[str] | None = None, *, no_cache: bool = False, @@ -224,7 +237,6 @@ def query( Args: query: path to the query file to execute, or the query string itself - project: Path to project file (defaults to project selection process) graph: IRI for graph to include (can be specified multiple times) no_cache: whether to skip loading from cache and re-run inference @@ -235,7 +247,7 @@ def query( else: query_contents = str(query) - ds, _ = infer(project, no_cache=no_cache) + ds, _ = infer(no_cache=no_cache) view = ds if graph: From 98816831593371d1115a64054bd0aa0e468a10aa Mon Sep 17 00:00:00 2001 From: rmuil1 Date: Mon, 2 Feb 2026 16:32:39 +0000 Subject: [PATCH 2/2] fixes #33: project included in derived folder to avoid cache collision --- src/pythinfer/inout.py | 8 +- tests/integration/test_cache_isolation.py | 193 ++++++++++++++++++++++ 2 files changed, 199 insertions(+), 2 deletions(-) create mode 100644 tests/integration/test_cache_isolation.py diff --git a/src/pythinfer/inout.py b/src/pythinfer/inout.py index 62eece3..c8b63d2 100644 --- a/src/pythinfer/inout.py +++ b/src/pythinfer/inout.py @@ -263,8 +263,12 @@ def to_yaml_file(self, output_path: Path) -> None: @property def path_output(self) -> Path: - """Path to the output folder.""" - return self.path_self.parent / "derived" + """Path to the output folder. + + Includes the project file stem to avoid cache collisions when multiple + project files exist in the same directory. + """ + return self.path_self.parent / "derived" / self.path_self.stem @property def paths_all_input(self) -> list[Path]: diff --git a/tests/integration/test_cache_isolation.py b/tests/integration/test_cache_isolation.py new file mode 100644 index 0000000..2acb358 --- /dev/null +++ b/tests/integration/test_cache_isolation.py @@ -0,0 +1,193 @@ +"""Integration tests for cache isolation with multiple project files.""" + +import os +import shutil +from pathlib import Path + +import pytest + +from pythinfer.infer import load_cache, run_inference_backend +from pythinfer.inout import COMBINED_FULL_FILESTEM, load_project +from pythinfer.merge import merge_graphs + +PROJECT_ROOT = Path(__file__).parent.parent.parent + + +class TestCacheIsolation: + """Test that different project files in the same directory have isolated caches.""" + + @pytest.fixture + def eg0_temp_dir(self, tmp_path: Path) -> Path: + """Create temporary copy of eg0-basic to avoid modifying the repository.""" + shutil.copytree( + PROJECT_ROOT / "example_projects" / "eg0-basic", tmp_path / "eg0-basic" + ) + + return tmp_path / "eg0-basic" + + def test_separate_cache_directories_for_different_projects( + self, eg0_temp_dir: Path + ) -> None: + """Test that different project files create separate cache directories. + + This verifies the fix for the bug where --project argument would use + the wrong cache if a cache existed for the default pythinfer.yaml. + """ + # Verify example project exists and has both config files + default_config = eg0_temp_dir / "pythinfer.yaml" + celebrity_config = eg0_temp_dir / "pythinfer_celebrity.yaml" + + assert default_config.exists(), "pythinfer.yaml not found" + assert celebrity_config.exists(), "pythinfer_celebrity.yaml not found" + + # Load both projects + default_project = load_project(default_config) + celebrity_project = load_project(celebrity_config) + + # Verify they have different output paths based on project file stem + default_output = default_project.path_output + celebrity_output = celebrity_project.path_output + + assert default_output == eg0_temp_dir / "derived" / "pythinfer" + assert celebrity_output == eg0_temp_dir / "derived" / "pythinfer_celebrity" + assert default_output != celebrity_output + + def test_different_inference_results_with_different_projects( + self, eg0_temp_dir: Path + ) -> None: + """Test that different project files produce different inference results. + + The celebrity project includes an additional SPARQL inference rule + that the default project does not, so they should have different + inferred triples and cache files. + """ + original_cwd = Path.cwd() + try: + os.chdir(eg0_temp_dir) + + # Default project inference + default_project = load_project(None) # Uses discovery + default_project.owl_backend = "owlrl" + default_ds, default_external_ids = merge_graphs( + default_project, + output=True, + export_external=False, + extra_export_formats=None, + ) + run_inference_backend( + default_ds, + default_external_ids, + default_project, + None, + include_unwanted_triples=False, + export_full=True, + export_external_inferences=False, + extra_export_formats=None, + ) + default_count = len(default_ds) + + # Celebrity project inference + celebrity_project = load_project(Path("pythinfer_celebrity.yaml")) + celebrity_project.owl_backend = "owlrl" + celebrity_ds, celebrity_external_ids = merge_graphs( + celebrity_project, + output=True, + export_external=False, + extra_export_formats=None, + ) + run_inference_backend( + celebrity_ds, + celebrity_external_ids, + celebrity_project, + None, + include_unwanted_triples=False, + export_full=True, + export_external_inferences=False, + extra_export_formats=None, + ) + celebrity_count = len(celebrity_ds) + + # Verify different numbers of triples (celebrity has more due + # to extra inference) + assert default_count > 0 + assert celebrity_count > 0 + assert celebrity_count > default_count, ( + f"Celebrity project should have more inferences ({celebrity_count}) " + f"than default ({default_count})" + ) + + # Verify cache files exist in separate directories + default_cache = ( + default_project.path_output / f"{COMBINED_FULL_FILESTEM}.trig" + ) + celebrity_cache = ( + celebrity_project.path_output / f"{COMBINED_FULL_FILESTEM}.trig" + ) + + assert default_cache.exists(), ( + f"Default cache not found at {default_cache}" + ) + assert celebrity_cache.exists(), ( + f"Celebrity cache not found at {celebrity_cache}" + ) + + # Verify they're in different directories + assert default_cache.parent != celebrity_cache.parent + + finally: + os.chdir(original_cwd) + + def test_cache_not_mixed_between_projects(self, eg0_temp_dir: Path) -> None: + """Test that loading project doesn't confuse caches between projects. + + This is the specific bug scenario: if we run infer with default project, + then run infer with celebrity project, the celebrity project should not + load the default project's cache. + """ + original_cwd = Path.cwd() + try: + os.chdir(eg0_temp_dir) + + # Step 1: Run inference for default project (creates cache) + default_project = load_project(None) + default_project.owl_backend = "owlrl" + default_ds, default_external_ids = merge_graphs( + default_project, + output=True, + export_external=False, + ) + run_inference_backend( + default_ds, + default_external_ids, + default_project, + None, + include_unwanted_triples=False, + export_full=True, + export_external_inferences=False, + ) + + # Verify default cache was created + default_cache = load_cache(default_project) + assert default_cache is not None, ( + "Default project cache should exist" + ) + default_triple_count = len(default_cache) + + # Step 2: Load celebrity project and verify it doesn't use + # default cache + celebrity_project = load_project(Path("pythinfer_celebrity.yaml")) + celebrity_cache = load_cache(celebrity_project) + + # If cache was incorrectly shared, this assertion would fail + # because celebrity cache would have same triple count as default + if celebrity_cache is not None: + celebrity_triple_count = len(celebrity_cache) + # Celebrity has more triples due to additional inference + assert celebrity_triple_count > default_triple_count, ( + f"Celebrity cache should have more triples " + f"({celebrity_triple_count}) than default " + f"({default_triple_count}), but got fewer. " + f"This suggests the wrong cache is being used." + ) + finally: + os.chdir(original_cwd)