From 3a5fbaddb7427d7c5b9d54c0c87583b009e40afa Mon Sep 17 00:00:00 2001
From: rmuil1 <rmuil1@bloomberg.net>
Date: Mon, 2 Feb 2026 16:02:59 +0000
Subject: [PATCH 1/2] make project argument consistent across commands

---
 README.md                            | 11 +++++++
 example_projects/eg0-basic/README.md |  2 +-
 src/pythinfer/cli.py                 | 44 ++++++++++++++++++----------
 3 files changed, 40 insertions(+), 17 deletions(-)

diff --git a/README.md b/README.md
index c88d717..0349a8c 100644
--- a/README.md
+++ b/README.md
@@ -34,12 +34,23 @@ A distinction is made between 'external' and 'internal' files. See below.
 
     This will create a `pythinfer.yaml` project file in the project folder, merge all RDF files it finds, perform inference, and then execute the SPARQL query against the inferred graph.
 
+1. To use a specific project file, use the `--project` option before the command:
+
+    ```bash
+    uvx ~/git/pythinfer --project pythinfer_celebrity.yaml query select_who_knows_whom.rq
+    ```
+
 1. Edit the `pythinfer.yaml` file to specify which files to include, try again. Have fun.
 
 ![Demo of executing eg0 in CLI](demo-eg0.gif)
 
 ## Command Line Interface
 
+### Global Options
+
+- `--project` / `-p`: Specify the path to a project configuration file. If not provided, pythinfer will search for `pythinfer.yaml` in the current directory and parent directories, or create a new project if none is found.
+- `--verbose` / `-v`: Enable verbose (DEBUG) logging output.
+
 ### Common Options
 
 - `--extra-export`: allows specifying extra export formats beyond the default trig. Can be used to 'flatten' quads to triples when exporting (by exporting to ttl or nt as well as trig)
diff --git a/example_projects/eg0-basic/README.md b/example_projects/eg0-basic/README.md
index 8fddb9c..b0c62c7 100644
--- a/example_projects/eg0-basic/README.md
+++ b/example_projects/eg0-basic/README.md
@@ -75,7 +75,7 @@ To demonstrate custom inference rules with SPARQL, a CONSTRUCT query `eg_rule_to
 (BTW: the CONSTRUCT query file is named `eg_rule...` instead of `infer...` to avoid being picked up by the automatic project creation in the test suite.)
 
 ```sh
-uv run pythinfer query --no-cache --project pythinfer_celebrity.yaml select_who_knows_whom.rq
+uv run pythinfer --project pythinfer_celebrity.yaml query --no-cache select_who_knows_whom.rq
 ```
 
 NB: beware of [bug #33](https://github.com/robertmuil/pythinfer/issues/33): hence the `--no-cache` flag.
diff --git a/src/pythinfer/cli.py b/src/pythinfer/cli.py
index c1e8dd2..396f445 100644
--- a/src/pythinfer/cli.py
+++ b/src/pythinfer/cli.py
@@ -2,7 +2,7 @@
 
 import logging
 from collections.abc import Sequence
-from datetime import UTC, datetime
+from contextvars import ContextVar
 from pathlib import Path
 from typing import Annotated
 
@@ -19,6 +19,24 @@
 )
 from pythinfer.rdflibplus import DatasetView, graph_lengths
 
+ProjectOption = Annotated[
+    Path | None,
+    typer.Option(
+        "--project",
+        "-p",
+        help="Path to project configuration file (pythinfer.yaml)",
+    ),
+]
+
+VerboseOption = Annotated[
+    bool,
+    typer.Option(
+        "--verbose",
+        "-v",
+        help="Enable verbose (DEBUG) logging output",
+    ),
+]
+
 ExtraExportFormatOption = Annotated[
     list[str] | None,
     typer.Option(
@@ -32,6 +50,9 @@
 app = typer.Typer()
 logger = logging.getLogger(__name__)
 
+# Context variable to store the project path (thread-safe alternative to global)
+_project_path_var: ContextVar[Path | None] = ContextVar("project_path", default=None)
+
 
 def echo_success(msg: str) -> None:  # noqa: D103 - self-explanatory function
     typer.secho(msg, fg=typer.colors.GREEN)
@@ -73,14 +94,11 @@ def configure_logging(*, verbose: bool) -> None:
 @app.callback()
 def main_callback(
     *,
-    verbose: bool = typer.Option(
-        False,  # noqa: FBT003
-        "--verbose",
-        "-v",
-        help="Enable verbose (DEBUG) logging output",
-    ),
+    project: ProjectOption = None,
+    verbose: VerboseOption = False,
 ) -> None:
     """Global options for pythinfer CLI."""
+    _project_path_var.set(project)
     configure_logging(verbose=verbose)
 
 
@@ -108,7 +126,6 @@ def create(
 
 @app.command()
 def merge(
-    config: Path | None = None,
     output: Path | None = None,
     *,
     export_external: bool = False,
@@ -117,14 +134,13 @@ def merge(
     """Merge graphs as specified in the config file and save.
 
     Args:
-        config: path to the project configuration file
         output: path for data to be saved to (defaults to `derived/merged.trig`)
         export_external: whether to include external graphs in output
         extra_export_format: additional export format(s) (besides trig),
                                 can be specified multiple times
 
     """
-    project = load_project(config)
+    project = load_project(_project_path_var.get())
     ds, external_graph_ids = merge_graphs(
         project,
         output=output or True,
@@ -137,7 +153,6 @@ def merge(
 
 @app.command()
 def infer(
-    config: Path | None = None,
     backend: str = "owlrl",
     output: Path | None = None,
     *,
@@ -150,7 +165,6 @@ def infer(
     """Run inference backends on merged graph.
 
     Args:
-        config: path to Project defining the inputs
         backend: OWL inference engine to use
         output: output path for final inferences (None for project-based default)
         include_unwanted_triples: include all valid inferences, even unhelpful
@@ -161,7 +175,7 @@ def infer(
                                 can be specified multiple times
 
     """
-    project = load_project(config)
+    project = load_project(_project_path_var.get())
 
     # Force no_cache when extra export formats requested, otherwise exports won't happen
     if extra_export_format and not no_cache:
@@ -212,7 +226,6 @@ def infer(
 @app.command()
 def query(
     query: str,
-    project: Path | None = None,
     graph: list[str] | None = None,
     *,
     no_cache: bool = False,
@@ -224,7 +237,6 @@ def query(
 
     Args:
         query: path to the query file to execute, or the query string itself
-        project: Path to project file (defaults to project selection process)
         graph: IRI for graph to include (can be specified multiple times)
         no_cache: whether to skip loading from cache and re-run inference
 
@@ -235,7 +247,7 @@ def query(
     else:
         query_contents = str(query)
 
-    ds, _ = infer(project, no_cache=no_cache)
+    ds, _ = infer(no_cache=no_cache)
 
     view = ds
     if graph:

From 98816831593371d1115a64054bd0aa0e468a10aa Mon Sep 17 00:00:00 2001
From: rmuil1 <rmuil1@bloomberg.net>
Date: Mon, 2 Feb 2026 16:32:39 +0000
Subject: [PATCH 2/2] fixes #33: project included in derived folder to avoid
 cache collision

---
 src/pythinfer/inout.py                    |   8 +-
 tests/integration/test_cache_isolation.py | 193 ++++++++++++++++++++++
 2 files changed, 199 insertions(+), 2 deletions(-)
 create mode 100644 tests/integration/test_cache_isolation.py

diff --git a/src/pythinfer/inout.py b/src/pythinfer/inout.py
index 62eece3..c8b63d2 100644
--- a/src/pythinfer/inout.py
+++ b/src/pythinfer/inout.py
@@ -263,8 +263,12 @@ def to_yaml_file(self, output_path: Path) -> None:
 
     @property
     def path_output(self) -> Path:
-        """Path to the output folder."""
-        return self.path_self.parent / "derived"
+        """Path to the output folder.
+
+        Includes the project file stem to avoid cache collisions when multiple
+        project files exist in the same directory.
+        """
+        return self.path_self.parent / "derived" / self.path_self.stem
 
     @property
     def paths_all_input(self) -> list[Path]:
diff --git a/tests/integration/test_cache_isolation.py b/tests/integration/test_cache_isolation.py
new file mode 100644
index 0000000..2acb358
--- /dev/null
+++ b/tests/integration/test_cache_isolation.py
@@ -0,0 +1,193 @@
+"""Integration tests for cache isolation with multiple project files."""
+
+import os
+import shutil
+from pathlib import Path
+
+import pytest
+
+from pythinfer.infer import load_cache, run_inference_backend
+from pythinfer.inout import COMBINED_FULL_FILESTEM, load_project
+from pythinfer.merge import merge_graphs
+
+PROJECT_ROOT = Path(__file__).parent.parent.parent
+
+
+class TestCacheIsolation:
+    """Test that different project files in the same directory have isolated caches."""
+
+    @pytest.fixture
+    def eg0_temp_dir(self, tmp_path: Path) -> Path:
+        """Create temporary copy of eg0-basic to avoid modifying the repository."""
+        shutil.copytree(
+            PROJECT_ROOT / "example_projects" / "eg0-basic", tmp_path / "eg0-basic"
+        )
+
+        return tmp_path / "eg0-basic"
+
+    def test_separate_cache_directories_for_different_projects(
+        self, eg0_temp_dir: Path
+    ) -> None:
+        """Test that different project files create separate cache directories.
+
+        This verifies the fix for the bug where --project argument would use
+        the wrong cache if a cache existed for the default pythinfer.yaml.
+        """
+        # Verify example project exists and has both config files
+        default_config = eg0_temp_dir / "pythinfer.yaml"
+        celebrity_config = eg0_temp_dir / "pythinfer_celebrity.yaml"
+
+        assert default_config.exists(), "pythinfer.yaml not found"
+        assert celebrity_config.exists(), "pythinfer_celebrity.yaml not found"
+
+        # Load both projects
+        default_project = load_project(default_config)
+        celebrity_project = load_project(celebrity_config)
+
+        # Verify they have different output paths based on project file stem
+        default_output = default_project.path_output
+        celebrity_output = celebrity_project.path_output
+
+        assert default_output == eg0_temp_dir / "derived" / "pythinfer"
+        assert celebrity_output == eg0_temp_dir / "derived" / "pythinfer_celebrity"
+        assert default_output != celebrity_output
+
+    def test_different_inference_results_with_different_projects(
+        self, eg0_temp_dir: Path
+    ) -> None:
+        """Test that different project files produce different inference results.
+
+        The celebrity project includes an additional SPARQL inference rule
+        that the default project does not, so they should have different
+        inferred triples and cache files.
+        """
+        original_cwd = Path.cwd()
+        try:
+            os.chdir(eg0_temp_dir)
+
+            # Default project inference
+            default_project = load_project(None)  # Uses discovery
+            default_project.owl_backend = "owlrl"
+            default_ds, default_external_ids = merge_graphs(
+                default_project,
+                output=True,
+                export_external=False,
+                extra_export_formats=None,
+            )
+            run_inference_backend(
+                default_ds,
+                default_external_ids,
+                default_project,
+                None,
+                include_unwanted_triples=False,
+                export_full=True,
+                export_external_inferences=False,
+                extra_export_formats=None,
+            )
+            default_count = len(default_ds)
+
+            # Celebrity project inference
+            celebrity_project = load_project(Path("pythinfer_celebrity.yaml"))
+            celebrity_project.owl_backend = "owlrl"
+            celebrity_ds, celebrity_external_ids = merge_graphs(
+                celebrity_project,
+                output=True,
+                export_external=False,
+                extra_export_formats=None,
+            )
+            run_inference_backend(
+                celebrity_ds,
+                celebrity_external_ids,
+                celebrity_project,
+                None,
+                include_unwanted_triples=False,
+                export_full=True,
+                export_external_inferences=False,
+                extra_export_formats=None,
+            )
+            celebrity_count = len(celebrity_ds)
+
+            # Verify different numbers of triples (celebrity has more due
+            # to extra inference)
+            assert default_count > 0
+            assert celebrity_count > 0
+            assert celebrity_count > default_count, (
+                f"Celebrity project should have more inferences ({celebrity_count}) "
+                f"than default ({default_count})"
+            )
+
+            # Verify cache files exist in separate directories
+            default_cache = (
+                default_project.path_output / f"{COMBINED_FULL_FILESTEM}.trig"
+            )
+            celebrity_cache = (
+                celebrity_project.path_output / f"{COMBINED_FULL_FILESTEM}.trig"
+            )
+
+            assert default_cache.exists(), (
+                f"Default cache not found at {default_cache}"
+            )
+            assert celebrity_cache.exists(), (
+                f"Celebrity cache not found at {celebrity_cache}"
+            )
+
+            # Verify they're in different directories
+            assert default_cache.parent != celebrity_cache.parent
+
+        finally:
+            os.chdir(original_cwd)
+
+    def test_cache_not_mixed_between_projects(self, eg0_temp_dir: Path) -> None:
+        """Test that loading project doesn't confuse caches between projects.
+
+        This is the specific bug scenario: if we run infer with default project,
+        then run infer with celebrity project, the celebrity project should not
+        load the default project's cache.
+        """
+        original_cwd = Path.cwd()
+        try:
+            os.chdir(eg0_temp_dir)
+
+            # Step 1: Run inference for default project (creates cache)
+            default_project = load_project(None)
+            default_project.owl_backend = "owlrl"
+            default_ds, default_external_ids = merge_graphs(
+                default_project,
+                output=True,
+                export_external=False,
+            )
+            run_inference_backend(
+                default_ds,
+                default_external_ids,
+                default_project,
+                None,
+                include_unwanted_triples=False,
+                export_full=True,
+                export_external_inferences=False,
+            )
+
+            # Verify default cache was created
+            default_cache = load_cache(default_project)
+            assert default_cache is not None, (
+                "Default project cache should exist"
+            )
+            default_triple_count = len(default_cache)
+
+            # Step 2: Load celebrity project and verify it doesn't use
+            # default cache
+            celebrity_project = load_project(Path("pythinfer_celebrity.yaml"))
+            celebrity_cache = load_cache(celebrity_project)
+
+            # If cache was incorrectly shared, this assertion would fail
+            # because celebrity cache would have same triple count as default
+            if celebrity_cache is not None:
+                celebrity_triple_count = len(celebrity_cache)
+                # Celebrity has more triples due to additional inference
+                assert celebrity_triple_count > default_triple_count, (
+                    f"Celebrity cache should have more triples "
+                    f"({celebrity_triple_count}) than default "
+                    f"({default_triple_count}), but got fewer. "
+                    f"This suggests the wrong cache is being used."
+                )
+        finally:
+            os.chdir(original_cwd)