From 250c9530346c9ba6ea4324986f615674d8336a93 Mon Sep 17 00:00:00 2001
From: rmuil1 <rmuil1@bloomberg.net>
Date: Mon, 2 Feb 2026 19:25:37 +0000
Subject: [PATCH 1/9] use urn scheme for named graphs

tests don't work fully yet
---
 README.md                                     |  4 +-
 .../derived/expected_inferred_wanted.trig     |  2 +-
 .../eg0-basic/derived/expected_merged.trig    |  4 +-
 src/pythinfer/infer.py                        | 94 ++++++++++++++++---
 src/pythinfer/merge.py                        | 59 ++++++++++--
 5 files changed, 140 insertions(+), 23 deletions(-)

diff --git a/README.md b/README.md
index f74ebfc..fa429c2 100644
--- a/README.md
+++ b/README.md
@@ -362,4 +362,6 @@ The `example_projects` folder contains contrived examples, but this has also bee
 1. check and raise error or at least warning if default_union is set in underlying Dataset of DatasetView
 1. document and/or fix serialisation: canon longTurtle is not great with the way it orders things, so we might need to call out to riot unfortunately.
 1. consider changing the distinction from interal/external to data/vocabulary (where vocab includes taxonomies or ontologies) - basically the ABox/TBox distinction where CBox is part of TBox.
-1. add support for ASK query
+1. add better output support for ASK query
+1. add option to remove project name from named graphs, for easier specification:
+   1. e.g. `<urn:pythinfer:inferences:owl>` which is easy to remember and specify on command-line.
diff --git a/example_projects/eg0-basic/derived/expected_inferred_wanted.trig b/example_projects/eg0-basic/derived/expected_inferred_wanted.trig
index ba12791..a804dc6 100644
--- a/example_projects/eg0-basic/derived/expected_inferred_wanted.trig
+++ b/example_projects/eg0-basic/derived/expected_inferred_wanted.trig
@@ -1,6 +1,6 @@
 @prefix : <http://example.org/> .
 @prefix foaf: <http://xmlns.com/foaf/0.1/> .
 
-<inferences_owl> {
+<urn:pythinfer:eg0-basic:inferences:owl> {
     :Alice foaf:knows :Bob .
 }
diff --git a/example_projects/eg0-basic/derived/expected_merged.trig b/example_projects/eg0-basic/derived/expected_merged.trig
index 3269bb8..00ca617 100644
--- a/example_projects/eg0-basic/derived/expected_merged.trig
+++ b/example_projects/eg0-basic/derived/expected_merged.trig
@@ -2,7 +2,7 @@
 @prefix foaf: <http://xmlns.com/foaf/0.1/> .
 @prefix owl: <http://www.w3.org/2002/07/owl#> .
 
-<basic-data.ttl> {
+<urn:pythinfer:eg0-basic:file:basic-data.ttl> {
     :Bob a foaf:Person ;
         foaf:knows :Alice ;
         foaf:name "Bob Jones" .
@@ -12,7 +12,7 @@
         foaf:name "Alice Smith" .
 }
 
-<basic-model.ttl> {
+<urn:pythinfer:eg0-basic:file:basic-model.ttl> {
     foaf:knows a owl:SymmetricProperty .
 }
 
diff --git a/src/pythinfer/infer.py b/src/pythinfer/infer.py
index e51bb42..6db90dd 100755
--- a/src/pythinfer/infer.py
+++ b/src/pythinfer/infer.py
@@ -30,11 +30,22 @@
     export_dataset,
     load_sparql_inference_queries,
 )
+from pythinfer.merge import PYTHINFER_NS
 from pythinfer.rdflibplus import DatasetView
 
-IRI_EXTERNAL_INFERENCES: URIRef = URIRef("inferences_external")  # type: ignore[bad-assignment]
-IRI_OWL_INFERENCES: URIRef = URIRef("inferences_owl")  # type: ignore[bad-assignment]
-IRI_SPARQL_INFERENCES: URIRef = URIRef("inferences_sparql")  # type: ignore[bad-assignment]
+
+def _create_inference_urn(project_name: str, inference_type: str) -> URIRef:
+    """Create a stable URN identifier for an inference graph.
+
+    Args:
+        project_name: Name of the project
+        inference_type: Type of inference ('external', 'owl', or 'sparql')
+
+    Returns:
+        URN for the inference graph, e.g.:
+        urn:pythinfer:eg0-basic:inferences:owl
+    """
+    return PYTHINFER_NS[f"{project_name}:inferences:{inference_type}"]
 
 MAX_REASONING_ROUNDS = 5
 SCRIPT_DIR = Path(__file__).parent
@@ -259,7 +270,7 @@ def filter_triples(
 
 
 def _generate_external_inferences(
-    ds: Dataset, external_graph_ids: list[IdentifiedNode]
+    ds: Dataset, external_graph_ids: list[IdentifiedNode], project: Project
 ) -> Graph:
     """Generate inferences from external vocabularies only (step 2).
 
@@ -269,6 +280,7 @@ def _generate_external_inferences(
     Args:
         ds: Dataset containing all graphs.
         external_graph_ids: List of graph identifiers that are external.
+        project: The project configuration.
 
     Returns:
         Graph containing external inferences.
@@ -286,13 +298,35 @@ def _generate_external_inferences(
     info("  Temporary dataset created with %d triples in default graph", len(temp_ds))
 
     # Create inferences graph in temp dataset (must share same store)
-    temp_inferences = temp_ds.graph(IRI_EXTERNAL_INFERENCES)
+    iri_external = _create_inference_urn(project.name, "external")
+    temp_inferences = temp_ds.graph(iri_external)
 
     apply_owlrl_inference(temp_ds, temp_inferences)
 
-    g_external_inferences = ds.graph(IRI_EXTERNAL_INFERENCES)
+    g_external_inferences = ds.graph(iri_external)
     for s, p, o in temp_inferences:
         g_external_inferences.add((s, p, o))
+
+    # Add provenance metadata for external inference graph
+    from rdflib import DCTERMS
+    g_external_inferences.add(
+        (iri_external, RDF.type, PYTHINFER_NS["InferenceGraph"])
+    )
+    g_external_inferences.add(
+        (
+            iri_external,
+            PYTHINFER_NS["inferenceType"],
+            PYTHINFER_NS["ExternalReasoner"],
+        )
+    )
+    g_external_inferences.add(
+        (
+            iri_external,
+            DCTERMS.description,
+            Literal("Inferences generated by OWL-RL over external vocabularies"),
+        )
+    )
+
     info("  External inferences generated: %d triples", len(g_external_inferences))
     return g_external_inferences
 
@@ -406,7 +440,7 @@ def run_inference_backend(
     sparql_queries = load_sparql_inference_queries(project.paths_sparql_inference or [])
 
     # Step 2: Generate external inferences (once - this is the "noise floor")
-    g_external_inferences = _generate_external_inferences(ds, external_graph_ids)
+    g_external_inferences = _generate_external_inferences(ds, external_graph_ids, project)
 
     # Steps 3-5: Iterate full inferences + heuristics until convergence
     info(
@@ -414,8 +448,43 @@ def run_inference_backend(
         MAX_REASONING_ROUNDS,
     )
 
-    g_inferences_owl = ds.graph(IRI_OWL_INFERENCES)
-    g_inferences_sparql = ds.graph(IRI_SPARQL_INFERENCES)
+    iri_owl = _create_inference_urn(project.name, "owl")
+    iri_sparql = _create_inference_urn(project.name, "sparql")
+    g_inferences_owl = ds.graph(iri_owl)
+    g_inferences_sparql = ds.graph(iri_sparql)
+
+    # Add provenance metadata for inference graphs
+    from rdflib import DCTERMS
+    g_inferences_owl.add((iri_owl, RDF.type, PYTHINFER_NS["InferenceGraph"]))
+    g_inferences_owl.add(
+        (iri_owl, PYTHINFER_NS["inferenceType"], PYTHINFER_NS["OWLRL"])
+    )
+    g_inferences_owl.add(
+        (
+            iri_owl,
+            DCTERMS.description,
+            Literal("Inferences generated by OWL-RL reasoner"),
+        )
+    )
+
+    g_inferences_sparql.add(
+        (iri_sparql, RDF.type, PYTHINFER_NS["InferenceGraph"])
+    )
+    g_inferences_sparql.add(
+        (
+            iri_sparql,
+            PYTHINFER_NS["inferenceType"],
+            PYTHINFER_NS["SPARQL"],
+        )
+    )
+    g_inferences_sparql.add(
+        (
+            iri_sparql,
+            DCTERMS.description,
+            Literal("Inferences generated by SPARQL CONSTRUCT queries"),
+        )
+    )
+
     iteration = 0
     previous_triple_count = len(ds)  # Count triples in entire dataset
 
@@ -491,9 +560,10 @@ def run_inference_backend(
         len(g_inferences_owl) + len(g_inferences_sparql),
     )
 
+    iri_external = _create_inference_urn(project.name, "external")
     all_external_ids: list[IdentifiedNode] = [
         *external_graph_ids,
-        IRI_EXTERNAL_INFERENCES,
+        iri_external,
     ]
 
     output_file = output or project.path_output / f"{INFERRED_WANTED_FILESTEM}.trig"
@@ -501,8 +571,8 @@ def run_inference_backend(
 
     output_ds = DatasetView(
         ds,
-        [IRI_OWL_INFERENCES, IRI_SPARQL_INFERENCES]
-        + ([IRI_EXTERNAL_INFERENCES] if export_external_inferences else []),
+        [iri_owl, iri_sparql]
+        + ([iri_external] if export_external_inferences else []),
     )
 
     export_dataset(
diff --git a/src/pythinfer/merge.py b/src/pythinfer/merge.py
index 6c8f7ed..9cae30f 100644
--- a/src/pythinfer/merge.py
+++ b/src/pythinfer/merge.py
@@ -3,7 +3,7 @@
 import logging
 from pathlib import Path
 
-from rdflib import Dataset, IdentifiedNode
+from rdflib import DCTERMS, RDF, Dataset, IdentifiedNode, Namespace, URIRef
 
 from pythinfer.inout import MERGED_FILESTEM, Project, export_dataset
 from pythinfer.rdflibplus import DatasetView
@@ -12,10 +12,34 @@
 info = logger.info
 dbg = debug = logger.debug
 
+# URN namespace for pythinfer graph identifiers
+# Format: urn:pythinfer:{project-name}:file:{relative-path}
+#     or: urn:pythinfer:{project-name}:inferences:{type}
+PYTHINFER_NS = Namespace("urn:pythinfer:")
 
-# NB: in the below we are using the file *name* only as the named graph identifier.
-# This assumes that input files have unique names even if in different directories,
-# which is likely an invalid assumption...
+
+def _create_graph_urn(project: Project, file_path: Path) -> URIRef:
+    """Create a stable URN identifier for a source file's named graph.
+
+    Uses project name and relative path to create a URN that is:
+    - Stable across re-parsing
+    - Portable within a project
+    - Explicitly non-dereferenceable
+    - Informative about the source
+
+    Args:
+        project: The pythinfer project
+        file_path: Path to the source file
+
+    Returns:
+        URN for the named graph, e.g.:
+        urn:pythinfer:eg0-basic:file:basic-model.ttl
+    """
+    rel_path = file_path.relative_to(project.path_self.parent)
+    # Normalize to forward slashes and replace with colons for URN structure
+    # Use colons to maintain hierarchical structure in URN
+    path_str = str(rel_path).replace("\\", "/").replace("/", ":")
+    return PYTHINFER_NS[f"{project.name}:file:{path_str}"]
 
 
 def merge_graphs(
@@ -43,24 +67,45 @@ def merge_graphs(
 
     """
     ds = Dataset()
+    ds.bind("pythinfer", PYTHINFER_NS)
+    ds.bind("dcterms", DCTERMS)
     external_gids: list[IdentifiedNode] = []
 
     # Load external vocabulary files (ephemeral - used for inference only)
     for src in project.paths_vocab_ext:
-        g = ds.graph(src.name)
+        graph_urn = _create_graph_urn(project, src)
+        g = ds.graph(graph_urn)
         g.parse(src, format="turtle")
+
+        # Add provenance metadata to the graph
+        g.add((graph_urn, RDF.type, PYTHINFER_NS["SourceGraph"]))
+        g.add((graph_urn, DCTERMS.source, URIRef(src.resolve().as_uri())))
+        g.add((graph_urn, PYTHINFER_NS["sourceType"], PYTHINFER_NS["ExternalVocabulary"]))
+
         external_gids.append(g.identifier)
 
     # Load internal vocabulary files
     for src in project.paths_vocab_int:
-        g = ds.graph(src.name)
+        graph_urn = _create_graph_urn(project, src)
+        g = ds.graph(graph_urn)
         g.parse(src, format="turtle")
 
+        # Add provenance metadata
+        g.add((graph_urn, RDF.type, PYTHINFER_NS["SourceGraph"]))
+        g.add((graph_urn, DCTERMS.source, URIRef(src.resolve().as_uri())))
+        g.add((graph_urn, PYTHINFER_NS["sourceType"], PYTHINFER_NS["InternalVocabulary"]))
+
     # Load data files
     for src in project.paths_data:
-        g = ds.graph(src.name)
+        graph_urn = _create_graph_urn(project, src)
+        g = ds.graph(graph_urn)
         g.parse(src, format="turtle")
 
+        # Add provenance metadata
+        g.add((graph_urn, RDF.type, PYTHINFER_NS["SourceGraph"]))
+        g.add((graph_urn, DCTERMS.source, URIRef(src.resolve().as_uri())))
+        g.add((graph_urn, PYTHINFER_NS["sourceType"], PYTHINFER_NS["DataGraph"]))
+
     if output:
         if isinstance(output, bool):
             output_file = project.path_output / f"{MERGED_FILESTEM}.trig"

From 8d72ab648a50c3ad520590826def1ddb781b925e Mon Sep 17 00:00:00 2001
From: rmuil1 <rmuil1@bloomberg.net>
Date: Tue, 3 Feb 2026 19:33:31 +0000
Subject: [PATCH 2/9] use folder of project file for project name if none
 provided

---
 src/pythinfer/inout.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/pythinfer/inout.py b/src/pythinfer/inout.py
index c8b63d2..fc41f50 100644
--- a/src/pythinfer/inout.py
+++ b/src/pythinfer/inout.py
@@ -209,7 +209,7 @@ def from_yaml(config_path: Path | str) -> "Project":
         # Add path_self to the config dict before validation
         cfg["path_self"] = _config_path
         if "name" not in cfg:
-            cfg["name"] = _config_path.stem
+            cfg["name"] = _config_path.parent.stem
 
         # Let Pydantic handle validation and field normalization
         # Pass config_dir through context for path resolution in validators

From b3f6da0d71b00da480a60f5180103edc164a3391 Mon Sep 17 00:00:00 2001
From: rmuil1 <rmuil1@bloomberg.net>
Date: Tue, 3 Feb 2026 21:03:19 +0000
Subject: [PATCH 3/9] consolidate graph iri minting and update expected example
 files

---
 .../eg0-basic/derived/expected_merged.trig    | 18 -----
 .../eg0-basic/expected/expected-0-merged.trig | 30 +++++++
 .../expected-2-inferred-wanted.trig}          |  2 +-
 .../expected-0-merged.trig}                   | 13 ++-
 .../expected-2-inferred-wanted.trig}          |  3 +-
 .../expected-2-inferred-wanted.trig}          |  6 +-
 src/pythinfer/infer.py                        | 81 +++++--------------
 src/pythinfer/inout.py                        | 59 +++++++++++++-
 src/pythinfer/merge.py                        | 56 +++----------
 tests/e2e/test_e2e_from_cli.py                | 32 ++++----
 10 files changed, 154 insertions(+), 146 deletions(-)
 delete mode 100644 example_projects/eg0-basic/derived/expected_merged.trig
 create mode 100644 example_projects/eg0-basic/expected/expected-0-merged.trig
 rename example_projects/eg0-basic/{derived/expected_inferred_wanted.trig => expected/expected-2-inferred-wanted.trig} (68%)
 rename example_projects/eg1-ancestors/{derived/expected_merged.trig => expected/expected-0-merged.trig} (79%)
 rename example_projects/eg1-ancestors/{derived/expected_inferred_wanted.trig => expected/expected-2-inferred-wanted.trig} (89%)
 rename example_projects/eg2-projects/{derived/expected_inferred_wanted.trig => expected/expected-2-inferred-wanted.trig} (98%)

diff --git a/example_projects/eg0-basic/derived/expected_merged.trig b/example_projects/eg0-basic/derived/expected_merged.trig
deleted file mode 100644
index 00ca617..0000000
--- a/example_projects/eg0-basic/derived/expected_merged.trig
+++ /dev/null
@@ -1,18 +0,0 @@
-@prefix : <http://example.org/> .
-@prefix foaf: <http://xmlns.com/foaf/0.1/> .
-@prefix owl: <http://www.w3.org/2002/07/owl#> .
-
-<urn:pythinfer:eg0-basic:file:basic-data.ttl> {
-    :Bob a foaf:Person ;
-        foaf:knows :Alice ;
-        foaf:name "Bob Jones" .
-
-    :Alice a foaf:Person ;
-        foaf:age 30 ;
-        foaf:name "Alice Smith" .
-}
-
-<urn:pythinfer:eg0-basic:file:basic-model.ttl> {
-    foaf:knows a owl:SymmetricProperty .
-}
-
diff --git a/example_projects/eg0-basic/expected/expected-0-merged.trig b/example_projects/eg0-basic/expected/expected-0-merged.trig
new file mode 100644
index 0000000..adf0572
--- /dev/null
+++ b/example_projects/eg0-basic/expected/expected-0-merged.trig
@@ -0,0 +1,30 @@
+@prefix : <http://example.org/> .
+@prefix dcterms: <http://purl.org/dc/terms/> .
+@prefix foaf: <http://xmlns.com/foaf/0.1/> .
+@prefix owl: <http://www.w3.org/2002/07/owl#> .
+@prefix pythinfer: <http://pythinfer.local/> .
+
+@base <http://pythinfer.local/eg0-basic/> .
+
+<file/basic-data.ttl> {
+    :Bob a foaf:Person ;
+        foaf:knows :Alice ;
+        foaf:name "Bob Jones" .
+
+    :Alice a foaf:Person ;
+        foaf:age 30 ;
+        foaf:name "Alice Smith" .
+}
+
+<file/basic-model.ttl> {
+    foaf:knows a owl:SymmetricProperty .
+}
+
+<provenance> {
+    <file/basic-data.ttl> a pythinfer:SourceGraph ;
+        dcterms:source <file:///$PROJ_FOLDER/pythinfer/example_projects/eg0-basic/basic-data.ttl> ;
+        .
+    <file/basic-model.ttl> a pythinfer:SourceGraph ;
+        dcterms:source <file:///$PROJ_FOLDER/pythinfer/example_projects/eg0-basic/basic-model.ttl> ;
+        .
+}
\ No newline at end of file
diff --git a/example_projects/eg0-basic/derived/expected_inferred_wanted.trig b/example_projects/eg0-basic/expected/expected-2-inferred-wanted.trig
similarity index 68%
rename from example_projects/eg0-basic/derived/expected_inferred_wanted.trig
rename to example_projects/eg0-basic/expected/expected-2-inferred-wanted.trig
index a804dc6..d00c58e 100644
--- a/example_projects/eg0-basic/derived/expected_inferred_wanted.trig
+++ b/example_projects/eg0-basic/expected/expected-2-inferred-wanted.trig
@@ -1,6 +1,6 @@
 @prefix : <http://example.org/> .
 @prefix foaf: <http://xmlns.com/foaf/0.1/> .
 
-<urn:pythinfer:eg0-basic:inferences:owl> {
+<http://pythinfer.local/eg0-basic/inferences/owl> {
     :Alice foaf:knows :Bob .
 }
diff --git a/example_projects/eg1-ancestors/derived/expected_merged.trig b/example_projects/eg1-ancestors/expected/expected-0-merged.trig
similarity index 79%
rename from example_projects/eg1-ancestors/derived/expected_merged.trig
rename to example_projects/eg1-ancestors/expected/expected-0-merged.trig
index 1e8d3e6..a3774e8 100644
--- a/example_projects/eg1-ancestors/derived/expected_merged.trig
+++ b/example_projects/eg1-ancestors/expected/expected-0-merged.trig
@@ -4,8 +4,11 @@
 @prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .
 @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
 @prefix skos: <http://www.w3.org/2004/02/skos/core#> .
+@prefix pythinfer: <http://pythinfer.local/> .
 
-<ancestors-model.ttl> {
+@base <http://pythinfer.local/eg1-ancestors/> .
+
+<file/ancestors-model.ttl> {
     ex:childOf rdfs:label "child of" ;
         owl:inverseOf ex:parentOf .
 
@@ -33,7 +36,7 @@
         rdfs:label "Person" .
 }
 
-<ancestors-data.ttl> {
+<file/ancestors-data.ttl> {
     ex:Alice a ex:Person ;
         ex:parentOf ex:Bob,
             ex:Carol .
@@ -48,3 +51,9 @@
 
     ex:Eve a ex:Person .
 }
+
+<provenance> {
+    <file/ancestors-data.ttl> a pythinfer:SourceGraph .
+    <file/ancestors-model.ttl> a pythinfer:SourceGraph .
+    <file/../../vocabs-external/skos.ttl> a pythinfer:SourceGraph .
+}
\ No newline at end of file
diff --git a/example_projects/eg1-ancestors/derived/expected_inferred_wanted.trig b/example_projects/eg1-ancestors/expected/expected-2-inferred-wanted.trig
similarity index 89%
rename from example_projects/eg1-ancestors/derived/expected_inferred_wanted.trig
rename to example_projects/eg1-ancestors/expected/expected-2-inferred-wanted.trig
index a355100..c49f1be 100644
--- a/example_projects/eg1-ancestors/derived/expected_inferred_wanted.trig
+++ b/example_projects/eg1-ancestors/expected/expected-2-inferred-wanted.trig
@@ -1,6 +1,7 @@
 @prefix ex: <http://example.org/ancestor/> .
 
-<inferences_owl> {
+@base <http://pythinfer.local/eg1-ancestors/> .
+<inferences/owl> {
     ex:David ex:childOf ex:Bob ;
         ex:descendantOf ex:Alice,
             ex:Bob .
diff --git a/example_projects/eg2-projects/derived/expected_inferred_wanted.trig b/example_projects/eg2-projects/expected/expected-2-inferred-wanted.trig
similarity index 98%
rename from example_projects/eg2-projects/derived/expected_inferred_wanted.trig
rename to example_projects/eg2-projects/expected/expected-2-inferred-wanted.trig
index 202a2a0..27d80cc 100644
--- a/example_projects/eg2-projects/derived/expected_inferred_wanted.trig
+++ b/example_projects/eg2-projects/expected/expected-2-inferred-wanted.trig
@@ -7,7 +7,9 @@
 @prefix ptp: <http://www.example.org/pythinfer/project/> .
 @prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
 
-<inferences_sparql> {
+@base <http://pythinfer.local/eg2-projects/> .
+
+<inferences/sparql> {
     # This is the most important SPARQL-based inference: the relationship between projA and projB
     eg:relationship-projA-projB a ptp:ProjectRelationship ;
         ptp:hasParticipant eg:projA,
@@ -47,7 +49,7 @@
 }
 
 
-<inferences_owl> {
+<inferences/owl> {
     eg:projA a dcat:Catalog,
             prov:Activity ;
         ptp:hasDataSource _:b0,
diff --git a/src/pythinfer/infer.py b/src/pythinfer/infer.py
index 6db90dd..6254752 100755
--- a/src/pythinfer/infer.py
+++ b/src/pythinfer/infer.py
@@ -9,6 +9,7 @@
 from owlrl import DeductiveClosure
 from owlrl.OWLRL import OWLRL_Semantics
 from rdflib import (
+    DCTERMS,
     OWL,
     RDF,
     RDFS,
@@ -30,23 +31,10 @@
     export_dataset,
     load_sparql_inference_queries,
 )
-from pythinfer.merge import PYTHINFER_NS
+from pythinfer.inout import PYTHINFER_NS
 from pythinfer.rdflibplus import DatasetView
 
 
-def _create_inference_urn(project_name: str, inference_type: str) -> URIRef:
-    """Create a stable URN identifier for an inference graph.
-
-    Args:
-        project_name: Name of the project
-        inference_type: Type of inference ('external', 'owl', or 'sparql')
-
-    Returns:
-        URN for the inference graph, e.g.:
-        urn:pythinfer:eg0-basic:inferences:owl
-    """
-    return PYTHINFER_NS[f"{project_name}:inferences:{inference_type}"]
-
 MAX_REASONING_ROUNDS = 5
 SCRIPT_DIR = Path(__file__).parent
 logger = logging.getLogger(__name__)
@@ -298,8 +286,9 @@ def _generate_external_inferences(
     info("  Temporary dataset created with %d triples in default graph", len(temp_ds))
 
     # Create inferences graph in temp dataset (must share same store)
-    iri_external = _create_inference_urn(project.name, "external")
+    iri_external = project.inference_gid("external")
     temp_inferences = temp_ds.graph(iri_external)
+    g_provenance = ds.graph(project.provenance_gid)
 
     apply_owlrl_inference(temp_ds, temp_inferences)
 
@@ -308,25 +297,10 @@ def _generate_external_inferences(
         g_external_inferences.add((s, p, o))
 
     # Add provenance metadata for external inference graph
-    from rdflib import DCTERMS
-    g_external_inferences.add(
-        (iri_external, RDF.type, PYTHINFER_NS["InferenceGraph"])
-    )
-    g_external_inferences.add(
-        (
-            iri_external,
-            PYTHINFER_NS["inferenceType"],
-            PYTHINFER_NS["ExternalReasoner"],
-        )
-    )
-    g_external_inferences.add(
-        (
-            iri_external,
-            DCTERMS.description,
-            Literal("Inferences generated by OWL-RL over external vocabularies"),
-        )
+    g_provenance.add((iri_external, RDF.type, PYTHINFER_NS["InferenceGraph"]))
+    g_provenance.add(
+        (iri_external, PYTHINFER_NS["inferenceEngine"], Literal("owlrl"))
     )
-
     info("  External inferences generated: %d triples", len(g_external_inferences))
     return g_external_inferences
 
@@ -440,7 +414,9 @@ def run_inference_backend(
     sparql_queries = load_sparql_inference_queries(project.paths_sparql_inference or [])
 
     # Step 2: Generate external inferences (once - this is the "noise floor")
-    g_external_inferences = _generate_external_inferences(ds, external_graph_ids, project)
+    g_external_inferences = _generate_external_inferences(
+        ds, external_graph_ids, project
+    )
 
     # Steps 3-5: Iterate full inferences + heuristics until convergence
     info(
@@ -448,43 +424,28 @@ def run_inference_backend(
         MAX_REASONING_ROUNDS,
     )
 
-    iri_owl = _create_inference_urn(project.name, "owl")
-    iri_sparql = _create_inference_urn(project.name, "sparql")
+    iri_owl = project.inference_gid("owl")
+    iri_sparql = project.inference_gid("sparql")
     g_inferences_owl = ds.graph(iri_owl)
     g_inferences_sparql = ds.graph(iri_sparql)
+    g_provenance = ds.graph(project.provenance_gid)
 
     # Add provenance metadata for inference graphs
-    from rdflib import DCTERMS
-    g_inferences_owl.add((iri_owl, RDF.type, PYTHINFER_NS["InferenceGraph"]))
-    g_inferences_owl.add(
-        (iri_owl, PYTHINFER_NS["inferenceType"], PYTHINFER_NS["OWLRL"])
-    )
-    g_inferences_owl.add(
-        (
-            iri_owl,
-            DCTERMS.description,
-            Literal("Inferences generated by OWL-RL reasoner"),
-        )
+    g_provenance.add((iri_owl, RDF.type, PYTHINFER_NS["InferenceGraph"]))
+    g_provenance.add(
+        (iri_owl, PYTHINFER_NS["inferenceEngine"], Literal(project.owl_backend))
     )
 
-    g_inferences_sparql.add(
+    g_provenance.add(
         (iri_sparql, RDF.type, PYTHINFER_NS["InferenceGraph"])
     )
-    g_inferences_sparql.add(
+    g_provenance.add(
         (
             iri_sparql,
-            PYTHINFER_NS["inferenceType"],
-            PYTHINFER_NS["SPARQL"],
+            PYTHINFER_NS["inferenceEngine"],
+            Literal("SPARQL CONSTRUCT"),
         )
     )
-    g_inferences_sparql.add(
-        (
-            iri_sparql,
-            DCTERMS.description,
-            Literal("Inferences generated by SPARQL CONSTRUCT queries"),
-        )
-    )
-
     iteration = 0
     previous_triple_count = len(ds)  # Count triples in entire dataset
 
@@ -560,7 +521,7 @@ def run_inference_backend(
         len(g_inferences_owl) + len(g_inferences_sparql),
     )
 
-    iri_external = _create_inference_urn(project.name, "external")
+    iri_external = project.inference_gid("external")
     all_external_ids: list[IdentifiedNode] = [
         *external_graph_ids,
         iri_external,
diff --git a/src/pythinfer/inout.py b/src/pythinfer/inout.py
index fc41f50..3ed158a 100644
--- a/src/pythinfer/inout.py
+++ b/src/pythinfer/inout.py
@@ -14,10 +14,17 @@
     field_validator,
     model_validator,
 )
-from rdflib import Dataset, Graph
+from rdflib import Dataset, Graph, Namespace, URIRef
 
 logger = logging.getLogger(__name__)
 
+# Base namespace for pythinfer graph identifiers and potentially other IRIs
+# Originally wanted to use a URN base (`urn:pythinfer:`) like so:
+# Format: urn:pythinfer:{project-name}:file:{relative-path}
+#     or: urn:pythinfer:{project-name}:inferences:{type}
+# However, parsing the TTL complained about no slash after colon etc.
+PYTHINFER_NS = Namespace("http://pythinfer.local/")
+
 PROJECT_FILE_NAME = "pythinfer.yaml"
 MAX_DISCOVERY_SEARCH_DEPTH = 10
 
@@ -117,7 +124,7 @@ class Project(BaseModel):
 
     @model_validator(mode="before")
     @classmethod
-    def normalize_field_names(cls, data: dict) -> dict:
+    def normalize_field_names(cls, data: dict[str, str]) -> dict[str, str]:
         """Normalize field names to accept multiple spellings."""
         if not isinstance(data, dict):
             return data
@@ -136,10 +143,9 @@ def normalize_field_names(cls, data: dict) -> dict:
             "sparql_inference": "paths_sparql_inference",
             "paths_sparql_inference": "paths_sparql_inference",
             "owl-backend": "owl_backend",
-            "owl_backend": "owl_backend",
         }
 
-        normalized = {}
+        normalized: dict[str, str] = {}
         for key, value in data.items():
             # Use canonical name if it's an alias, otherwise keep original
             canonical_key = field_aliases.get(key, key)
@@ -280,6 +286,51 @@ def paths_all(self) -> list[Path]:
         """List of all paths (input + SPARQL inference) - cache checking."""
         return self.paths_all_input + (self.paths_sparql_inference or [])
 
+    @property
+    def namespace(self) -> Namespace:
+        """The IRI Namespace associated with this Project."""
+        # TODO: normalise name to be appropriate for an IRI.
+        return Namespace(PYTHINFER_NS[self.name] + "/")
+
+    @property
+    def provenance_gid(self) -> URIRef:
+        """The IRI to use for the provenance named graph for this Project."""
+        return self.namespace["provenance"]
+
+    def source_file_gid(self, file_path: Path) -> URIRef:
+        """Create a stable identifier for a source file's named graph.
+
+        Uses project name and relative path to create an IRI that is:
+        - Stable across re-parsing
+        - Portable within a project
+        - Informative about the source
+        - (no longer, because URNs don't currently work) Explicitly non-dereferenceable
+
+        Args:
+            file_path: Path to the source file
+
+        Returns:
+            IRI for the named graph, e.g.:
+            http://pythinfer.local/eg0-basic/file/basic-model.ttl
+
+        """
+        rel_path = file_path.relative_to(self.path_self.parent)
+        # Note, to use a URN, we'd need to replace with colons for URN structure
+        # Use colons to maintain hierarchical structure in URN
+        return self.namespace[f"file/{rel_path}"]
+
+    def inference_gid(self, inference_type: str) -> URIRef:
+        """Create a stable identifier for an inference graph.
+
+        Args:
+            inference_type: Type of inference ('external', 'owl', or 'sparql')
+
+        Returns:
+            IRI for the inference graph, e.g.:
+            http://pythinfer.local/eg0-basic/inferences/owl
+
+        """
+        return self.namespace[f"inferences/{inference_type}"]
 
 def discover_project(start_path: Path, _current_depth: int = 0) -> Path:
     """Discover a pythinfer project by searching for a config file.
diff --git a/src/pythinfer/merge.py b/src/pythinfer/merge.py
index 9cae30f..bf2ec21 100644
--- a/src/pythinfer/merge.py
+++ b/src/pythinfer/merge.py
@@ -3,45 +3,15 @@
 import logging
 from pathlib import Path
 
-from rdflib import DCTERMS, RDF, Dataset, IdentifiedNode, Namespace, URIRef
+from rdflib import DCTERMS, RDF, Dataset, IdentifiedNode, URIRef
 
-from pythinfer.inout import MERGED_FILESTEM, Project, export_dataset
+from pythinfer.inout import MERGED_FILESTEM, PYTHINFER_NS, Project, export_dataset
 from pythinfer.rdflibplus import DatasetView
 
 logger = logging.getLogger(__name__)
 info = logger.info
 dbg = debug = logger.debug
 
-# URN namespace for pythinfer graph identifiers
-# Format: urn:pythinfer:{project-name}:file:{relative-path}
-#     or: urn:pythinfer:{project-name}:inferences:{type}
-PYTHINFER_NS = Namespace("urn:pythinfer:")
-
-
-def _create_graph_urn(project: Project, file_path: Path) -> URIRef:
-    """Create a stable URN identifier for a source file's named graph.
-
-    Uses project name and relative path to create a URN that is:
-    - Stable across re-parsing
-    - Portable within a project
-    - Explicitly non-dereferenceable
-    - Informative about the source
-
-    Args:
-        project: The pythinfer project
-        file_path: Path to the source file
-
-    Returns:
-        URN for the named graph, e.g.:
-        urn:pythinfer:eg0-basic:file:basic-model.ttl
-    """
-    rel_path = file_path.relative_to(project.path_self.parent)
-    # Normalize to forward slashes and replace with colons for URN structure
-    # Use colons to maintain hierarchical structure in URN
-    path_str = str(rel_path).replace("\\", "/").replace("/", ":")
-    return PYTHINFER_NS[f"{project.name}:file:{path_str}"]
-
-
 def merge_graphs(
     project: Project,
     *,
@@ -70,41 +40,39 @@ def merge_graphs(
     ds.bind("pythinfer", PYTHINFER_NS)
     ds.bind("dcterms", DCTERMS)
     external_gids: list[IdentifiedNode] = []
+    g_provenance = ds.graph(project.provenance_gid)
 
     # Load external vocabulary files (ephemeral - used for inference only)
     for src in project.paths_vocab_ext:
-        graph_urn = _create_graph_urn(project, src)
+        graph_urn = project.source_file_gid(src)
         g = ds.graph(graph_urn)
         g.parse(src, format="turtle")
 
         # Add provenance metadata to the graph
-        g.add((graph_urn, RDF.type, PYTHINFER_NS["SourceGraph"]))
-        g.add((graph_urn, DCTERMS.source, URIRef(src.resolve().as_uri())))
-        g.add((graph_urn, PYTHINFER_NS["sourceType"], PYTHINFER_NS["ExternalVocabulary"]))
+        g_provenance.add((graph_urn, RDF.type, PYTHINFER_NS["SourceGraph"]))
+        g_provenance.add((graph_urn, DCTERMS.source, URIRef(src.resolve().as_uri())))
 
         external_gids.append(g.identifier)
 
     # Load internal vocabulary files
     for src in project.paths_vocab_int:
-        graph_urn = _create_graph_urn(project, src)
+        graph_urn = project.source_file_gid(src)
         g = ds.graph(graph_urn)
         g.parse(src, format="turtle")
 
         # Add provenance metadata
-        g.add((graph_urn, RDF.type, PYTHINFER_NS["SourceGraph"]))
-        g.add((graph_urn, DCTERMS.source, URIRef(src.resolve().as_uri())))
-        g.add((graph_urn, PYTHINFER_NS["sourceType"], PYTHINFER_NS["InternalVocabulary"]))
+        g_provenance.add((graph_urn, RDF.type, PYTHINFER_NS["SourceGraph"]))
+        g_provenance.add((graph_urn, DCTERMS.source, URIRef(src.resolve().as_uri())))
 
     # Load data files
     for src in project.paths_data:
-        graph_urn = _create_graph_urn(project, src)
+        graph_urn = project.source_file_gid(src)
         g = ds.graph(graph_urn)
         g.parse(src, format="turtle")
 
         # Add provenance metadata
-        g.add((graph_urn, RDF.type, PYTHINFER_NS["SourceGraph"]))
-        g.add((graph_urn, DCTERMS.source, URIRef(src.resolve().as_uri())))
-        g.add((graph_urn, PYTHINFER_NS["sourceType"], PYTHINFER_NS["DataGraph"]))
+        g_provenance.add((graph_urn, RDF.type, PYTHINFER_NS["SourceGraph"]))
+        g_provenance.add((graph_urn, DCTERMS.source, URIRef(src.resolve().as_uri())))
 
     if output:
         if isinstance(output, bool):
diff --git a/tests/e2e/test_e2e_from_cli.py b/tests/e2e/test_e2e_from_cli.py
index 313e22a..1a50c0c 100644
--- a/tests/e2e/test_e2e_from_cli.py
+++ b/tests/e2e/test_e2e_from_cli.py
@@ -4,7 +4,7 @@
 from pathlib import Path
 
 import pytest
-from rdflib import Dataset
+from rdflib import Dataset, DCTERMS
 from rdflib.compare import graph_diff, isomorphic
 from typer.testing import CliRunner
 
@@ -39,15 +39,11 @@ def test_cli_command(
         if (command == "merge")
         else f"{INFERRED_WANTED_FILESTEM}.trig"
     )
-    expected_file = (
-        "expected_merged.trig"
-        if (command == "merge")
-        else "expected_inferred_wanted.trig"
-    )
+    expected_file = "expected-" + actual_file
 
     # Path to expected and actual output files
-    expected_file_path = project_dir / "derived" / expected_file
-    actual_file_path = project_dir / "derived" / actual_file
+    expected_file_path = project_dir / "expected" / expected_file
+    actual_file_path = project_dir / "derived" / "test_cli_command" / actual_file
 
     # Ensure expected file exists
     assert expected_file_path.exists(), (
@@ -58,16 +54,19 @@ def test_cli_command(
     if actual_file_path.exists():
         actual_file_path.unlink()
 
+    # Make sure intermediate output folder exists
+    actual_file_path.parent.mkdir(exist_ok=True)
+
     # Run the command using CliRunner but with proper working directory
     # Save current working directory and change to project directory
     original_cwd = Path.cwd()
+    runner = CliRunner()
+    cmd_args = [command, "--output", str(actual_file_path)]
+    # Disable cache for infer command to ensure fresh runs
+    if command == "infer":
+        cmd_args.append("--no-cache")
+    os.chdir(project_dir)
     try:
-        os.chdir(project_dir)
-        runner = CliRunner()
-        cmd_args = [command, "--output", str(actual_file_path)]
-        # Disable cache for infer command to ensure fresh runs
-        if command == "infer":
-            cmd_args.append("--no-cache")
         result = runner.invoke(app, cmd_args)
     finally:
         os.chdir(original_cwd)
@@ -103,6 +102,11 @@ def test_cli_command(
         expected_graph = expected_ds.graph(graph_id)
         actual_graph = actual_ds.graph(graph_id)
 
+        if graph_id.endswith("provenance"):
+            # Remove source information, as this will differ by execution environment
+            expected_graph.remove((None, DCTERMS.source, None))
+            actual_graph.remove((None, DCTERMS.source, None))
+
         if not isomorphic(expected_graph, actual_graph):
             # Compute the difference to show what's missing/extra
             in_both, in_expected_only, in_actual_only = graph_diff(

From cc79b051e3fa5397c995597ee08ca11b5e1a4490 Mon Sep 17 00:00:00 2001
From: rmuil1 <rmuil1@bloomberg.net>
Date: Tue, 3 Feb 2026 23:00:10 +0000
Subject: [PATCH 4/9] make parents of derived folder in test

---
 tests/e2e/test_e2e_from_cli.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/e2e/test_e2e_from_cli.py b/tests/e2e/test_e2e_from_cli.py
index 1a50c0c..54ffa4d 100644
--- a/tests/e2e/test_e2e_from_cli.py
+++ b/tests/e2e/test_e2e_from_cli.py
@@ -55,7 +55,7 @@ def test_cli_command(
         actual_file_path.unlink()
 
     # Make sure intermediate output folder exists
-    actual_file_path.parent.mkdir(exist_ok=True)
+    actual_file_path.parent.mkdir(parents=True, exist_ok=True)
 
     # Run the command using CliRunner but with proper working directory
     # Save current working directory and change to project directory

From 677d7cda3f58b072f31d5b32d20d25f403cf2cae Mon Sep 17 00:00:00 2001
From: rmuil1 <rmuil1@bloomberg.net>
Date: Tue, 3 Feb 2026 23:04:01 +0000
Subject: [PATCH 5/9] trying to fix missing file in CICD

---
 tests/e2e/test_e2e_from_cli.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/e2e/test_e2e_from_cli.py b/tests/e2e/test_e2e_from_cli.py
index 54ffa4d..7a9e068 100644
--- a/tests/e2e/test_e2e_from_cli.py
+++ b/tests/e2e/test_e2e_from_cli.py
@@ -65,8 +65,8 @@ def test_cli_command(
     # Disable cache for infer command to ensure fresh runs
     if command == "infer":
         cmd_args.append("--no-cache")
-    os.chdir(project_dir)
     try:
+        os.chdir(project_dir)
         result = runner.invoke(app, cmd_args)
     finally:
         os.chdir(original_cwd)

From fb43d1e6f7785df134d277705164a5db6386ccb4 Mon Sep 17 00:00:00 2001
From: Robert Muil <robertmuil@gmail.com>
Date: Fri, 6 Feb 2026 12:37:21 +0000
Subject: [PATCH 6/9] update github actions

upgrade actions/checkout and actions/setup-python and astral-sh/setup-uv
---
 .github/workflows/test.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index ce44e96..5f358c8 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -16,15 +16,15 @@ jobs:
         python-version: ["3.10", "3.11", "3.12"]
 
     steps:
-      - uses: actions/checkout@v4
+      - uses: actions/checkout@v6
 
       - name: Set up Python ${{ matrix.python-version }}
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v6
         with:
           python-version: ${{ matrix.python-version }}
 
       - name: Install uv
-        uses: astral-sh/setup-uv@v2
+        uses: astral-sh/setup-uv@v7
 
       - name: Install dependencies
         run: uv sync

From 1a7096a4d35b47801272c07de172a60fabf8cc60 Mon Sep 17 00:00:00 2001
From: Robert Muil <robertmuil@gmail.com>
Date: Fri, 6 Feb 2026 16:56:24 +0000
Subject: [PATCH 7/9] debugging action

---
 .github/workflows/test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 5f358c8..43ba2d4 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -30,7 +30,7 @@ jobs:
         run: uv sync
 
       - name: Run tests with coverage
-        run: uv run pytest tests/ --cov=src/pythinfer --cov-report=xml --cov-report=term-missing
+        run: pwd && find ./ && uv run pytest tests/ --cov=src/pythinfer --cov-report=xml --cov-report=term-missing
 
       - name: Upload coverage to Codecov
         uses: codecov/codecov-action@v4

From b801ae231062a86f7e3bcef032711e7f8263a704 Mon Sep 17 00:00:00 2001
From: rmuil1 <rmuil1@bloomberg.net>
Date: Fri, 6 Feb 2026 20:22:08 +0000
Subject: [PATCH 8/9] attempted fix to brittle relative path handling

---
 src/pythinfer/inout.py | 23 ++++++++++++++++++++---
 1 file changed, 20 insertions(+), 3 deletions(-)

diff --git a/src/pythinfer/inout.py b/src/pythinfer/inout.py
index 3ed158a..bf4d4b6 100644
--- a/src/pythinfer/inout.py
+++ b/src/pythinfer/inout.py
@@ -227,10 +227,11 @@ def _path_to_yaml_str(self, path: Path) -> str:
         If the path is relative to the project file's directory, store it
         relative for better portability. Otherwise, store as absolute path.
         """
-        project_dir = self.path_self.parent
+        resolved_path = path.resolve()
+        resolved_project_dir = self.path_self.resolve().parent
         try:
             # Try to make it relative to the project directory
-            rel_path = path.relative_to(project_dir)
+            rel_path = resolved_path.relative_to(resolved_project_dir)
             return str(rel_path)
         except ValueError:
             # Path is not relative to project_dir, store as-is
@@ -314,7 +315,23 @@ def source_file_gid(self, file_path: Path) -> URIRef:
             http://pythinfer.local/eg0-basic/file/basic-model.ttl
 
         """
-        rel_path = file_path.relative_to(self.path_self.parent)
+        # Resolve both paths to their canonical form to handle symlinks and
+        # relative path differences that can occur across different environments
+        resolved_file_path = file_path.resolve()
+        resolved_project_parent = self.path_self.resolve().parent
+
+        try:
+            rel_path = resolved_file_path.relative_to(resolved_project_parent)
+        except ValueError:
+            # File is outside project directory; try to use the original path as-is
+            # to preserve the structure shown in the config file
+            try:
+                # Try with the unresolved path in case it has a meaningful structure
+                rel_path = file_path.relative_to(self.path_self.parent)
+            except ValueError:
+                # If that also fails, just use the file name
+                rel_path = resolved_file_path.name
+
         # Note, to use a URN, we'd need to replace with colons for URN structure
         # Use colons to maintain hierarchical structure in URN
         return self.namespace[f"file/{rel_path}"]

From 743f2cd347327030e6cbad609967ba6ede821c90 Mon Sep 17 00:00:00 2001
From: Robert Muil <robertmuil@gmail.com>
Date: Fri, 6 Feb 2026 20:27:16 +0000
Subject: [PATCH 9/9] Rewind diagnostics from test.yml

---
 .github/workflows/test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 43ba2d4..5f358c8 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -30,7 +30,7 @@ jobs:
         run: uv sync
 
       - name: Run tests with coverage
-        run: pwd && find ./ && uv run pytest tests/ --cov=src/pythinfer --cov-report=xml --cov-report=term-missing
+        run: uv run pytest tests/ --cov=src/pythinfer --cov-report=xml --cov-report=term-missing
 
       - name: Upload coverage to Codecov
         uses: codecov/codecov-action@v4