diff --git a/cwltool/provenance_profile.py b/cwltool/provenance_profile.py index 1eec5bb6b..0c1445cd2 100644 --- a/cwltool/provenance_profile.py +++ b/cwltool/provenance_profile.py @@ -312,7 +312,7 @@ def record_process_end( self.document.wasEndedBy(process_run_id, None, self.workflow_run_uri, when) def _add_nested_annotations( - self, annotation_key, annotation_value, e: ProvEntity + self, annotation_key: str, annotation_value: Any, e: ProvEntity ) -> ProvEntity: """Propagate input data annotations to provenance.""" # Change https:// into http:// first @@ -398,13 +398,13 @@ def declare_file(self, value: CWLObjectType) -> Tuple[ProvEntity, ProvEntity, st # Identify all schema annotations schema_annotations = dict( - [(v, value[v]) for v in value.keys() if "schema.org" in v] + [(v, value[v]) for v in value.keys() if v.startswith("https://schema.org")] ) # Transfer SCHEMA annotations to provenance for s in schema_annotations: if "additionalType" in s: - additional_type = schema_annotations[s].split(sep="/")[ + additional_type = cast(str, schema_annotations[s]).split(sep="/")[ -1 ] # find better method? file_entity.add_attributes({PROV_TYPE: SCHEMA[additional_type]}) @@ -527,13 +527,13 @@ def declare_directory(self, value: CWLObjectType) -> ProvEntity: # Identify all schema annotations schema_annotations = dict( - [(v, value[v]) for v in value.keys() if "schema.org" in v] + [(v, value[v]) for v in value.keys() if v.startswith("https://schema.org")] ) # Transfer SCHEMA annotations to provenance for s in schema_annotations: if "additionalType" in s: - additional_type = schema_annotations[s].split(sep="/")[ + additional_type = cast(str, schema_annotations[s]).split(sep="/")[ -1 ] # find better method? coll.add_attributes({PROV_TYPE: SCHEMA[additional_type]}) diff --git a/tests/test_provenance.py b/tests/test_provenance.py index 4adb5e5c1..cfb80ccb8 100644 --- a/tests/test_provenance.py +++ b/tests/test_provenance.py @@ -81,6 +81,28 @@ def test_revsort_workflow(tmp_path: Path) -> None: check_provenance(folder) +@needs_docker +def test_revsort_label_annotations(tmp_path: Path) -> None: + """Affirm that EDAM file formats in the input object make it into CWLProv.""" + base_path = cwltool( + tmp_path, + get_data("tests/wf/revsort.cwl"), + get_data("tests/wf/revsort-job.json"), + ) + prov_file = base_path / "metadata" / "provenance" / "primary.cwlprov.nt" + arcp_root = find_arcp(base_path) + g = Graph() + with open(prov_file, "rb") as f: + g.parse(file=f, format="nt", publicID=arcp_root) + mime_having_objects = list(g.subjects(SCHEMA.encodingFormat)) + assert len(mime_having_objects) == 2 + for obj in mime_having_objects: + assert ( + cast(Literal, list(g.objects(obj, SCHEMA.encodingFormat))[0]).value + == "https://www.iana.org/assignments/media-types/text/plain" + ) + + @needs_docker def test_nested_workflow(tmp_path: Path) -> None: check_provenance(cwltool(tmp_path, get_data("tests/wf/nested.cwl")), nested=True)