Merge pull request galaxyproject#18884 from jmchilton/test_format

Implement Pydantic model for workflow test format.
bgruening · Sep 25, 2024 · 3c92414 · 3c92414
2 parents 82b8893 + a38f610
commit 3c92414
Show file tree

Hide file tree

Showing 20 changed files with 1,076 additions and 418 deletions.
diff --git a/lib/galaxy/tool_util/models.py b/lib/galaxy/tool_util/models.py
@@ -5,11 +5,23 @@
 """
 
 from typing import (
+    Any,
+    Dict,
     List,
     Optional,
+    Union,
 )
 
-from pydantic import BaseModel
+from pydantic import (
+    AnyUrl,
+    BaseModel,
+    ConfigDict,
+    RootModel,
+)
+from typing_extensions import (
+    NotRequired,
+    TypedDict,
+)
 
 from .parameters import (
     input_models_for_tool_source,
@@ -18,13 +30,15 @@
 from .parser.interface import (
     Citation,
     HelpContent,
+    OutputCompareType,
     ToolSource,
     XrefDict,
 )
 from .parser.output_models import (
     from_tool_source,
     ToolOutput,
 )
+from .verify.assertion_models import assertions
 
 
 class ParsedTool(BaseModel):
@@ -73,3 +87,85 @@ def parse_tool(tool_source: ToolSource) -> ParsedTool:
         xrefs=xrefs,
         help=help,
     )
+
+
+class StrictModel(BaseModel):
+
+    model_config = ConfigDict(
+        extra="forbid",
+    )
+
+
+class BaseTestOutputModel(StrictModel):
+    file: Optional[str] = None
+    path: Optional[str] = None
+    location: Optional[AnyUrl] = None
+    ftype: Optional[str] = None
+    sort: Optional[bool] = None
+    compare: Optional[OutputCompareType] = None
+    checksum: Optional[str] = None
+    metadata: Optional[Dict[str, Any]] = None
+    asserts: Optional[assertions] = None
+    delta: Optional[int] = None
+    delta_frac: Optional[float] = None
+    lines_diff: Optional[int] = None
+    decompress: Optional[bool] = None
+
+
+class TestDataOutputAssertions(BaseTestOutputModel):
+    pass
+
+
+class TestCollectionCollectionElementAssertions(StrictModel):
+    elements: Optional[Dict[str, "TestCollectionElementAssertion"]] = None
+    element_tests: Optional[Dict[str, "TestCollectionElementAssertion"]] = None
+
+
+class TestCollectionDatasetElementAssertions(BaseTestOutputModel):
+    pass
+
+
+TestCollectionElementAssertion = Union[
+    TestCollectionDatasetElementAssertions, TestCollectionCollectionElementAssertions
+]
+TestCollectionCollectionElementAssertions.model_rebuild()
+
+
+class CollectionAttributes(StrictModel):
+    collection_type: Optional[str] = None
+
+
+class TestCollectionOutputAssertions(StrictModel):
+    elements: Optional[Dict[str, TestCollectionElementAssertion]] = None
+    element_tests: Optional[Dict[str, "TestCollectionElementAssertion"]] = None
+    attributes: Optional[CollectionAttributes] = None
+
+
+TestOutputLiteral = Union[bool, int, float, str]
+
+TestOutputAssertions = Union[TestCollectionOutputAssertions, TestDataOutputAssertions, TestOutputLiteral]
+
+JobDict = Dict[str, Any]
+
+
+class TestJob(StrictModel):
+    doc: Optional[str]
+    job: JobDict
+    outputs: Dict[str, TestOutputAssertions]
+
+
+Tests = RootModel[List[TestJob]]
+
+# TODO: typed dict versions of all thee above for verify code - make this Dict[str, Any] here more
+# specific.
+OutputChecks = Union[TestOutputLiteral, Dict[str, Any]]
+OutputsDict = Dict[str, OutputChecks]
+
+
+class TestJobDict(TypedDict):
+    doc: NotRequired[str]
+    job: NotRequired[JobDict]
+    outputs: OutputsDict
+
+
+TestDicts = List[TestJobDict]
diff --git a/lib/galaxy/tool_util/parser/interface.py b/lib/galaxy/tool_util/parser/interface.py
@@ -5,6 +5,7 @@
     ABCMeta,
     abstractmethod,
 )
+from enum import Enum
 from os.path import join
 from typing import (
     Any,
@@ -49,9 +50,18 @@ class AssertionDict(TypedDict):
 XmlInt = Union[str, int]
 
 
+class OutputCompareType(str, Enum):
+    diff = "diff"
+    re_match = "re_match"
+    sim_size = "sim_size"
+    re_match_multiline = "re_match_multiline"
+    contains = "contains"
+    image_diff = "image_diff"
+
+
 class ToolSourceTestOutputAttributes(TypedDict):
     object: NotRequired[Optional[Any]]
-    compare: str
+    compare: OutputCompareType
     lines_diff: int
     delta: int
     delta_frac: Optional[float]

diff --git a/lib/galaxy/tool_util/parser/xml.py b/lib/galaxy/tool_util/parser/xml.py
@@ -43,6 +43,7 @@
     DynamicOptions,
     HelpContent,
     InputSource,
+    OutputCompareType,
     PageSource,
     PagesSource,
     RequiredFiles,
@@ -834,7 +835,7 @@ def __parse_test_attributes(
         value_object = json.loads(attrib.pop("value_json"))
 
     # Method of comparison
-    compare: str = attrib.pop("compare", "diff").lower()
+    compare: OutputCompareType = cast(OutputCompareType, attrib.pop("compare", "diff").lower())
     # Number of lines to allow to vary in logs (for dates, etc)
     lines_diff: int = int(attrib.pop("lines_diff", "0"))
     # Allow a file size to vary if sim_size compare

diff --git a/lib/galaxy/tool_util/validate_test_format.py b/lib/galaxy/tool_util/validate_test_format.py
@@ -0,0 +1,40 @@
+#!/usr/bin/env python
+
+import argparse
+import sys
+
+import yaml
+
+from galaxy.tool_util.models import Tests
+
+DESCRIPTION = """
+A small utility to verify the Planemo test format.
+
+This script doesn't use semantic information about tools or workflows so only
+the structure of the file is checked and things like inputs matching up is not
+included.
+"""
+
+
+def validate_test_file(test_file: str) -> None:
+    with open(test_file) as f:
+        json = yaml.safe_load(f)
+    Tests.model_validate(json)
+
+
+def arg_parser() -> argparse.ArgumentParser:
+    parser = argparse.ArgumentParser(description=DESCRIPTION)
+    parser.add_argument("test_file")
+    return parser
+
+
+def main(argv=None) -> None:
+    if argv is None:
+        argv = sys.argv[1:]
+
+    args = arg_parser().parse_args(argv)
+    validate_test_file(args.test_file)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/lib/galaxy/tool_util/verify/__init__.py b/lib/galaxy/tool_util/verify/__init__.py
@@ -597,6 +597,8 @@ def files_image_diff(file1: str, file2: str, attributes: Optional[Dict[str, Any]
 # TODO: After tool-util with this included is published, fefactor planemo.test._check_output
 # to use this function. There is already a comment there about breaking fewer abstractions.
 # https://github.com/galaxyproject/planemo/blob/master/planemo/test/_check_output.py
+# TODO: Also migrate the logic for checking non-dictionaries out of Planemo - this function now
+# does that check also.
 def verify_file_path_against_dict(
     get_filename: GetFilenameT,
     get_location: GetLocationT,
@@ -621,30 +623,38 @@ def verify_file_contents_against_dict(
     test_properties,
     test_data_target_dir: Optional[str] = None,
 ) -> None:
-    # Support Galaxy-like file location (using "file") or CWL-like ("path" or "location").
-    expected_file = test_properties.get("file", None)
-    if expected_file is None:
-        expected_file = test_properties.get("path", None)
-    if expected_file is None:
-        location = test_properties.get("location")
-        if location:
-            if location.startswith(("http://", "https://")):
-                assert get_location
-                expected_file = get_location(location)
-            else:
-                expected_file = location.split("file://", 1)[-1]
-
-    if "asserts" in test_properties:
-        test_properties["assert_list"] = to_test_assert_list(test_properties["asserts"])
-    verify(
-        item_label,
-        output_content,
-        attributes=test_properties,
-        filename=expected_file,
-        get_filename=get_filename,
-        keep_outputs_dir=test_data_target_dir,
-        verify_extra_files=None,
-    )
+    expected_file: Optional[str] = None
+    if isinstance(test_properties, dict):
+        # Support Galaxy-like file location (using "file") or CWL-like ("path" or "location").
+        expected_file = test_properties.get("file", None)
+        if expected_file is None:
+            expected_file = test_properties.get("path", None)
+        if expected_file is None:
+            location = test_properties.get("location")
+            if location:
+                if location.startswith(("http://", "https://")):
+                    assert get_location
+                    expected_file = get_location(location)
+                else:
+                    expected_file = location.split("file://", 1)[-1]
+
+        if "asserts" in test_properties:
+            test_properties["assert_list"] = to_test_assert_list(test_properties["asserts"])
+        verify(
+            item_label,
+            output_content,
+            attributes=test_properties,
+            filename=expected_file,
+            get_filename=get_filename,
+            keep_outputs_dir=test_data_target_dir,
+            verify_extra_files=None,
+        )
+    else:
+        output_value = json.loads(output_content.decode("utf-8"))
+        if test_properties != output_value:
+            template = "Output [%s] value [%s] does not match expected value [%s]."
+            message = template % (item_label, output_value, test_properties)
+            raise AssertionError(message)
 
 
 __all__ = [