Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion lib/galaxy/tool_util/verify/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,9 @@ def get_filename(filename: str) -> str:
assertions = attributes.get("assert_list", None)
if assertions is not None:
try:
verify_assertions(output_content, attributes["assert_list"], attributes.get("decompress", False))
# Auto-detect separator based on file type
sep = "," if attributes.get("ftype") == "csv" else "\t"
verify_assertions(output_content, attributes["assert_list"], attributes.get("decompress", False), sep=sep)
except AssertionError as err:
errmsg = f"{item_label} different than expected\n"
errmsg += unicodify(err)
Expand Down
13 changes: 10 additions & 3 deletions lib/galaxy/tool_util/verify/asserts/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from typing import (
Callable,
Dict,
Optional,
Tuple,
)

Expand Down Expand Up @@ -41,7 +42,9 @@
assertion_functions: Dict[str, Callable] = {k: v[1] for (k, v) in assertion_module_and_functions.items()}


def verify_assertions(data: bytes, assertion_description_list: list, decompress: bool = False):
def verify_assertions(
data: bytes, assertion_description_list: list, decompress: bool = False, sep: Optional[str] = None
):
"""This function takes a list of assertions and a string to check
these assertions against."""
if decompress:
Expand All @@ -51,10 +54,10 @@ def verify_assertions(data: bytes, assertion_description_list: list, decompress:
with get_fileobj(tmpfh.name, mode="rb", compressed_formats=None) as fh:
data = fh.read()
for assertion_description in assertion_description_list:
verify_assertion(data, assertion_description)
verify_assertion(data, assertion_description, sep=sep)


def verify_assertion(data: bytes, assertion_description):
def verify_assertion(data: bytes, assertion_description, sep: Optional[str] = None):
tag = assertion_description["tag"]
assert_function_name = "assert_" + tag
assert_function = assertion_functions.get(assert_function_name)
Expand Down Expand Up @@ -103,5 +106,9 @@ def verify_assertion(data: bytes, assertion_description):
if "children" in assert_function_args:
args["children"] = assertion_description["children"]

# Only set sep if the assertion accepts it and it's not already specified in XML
if "sep" in assert_function_args and sep is not None and "sep" not in assertion_description["attributes"]:
args["sep"] = sep

# TODO: Verify all needed function arguments are specified.
assert_function(**args)
7 changes: 4 additions & 3 deletions lib/galaxy/tool_util/verify/asserts/tabular.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
)
from ._util import _assert_number

Sep = Annotated[str, AssertionParameter("Separator defining columns, default: tab")]
Sep = Annotated[str, AssertionParameter("Separator defining columns, default: tab (or comma for csv)")]
Comment = Annotated[
str,
AssertionParameter(
Expand Down Expand Up @@ -53,9 +53,10 @@ def assert_has_n_columns(
Number of columns can optionally also be specified with ``delta``. Alternatively the
range of expected occurences can be specified by ``min`` and/or ``max``.

Optionally a column separator (``sep``, default is ``\t``) `and comment character(s)
Optionally a column separator (``sep``) and comment character(s)
can be specified (``comment``, default is empty string). The first non-comment
line is used for determining the number of columns.
line is used for determining the number of columns. The default separator is
tab for most tabular data types, but comma for csv files.
"""
first_line = get_first_line(output, comment)
n_columns = len(first_line.split(sep))
Expand Down
7 changes: 4 additions & 3 deletions lib/galaxy/tool_util/xsd/galaxy.xsd
Original file line number Diff line number Diff line change
Expand Up @@ -2606,9 +2606,10 @@ For instance, ``<has_n_columns n="3"/>``. The assertion tests only the first lin
Number of columns can optionally also be specified with ``delta``. Alternatively the
range of expected occurences can be specified by ``min`` and/or ``max``.

Optionally a column separator (``sep``, default is `` ``) `and comment character(s)
Optionally a column separator (``sep``) and comment character(s)
can be specified (``comment``, default is empty string). The first non-comment
line is used for determining the number of columns.
line is used for determining the number of columns. The default separator is
tab for most tabular data types, but comma for csv files.

$attribute_list::5]]></xs:documentation>
</xs:annotation>
Expand All @@ -2635,7 +2636,7 @@ $attribute_list::5]]></xs:documentation>
</xs:attribute>
<xs:attribute name="sep" type="xs:string" use="optional">
<xs:annotation>
<xs:documentation xml:lang="en"><![CDATA[Separator defining columns, default: tab]]></xs:documentation>
<xs:documentation xml:lang="en"><![CDATA[Separator defining columns, default: tab (or comma for csv)]]></xs:documentation>
</xs:annotation>
</xs:attribute>
<xs:attribute name="comment" type="xs:string" use="optional">
Expand Down
76 changes: 76 additions & 0 deletions test/unit/tool_util/test_verify_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,3 +93,79 @@ def test_sim_size_failure_still_updates(tmp_path):
assert assertion_error
assert (tmp_path / filename).exists()
assert (tmp_path / filename).open("rb").read() == b"expected"


def test_csv_ftype_auto_sep():
"""test that ftype='csv' automatically sets separator for has_n_columns assertion"""
item_label = "csv test"
output_content = b"col1,col2,col3\n"
attributes = {
"ftype": "csv",
"assert_list": [
{
"tag": "has_n_columns",
"attributes": {"n": "3"},
"children": [],
}
],
}

# This should pass because ftype="csv" triggers sep="," auto-detection
verify(
item_label,
output_content,
attributes=attributes,
filename=None,
get_filecontent=t_data_downloader_for(output_content),
)


def test_tabular_ftype_auto_sep():
"""test that ftype='tabular' uses tab separator for has_n_columns assertion"""
item_label = "tabular test"
output_content = b"col1\tcol2\tcol3\n"
attributes = {
"ftype": "tabular",
"assert_list": [
{
"tag": "has_n_columns",
"attributes": {"n": "3"},
"children": [],
}
],
}

# This should pass because ftype="tabular" triggers sep="\t" (default)
verify(
item_label,
output_content,
attributes=attributes,
filename=None,
get_filecontent=t_data_downloader_for(output_content),
)


def test_csv_ftype_explicit_sep_override():
"""test that explicit sep in assertion overrides ftype='csv' auto-detection"""
item_label = "csv with explicit sep test"
# Tab-separated data (not comma-separated!)
output_content = b"col1\tcol2\tcol3\n"
attributes = {
"ftype": "csv", # ftype is csv but data is actually tab-separated
"assert_list": [
{
"tag": "has_n_columns",
"attributes": {"n": "3", "sep": "\t"}, # Explicit sep overrides
"children": [],
}
],
}

# This should pass because explicit sep="\t" overrides the ftype="csv" auto-detection
verify(
item_label,
output_content,
attributes=attributes,
filename=None,
get_filecontent=t_data_downloader_for(output_content),
)
Loading