Skip to content

Commit 16e18d4

Browse files
authored
Merge pull request #19395 from mvdbeek/alternative_format_source_fix
Alternative `format_source` fix
2 parents e2636d7 + f28ea20 commit 16e18d4

File tree

15 files changed

+136
-51
lines changed

15 files changed

+136
-51
lines changed

lib/galaxy/model/__init__.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6829,9 +6829,10 @@ def dataset_elements_and_identifiers(self, identifiers=None):
68296829
def first_dataset_element(self) -> Optional["DatasetCollectionElement"]:
68306830
for element in self.elements:
68316831
if element.is_collection:
6832-
first_element = element.child_collection.first_dataset_element
6833-
if first_element:
6834-
return first_element
6832+
if element.child_collection:
6833+
first_element = element.child_collection.first_dataset_element
6834+
if first_element:
6835+
return first_element
68356836
else:
68366837
return element
68376838
return None
@@ -7003,7 +7004,7 @@ class HistoryDatasetCollectionAssociation(
70037004
create_time: Mapped[datetime] = mapped_column(default=now, nullable=True)
70047005
update_time: Mapped[datetime] = mapped_column(default=now, onupdate=now, index=True, nullable=True)
70057006

7006-
collection = relationship("DatasetCollection")
7007+
collection: Mapped["DatasetCollection"] = relationship("DatasetCollection")
70077008
history: Mapped[Optional["History"]] = relationship(back_populates="dataset_collections")
70087009

70097010
copied_from_history_dataset_collection_association = relationship(
@@ -7421,18 +7422,18 @@ class DatasetCollectionElement(Base, Dictifiable, Serializable):
74217422
element_index: Mapped[Optional[int]]
74227423
element_identifier: Mapped[Optional[str]] = mapped_column(Unicode(255))
74237424

7424-
hda = relationship(
7425+
hda: Mapped[Optional["HistoryDatasetAssociation"]] = relationship(
74257426
"HistoryDatasetAssociation",
74267427
primaryjoin=(lambda: DatasetCollectionElement.hda_id == HistoryDatasetAssociation.id),
74277428
)
7428-
ldda = relationship(
7429+
ldda: Mapped[Optional["LibraryDatasetDatasetAssociation"]] = relationship(
74297430
"LibraryDatasetDatasetAssociation",
74307431
primaryjoin=(lambda: DatasetCollectionElement.ldda_id == LibraryDatasetDatasetAssociation.id),
74317432
)
7432-
child_collection = relationship(
7433+
child_collection: Mapped[Optional["DatasetCollection"]] = relationship(
74337434
"DatasetCollection", primaryjoin=(lambda: DatasetCollectionElement.child_collection_id == DatasetCollection.id)
74347435
)
7435-
collection = relationship(
7436+
collection: Mapped[DatasetCollection] = relationship(
74367437
"DatasetCollection",
74377438
primaryjoin=(lambda: DatasetCollection.id == DatasetCollectionElement.dataset_collection_id),
74387439
back_populates="elements",

lib/galaxy/tool_util/parser/interface.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,12 @@
3737
ResourceRequirement,
3838
ToolRequirements,
3939
)
40+
from galaxy.tool_util.parser.output_objects import (
41+
ToolOutput,
42+
ToolOutputCollection,
43+
)
44+
from galaxy.tools import Tool
45+
4046

4147
NOT_IMPLEMENTED_MESSAGE = "Galaxy tool format does not yet support this tool feature."
4248

@@ -331,7 +337,9 @@ def parse_provided_metadata_file(self):
331337
return "galaxy.json"
332338

333339
@abstractmethod
334-
def parse_outputs(self, tool):
340+
def parse_outputs(
341+
self, tool: Optional["Tool"]
342+
) -> Tuple[Dict[str, "ToolOutput"], Dict[str, "ToolOutputCollection"]]:
335343
"""Return a pair of output and output collections ordered
336344
dictionaries for use by Tool.
337345
"""

lib/galaxy/tool_util/parser/output_models.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from typing import (
99
List,
1010
Optional,
11+
Sequence,
1112
Union,
1213
)
1314

@@ -105,7 +106,7 @@ class FilePatternDatasetCollectionDescription(DatasetCollectionDescription):
105106
ToolOutput = Annotated[ToolOutputT, Field(discriminator="type")]
106107

107108

108-
def from_tool_source(tool_source: ToolSource) -> List[ToolOutput]:
109+
def from_tool_source(tool_source: ToolSource) -> Sequence[ToolOutput]:
109110
tool_outputs, tool_output_collections = tool_source.parse_outputs(None)
110111
outputs = []
111112
for tool_output in tool_outputs.values():

lib/galaxy/tool_util/parser/output_objects.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -281,7 +281,7 @@ def __init__(
281281
self.collection = True
282282
self.default_format = default_format
283283
self.structure = structure
284-
self.outputs: Dict[str, str] = {}
284+
self.outputs: Dict[str, ToolOutput] = {}
285285

286286
self.inherit_format = inherit_format
287287
self.inherit_metadata = inherit_metadata

lib/galaxy/tools/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,10 @@
8383
PageSource,
8484
ToolSource,
8585
)
86+
from galaxy.tool_util.parser.output_objects import (
87+
ToolOutput,
88+
ToolOutputCollection,
89+
)
8690
from galaxy.tool_util.parser.util import (
8791
parse_profile_version,
8892
parse_tool_version_with_defaults,
@@ -847,6 +851,8 @@ def __init__(
847851
self.tool_errors = None
848852
# Parse XML element containing configuration
849853
self.tool_source = tool_source
854+
self.outputs: Dict[str, ToolOutput] = {}
855+
self.output_collections: Dict[str, ToolOutputCollection] = {}
850856
self._is_workflow_compatible = None
851857
self.__help = None
852858
self.__tests: Optional[str] = None

lib/galaxy/tools/actions/__init__.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
cast,
1010
Dict,
1111
List,
12+
MutableMapping,
1213
Optional,
1314
Set,
1415
Tuple,
@@ -533,7 +534,7 @@ def handle_output(name, output, hidden=None):
533534
output,
534535
wrapped_params.params,
535536
inp_data,
536-
inp_dataset_collections,
537+
input_collections,
537538
input_ext,
538539
python_template_version=tool.python_template_version,
539540
execution_cache=execution_cache,
@@ -1156,7 +1157,7 @@ def determine_output_format(
11561157
output: "ToolOutput",
11571158
parameter_context,
11581159
input_datasets,
1159-
input_dataset_collections,
1160+
input_dataset_collections: MutableMapping[str, model.HistoryDatasetCollectionAssociation],
11601161
random_input_ext,
11611162
python_template_version="3",
11621163
execution_cache=None,
@@ -1198,7 +1199,7 @@ def determine_output_format(
11981199

11991200
if collection_name in input_dataset_collections:
12001201
try:
1201-
input_collection = input_dataset_collections[collection_name][0][0]
1202+
input_collection = input_dataset_collections[collection_name]
12021203
input_collection_collection = input_collection.collection
12031204
if element_index is None:
12041205
# just pick the first HDA

lib/galaxy/tools/evaluation.py

Lines changed: 42 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
MinimalToolApp,
3434
)
3535
from galaxy.tool_util.data import TabularToolDataTable
36+
from galaxy.tools.actions import determine_output_format
3637
from galaxy.tools.parameters import (
3738
visit_input_values,
3839
wrapped_json,
@@ -64,6 +65,7 @@
6465
safe_makedirs,
6566
unicodify,
6667
)
68+
from galaxy.util.path import StrPath
6769
from galaxy.util.template import (
6870
fill_template,
6971
InputNotFoundSyntaxError,
@@ -102,7 +104,7 @@ def __init__(self, *args: object, tool_id: Optional[str], tool_version: str, is_
102104
self.is_latest = is_latest
103105

104106

105-
def global_tool_logs(func, config_file: str, action_str: str, tool: "Tool"):
107+
def global_tool_logs(func, config_file: Optional[StrPath], action_str: str, tool: "Tool"):
106108
try:
107109
return func()
108110
except Exception as e:
@@ -130,7 +132,7 @@ class ToolEvaluator:
130132
job: model.Job
131133
materialize_datasets: bool = True
132134

133-
def __init__(self, app: MinimalToolApp, tool, job, local_working_directory):
135+
def __init__(self, app: MinimalToolApp, tool: "Tool", job, local_working_directory):
134136
self.app = app
135137
self.job = job
136138
self.tool = tool
@@ -186,6 +188,9 @@ def set_compute_environment(self, compute_environment: ComputeEnvironment, get_s
186188
out_data,
187189
output_collections=out_collections,
188190
)
191+
# late update of format_source outputs
192+
self._eval_format_source(job, inp_data, out_data)
193+
189194
self.execute_tool_hooks(inp_data=inp_data, out_data=out_data, incoming=incoming)
190195

191196
def execute_tool_hooks(self, inp_data, out_data, incoming):
@@ -275,6 +280,23 @@ def _materialize_objects(
275280

276281
return undeferred_objects
277282

283+
def _eval_format_source(
284+
self,
285+
job: model.Job,
286+
inp_data: Dict[str, Optional[model.DatasetInstance]],
287+
out_data: Dict[str, model.DatasetInstance],
288+
):
289+
for output_name, output in out_data.items():
290+
if (
291+
(tool_output := self.tool.outputs.get(output_name))
292+
and (tool_output.format_source or tool_output.change_format)
293+
and output.extension == "expression.json"
294+
):
295+
input_collections = {jtidca.name: jtidca.dataset_collection for jtidca in job.input_dataset_collections}
296+
ext = determine_output_format(tool_output, self.param_dict, inp_data, input_collections, None)
297+
if ext:
298+
output.extension = ext
299+
278300
def _replaced_deferred_objects(
279301
self,
280302
inp_data: Dict[str, Optional[model.DatasetInstance]],
@@ -364,6 +386,9 @@ def do_walk(inputs, input_values):
364386
do_walk(inputs, input_values)
365387

366388
def __populate_wrappers(self, param_dict, input_datasets, job_working_directory):
389+
390+
element_identifier_mapper = ElementIdentifierMapper(input_datasets)
391+
367392
def wrap_input(input_values, input):
368393
value = input_values[input.name]
369394
if isinstance(input, DataToolParameter) and input.multiple:
@@ -380,26 +405,26 @@ def wrap_input(input_values, input):
380405

381406
elif isinstance(input, DataToolParameter):
382407
dataset = input_values[input.name]
383-
wrapper_kwds = dict(
408+
element_identifier = element_identifier_mapper.identifier(dataset, param_dict)
409+
input_values[input.name] = DatasetFilenameWrapper(
410+
dataset=dataset,
384411
datatypes_registry=self.app.datatypes_registry,
385412
tool=self.tool,
386413
name=input.name,
387414
compute_environment=self.compute_environment,
415+
identifier=element_identifier,
416+
formats=input.formats,
388417
)
389-
element_identifier = element_identifier_mapper.identifier(dataset, param_dict)
390-
if element_identifier:
391-
wrapper_kwds["identifier"] = element_identifier
392-
wrapper_kwds["formats"] = input.formats
393-
input_values[input.name] = DatasetFilenameWrapper(dataset, **wrapper_kwds)
394418
elif isinstance(input, DataCollectionToolParameter):
395419
dataset_collection = value
396-
wrapper_kwds = dict(
420+
wrapper = DatasetCollectionWrapper(
421+
job_working_directory=job_working_directory,
422+
has_collection=dataset_collection,
397423
datatypes_registry=self.app.datatypes_registry,
398424
compute_environment=self.compute_environment,
399425
tool=self.tool,
400426
name=input.name,
401427
)
402-
wrapper = DatasetCollectionWrapper(job_working_directory, dataset_collection, **wrapper_kwds)
403428
input_values[input.name] = wrapper
404429
elif isinstance(input, SelectToolParameter):
405430
if input.multiple:
@@ -409,14 +434,13 @@ def wrap_input(input_values, input):
409434
)
410435
else:
411436
input_values[input.name] = InputValueWrapper(
412-
input, value, param_dict, profile=self.tool and self.tool.profile
437+
input, value, param_dict, profile=self.tool and self.tool.profile or None
413438
)
414439

415440
# HACK: only wrap if check_values is not false, this deals with external
416441
# tools where the inputs don't even get passed through. These
417442
# tools (e.g. UCSC) should really be handled in a special way.
418443
if self.tool.check_values:
419-
element_identifier_mapper = ElementIdentifierMapper(input_datasets)
420444
self.__walk_inputs(self.tool.inputs, param_dict, wrap_input)
421445

422446
def __populate_input_dataset_wrappers(self, param_dict, input_datasets):
@@ -443,13 +467,13 @@ def __populate_input_dataset_wrappers(self, param_dict, input_datasets):
443467
param_dict[name] = wrapper
444468
continue
445469
if not isinstance(param_dict_value, ToolParameterValueWrapper):
446-
wrapper_kwds = dict(
470+
param_dict[name] = DatasetFilenameWrapper(
471+
dataset=data,
447472
datatypes_registry=self.app.datatypes_registry,
448473
tool=self.tool,
449474
name=name,
450475
compute_environment=self.compute_environment,
451476
)
452-
param_dict[name] = DatasetFilenameWrapper(data, **wrapper_kwds)
453477

454478
def __populate_output_collection_wrappers(self, param_dict, output_collections, job_working_directory):
455479
tool = self.tool
@@ -460,14 +484,15 @@ def __populate_output_collection_wrappers(self, param_dict, output_collections,
460484
# message = message_template % ( name, tool.output_collections )
461485
# raise AssertionError( message )
462486

463-
wrapper_kwds = dict(
487+
wrapper = DatasetCollectionWrapper(
488+
job_working_directory=job_working_directory,
489+
has_collection=out_collection,
464490
datatypes_registry=self.app.datatypes_registry,
465491
compute_environment=self.compute_environment,
466492
io_type="output",
467493
tool=tool,
468494
name=name,
469495
)
470-
wrapper = DatasetCollectionWrapper(job_working_directory, out_collection, **wrapper_kwds)
471496
param_dict[name] = wrapper
472497
# TODO: Handle nested collections...
473498
for element_identifier, output_def in tool.output_collections[name].outputs.items():
@@ -662,6 +687,7 @@ def _build_command_line(self):
662687
if interpreter:
663688
# TODO: path munging for cluster/dataset server relocatability
664689
executable = command_line.split()[0]
690+
assert self.tool.tool_dir
665691
tool_dir = os.path.abspath(self.tool.tool_dir)
666692
abs_executable = os.path.join(tool_dir, executable)
667693
command_line = command_line.replace(executable, f"{interpreter} {shlex.quote(abs_executable)}", 1)

lib/galaxy/tools/parameters/basic.py

Lines changed: 21 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1998,6 +1998,7 @@ def do_validate(v):
19981998
dataset_count += 1
19991999
do_validate(v.hda)
20002000
else:
2001+
assert v.child_collection
20012002
for dataset_instance in v.child_collection.dataset_instances:
20022003
dataset_count += 1
20032004
do_validate(dataset_instance)
@@ -2176,33 +2177,39 @@ def from_json(self, value, trans, other_values=None):
21762177
dataset_matcher_factory = get_dataset_matcher_factory(trans)
21772178
dataset_matcher = dataset_matcher_factory.dataset_matcher(self, other_values)
21782179
for v in rval:
2180+
value_to_check: Union[
2181+
DatasetInstance, DatasetCollection, DatasetCollectionElement, HistoryDatasetCollectionAssociation
2182+
] = v
21792183
if isinstance(v, DatasetCollectionElement):
21802184
if hda := v.hda:
2181-
v = hda
2185+
value_to_check = hda
21822186
elif ldda := v.ldda:
2183-
v = ldda
2187+
value_to_check = ldda
21842188
elif collection := v.child_collection:
2185-
v = collection
2186-
elif not v.collection and v.collection.populated_optimized:
2189+
value_to_check = collection
2190+
elif v.collection and not v.collection.populated_optimized:
21872191
raise ParameterValueError("the selected collection has not been populated.", self.name)
21882192
else:
21892193
raise ParameterValueError("Collection element in unexpected state", self.name)
2190-
if isinstance(v, DatasetInstance):
2191-
if v.deleted:
2194+
if isinstance(value_to_check, DatasetInstance):
2195+
if value_to_check.deleted:
21922196
raise ParameterValueError("the previously selected dataset has been deleted.", self.name)
2193-
elif v.dataset and v.dataset.state in [Dataset.states.ERROR, Dataset.states.DISCARDED]:
2197+
elif value_to_check.dataset and value_to_check.dataset.state in [
2198+
Dataset.states.ERROR,
2199+
Dataset.states.DISCARDED,
2200+
]:
21942201
raise ParameterValueError(
21952202
"the previously selected dataset has entered an unusable state", self.name
21962203
)
2197-
match = dataset_matcher.hda_match(v)
2204+
match = dataset_matcher.hda_match(value_to_check)
21982205
if match and match.implicit_conversion:
2199-
v.implicit_conversion = True # type:ignore[union-attr]
2200-
elif isinstance(v, HistoryDatasetCollectionAssociation):
2201-
if v.deleted:
2206+
value_to_check.implicit_conversion = True # type:ignore[attr-defined]
2207+
elif isinstance(value_to_check, HistoryDatasetCollectionAssociation):
2208+
if value_to_check.deleted:
22022209
raise ParameterValueError("the previously selected dataset collection has been deleted.", self.name)
2203-
v = v.collection
2204-
if isinstance(v, DatasetCollection):
2205-
if v.elements_deleted:
2210+
value_to_check = value_to_check.collection
2211+
if isinstance(value_to_check, DatasetCollection):
2212+
if value_to_check.elements_deleted:
22062213
raise ParameterValueError(
22072214
"the previously selected dataset collection has elements that are deleted.", self.name
22082215
)

0 commit comments

Comments
 (0)