Skip to content

Commit

Permalink
Merge branch 'release_23.1' into dev
Browse files Browse the repository at this point in the history
  • Loading branch information
mvdbeek committed Sep 10, 2023
2 parents 19faa82 + cd50265 commit e79ba43
Show file tree
Hide file tree
Showing 3 changed files with 89 additions and 31 deletions.
25 changes: 14 additions & 11 deletions lib/galaxy/jobs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1765,18 +1765,21 @@ def _finish_dataset(self, output_name, dataset, job, context, final_job_state, r
metadata_set_successfully = self.external_output_metadata.external_metadata_set_successfully(
dataset, output_name, self.sa_session, working_directory=self.working_directory
)
if retry_internally and not metadata_set_successfully:
# If Galaxy was expected to sniff type and didn't - do so.
if dataset.ext == "_sniff_":
extension = sniff.handle_uploaded_dataset_file(
dataset.dataset.file_name, self.app.datatypes_registry
)
dataset.extension = extension
if not metadata_set_successfully:
if self.tool.tool_type == "expression":
dataset._state = model.Dataset.states.OK
elif retry_internally:
# If Galaxy was expected to sniff type and didn't - do so.
if dataset.ext == "_sniff_":
extension = sniff.handle_uploaded_dataset_file(
dataset.dataset.file_name, self.app.datatypes_registry
)
dataset.extension = extension

# call datatype.set_meta directly for the initial set_meta call during dataset creation
dataset.datatype.set_meta(dataset, overwrite=False)
elif job.states.ERROR != final_job_state and not metadata_set_successfully:
dataset._state = model.Dataset.states.FAILED_METADATA
# call datatype.set_meta directly for the initial set_meta call during dataset creation
dataset.datatype.set_meta(dataset, overwrite=False)
else:
dataset._state = model.Dataset.states.FAILED_METADATA
else:
self.external_output_metadata.load_metadata(
dataset,
Expand Down
56 changes: 36 additions & 20 deletions lib/galaxy/metadata/set_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import os
import sys
import traceback
from functools import partial
from pathlib import Path
from typing import Optional

Expand All @@ -42,6 +43,7 @@
from galaxy.job_execution.setup import TOOL_PROVIDED_JOB_METADATA_KEYS
from galaxy.model import (
Dataset,
DatasetInstance,
HistoryDatasetAssociation,
Job,
store,
Expand Down Expand Up @@ -75,6 +77,12 @@
MAX_STDIO_READ_BYTES = 100 * 10**6 # 100 MB


def reset_external_filename(dataset_instance: DatasetInstance):
assert dataset_instance.dataset
dataset_instance.dataset.external_filename = None
dataset_instance.dataset.extra_files_path = None


def set_validated_state(dataset_instance):
datatype_validation = validate(dataset_instance)

Expand All @@ -101,9 +109,7 @@ def set_meta_with_tool_provided(
extension = dataset_instance.extension
if extension == "_sniff_":
try:
extension = sniff.handle_uploaded_dataset_file(
dataset_instance.dataset.external_filename, datatypes_registry
)
extension = sniff.handle_uploaded_dataset_file(dataset_instance.dataset.file_name, datatypes_registry)
# We need to both set the extension so it is available to set_meta
# and record it in the metadata so it can be reloaded on the server
# side and the model updated (see MetadataCollection.{from,to}_JSON_dict)
Expand Down Expand Up @@ -371,6 +377,7 @@ def set_meta(new_dataset_instance, file_dict):
set_meta_kwds = stringify_dictionary_keys(
json.load(open(filename_kwds))
) # load kwds; need to ensure our keywords are not unicode
object_store_update_actions = []
try:
is_deferred = bool(unnamed_is_deferred.get(dataset_instance_id))
dataset.metadata_deferred = is_deferred
Expand All @@ -392,7 +399,9 @@ def set_meta(new_dataset_instance, file_dict):
if not link_data_only:
# Only set external filename if we're dealing with files in job working directory.
# Fixes link_data_only uploads
dataset.dataset.external_filename = external_filename
if not object_store:
# overriding the external filename would break pushing to object stores
dataset.dataset.external_filename = external_filename
# We derive extra_files_dir_name from external_filename, because OutputsToWorkingDirectoryPathRewriter
# always rewrites the path to include the uuid, even if store_by is set to id, and the extra files
# rewrite is derived from the dataset path (since https://github.com/galaxyproject/galaxy/pull/16541).
Expand Down Expand Up @@ -423,16 +432,6 @@ def set_meta(new_dataset_instance, file_dict):
setattr(dataset.metadata, metadata_name, metadata_file_override)
if output_dict.get("validate", False):
set_validated_state(dataset)
if dataset_instance_id not in unnamed_id_to_path:
# We're going to run through set_metadata in collect_dynamic_outputs with more contextual metadata,
# so skip set_meta here.
set_meta(dataset, file_dict)
if extended_metadata_collection:
collect_extra_files(object_store, dataset, ".")
dataset_state = "deferred" if (is_deferred and final_job_state == "ok") else final_job_state
if not dataset.state == dataset.states.ERROR:
# Don't overwrite failed state (for invalid content) here
dataset.state = dataset.dataset.state = dataset_state

if extended_metadata_collection:
if not object_store or not export_store:
Expand All @@ -441,7 +440,22 @@ def set_meta(new_dataset_instance, file_dict):
if not is_deferred and not link_data_only and os.path.getsize(external_filename):
# Here we might be updating a disk based objectstore when outputs_to_working_directory is used,
# or a remote object store from its cache path.
object_store.update_from_file(dataset.dataset, file_name=external_filename, create=True)
object_store_update_actions.append(
partial(
object_store.update_from_file, dataset.dataset, file_name=external_filename, create=True
)
)
object_store_update_actions.append(partial(reset_external_filename, dataset))
object_store_update_actions.append(partial(export_store.add_dataset, dataset))
if dataset_instance_id not in unnamed_id_to_path:
object_store_update_actions.append(partial(collect_extra_files, object_store, dataset, "."))
dataset_state = "deferred" if (is_deferred and final_job_state == "ok") else final_job_state
if not dataset.state == dataset.states.ERROR:
# Don't overwrite failed state (for invalid content) here
dataset.state = dataset.dataset.state = dataset_state
# We're going to run through set_metadata in collect_dynamic_outputs with more contextual metadata,
# so only run set_meta for fixed outputs
set_meta(dataset, file_dict)
# TODO: merge expression_context into tool_provided_metadata so we don't have to special case this (here and in _finish_dataset)
meta = tool_provided_metadata.get_dataset_meta(output_name, dataset.dataset.id, dataset.dataset.uuid)
if meta:
Expand Down Expand Up @@ -472,19 +486,21 @@ def set_meta(new_dataset_instance, file_dict):
if context_key in context:
context_value = context[context_key]
setattr(dataset, context_key, context_value)
# We only want to persist the external_filename if the dataset has been linked in.
if not is_deferred and not link_data_only:
dataset.dataset.external_filename = None
dataset.dataset.extra_files_path = None
export_store.add_dataset(dataset)
else:
if dataset_instance_id not in unnamed_id_to_path:
# We're going to run through set_metadata in collect_dynamic_outputs with more contextual metadata,
# so only run set_meta for fixed outputs
set_meta(dataset, file_dict)
dataset.metadata.to_JSON_dict(filename_out) # write out results of set_meta

with open(filename_results_code, "w+") as tf:
json.dump((True, "Metadata has been set successfully"), tf) # setting metadata has succeeded
except Exception:
with open(filename_results_code, "w+") as tf:
json.dump((False, traceback.format_exc()), tf) # setting metadata has failed somehow
finally:
for action in object_store_update_actions:
action()

if export_store:
export_store.push_metadata_files()
Expand Down
39 changes: 39 additions & 0 deletions lib/galaxy_test/api/test_workflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -1938,6 +1938,45 @@ def test_workflow_metadata_validation_0(self):
history_id=history_id,
)

def test_run_workflow_pick_value_bam_pja(self):
# Makes sure that setting metadata on expression tool data outputs
# doesn't break result evaluation.
with self.dataset_populator.test_history() as history_id:
self._run_workflow(
"""class: GalaxyWorkflow
inputs:
some_file:
type: data
steps:
pick_value:
tool_id: pick_value
in:
style_cond|type_cond|pick_from_0|value:
source: some_file
out:
data_param:
change_datatype: bam
tool_state:
style_cond:
__current_case__: 2
pick_style: first_or_error
type_cond:
__current_case__: 4
param_type: data
pick_from:
- __index__: 0
value:
__class__: RuntimeValue
""",
test_data="""
some_file:
value: 1.bam
file_type: bam
type: File
""",
history_id=history_id,
)

def test_run_workflow_simple_conditional_step(self):
with self.dataset_populator.test_history() as history_id:
summary = self._run_workflow(
Expand Down

0 comments on commit e79ba43

Please sign in to comment.