diff --git a/doc/source/admin/galaxy_options.rst b/doc/source/admin/galaxy_options.rst index 2ce7e0882bf5..010844a4fa18 100644 --- a/doc/source/admin/galaxy_options.rst +++ b/doc/source/admin/galaxy_options.rst @@ -1167,18 +1167,6 @@ :Type: str -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -``delay_tool_initialization`` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -:Description: - Set this to true to delay parsing of tool inputs and outputs until - they are needed. This results in faster startup times but uses - more memory when using forked Galaxy processes. -:Default: ``false`` -:Type: bool - - ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ``biotools_content_directory`` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/lib/galaxy/app.py b/lib/galaxy/app.py index c33207390d23..d16592821cde 100644 --- a/lib/galaxy/app.py +++ b/lib/galaxy/app.py @@ -646,6 +646,8 @@ def __init__(self, **kwargs) -> None: ("application stack", self._shutdown_application_stack), ] self._register_singleton(StructuredApp, self) # type: ignore[type-abstract] + if kwargs.get("is_webapp"): + self.is_webapp = kwargs["is_webapp"] # A lot of postfork initialization depends on the server name, ensure it is set immediately after forking before other postfork functions self.application_stack.register_postfork_function(self.application_stack.set_postfork_server_name, self) self.config.reload_sanitize_allowlist(explicit="sanitize_allowlist_file" in kwargs) diff --git a/lib/galaxy/app_unittest_utils/galaxy_mock.py b/lib/galaxy/app_unittest_utils/galaxy_mock.py index e36efe49996c..6d5c5233f47d 100644 --- a/lib/galaxy/app_unittest_utils/galaxy_mock.py +++ b/lib/galaxy/app_unittest_utils/galaxy_mock.py @@ -104,6 +104,7 @@ class MockApp(di.Container, GalaxyDataTestApp): history_manager: HistoryManager job_metrics: JobMetrics stop: bool + is_webapp: bool = True def __init__(self, config=None, **kwargs) -> None: super().__init__() @@ -227,7 +228,7 @@ def __init__(self, **kwargs): # set by MockDir self.enable_tool_document_cache = False - self.delay_tool_initialization = True + self.tool_cache_data_dir = os.path.join(self.root, "tool_cache") self.external_chown_script = None self.check_job_script_integrity = False self.check_job_script_integrity_count = 0 diff --git a/lib/galaxy/app_unittest_utils/tools_support.py b/lib/galaxy/app_unittest_utils/tools_support.py index 17486ec3a9e2..38d491d5d7a3 100644 --- a/lib/galaxy/app_unittest_utils/tools_support.py +++ b/lib/galaxy/app_unittest_utils/tools_support.py @@ -109,7 +109,6 @@ def setup_app(self): def __setup_tool(self): tool_source = get_tool_source(self.tool_file) self.tool = create_tool_from_source(self.app, tool_source, config_file=self.tool_file) - self.tool.assert_finalized() if getattr(self, "tool_action", None): self.tool.tool_action = self.tool_action return self.tool diff --git a/lib/galaxy/config/sample/galaxy.yml.sample b/lib/galaxy/config/sample/galaxy.yml.sample index a027b52c6e43..3f4d369e9981 100644 --- a/lib/galaxy/config/sample/galaxy.yml.sample +++ b/lib/galaxy/config/sample/galaxy.yml.sample @@ -899,11 +899,6 @@ galaxy: # this option will be resolved with respect to . #tool_search_index_dir: tool_search_index - # Set this to true to delay parsing of tool inputs and outputs until - # they are needed. This results in faster startup times but uses more - # memory when using forked Galaxy processes. - #delay_tool_initialization: false - # Point Galaxy at a repository consisting of a copy of the bio.tools # database (e.g. https://github.com/bio-tools/content/) to resolve # bio.tools data for tool metadata. diff --git a/lib/galaxy/config/schemas/config_schema.yml b/lib/galaxy/config/schemas/config_schema.yml index 7d7b07796bcb..e2f2c7320acf 100644 --- a/lib/galaxy/config/schemas/config_schema.yml +++ b/lib/galaxy/config/schemas/config_schema.yml @@ -846,15 +846,6 @@ mapping: desc: Directory in which the toolbox search index is stored. - delay_tool_initialization: - type: bool - default: false - required: false - desc: | - Set this to true to delay parsing of tool inputs and outputs until they are needed. - This results in faster startup times but uses more memory when using forked Galaxy - processes. - biotools_content_directory: type: str required: false diff --git a/lib/galaxy/structured_app.py b/lib/galaxy/structured_app.py index f21de41d4aca..d18638c97c09 100644 --- a/lib/galaxy/structured_app.py +++ b/lib/galaxy/structured_app.py @@ -73,6 +73,7 @@ class BasicSharedApp(Container): class MinimalToolApp(Protocol): + is_webapp: bool name: str # Leave config as Any: in a full Galaxy app this is a GalaxyAppConfiguration object, but this is mostly dynamically # generated, and here we want to also allow other kinds of configuration objects (e.g. a Bunch). diff --git a/lib/galaxy/tool_shed/tools/tool_validator.py b/lib/galaxy/tool_shed/tools/tool_validator.py index 1220acacc213..9b94813584a2 100644 --- a/lib/galaxy/tool_shed/tools/tool_validator.py +++ b/lib/galaxy/tool_shed/tools/tool_validator.py @@ -89,7 +89,6 @@ def load_tool_from_config(self, repository_id, full_path): repository_id=repository_id, allow_code_files=False, ) - tool.assert_finalized(raise_if_invalid=True) valid = True error_message = None except KeyError as e: diff --git a/lib/galaxy/tool_util/parser/interface.py b/lib/galaxy/tool_util/parser/interface.py index a21c427672a5..aba4142edb46 100644 --- a/lib/galaxy/tool_util/parser/interface.py +++ b/lib/galaxy/tool_util/parser/interface.py @@ -270,7 +270,7 @@ def parse_stdio(self): return [], [] @abstractmethod - def parse_help(self): + def parse_help(self) -> Optional[str]: """Return RST definition of help text for tool or None if the tool doesn't define help text. """ diff --git a/lib/galaxy/tool_util/parser/output_objects.py b/lib/galaxy/tool_util/parser/output_objects.py index 9cf29a420778..da819cab5172 100644 --- a/lib/galaxy/tool_util/parser/output_objects.py +++ b/lib/galaxy/tool_util/parser/output_objects.py @@ -5,6 +5,8 @@ Optional, ) +from typing_extensions import TypedDict + from galaxy.util import Element from galaxy.util.dictifiable import Dictifiable from .output_actions import ToolOutputActionGroup @@ -14,6 +16,14 @@ ) +class ChangeFormatModel(TypedDict): + value: Optional[str] + format: Optional[str] + input: Optional[str] + input_dataset: Optional[str] + check_attribute: Optional[str] + + class ToolOutputBase(Dictifiable): def __init__( self, @@ -84,7 +94,7 @@ def __init__( self.actions = actions # Initialize default values - self.change_format: List[Element] = [] + self.change_format: List[ChangeFormatModel] = [] self.implicit = implicit self.from_work_dir: Optional[str] = None self.dataset_collector_descriptions: List[DatasetCollectionDescription] = [] diff --git a/lib/galaxy/tool_util/parser/xml.py b/lib/galaxy/tool_util/parser/xml.py index 1b854c5b993d..e468a14ad67d 100644 --- a/lib/galaxy/tool_util/parser/xml.py +++ b/lib/galaxy/tool_util/parser/xml.py @@ -5,6 +5,8 @@ import re import uuid from typing import ( + cast, + Iterable, List, Optional, ) @@ -38,6 +40,7 @@ from .output_actions import ToolOutputActionGroup from .output_collection_def import dataset_collector_descriptions_from_elem from .output_objects import ( + ChangeFormatModel, ToolExpressionOutput, ToolOutput, ToolOutputCollection, @@ -62,6 +65,56 @@ def inject_validates(inject): return match is not None +def destroy_tree(tree): + root = tree.getroot() + + node_tracker = {root: [0, None]} + + for node in root.iterdescendants(): + parent = node.getparent() + node_tracker[node] = [node_tracker[parent][0] + 1, parent] + + node_tracker = sorted( + [(depth, parent, child) for child, (depth, parent) in node_tracker.items()], key=lambda x: x[0], reverse=True + ) + + for _, parent, child in node_tracker: + if parent is None: + break + parent.remove(child) + + del tree + + +def parse_change_format(change_format: Iterable[Element]) -> List[ChangeFormatModel]: + change_models: List[ChangeFormatModel] = [] + for change_elem in change_format: + change_elem = cast(Element, change_elem) + for when_elem in change_elem.findall("when"): + when_elem = cast(Element, when_elem) + value: Optional[str] = when_elem.get("value", None) + format_: Optional[str] = when_elem.get("format", None) + check: Optional[str] = when_elem.get("input", None) + input_dataset: Optional[str] = None + check_attribute: Optional[str] = None + if check is not None: + if "$" not in check: + check = f"${check}" + else: + input_dataset = when_elem.get("input_dataset", None) + check_attribute = when_elem.get("attribute", None) + change_models.append( + ChangeFormatModel( + value=value, + format=format_, + input=check, + input_dataset=input_dataset, + check_attribute=check_attribute, + ) + ) + return change_models + + class XmlToolSource(ToolSource): """Responsible for parsing a tool from classic Galaxy representation.""" @@ -70,13 +123,19 @@ class XmlToolSource(ToolSource): def __init__(self, xml_tree: ElementTree, source_path=None, macro_paths=None): self.xml_tree = xml_tree - self.root = xml_tree.getroot() + self.root = self.xml_tree.getroot() self._source_path = source_path self._macro_paths = macro_paths or [] self.legacy_defaults = self.parse_profile() == "16.01" + self._string = xml_to_string(self.root) def to_string(self): - return xml_to_string(self.root) + return self._string + + def mem_optimize(self): + destroy_tree(self.xml_tree) + self.root = None + self._xml_tree = None def parse_version(self): return self.root.get("version", None) @@ -446,7 +505,7 @@ def _parse_output( elif auto_format: output_format = "_sniff_" output.format = output_format - output.change_format = data_elem.findall("change_format") + output.change_format = parse_change_format(data_elem.findall("change_format")) output.format_source = data_elem.get("format_source", default_format_source) output.default_identifier_source = data_elem.get("default_identifier_source", "None") output.metadata_source = data_elem.get("metadata_source", default_metadata_source) @@ -1155,21 +1214,16 @@ def parse_static_options(self): >>> xis.parse_static_options() [('a', 'a', True), ('b', 'b', False)] """ - static_options = list() + + deduplicated_static_options = {} + elem = self.input_elem for option in elem.findall("option"): value = option.get("value") text = option.text or value selected = string_as_bool(option.get("selected", False)) - present = False - for i, o in enumerate(static_options): - if o[1] == value: - present = True - static_options[i] = (text, value, selected) - break - if not present: - static_options.append((text, value, selected)) - return static_options + deduplicated_static_options[value] = (text, value, selected) + return list(deduplicated_static_options.values()) def parse_optional(self, default=None): """Return boolean indicating whether parameter is optional.""" diff --git a/lib/galaxy/tools/__init__.py b/lib/galaxy/tools/__init__.py index 175dc0b4e85b..1ba71eab3101 100644 --- a/lib/galaxy/tools/__init__.py +++ b/lib/galaxy/tools/__init__.py @@ -9,7 +9,6 @@ import re import tarfile import tempfile -import threading from collections.abc import MutableMapping from pathlib import Path from typing import ( @@ -80,6 +79,7 @@ ToolSection, ) from galaxy.tool_util.toolbox.views.sources import StaticToolBoxViewSources +from galaxy.tool_util.verify.interactor import ToolTestDescription from galaxy.tool_util.verify.test_data import TestDataNotFoundError from galaxy.tool_util.version import ( LegacyVersion, @@ -181,7 +181,6 @@ "but node or nodejs could not be found. Please contact the Galaxy adminstrator" ) -HELP_UNINITIALIZED = threading.Lock() MODEL_TOOLS_PATH = os.path.abspath(os.path.dirname(__file__)) # Tools that require Galaxy's Python environment to be preserved. GALAXY_LIB_TOOLS_UNVERSIONED = [ @@ -510,10 +509,7 @@ def create_tool(self, config_file, tool_cache_data_dir=None, **kwds): cache.set(config_file, tool_source) else: tool_source = self.get_expanded_tool_source(config_file) - tool = self._create_tool_from_source(tool_source, config_file=config_file, **kwds) - if not self.app.config.delay_tool_initialization: - tool.assert_finalized(raise_if_invalid=True) - return tool + return self._create_tool_from_source(tool_source, config_file=config_file, **kwds) def get_expanded_tool_source(self, config_file, **kwargs): try: @@ -704,8 +700,7 @@ class Tool(Dictifiable): tool_action: ToolAction tool_type_local = False dict_collection_visible_keys = ["id", "name", "version", "description", "labels"] - __help: Optional[threading.Lock] - __help_by_page: Union[threading.Lock, List[str]] + __help: Optional[Template] job_search: "JobSearch" version: str @@ -764,6 +759,7 @@ def __init__( self.changeset_revision = None self.installed_changeset_revision = None self.sharable_url = None + self.npages = 0 # The tool.id value will be the value of guid, but we'll keep the # guid attribute since it is useful to have. self.guid = guid @@ -779,57 +775,21 @@ def __init__( # Parse XML element containing configuration self.tool_source = tool_source self._is_workflow_compatible = None - self.finalized = False + self.__help = None + self.__tests: Optional[str] = None try: self.parse(tool_source, guid=guid, dynamic=dynamic) except Exception as e: global_tool_errors.add_error(config_file, "Tool Loading", e) raise e + mem_optimize = getattr(self.tool_source, "mem_optimize", None) + if mem_optimize is not None: + mem_optimize() # The job search is only relevant in a galaxy context, and breaks # loading tools into the toolshed for validation. if self.app.name == "galaxy": self.job_search = self.app.job_search - def __getattr__(self, name): - lazy_attributes = { - "action", - "check_values", - "display_by_page", - "enctype", - "has_multiple_pages", - "inputs", - "inputs_by_page", - "last_page", - "method", - "npages", - "nginx_upload", - "target", - "template_macro_params", - "outputs", - "output_collections", - } - if name in lazy_attributes: - self.assert_finalized() - return getattr(self, name) - raise AttributeError(name) - - def assert_finalized(self, raise_if_invalid=False): - if self.finalized is False: - try: - self.parse_inputs(self.tool_source) - self.parse_outputs(self.tool_source) - self.finalized = True - except Exception: - toolbox = getattr(self.app, "toolbox", None) - if toolbox: - toolbox.remove_tool_by_id(self.id) - if raise_if_invalid: - raise - else: - log.warning( - "An error occured while parsing the tool wrapper xml, the tool is not functional", exc_info=True - ) - def remove_from_cache(self): source_path = self.tool_source.source_path if source_path: @@ -1142,7 +1102,13 @@ def parse(self, tool_source: ToolSource, guid=None, dynamic=False): self.hidden = tool_source.parse_hidden() self.license = tool_source.parse_license() self.creator = tool_source.parse_creator() + self.parse_inputs(self.tool_source) + self.parse_outputs(self.tool_source) + self.raw_help = None + if self.app.is_webapp: + self.raw_help = self.__get_help_with_images(tool_source.parse_help()) + self.parse_tests() self.__parse_legacy_features(tool_source) # Load any tool specific options (optional) @@ -1157,9 +1123,6 @@ def parse(self, tool_source: ToolSource, guid=None, dynamic=False): self.provided_metadata_file = tool_source.parse_provided_metadata_file() self.provided_metadata_style = tool_source.parse_provided_metadata_style() - # Parse tool help - self.parse_help(tool_source) - # Parse result handling for tool exit codes and stdout/stderr messages: self.parse_stdio(tool_source) @@ -1179,10 +1142,8 @@ def parse(self, tool_source: ToolSource, guid=None, dynamic=False): try: expressions.find_engine(self.app.config) except Exception: - message = REQUIRES_JS_RUNTIME_MESSAGE % self.tool_id or self.tool_uuid + message = REQUIRES_JS_RUNTIME_MESSAGE % self.id or getattr(self, "uuid", "unknown tool id") raise Exception(message) - # Tests - self.__parse_tests(tool_source) # Requirements (dependencies) requirements, containers, resource_requirements = tool_source.parse_requirements_and_containers() @@ -1221,6 +1182,8 @@ def parse(self, tool_source: ToolSource, guid=None, dynamic=False): self._macro_paths = tool_source.macro_paths self.ports = tool_source.parse_interactivetool() + self._is_workflow_compatible = self.check_workflow_compatible(self.tool_source) + def __parse_legacy_features(self, tool_source): self.code_namespace: Dict[str, str] = {} self.hook_map: Dict[str, str] = {} @@ -1268,10 +1231,6 @@ def __parse_legacy_features(self, tool_source): for key, value in uihints_elem.attrib.items(): self.uihints[key] = value - def __parse_tests(self, tool_source): - self.__tests_source = tool_source - self.__tests_populated = False - def __parse_config_files(self, tool_source): self.config_files = [] if not hasattr(tool_source, "root"): @@ -1310,21 +1269,20 @@ def __parse_trackster_conf(self, tool_source): if trackster_conf is not None: self.trackster_conf = TracksterConfig.parse(trackster_conf) + def parse_tests(self): + tests_source = self.tool_source + if tests_source: + try: + self.__tests = json.dumps([t.to_dict() for t in parse_tests(self, tests_source)], indent=None) + except Exception: + self.__tests = None + log.exception("Failed to parse tool tests for tool '%s'", self.id) + @property def tests(self): - self.assert_finalized() - if not self.__tests_populated: - tests_source = self.__tests_source - if tests_source: - try: - self.__tests = parse_tests(self, tests_source) - except Exception: - self.__tests = None - log.exception("Failed to parse tool tests for tool '%s'", self.id) - else: - self.__tests = None - self.__tests_populated = True - return self.__tests + if self.__tests: + return [ToolTestDescription(d) for d in json.loads(self.__tests)] + return None @property def _repository_dir(self): @@ -1467,17 +1425,6 @@ def parse_inputs(self, tool_source: ToolSource): self.input_required = True break - def parse_help(self, tool_source): - """ - Parse the help text for the tool. Formatted in reStructuredText, but - stored as Mako to allow for dynamic image paths. - This implementation supports multiple pages. - """ - # TODO: Allow raw HTML or an external link. - self.__help = HELP_UNINITIALIZED - self.__help_by_page = HELP_UNINITIALIZED - self.__help_source = tool_source - def parse_outputs(self, tool_source): """ Parse elements and fill in self.outputs (keyed by name) @@ -1693,6 +1640,19 @@ def populate_tool_shed_info(self, tool_shed_repository): self.app, self.tool_shed, self.repository_owner, self.repository_name ) + @property + def help(self) -> Template: + try: + return Template( + rst_to_html(self.raw_help), + input_encoding="utf-8", + default_filters=["decode.utf8"], + encoding_errors="replace", + ) + except Exception: + log.info("Exception while parsing help for tool with id '%s'", self.id) + return Template("", input_encoding="utf-8") + @property def biotools_reference(self) -> Optional[str]: """Return a bio.tools ID if external reference to it is found. @@ -1701,88 +1661,23 @@ def biotools_reference(self) -> Optional[str]: """ return biotools_reference(self.xrefs) - @property - def help(self): - if self.__help is HELP_UNINITIALIZED: - self.__ensure_help() - return self.__help - - @property - def help_by_page(self): - if self.__help_by_page is HELP_UNINITIALIZED: - self.__ensure_help() - return self.__help_by_page - - @property - def raw_help(self): - # may return rst (or Markdown in the future) - tool_source = self.__help_source - help_text = tool_source.parse_help() - return help_text - - def __ensure_help(self): - with HELP_UNINITIALIZED: - if self.__help is HELP_UNINITIALIZED: - self.__inititalize_help() - - def __inititalize_help(self): - tool_source = self.__help_source - self.__help = None - __help_by_page = [] - help_footer = "" - help_text = tool_source.parse_help() - if help_text is not None: - try: - if help_text.find(".. image:: ") >= 0 and (self.tool_shed_repository or self.repository_id): - help_text = set_image_paths( - self.app, - help_text, - encoded_repository_id=self.repository_id, - tool_shed_repository=self.tool_shed_repository, - tool_id=self.old_id, - tool_version=self.version, - ) - except Exception: - log.exception( - "Exception in parse_help, so images may not be properly displayed for tool with id '%s'", self.id - ) - try: - self.__help = Template( - rst_to_html(help_text), - input_encoding="utf-8", - default_filters=["decode.utf8"], - encoding_errors="replace", + def __get_help_with_images(self, raw_help: Optional[str]): + help_text = raw_help or "" + try: + if help_text.find(".. image:: ") >= 0 and (self.tool_shed_repository or self.repository_id): + return set_image_paths( + self.app, + help_text, + encoded_repository_id=self.repository_id, + tool_shed_repository=self.tool_shed_repository, + tool_id=self.old_id, + tool_version=self.version, ) - except Exception: - log.exception("Exception while parsing help for tool with id '%s'", self.id) - - # Handle deprecated multi-page help text in XML case. - if hasattr(tool_source, "root"): - help_elem = tool_source.root.find("help") - help_header = help_text - help_pages = help_elem.findall("page") - # Multiple help page case - if help_pages: - for help_page in help_pages: - __help_by_page.append(help_page.text) - help_footer = help_footer + help_page.tail - # Each page has to rendered all-together because of backreferences allowed by rst - try: - __help_by_page = [ - Template( - rst_to_html(help_header + x + help_footer), - input_encoding="utf-8", - default_filters=["decode.utf8"], - encoding_errors="replace", - ) - for x in __help_by_page - ] - except Exception: - log.exception("Exception while parsing multi-page help for tool with id '%s'", self.id) - # Pad out help pages to match npages ... could this be done better? - while len(__help_by_page) < self.npages: - __help_by_page.append(self.__help) - self.__help_by_page = __help_by_page + except Exception: + log.exception( + "Exception in parse_help, so images may not be properly displayed for tool with id '%s'", self.id + ) + return help_text def find_output_def(self, name): # name is JobToOutputDatasetAssociation name. @@ -1799,12 +1694,7 @@ def find_output_def(self, name): @property def is_workflow_compatible(self): - is_workflow_compatible = self._is_workflow_compatible - if is_workflow_compatible is None: - is_workflow_compatible = self.check_workflow_compatible(self.tool_source) - if self.finalized: - self._is_workflow_compatible = is_workflow_compatible - return is_workflow_compatible + return self._is_workflow_compatible def check_workflow_compatible(self, tool_source): """ @@ -1813,7 +1703,7 @@ def check_workflow_compatible(self, tool_source): """ # Multiple page tools are not supported -- we're eliminating most # of these anyway - if self.finalized and self.has_multiple_pages: + if self.has_multiple_pages: return False # This is probably the best bet for detecting external web tools # right now @@ -3518,6 +3408,10 @@ def _get_new_elements(self, history, elements_to_copy): new_elements[element_identifier] = copied_value return new_elements + @staticmethod + def element_is_valid(element: model.DatasetCollectionElement): + return element.element_object.is_ok + def produce_outputs(self, trans, out_data, output_collections, incoming, history, **kwds): collection = incoming["input"] diff --git a/lib/galaxy/tools/actions/__init__.py b/lib/galaxy/tools/actions/__init__.py index 934604a8ef8e..4f9aa4e1b55d 100644 --- a/lib/galaxy/tools/actions/__init__.py +++ b/lib/galaxy/tools/actions/__init__.py @@ -40,6 +40,7 @@ if TYPE_CHECKING: from galaxy.model import DatasetInstance + from galaxy.tool_util.parser.output_objects import ToolOutput log = logging.getLogger(__name__) @@ -1131,7 +1132,7 @@ def get_ext_or_implicit_ext(hda): def determine_output_format( - output, + output: "ToolOutput", parameter_context, input_datasets, input_dataset_collections, @@ -1144,7 +1145,6 @@ def determine_output_format( wrappers, a map of the input datasets (name => HDA), and the last input extensions in the tool form. - TODO: Don't deal with XML here - move this logic into ToolOutput. TODO: Make the input extension used deterministic instead of random. """ # the type should match the input @@ -1206,47 +1206,40 @@ def determine_output_format( log.debug("Exception while trying to determine format_source: %s", e) # process change_format tags - if output.change_format is not None: - new_format_set = False - for change_elem in output.change_format: - for when_elem in change_elem.findall("when"): - check = when_elem.get("input", None) - if check is not None: - try: - if "$" not in check: - # allow a simple name or more complex specifications - check = "${%s}" % check - if fill_template( - check, context=parameter_context, python_template_version=python_template_version - ) == when_elem.get("value", None): - ext = when_elem.get("format", ext) - except Exception: - # bad tag input value; possibly referencing a param within a different conditional when block or other nonexistent grouping construct - continue - else: - check = when_elem.get("input_dataset", None) - if check is not None: - check = input_datasets.get(check, None) - # At this point check is a HistoryDatasetAssociation object. - check_format = when_elem.get("format", ext) - check_value = when_elem.get("value", None) - check_attribute = when_elem.get("attribute", None) - if check is not None and check_value is not None and check_attribute is not None: - # See if the attribute to be checked belongs to the HistoryDatasetAssociation object. - if hasattr(check, check_attribute): - if str(getattr(check, check_attribute)) == str(check_value): - ext = check_format - new_format_set = True - break - # See if the attribute to be checked belongs to the metadata associated with the - # HistoryDatasetAssociation object. - if check.metadata is not None: - metadata_value = check.metadata.get(check_attribute, None) - if metadata_value is not None: - if str(metadata_value) == str(check_value): - ext = check_format - new_format_set = True - break - if new_format_set: - break + if output.change_format: + for change_format_model in output.change_format: + input_check = change_format_model.get("input") + if input_check is not None: + try: + if ( + fill_template( + input_check, context=parameter_context, python_template_version=python_template_version + ) + == change_format_model["value"] + ): + if change_format_model["format"]: + return change_format_model["format"] + except Exception: + # bad tag input value; possibly referencing a param within a different conditional when block or other nonexistent grouping construct + continue + else: + input_dataset_check = change_format_model.get("input_dataset") + if input_dataset_check is not None: + dataset = input_datasets.get(input_dataset_check) + # At this point check is a HistoryDatasetAssociation object. + check_format = change_format_model["format"] or ext + check_value = change_format_model["value"] + check_attribute = change_format_model["check_attribute"] + if dataset is not None and check_value is not None and check_attribute is not None: + # See if the attribute to be checked belongs to the HistoryDatasetAssociation object. + if hasattr(dataset, check_attribute): + if str(getattr(dataset, check_attribute)) == str(check_value): + return check_format + # See if the attribute to be checked belongs to the metadata associated with the + # HistoryDatasetAssociation object. + if dataset.metadata is not None: + metadata_value = dataset.metadata.get(check_attribute) + if metadata_value is not None: + if str(metadata_value) == str(check_value): + return check_format return ext diff --git a/lib/galaxy/tools/remote_tool_eval.py b/lib/galaxy/tools/remote_tool_eval.py index 074a99f17e97..cc3f668b7657 100644 --- a/lib/galaxy/tools/remote_tool_eval.py +++ b/lib/galaxy/tools/remote_tool_eval.py @@ -49,6 +49,7 @@ class ToolApp(MinimalToolApp): """Dummy App that allows loading tools""" name = "tool_app" + is_webapp = False def __init__( self, diff --git a/lib/galaxy/tools/repositories.py b/lib/galaxy/tools/repositories.py index f8cfc8f43532..c5e17273d0a0 100644 --- a/lib/galaxy/tools/repositories.py +++ b/lib/galaxy/tools/repositories.py @@ -12,6 +12,8 @@ class ValidationContext: """Minimal App object for tool validation.""" + is_webapp = True + def __init__( self, app_name, diff --git a/lib/galaxy/tools/test.py b/lib/galaxy/tools/test.py index 695f6c9ef7f7..b097483ca4a7 100644 --- a/lib/galaxy/tools/test.py +++ b/lib/galaxy/tools/test.py @@ -2,6 +2,7 @@ import os import os.path from typing import ( + Iterable, List, Tuple, Union, @@ -23,13 +24,13 @@ log = logging.getLogger(__name__) -def parse_tests(tool, tests_source): +def parse_tests(tool, tests_source) -> Iterable[ToolTestDescription]: """ Build ToolTestDescription objects for each "" elements and return default interactor (if any). """ raw_tests_dict = tests_source.parse_tests_to_dict() - tests = [] + tests: List[ToolTestDescription] = [] for i, raw_test_dict in enumerate(raw_tests_dict.get("tests", [])): test = description_from_tool_object(tool, i, raw_test_dict) tests.append(test) diff --git a/lib/galaxy/util/__init__.py b/lib/galaxy/util/__init__.py index 680df532d56c..37e814f7aff4 100644 --- a/lib/galaxy/util/__init__.py +++ b/lib/galaxy/util/__init__.py @@ -86,13 +86,6 @@ def __new__(cls, element=None, file=None) -> etree.ElementTree: ElementTree, ) -try: - import docutils.core as docutils_core - import docutils.writers.html4css1 as docutils_html4css1 -except ImportError: - docutils_core = None # type: ignore[assignment] - docutils_html4css1 = None # type: ignore[assignment] - from .custom_logging import get_logger from .inflection import Inflector from .path import ( # noqa: F401 @@ -101,6 +94,7 @@ def __new__(cls, element=None, file=None) -> etree.ElementTree: safe_relpath, StrPath, ) +from .rst_to_html import rst_to_html # noqa: F401 try: shlex_join = shlex.join # type: ignore[attr-defined] @@ -956,33 +950,6 @@ def update(self, values): self.__dict__.update(values) -def rst_to_html(s, error=False): - """Convert a blob of reStructuredText to HTML""" - log = get_logger("docutils") - - if docutils_core is None: - raise Exception("Attempted to use rst_to_html but docutils unavailable.") - - class FakeStream: - def write(self, str): - if len(str) > 0 and not str.isspace(): - if error: - raise Exception(str) - log.warning(str) - - settings_overrides = { - "embed_stylesheet": False, - "template": os.path.join(os.path.dirname(__file__), "docutils_template.txt"), - "warning_stream": FakeStream(), - "doctitle_xform": False, # without option, very different rendering depending on - # number of sections in help content. - } - - return unicodify( - docutils_core.publish_string(s, writer=docutils_html4css1.Writer(), settings_overrides=settings_overrides) - ) - - def xml_text(root, name=None): """Returns the text inside an element""" if name is not None: diff --git a/lib/galaxy/util/rst_to_html.py b/lib/galaxy/util/rst_to_html.py new file mode 100644 index 000000000000..1141202bb585 --- /dev/null +++ b/lib/galaxy/util/rst_to_html.py @@ -0,0 +1,68 @@ +import functools +import os + +try: + import docutils.core + import docutils.io + import docutils.utils + import docutils.writers.html4css1 +except ImportError: + docutils = None # type: ignore[assignment] + +from .custom_logging import get_logger + + +class FakeStream: + def __init__(self, error): + self.__error = error + + log_ = get_logger("docutils") + + def write(self, str): + if len(str) > 0 and not str.isspace(): + if self.__error: + raise Exception(str) + self.log_.warning(str) + + +@functools.lru_cache(maxsize=None) +def get_publisher(error=False): + docutils_writer = docutils.writers.html4css1.Writer() + docutils_template_path = os.path.join(os.path.dirname(__file__), "docutils_template.txt") + no_report_level = docutils.utils.Reporter.SEVERE_LEVEL + 1 + settings_overrides = { + "embed_stylesheet": False, + "template": docutils_template_path, + "warning_stream": FakeStream(error), + "doctitle_xform": False, # without option, very different rendering depending on + # number of sections in help content. + "halt_level": no_report_level, + "output_encoding": "unicode", + } + + if not error: + # in normal operation we don't want noisy warnings, that's tool author business + settings_overrides["report_level"] = no_report_level + + Publisher = docutils.core.Publisher + pub = Publisher( + parser=None, + writer=docutils_writer, + settings=None, + source_class=docutils.io.StringInput, + destination_class=docutils.io.StringOutput, + ) + pub.set_components("standalone", "restructuredtext", "pseudoxml") + pub.process_programmatic_settings(None, settings_overrides, None) + return pub + + +@functools.lru_cache(maxsize=None) +def rst_to_html(s, error=False): + if docutils is None: + raise Exception("Attempted to use rst_to_html but docutils unavailable.") + + publisher = get_publisher(error=error) + publisher.set_source(s, None) + publisher.set_destination(None, None) + return publisher.publish(enable_exit_status=False) diff --git a/lib/galaxy/web_stack/gunicorn_config.py b/lib/galaxy/web_stack/gunicorn_config.py index ec63f9e48569..86e81ccc9a15 100644 --- a/lib/galaxy/web_stack/gunicorn_config.py +++ b/lib/galaxy/web_stack/gunicorn_config.py @@ -1,10 +1,15 @@ """ Gunicorn config file based on https://gist.github.com/hynek/ba655c8756924a5febc5285c712a7946 """ +import gc import os import sys +def is_preload_app(): + return "--preload" in os.environ.get("GUNICORN_CMD_ARGS", "") or "--preload" in sys.argv + + def on_starting(server): """ Attach a set of IDs that can be temporarily re-used. @@ -45,6 +50,13 @@ def on_reload(server): server._worker_id_overload = set(range(1, server.cfg.workers + 1)) +def when_ready(server): + # freeze objects after preloading app + if is_preload_app(): + gc.freeze() + print("Objects frozen in perm gen: ", gc.get_freeze_count()) + + def pre_fork(server, worker): """ Attach the next free worker_id before forking off. @@ -58,7 +70,8 @@ def post_fork(server, worker): """ os.environ["GUNICORN_WORKER_ID"] = str(worker._worker_id) os.environ["GUNICORN_LISTENERS"] = ",".join(str(bind) for bind in server.LISTENERS) - if "--preload" in os.environ.get("GUNICORN_CMD_ARGS", "") or "--preload" in sys.argv: + if is_preload_app(): + gc.enable() from galaxy.web_stack import GunicornApplicationStack GunicornApplicationStack.late_postfork_event.set() diff --git a/lib/galaxy/webapps/galaxy/buildapp.py b/lib/galaxy/webapps/galaxy/buildapp.py index e0d81771d7fc..236da06dd71d 100644 --- a/lib/galaxy/webapps/galaxy/buildapp.py +++ b/lib/galaxy/webapps/galaxy/buildapp.py @@ -56,7 +56,7 @@ def app_pair(global_conf, load_app_kwds=None, wsgi_preflight=True, **kwargs): galaxy.app.app = app else: try: - app = galaxy.app.UniverseApplication(global_conf=global_conf, **kwargs) + app = galaxy.app.UniverseApplication(global_conf=global_conf, is_webapp=True, **kwargs) galaxy.app.app = app except Exception: traceback.print_exc() diff --git a/lib/galaxy/workflow/modules.py b/lib/galaxy/workflow/modules.py index 302e7696fb50..20a8e4f3f639 100644 --- a/lib/galaxy/workflow/modules.py +++ b/lib/galaxy/workflow/modules.py @@ -1905,11 +1905,10 @@ def get_all_outputs(self, data_only=False): formats = ["input"] # default to special name "input" which remove restrictions on connections else: formats = [tool_output.format] - for change_elem in tool_output.change_format: - for when_elem in change_elem.findall("when"): - format = when_elem.get("format", None) - if format and format not in formats: - formats.append(format) + for change_format_model in tool_output.change_format: + format = change_format_model["format"] + if format and format not in formats: + formats.append(format) if tool_output.label: try: params = make_dict_copy(self.state.inputs) diff --git a/lib/galaxy_test/driver/driver_util.py b/lib/galaxy_test/driver/driver_util.py index f5be4fdc0042..b57691741f04 100644 --- a/lib/galaxy_test/driver/driver_util.py +++ b/lib/galaxy_test/driver/driver_util.py @@ -585,7 +585,7 @@ def build_galaxy_app(simple_kwargs) -> GalaxyUniverseApplication: simple_kwargs["global_conf"]["__file__"] = "lib/galaxy/config/sample/galaxy.yml.sample" simple_kwargs = load_app_properties(kwds=simple_kwargs) # Build the Universe Application - app = GalaxyUniverseApplication(**simple_kwargs) + app = GalaxyUniverseApplication(**simple_kwargs, is_webapp=True) log.info("Embedded Galaxy application started") global install_context diff --git a/test/unit/app/tools/test_actions.py b/test/unit/app/tools/test_actions.py index 85cc8ca63702..1e6b69528ac7 100644 --- a/test/unit/app/tools/test_actions.py +++ b/test/unit/app/tools/test_actions.py @@ -10,6 +10,7 @@ from galaxy.model.base import transaction from galaxy.objectstore import BaseObjectStore from galaxy.tool_util.parser.output_objects import ToolOutput +from galaxy.tool_util.parser.xml import parse_change_format from galaxy.tools.actions import ( DefaultToolAction, determine_output_format, @@ -243,7 +244,7 @@ def quick_output( test_output.format = format test_output.format_source = format_source if change_format_xml: - test_output.change_format = XML(change_format_xml).findall("change_format") + test_output.change_format = parse_change_format(XML(change_format_xml).findall("change_format")) else: test_output.change_format = [] return test_output diff --git a/test/unit/app/tools/test_tool_deserialization.py b/test/unit/app/tools/test_tool_deserialization.py index 1eab7d54156a..e7ce9f8cdf08 100644 --- a/test/unit/app/tools/test_tool_deserialization.py +++ b/test/unit/app/tools/test_tool_deserialization.py @@ -42,6 +42,7 @@ class ToolApp(GalaxyDataTestApp): name = "galaxy" biotools_metadata_source = None job_search = None + is_webapp = True @pytest.fixture diff --git a/test/unit/workflows/test_modules.py b/test/unit/workflows/test_modules.py index 414f6b8f40c2..27658e9fb3e2 100644 --- a/test/unit/workflows/test_modules.py +++ b/test/unit/workflows/test_modules.py @@ -480,7 +480,6 @@ def __mock_tool( params_from_strings=mock.Mock(), check_and_update_param_values=mock.Mock(), to_json=_to_json, - assert_finalized=lambda: None, ) return tool