Skip to content

Commit

Permalink
Merge pull request galaxyproject#16536 from mvdbeek/tool_mem_tweaks
Browse files Browse the repository at this point in the history
Tweak tool memory use and optimize shared memory when using preload
  • Loading branch information
jmchilton committed Aug 23, 2023
2 parents 99f7936 + 2ea0e2e commit 152c802
Show file tree
Hide file tree
Showing 25 changed files with 285 additions and 306 deletions.
12 changes: 0 additions & 12 deletions doc/source/admin/galaxy_options.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1167,18 +1167,6 @@
:Type: str


~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
``delay_tool_initialization``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

:Description:
Set this to true to delay parsing of tool inputs and outputs until
they are needed. This results in faster startup times but uses
more memory when using forked Galaxy processes.
:Default: ``false``
:Type: bool


~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
``biotools_content_directory``
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Expand Down
2 changes: 2 additions & 0 deletions lib/galaxy/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -646,6 +646,8 @@ def __init__(self, **kwargs) -> None:
("application stack", self._shutdown_application_stack),
]
self._register_singleton(StructuredApp, self) # type: ignore[type-abstract]
if kwargs.get("is_webapp"):
self.is_webapp = kwargs["is_webapp"]
# A lot of postfork initialization depends on the server name, ensure it is set immediately after forking before other postfork functions
self.application_stack.register_postfork_function(self.application_stack.set_postfork_server_name, self)
self.config.reload_sanitize_allowlist(explicit="sanitize_allowlist_file" in kwargs)
Expand Down
3 changes: 2 additions & 1 deletion lib/galaxy/app_unittest_utils/galaxy_mock.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ class MockApp(di.Container, GalaxyDataTestApp):
history_manager: HistoryManager
job_metrics: JobMetrics
stop: bool
is_webapp: bool = True

def __init__(self, config=None, **kwargs) -> None:
super().__init__()
Expand Down Expand Up @@ -227,7 +228,7 @@ def __init__(self, **kwargs):

# set by MockDir
self.enable_tool_document_cache = False
self.delay_tool_initialization = True
self.tool_cache_data_dir = os.path.join(self.root, "tool_cache")
self.external_chown_script = None
self.check_job_script_integrity = False
self.check_job_script_integrity_count = 0
Expand Down
1 change: 0 additions & 1 deletion lib/galaxy/app_unittest_utils/tools_support.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,6 @@ def setup_app(self):
def __setup_tool(self):
tool_source = get_tool_source(self.tool_file)
self.tool = create_tool_from_source(self.app, tool_source, config_file=self.tool_file)
self.tool.assert_finalized()
if getattr(self, "tool_action", None):
self.tool.tool_action = self.tool_action
return self.tool
Expand Down
5 changes: 0 additions & 5 deletions lib/galaxy/config/sample/galaxy.yml.sample
Original file line number Diff line number Diff line change
Expand Up @@ -899,11 +899,6 @@ galaxy:
# this option will be resolved with respect to <data_dir>.
#tool_search_index_dir: tool_search_index

# Set this to true to delay parsing of tool inputs and outputs until
# they are needed. This results in faster startup times but uses more
# memory when using forked Galaxy processes.
#delay_tool_initialization: false

# Point Galaxy at a repository consisting of a copy of the bio.tools
# database (e.g. https://github.com/bio-tools/content/) to resolve
# bio.tools data for tool metadata.
Expand Down
9 changes: 0 additions & 9 deletions lib/galaxy/config/schemas/config_schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -846,15 +846,6 @@ mapping:
desc:
Directory in which the toolbox search index is stored.

delay_tool_initialization:
type: bool
default: false
required: false
desc: |
Set this to true to delay parsing of tool inputs and outputs until they are needed.
This results in faster startup times but uses more memory when using forked Galaxy
processes.
biotools_content_directory:
type: str
required: false
Expand Down
1 change: 1 addition & 0 deletions lib/galaxy/structured_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ class BasicSharedApp(Container):


class MinimalToolApp(Protocol):
is_webapp: bool
name: str
# Leave config as Any: in a full Galaxy app this is a GalaxyAppConfiguration object, but this is mostly dynamically
# generated, and here we want to also allow other kinds of configuration objects (e.g. a Bunch).
Expand Down
1 change: 0 additions & 1 deletion lib/galaxy/tool_shed/tools/tool_validator.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,6 @@ def load_tool_from_config(self, repository_id, full_path):
repository_id=repository_id,
allow_code_files=False,
)
tool.assert_finalized(raise_if_invalid=True)
valid = True
error_message = None
except KeyError as e:
Expand Down
2 changes: 1 addition & 1 deletion lib/galaxy/tool_util/parser/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ def parse_stdio(self):
return [], []

@abstractmethod
def parse_help(self):
def parse_help(self) -> Optional[str]:
"""Return RST definition of help text for tool or None if the tool
doesn't define help text.
"""
Expand Down
12 changes: 11 additions & 1 deletion lib/galaxy/tool_util/parser/output_objects.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
Optional,
)

from typing_extensions import TypedDict

from galaxy.util import Element
from galaxy.util.dictifiable import Dictifiable
from .output_actions import ToolOutputActionGroup
Expand All @@ -14,6 +16,14 @@
)


class ChangeFormatModel(TypedDict):
value: Optional[str]
format: Optional[str]
input: Optional[str]
input_dataset: Optional[str]
check_attribute: Optional[str]


class ToolOutputBase(Dictifiable):
def __init__(
self,
Expand Down Expand Up @@ -84,7 +94,7 @@ def __init__(
self.actions = actions

# Initialize default values
self.change_format: List[Element] = []
self.change_format: List[ChangeFormatModel] = []
self.implicit = implicit
self.from_work_dir: Optional[str] = None
self.dataset_collector_descriptions: List[DatasetCollectionDescription] = []
Expand Down
80 changes: 67 additions & 13 deletions lib/galaxy/tool_util/parser/xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import re
import uuid
from typing import (
cast,
Iterable,
List,
Optional,
)
Expand Down Expand Up @@ -38,6 +40,7 @@
from .output_actions import ToolOutputActionGroup
from .output_collection_def import dataset_collector_descriptions_from_elem
from .output_objects import (
ChangeFormatModel,
ToolExpressionOutput,
ToolOutput,
ToolOutputCollection,
Expand All @@ -62,6 +65,56 @@ def inject_validates(inject):
return match is not None


def destroy_tree(tree):
root = tree.getroot()

node_tracker = {root: [0, None]}

for node in root.iterdescendants():
parent = node.getparent()
node_tracker[node] = [node_tracker[parent][0] + 1, parent]

node_tracker = sorted(
[(depth, parent, child) for child, (depth, parent) in node_tracker.items()], key=lambda x: x[0], reverse=True
)

for _, parent, child in node_tracker:
if parent is None:
break
parent.remove(child)

del tree


def parse_change_format(change_format: Iterable[Element]) -> List[ChangeFormatModel]:
change_models: List[ChangeFormatModel] = []
for change_elem in change_format:
change_elem = cast(Element, change_elem)
for when_elem in change_elem.findall("when"):
when_elem = cast(Element, when_elem)
value: Optional[str] = when_elem.get("value", None)
format_: Optional[str] = when_elem.get("format", None)
check: Optional[str] = when_elem.get("input", None)
input_dataset: Optional[str] = None
check_attribute: Optional[str] = None
if check is not None:
if "$" not in check:
check = f"${check}"
else:
input_dataset = when_elem.get("input_dataset", None)
check_attribute = when_elem.get("attribute", None)
change_models.append(
ChangeFormatModel(
value=value,
format=format_,
input=check,
input_dataset=input_dataset,
check_attribute=check_attribute,
)
)
return change_models


class XmlToolSource(ToolSource):
"""Responsible for parsing a tool from classic Galaxy representation."""

Expand All @@ -70,13 +123,19 @@ class XmlToolSource(ToolSource):

def __init__(self, xml_tree: ElementTree, source_path=None, macro_paths=None):
self.xml_tree = xml_tree
self.root = xml_tree.getroot()
self.root = self.xml_tree.getroot()
self._source_path = source_path
self._macro_paths = macro_paths or []
self.legacy_defaults = self.parse_profile() == "16.01"
self._string = xml_to_string(self.root)

def to_string(self):
return xml_to_string(self.root)
return self._string

def mem_optimize(self):
destroy_tree(self.xml_tree)
self.root = None
self._xml_tree = None

def parse_version(self):
return self.root.get("version", None)
Expand Down Expand Up @@ -446,7 +505,7 @@ def _parse_output(
elif auto_format:
output_format = "_sniff_"
output.format = output_format
output.change_format = data_elem.findall("change_format")
output.change_format = parse_change_format(data_elem.findall("change_format"))
output.format_source = data_elem.get("format_source", default_format_source)
output.default_identifier_source = data_elem.get("default_identifier_source", "None")
output.metadata_source = data_elem.get("metadata_source", default_metadata_source)
Expand Down Expand Up @@ -1155,21 +1214,16 @@ def parse_static_options(self):
>>> xis.parse_static_options()
[('a', 'a', True), ('b', 'b', False)]
"""
static_options = list()

deduplicated_static_options = {}

elem = self.input_elem
for option in elem.findall("option"):
value = option.get("value")
text = option.text or value
selected = string_as_bool(option.get("selected", False))
present = False
for i, o in enumerate(static_options):
if o[1] == value:
present = True
static_options[i] = (text, value, selected)
break
if not present:
static_options.append((text, value, selected))
return static_options
deduplicated_static_options[value] = (text, value, selected)
return list(deduplicated_static_options.values())

def parse_optional(self, default=None):
"""Return boolean indicating whether parameter is optional."""
Expand Down
Loading

0 comments on commit 152c802

Please sign in to comment.