Skip to content

Commit

Permalink
Merge branch 'release_23.1' into dev
Browse files Browse the repository at this point in the history
  • Loading branch information
mvdbeek committed Sep 25, 2023
2 parents 75c5977 + 602cdd4 commit 09ff121
Show file tree
Hide file tree
Showing 15 changed files with 87 additions and 36 deletions.
10 changes: 2 additions & 8 deletions lib/galaxy/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@
# absolute_import needed for tool_shed package.

import configparser
import ipaddress
import json
import locale
import logging
Expand Down Expand Up @@ -43,6 +42,7 @@
string_as_bool,
unicodify,
)
from galaxy.util.config_parsers import parse_allowlist_ips
from galaxy.util.custom_logging import LOGLV_TRACE
from galaxy.util.dynamic import HasDynamicProperties
from galaxy.util.facts import get_facts
Expand Down Expand Up @@ -915,13 +915,7 @@ def _process_config(self, kwargs: Dict[str, Any]) -> None:
self.tool_secret = kwargs.get("tool_secret", "")
self.metadata_strategy = kwargs.get("metadata_strategy", "directory")
self.use_remote_user = self.use_remote_user or self.single_user
self.fetch_url_allowlist_ips = [
ipaddress.ip_network(unicodify(ip.strip())) # If it has a slash, assume 127.0.0.1/24 notation
if "/" in ip
else ipaddress.ip_address(unicodify(ip.strip())) # Otherwise interpret it as an ip address.
for ip in kwargs.get("fetch_url_allowlist", "").split(",")
if len(ip.strip()) > 0
]
self.fetch_url_allowlist_ips = parse_allowlist_ips(listify(kwargs.get("fetch_url_allowlist")))
self.job_queue_cleanup_interval = int(kwargs.get("job_queue_cleanup_interval", "5"))

# Fall back to legacy job_working_directory config variable if set.
Expand Down
15 changes: 8 additions & 7 deletions lib/galaxy/files/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
PluginKind,
)
from galaxy.util import plugin_config
from galaxy.util.config_parsers import parse_allowlist_ips
from galaxy.util.dictifiable import Dictifiable

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -250,12 +251,12 @@ def from_app_config(config):
# Formalize what we read in from config to create a more clear interface
# for this component.
kwds = {}
kwds["symlink_allowlist"] = getattr(config, "user_library_import_symlink_allowlist", [])
kwds["fetch_url_allowlist"] = getattr(config, "fetch_url_allowlist", [])
kwds["library_import_dir"] = getattr(config, "library_import_dir", None)
kwds["user_library_import_dir"] = getattr(config, "user_library_import_dir", None)
kwds["ftp_upload_dir"] = getattr(config, "ftp_upload_dir", None)
kwds["ftp_upload_purge"] = getattr(config, "ftp_upload_purge", True)
kwds["symlink_allowlist"] = config.user_library_import_symlink_allowlist
kwds["fetch_url_allowlist"] = [str(ip) for ip in config.fetch_url_allowlist_ips]
kwds["library_import_dir"] = config.library_import_dir
kwds["user_library_import_dir"] = config.user_library_import_dir
kwds["ftp_upload_dir"] = config.ftp_upload_dir
kwds["ftp_upload_purge"] = config.ftp_upload_purge
return ConfiguredFileSourcesConfig(**kwds)

def to_dict(self):
Expand All @@ -272,7 +273,7 @@ def to_dict(self):
def from_dict(as_dict):
return ConfiguredFileSourcesConfig(
symlink_allowlist=as_dict["symlink_allowlist"],
fetch_url_allowlist=as_dict["fetch_url_allowlist"],
fetch_url_allowlist=parse_allowlist_ips(as_dict["fetch_url_allowlist"]),
library_import_dir=as_dict["library_import_dir"],
user_library_import_dir=as_dict["user_library_import_dir"],
ftp_upload_dir=as_dict["ftp_upload_dir"],
Expand Down
13 changes: 12 additions & 1 deletion lib/galaxy/files/sources/drs.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,21 @@ def __init__(self, **kwd: Unpack[FilesSourceProperties]):
self._force_http = props.pop("force_http", False)
self._props = props

@property
def _allowlist(self):
return self._file_sources_config.fetch_url_allowlist

def _realize_to(self, source_path, native_path, user_context=None, opts: Optional[FilesSourceOptions] = None):
props = self._serialization_props(user_context)
headers = props.pop("http_headers", {}) or {}
fetch_drs_to_file(source_path, native_path, user_context, headers=headers, force_http=self._force_http)
fetch_drs_to_file(
source_path,
native_path,
user_context,
fetch_url_allowlist=self._allowlist,
headers=headers,
force_http=self._force_http,
)

def _write_from(self, target_path, native_path, user_context=None, opts: Optional[FilesSourceOptions] = None):
raise NotImplementedError()
Expand Down
5 changes: 4 additions & 1 deletion lib/galaxy/files/sources/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from typing import (
cast,
Dict,
List,
Optional,
)

Expand All @@ -15,6 +16,7 @@
get_charset_from_http_headers,
stream_to_open_named_file,
)
from galaxy.util.config_parsers import IpAllowedListEntryT
from . import (
BaseFilesSource,
FilesSourceOptions,
Expand All @@ -28,6 +30,7 @@
class HTTPFilesSourceProperties(FilesSourceProperties, total=False):
url_regex: str
http_headers: Dict[str, str]
fetch_url_allowlist: List[IpAllowedListEntryT]


class HTTPFilesSource(BaseFilesSource):
Expand Down Expand Up @@ -63,7 +66,7 @@ def _realize_to(

with urllib.request.urlopen(req, timeout=DEFAULT_SOCKET_TIMEOUT) as page:
# Verify url post-redirects is still allowlisted
validate_non_local(page.geturl(), self._allowlist)
validate_non_local(page.geturl(), self._allowlist or extra_props.get("fetch_url_allowlist") or [])
f = open(native_path, "wb") # fd will be .close()ed in stream_to_open_named_file
return stream_to_open_named_file(
page, f.fileno(), native_path, source_encoding=get_charset_from_http_headers(page.headers)
Expand Down
9 changes: 2 additions & 7 deletions lib/galaxy/files/uris.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
from typing import (
List,
Optional,
Union,
)
from urllib.parse import urlparse

Expand All @@ -23,6 +22,7 @@
stream_to_open_named_file,
unicodify,
)
from galaxy.util.config_parsers import IpAllowedListEntryT

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -66,11 +66,6 @@ def stream_to_file(stream, suffix="", prefix="", dir=None, text=False, **kwd):
return stream_to_open_named_file(stream, fd, temp_name, **kwd)


IpAddressT = Union[ipaddress.IPv4Address, ipaddress.IPv6Address]
IpNetworkT = Union[ipaddress.IPv4Network, ipaddress.IPv6Network]
IpAllowedListEntryT = Union[IpAddressT, IpNetworkT]


def validate_uri_access(uri: str, is_admin: bool, ip_allowlist: List[IpAllowedListEntryT]) -> None:
"""Perform uniform checks on supplied URIs.
Expand Down Expand Up @@ -128,7 +123,7 @@ def validate_non_local(uri: str, ip_allowlist: List[IpAllowedListEntryT]) -> str
parsed_url = parsed_url[:idx]

# safe to log out, no credentials/request path, just an IP + port
log.debug("parsed url, port: %s : %s", parsed_url, port)
log.debug("parsed url %s, port: %s", parsed_url, port)
# Call getaddrinfo to resolve hostname into tuples containing IPs.
addrinfo = socket.getaddrinfo(parsed_url, port)
# Get the IP addresses that this entry resolves to (uniquely)
Expand Down
3 changes: 2 additions & 1 deletion lib/galaxy/jobs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@
ToolEvaluator,
)
from galaxy.util import (
etree,
parse_xml_string,
RWXRWXRWX,
safe_makedirs,
Expand Down Expand Up @@ -641,7 +642,7 @@ def get_tool_resource_xml(self, tool_id, tool_type):
field_name, self.resource_parameters
)
raise KeyError(message)
fields.append(self.resource_parameters[field_name])
fields.append(etree.fromstring(self.resource_parameters[field_name]))

if fields:
conditional_element = parse_xml_string(self.JOB_RESOURCE_CONDITIONAL_XML)
Expand Down
6 changes: 6 additions & 0 deletions lib/galaxy/model/unittest_utils/data_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,12 @@ def __init__(self, root=None, **kwd):
self.file_path = os.path.join(self.data_dir, "files")
self.server_name = "main"
self.enable_quotas = False
self.user_library_import_symlink_allowlist = []
self.fetch_url_allowlist_ips = []
self.library_import_dir = None
self.user_library_import_dir = None
self.ftp_upload_dir = None
self.ftp_upload_purge = False

def __del__(self):
if self._remove_root:
Expand Down
2 changes: 1 addition & 1 deletion lib/galaxy/util/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -979,7 +979,7 @@ def parse_resource_parameters(resource_param_file):
resource_definitions_root = resource_definitions.getroot()
for parameter_elem in resource_definitions_root.findall("param"):
name = parameter_elem.get("name")
resource_parameters[name] = parameter_elem
resource_parameters[name] = etree.tostring(parameter_elem)

return resource_parameters

Expand Down
21 changes: 21 additions & 0 deletions lib/galaxy/util/config_parsers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import ipaddress
from typing import (
List,
Union,
)

from galaxy.util import unicodify

IpAddressT = Union[ipaddress.IPv4Address, ipaddress.IPv6Address]
IpNetworkT = Union[ipaddress.IPv4Network, ipaddress.IPv6Network]
IpAllowedListEntryT = Union[IpAddressT, IpNetworkT]


def parse_allowlist_ips(fetch_url_allowlist: List[str]) -> List[IpAllowedListEntryT]:
return [
ipaddress.ip_network(unicodify(ip.strip())) # If it has a slash, assume 127.0.0.1/24 notation
if "/" in ip
else ipaddress.ip_address(unicodify(ip.strip())) # Otherwise interpret it as an ip address.
for ip in fetch_url_allowlist
if len(ip.strip()) > 0
]
8 changes: 7 additions & 1 deletion lib/galaxy/util/drs.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import time
from os import PathLike
from typing import (
List,
Optional,
Tuple,
Union,
Expand All @@ -17,6 +18,7 @@
from galaxy.files.sources.http import HTTPFilesSourceProperties
from galaxy.files.uris import stream_url_to_file
from galaxy.util import DEFAULT_SOCKET_TIMEOUT
from galaxy.util.config_parsers import IpAllowedListEntryT

TargetPathT = Union[str, PathLike]

Expand Down Expand Up @@ -81,6 +83,7 @@ def fetch_drs_to_file(
force_http=False,
retry_options: Optional[RetryOptions] = None,
headers: Optional[dict] = None,
fetch_url_allowlist: Optional[List[IpAllowedListEntryT]] = None,
):
"""Fetch contents of drs:// URI to a target path."""
if not drs_uri.startswith("drs://"):
Expand All @@ -107,7 +110,10 @@ def fetch_drs_to_file(
access_url, access_headers = _get_access_info(get_url, access_method, headers=headers)
opts = FilesSourceOptions()
if access_method["type"] == "https":
extra_props: HTTPFilesSourceProperties = {"http_headers": access_headers or {}}
extra_props: HTTPFilesSourceProperties = {
"http_headers": access_headers or {},
"fetch_url_allowlist": fetch_url_allowlist or [],
}
opts.extra_props = extra_props
else:
opts.extra_props = {}
Expand Down
3 changes: 2 additions & 1 deletion lib/galaxy_test/api/test_drs.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
ConfiguredFileSourcesConfig,
DictFileSourcesUserContext,
)
from galaxy.util.config_parsers import parse_allowlist_ips
from galaxy.util.drs import (
fetch_drs_to_file,
RetryOptions,
Expand All @@ -32,7 +33,7 @@


def user_context_fixture():
file_sources_config = ConfiguredFileSourcesConfig()
file_sources_config = ConfiguredFileSourcesConfig(fetch_url_allowlist=parse_allowlist_ips(["127.0.0.0/24"]))
file_sources = ConfiguredFileSources(file_sources_config, load_stock_plugins=True)
user_context = DictFileSourcesUserContext(
preferences={
Expand Down
2 changes: 1 addition & 1 deletion lib/galaxy_test/driver/driver_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ def setup_galaxy_config(
logging=LOGGING_CONFIG_DEFAULT,
monitor_thread_join_timeout=5,
object_store_store_by="uuid",
fetch_url_allowlist="127.0.0.1",
fetch_url_allowlist=["127.0.0.0/24"],
job_handler_monitor_sleep=0.2,
job_runner_monitor_sleep=0.2,
workflow_monitor_sleep=0.2,
Expand Down
2 changes: 1 addition & 1 deletion packages/files/setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,4 @@ python_requires = >=3.7

[options.packages.find]
exclude =
tests*
tests*
4 changes: 3 additions & 1 deletion test/unit/files/test_drs.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,15 +45,17 @@ def drs_repo_handler(request):
content_type="application/json",
)

test_url = "drs://drs.example.org/314159"

def check_specific_header(request, **kwargs):
assert request.full_url == "https://my.respository.org/myfile.txt"
assert request.headers["Authorization"] == "Basic Z2E0Z2g6ZHJz"
response: Any = io.StringIO("hello drs world")
response.headers = {}
response.geturl = lambda: test_url
return response

with mock.patch.object(urllib.request, "urlopen", new=check_specific_header):
test_url = "drs://drs.example.org/314159"
user_context = user_context_fixture()
file_sources = configured_file_sources(FILE_SOURCES_CONF)
file_source_pair = file_sources.get_file_source_path(test_url)
Expand Down
20 changes: 15 additions & 5 deletions test/unit/files/test_http.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,16 @@


def test_file_source_http_specific():
test_url = "https://www.usegalaxy.org/myfile.txt"

def check_specific_header(request, **kwargs):
assert request.headers["Authorization"] == "Bearer IBearTokens"
response: Any = io.StringIO("hello specific world")
response.headers = {}
response.geturl = lambda: test_url
return response

with mock.patch.object(urllib.request, "urlopen", new=check_specific_header):
test_url = "https://www.usegalaxy.org/myfile.txt"
user_context = user_context_fixture()
file_sources = configured_file_sources(FILE_SOURCES_CONF)
file_source_pair = file_sources.get_file_source_path(test_url)
Expand All @@ -39,14 +41,16 @@ def check_specific_header(request, **kwargs):


def test_file_source_another_http_specific():
test_url = "http://www.galaxyproject.org/anotherfile.txt"

def check_another_header(request, **kwargs):
assert request.headers["Another_header"] == "found"
response: Any = io.StringIO("hello another world")
response.geturl = lambda: test_url
response.headers = {}
return response

with mock.patch.object(urllib.request, "urlopen", new=check_another_header):
test_url = "http://www.galaxyproject.org/anotherfile.txt"
user_context = user_context_fixture()
file_sources = configured_file_sources(FILE_SOURCES_CONF)
file_source_pair = file_sources.get_file_source_path(test_url)
Expand All @@ -58,14 +62,16 @@ def check_another_header(request, **kwargs):


def test_file_source_http_generic():
test_url = "https://www.elsewhere.org/myfile.txt"

def check_generic_headers(request, **kwargs):
assert not request.headers
response: Any = io.StringIO("hello generic world")
response.headers = {}
response.geturl = lambda: test_url
return response

with mock.patch.object(urllib.request, "urlopen", new=check_generic_headers):
test_url = "https://www.elsewhere.org/myfile.txt"
user_context = user_context_fixture()
file_sources = configured_file_sources(FILE_SOURCES_CONF)
file_source_pair = file_sources.get_file_source_path(test_url)
Expand All @@ -77,14 +83,16 @@ def check_generic_headers(request, **kwargs):


def test_file_source_ftp_url():
test_url = "ftp://ftp.gnu.org/README"

def check_generic_headers(request, **kwargs):
assert not request.headers
response: Any = io.StringIO("This is ftp.gnu.org, the FTP server of the the GNU project.")
response.headers = {}
response.geturl = lambda: test_url
return response

with mock.patch.object(urllib.request, "urlopen", new=check_generic_headers):
test_url = "ftp://ftp.gnu.org/README"
user_context = user_context_fixture()
file_sources = configured_file_sources(FILE_SOURCES_CONF)
file_source_pair = file_sources.get_file_source_path(test_url)
Expand All @@ -108,14 +116,16 @@ def test_file_source_http_without_stock_generic():


def test_file_source_http_without_stock_specific():
test_url = "https://www.usegalaxy.org/myfile2.txt"

def check_specific_header(request, **kwargs):
assert request.headers["Authorization"] == "Bearer IBearTokens"
response: Any = io.StringIO("hello specific world 2")
response.headers = {}
response.geturl = lambda: test_url
return response

with mock.patch.object(urllib.request, "urlopen", new=check_specific_header):
test_url = "https://www.usegalaxy.org/myfile2.txt"
user_context = user_context_fixture()
file_sources = configured_file_sources(FILE_SOURCES_CONF_WITHOUT_STOCK)
file_source_pair = file_sources.get_file_source_path(test_url)
Expand Down

0 comments on commit 09ff121

Please sign in to comment.