Skip to content

Commit

Permalink
Tighten TRS url check
Browse files Browse the repository at this point in the history
Fixes
https://sentry.galaxyproject.org/share/issue/5b8d495edd6545278d4d93d0ed69ac3a/:
```
Message
Uncaught exception in exposed API method:
Stack Trace

Newest

gaierror: [Errno -2] Name or service not known
  File "urllib3/connection.py", line 174, in _new_conn
    conn = connection.create_connection(
  File "urllib3/util/connection.py", line 72, in create_connection
    for res in socket.getaddrinfo(host, port, family, socket.SOCK_STREAM):
  File "socket.py", line 962, in getaddrinfo
    for res in _socket.getaddrinfo(host, port, family, type, proto, flags):
NewConnectionError: <urllib3.connection.HTTPConnection object at 0x7f8e35406450>: Failed to establish a new connection: [Errno -2] Name or service not known
  File "urllib3/connectionpool.py", line 715, in urlopen
    httplib_response = self._make_request(
  File "urllib3/connectionpool.py", line 416, in _make_request
    conn.request(method, url, **httplib_request_kw)
  File "urllib3/connection.py", line 244, in request
    super(HTTPConnection, self).request(method, url, body=body, headers=headers)
  File "http/client.py", line 1286, in request
    self._send_request(method, url, body, headers, encode_chunked)
  File "http/client.py", line 1332, in _send_request
    self.endheaders(body, encode_chunked=encode_chunked)
  File "http/client.py", line 1281, in endheaders
    self._send_output(message_body, encode_chunked=encode_chunked)
  File "http/client.py", line 1041, in _send_output
    self.send(msg)
  File "http/client.py", line 979, in send
    self.connect()
  File "urllib3/connection.py", line 205, in connect
    conn = self._new_conn()
  File "urllib3/connection.py", line 186, in _new_conn
    raise NewConnectionError(
MaxRetryError: HTTPConnectionPool(host='https', port=80): Max retries exceeded with url: //training.galaxyproject.org/training-material//api/ga4gh/trs/v2/tools/variant-analysis-microbial-variants/versions/microbial_variant_calling/GALAXY/descriptor (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7f8e35406450>: Failed to establish a new connection: [Errno -2] Name or service not known'))
  File "requests/adapters.py", line 486, in send
    resp = conn.urlopen(
  File "urllib3/connectionpool.py", line 799, in urlopen
    retries = retries.increment(
  File "urllib3/util/retry.py", line 592, in increment
    raise MaxRetryError(_pool, url, error or ResponseError(cause))
ConnectionError: HTTPConnectionPool(host='https', port=80): Max retries exceeded with url: //training.galaxyproject.org/training-material//api/ga4gh/trs/v2/tools/variant-analysis-microbial-variants/versions/microbial_variant_calling/GALAXY/descriptor (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x7f8e35406450>: Failed to establish a new connection: [Errno -2] Name or service not known'))
  File "galaxy/web/framework/decorators.py", line 346, in decorator
    rval = func(self, trans, *args, **kwargs)
  File "galaxy/webapps/galaxy/api/workflows.py", line 261, in create
    archive_data = server.get_version_descriptor(trs_tool_id, trs_version_id)
  File "galaxy/workflow/trs_proxy.py", line 115, in get_version_descriptor
    return self._get(trs_api_url)["content"]
  File "galaxy/workflow/trs_proxy.py", line 126, in _get
    response = requests.get(url, params=params, timeout=DEFAULT_SOCKET_TIMEOUT)
  File "galaxy/util/requests.py", line 29, in wrapper
    rval = f(*args, **kwargs)
  File "requests/api.py", line 73, in get
    return request("get", url, params=params, **kwargs)
  File "requests/api.py", line 59, in request
    return session.request(method=method, url=url, **kwargs)
  File "requests/sessions.py", line 589, in request
    resp = self.send(prep, **send_kwargs)
  File "requests/sessions.py", line 703, in send
    r = adapter.send(request, **kwargs)
  File "requests/adapters.py", line 519, in send
    raise ConnectionError(e, request=request)
```
which happened for https://usegalaxy.org/workflows/trs_import?run_form=true&trs_url=http%3A%2F%2Flocalhost%3A4000%2F%2Ftraining-material%2F%2Fapi%2Fga4gh%2Ftrs%2Fv2%2Ftools%2Fvariant-analysis-microbial-variants%2Fversions%2Fmicrobial_variant_calling"
  • Loading branch information
mvdbeek committed Sep 18, 2024
1 parent aad1caf commit 64e74b8
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 16 deletions.
6 changes: 4 additions & 2 deletions lib/galaxy/webapps/galaxy/api/workflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,15 +243,17 @@ def create(self, trans: GalaxyWebTransaction, payload=None, **kwd):
trs_version_id = None
import_source = None
if "trs_url" in payload:
parts = self.app.trs_proxy.match_url(payload["trs_url"])
parts = self.app.trs_proxy.match_url(
payload["trs_url"], trans.app.config.fetch_url_allowlist_ips
)
if parts:
server = self.app.trs_proxy.server_from_url(parts["trs_base_url"])
trs_tool_id = parts["tool_id"]
trs_version_id = parts["version_id"]
payload["trs_tool_id"] = trs_tool_id
payload["trs_version_id"] = trs_version_id
else:
raise exceptions.MessageException("Invalid TRS URL.")
raise exceptions.RequestParameterInvalidException(f"Invalid TRS URL {payload['trs_url']}.")
else:
trs_server = payload.get("trs_server")
server = self.app.trs_proxy.get_server(trs_server)
Expand Down
18 changes: 16 additions & 2 deletions lib/galaxy/workflow/trs_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,21 @@
import os
import re
import urllib.parse
from typing import List

import yaml

from galaxy.exceptions import MessageException
from galaxy.exceptions import (
MessageException,
RequestParameterInvalidException,
)
from galaxy.files.uris import validate_non_local
from galaxy.util import (
asbool,
DEFAULT_SOCKET_TIMEOUT,
requests,
)
from galaxy.util.config_parsers import IpAllowedListEntryT
from galaxy.util.search import parse_filters

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -73,7 +79,15 @@ def get_server(self, trs_server):
def server_from_url(self, trs_url):
return TrsServer(trs_url)

def match_url(self, url):
def match_url(self, url, ip_allowlist: List[IpAllowedListEntryT]):
if url.lstrip().startswith("file://"):
# requests doesn't know what to do with file:// anyway, but just in case we swap
# out the implementation
raise RequestParameterInvalidException("Invalid TRS URL %s", url)
validate_non_local(url, ip_allowlist=ip_allowlist or [])
return self._match_url(url)

def _match_url(self, url):
if matches := re.match(TRS_URL_REGEX, url):
match_dict = matches.groupdict()
match_dict["tool_id"] = urllib.parse.unquote(match_dict["tool_id"])
Expand Down
32 changes: 20 additions & 12 deletions test/unit/workflows/test_trs_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import pytest
import yaml

from galaxy.exceptions import ConfigDoesNotAllowException
from galaxy.workflow.trs_proxy import (
GA4GH_GALAXY_DESCRIPTOR,
parse_search_kwds,
Expand Down Expand Up @@ -49,19 +50,19 @@ def test_proxy():

def test_match_url():
proxy = TrsProxy()
valid_dockstore = proxy.match_url(
valid_dockstore = proxy._match_url(
"https://dockstore.org/api/ga4gh/trs/v2/tools/"
"quay.io%2Fcollaboratory%2Fdockstore-tool-bedtools-genomecov/versions/0.3"
"quay.io%2Fcollaboratory%2Fdockstore-tool-bedtools-genomecov/versions/0.3",
)
assert valid_dockstore
assert valid_dockstore["trs_base_url"] == "https://dockstore.org/api"
# Should unquote
assert valid_dockstore["tool_id"] == "quay.io/collaboratory/dockstore-tool-bedtools-genomecov"
assert valid_dockstore["version_id"] == "0.3"

valid_dockstore_unescaped = proxy.match_url(
valid_dockstore_unescaped = proxy._match_url(
"https://dockstore.org/api/ga4gh/trs/v2/tools/"
"#workflow/github.com/jmchilton/galaxy-workflow-dockstore-example-1/mycoolworkflow/versions/master"
"#workflow/github.com/jmchilton/galaxy-workflow-dockstore-example-1/mycoolworkflow/versions/master",
)
assert valid_dockstore_unescaped
assert valid_dockstore_unescaped["trs_base_url"] == "https://dockstore.org/api"
Expand All @@ -71,40 +72,47 @@ def test_match_url():
)
assert valid_dockstore_unescaped["version_id"] == "master"

valid_workflow_hub = proxy.match_url("https://workflowhub.eu/ga4gh/trs/v2/tools/344/versions/1")
valid_workflow_hub = proxy._match_url("https://workflowhub.eu/ga4gh/trs/v2/tools/344/versions/1")
assert valid_workflow_hub
assert valid_workflow_hub["trs_base_url"] == "https://workflowhub.eu"
assert valid_workflow_hub["tool_id"] == "344"
assert valid_workflow_hub["version_id"] == "1"

valid_arbitrary_trs = proxy.match_url(
valid_arbitrary_trs = proxy._match_url(
"https://my-trs-server.golf/stuff/ga4gh/trs/v2/tools/hello-world/versions/version-1"
)
assert valid_arbitrary_trs
assert valid_arbitrary_trs["trs_base_url"] == "https://my-trs-server.golf/stuff"
assert valid_arbitrary_trs["tool_id"] == "hello-world"
assert valid_arbitrary_trs["version_id"] == "version-1"

ignore_extra = proxy.match_url(
"https://workflowhub.eu/ga4gh/trs/v2/tools/344/versions/1/CWL/descriptor/ro-crate-metadata.json"
ignore_extra = proxy._match_url(
"https://workflowhub.eu/ga4gh/trs/v2/tools/344/versions/1/CWL/descriptor/ro-crate-metadata.json",
)
assert ignore_extra
assert ignore_extra["trs_base_url"] == "https://workflowhub.eu"
assert ignore_extra["tool_id"] == "344"
assert ignore_extra["version_id"] == "1"

invalid = proxy.match_url("https://workflowhub.eu/workflows/1")
invalid = proxy._match_url("https://workflowhub.eu/workflows/1")
assert invalid is None

missing_version = proxy.match_url("https://workflowhub.eu/ga4gh/trs/v2/tools/344")
missing_version = proxy._match_url("https://workflowhub.eu/ga4gh/trs/v2/tools/344")
assert missing_version is None

blank = proxy.match_url("")
blank = proxy.match_url("", [])
assert blank is None

not_url = proxy.match_url("1234")
not_url = proxy.match_url("1234", [])
assert not_url is None

expected_exception = None
try:
proxy.match_url("https://localhost", [])
except ConfigDoesNotAllowException as e:
expected_exception = e
assert expected_exception, "matching url against localhost should fail"


def test_server_from_url():
proxy = TrsProxy()
Expand Down

0 comments on commit 64e74b8

Please sign in to comment.