From 4574603e309fbd14737afff9200fed5ce783ff6d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Mar=C3=ADa=20Fern=C3=A1ndez?= Date: Wed, 7 Dec 2022 02:36:44 +0100 Subject: [PATCH] Implemented "trs" scheme, based on https://github.com/ga4gh/tool-registry-service-schemas/issues/164 Tested with dockstore and WorkflowHub (it returns a 500 HTTP error code) --- wfexs_backend/fetchers/trs_files.py | 135 +++++++++++++++++++++++----- 1 file changed, 112 insertions(+), 23 deletions(-) diff --git a/wfexs_backend/fetchers/trs_files.py b/wfexs_backend/fetchers/trs_files.py index 17e66841..46e8a193 100644 --- a/wfexs_backend/fetchers/trs_files.py +++ b/wfexs_backend/fetchers/trs_files.py @@ -23,8 +23,7 @@ from typing import ( cast, - Mapping, - Optional, + TYPE_CHECKING, ) from urllib import parse @@ -33,27 +32,39 @@ from . import fetchClassicURL, FetcherException from ..common import ( - AbsPath, ContentKind, - ProtocolFetcher, - ProtocolFetcherReturn, - SecurityContextConfig, - URIType, URIWithMetadata, ) +if TYPE_CHECKING: + from typing import ( + List, + Mapping, + MutableSequence, + Optional, + Sequence, + ) + + from ..common import ( + AbsPath, + ProtocolFetcher, + ProtocolFetcherReturn, + SecurityContextConfig, + URIType, + ) INTERNAL_TRS_SCHEME_PREFIX = "wfexs.trs.files" +TRS_SCHEME_PREFIX = "trs" TRS_FILES_SUFFIX = "/files" TRS_DESCRIPTOR_INFIX = "/descriptor/" def fetchTRSFiles( - remote_file: URIType, - cachedFilename: AbsPath, - secContext: Optional[SecurityContextConfig] = None, -) -> ProtocolFetcherReturn: + remote_file: "URIType", + cachedFilename: "AbsPath", + secContext: "Optional[SecurityContextConfig]" = None, +) -> "ProtocolFetcherReturn": """ Method to download contents from TRS files related to a tool @@ -63,14 +74,94 @@ def fetchTRSFiles( """ parsedInputURL = parse.urlparse(remote_file) + path_steps: "List[str]" = parsedInputURL.path.split("/") embedded_remote_file = parsedInputURL.path - if not embedded_remote_file.endswith(TRS_FILES_SUFFIX): - metadata_url = cast(URIType, embedded_remote_file + TRS_FILES_SUFFIX) - else: - metadata_url = cast(URIType, embedded_remote_file) - descriptor_base_url = ( - embedded_remote_file[0 : -len(TRS_FILES_SUFFIX)] + TRS_DESCRIPTOR_INFIX + metadata_array: "MutableSequence[URIWithMetadata]" = [] + if parsedInputURL.scheme == INTERNAL_TRS_SCHEME_PREFIX: + # TODO: Improve this code + if not embedded_remote_file.endswith(TRS_FILES_SUFFIX): + metadata_url = cast("URIType", embedded_remote_file + TRS_FILES_SUFFIX) + descriptor_base_url = embedded_remote_file + TRS_DESCRIPTOR_INFIX + else: + metadata_url = cast("URIType", embedded_remote_file) + descriptor_base_url = ( + embedded_remote_file[0 : -len(TRS_FILES_SUFFIX)] + TRS_DESCRIPTOR_INFIX + ) + elif parsedInputURL.scheme == TRS_SCHEME_PREFIX: + # TRS official scheme + if len(path_steps) < 3 or path_steps[0] != "": + raise FetcherException( + f"Ill-formed TRS CURIE {remote_file}. It should be in the format of {TRS_SCHEME_PREFIX}://id/version or {TRS_SCHEME_PREFIX}://prefix-with-slashes/id/version" + ) + + version_steps = path_steps[0:-2] + version_steps.extend( + ["ga4gh", "trs", "v2", "tools", path_steps[-2], "versions", path_steps[-1]] + ) + version_metadata_url = cast( + "URIType", + parse.urlunparse( + parse.ParseResult( + scheme="https", + netloc=parsedInputURL.netloc, + path="/".join(version_steps), + params="", + query="", + fragment="", + ) + ), + ) + version_meta = { + "fetched": version_metadata_url, + "payload": None, + } + metadata_array.append(URIWithMetadata(remote_file, version_meta)) + try: + metaio = io.BytesIO() + _, metametaio, _ = fetchClassicURL(version_metadata_url, metaio) + version_metadata = json.loads(metaio.getvalue().decode("utf-8")) + version_meta["payload"] = version_metadata + metadata_array.extend(metametaio) + + except urllib.error.HTTPError as he: + raise FetcherException( + f"Error fetching or processing TRS version metadata for {remote_file} : {he.code} {he.reason}" + ) from he + + # At last, we can finish building the URL + new_path_steps = version_steps + [ + version_metadata["descriptor_type"][0], + "files", + ] + + metadata_url = cast( + "URIType", + parse.urlunparse( + parse.ParseResult( + scheme="https", + netloc=parsedInputURL.netloc, + path="/".join(new_path_steps), + params="", + query="", + fragment="", + ) + ), + ) + + descriptor_steps = version_steps + [ + version_metadata["descriptor_type"][0], + "descriptor", + ] + descriptor_base_url = parse.urlunparse( + parse.ParseResult( + scheme="https", + netloc=parsedInputURL.netloc, + path="/".join(descriptor_steps) + "/", + params="", + query="", + fragment="", + ) ) topMeta = { @@ -80,15 +171,12 @@ def fetchTRSFiles( "remote_workflow_entrypoint": None, } metadata_array = [URIWithMetadata(remote_file, topMeta)] - metaio = None try: metaio = io.BytesIO() _, metametaio, _ = fetchClassicURL(metadata_url, metaio) metadata = json.loads(metaio.getvalue().decode("utf-8")) topMeta["payload"] = metadata metadata_array.extend(metametaio) - - metaio = None except urllib.error.HTTPError as he: raise FetcherException( "Error fetching or processing TRS files metadata for {} : {} {}".format( @@ -104,8 +192,8 @@ def fetchTRSFiles( if file_rel_path is not None: emptyWorkflow = False - file_url = cast(URIType, descriptor_base_url + file_rel_path) - absfile = cast(AbsPath, os.path.join(cachedFilename, file_rel_path)) + file_url = cast("URIType", descriptor_base_url + file_rel_path) + absfile = cast("AbsPath", os.path.join(cachedFilename, file_rel_path)) # Intermediate path creation reldir = os.path.dirname(file_rel_path) @@ -146,6 +234,7 @@ def fetchTRSFiles( # These are schemes from identifiers.org -SCHEME_HANDLERS: Mapping[str, ProtocolFetcher] = { +SCHEME_HANDLERS: "Mapping[str, ProtocolFetcher]" = { INTERNAL_TRS_SCHEME_PREFIX: fetchTRSFiles, + TRS_SCHEME_PREFIX: fetchTRSFiles, }