diff --git a/python/src/ark_resolver/ark_url_rust.py b/python/src/ark_resolver/ark_url_rust.py new file mode 100644 index 0000000..d0abe94 --- /dev/null +++ b/python/src/ark_resolver/ark_url_rust.py @@ -0,0 +1,317 @@ +#!/usr/bin/env python3 + +# Copyright © 2015 - 2025 Swiss National Data and Service Center for the Humanities and/or DaSCH Service Platform contributors. +# SPDX-License-Identifier: Apache-2.0 + +import base64 +import uuid +import logging +from string import Template +from urllib import parse + +from ark_resolver.ark_url import ArkUrlException +import ark_resolver.check_digit as check_digit_py + + +################################################################################################# +# Tools for generating and parsing DSP ARK URLs. + + + +class ArkUrlInfo: + """ + Represents the information retrieved from a DSP ARK ID. + """ + + def __init__(self, settings, ark_id): + self.settings = settings + + match = settings.match_ark_path(ark_id) + + if match: + # Yes. Is it a version 1 ARK ID? + self.url_version = int(match[0]) + else: + # No. Is it a version 0 ARK ID? + match = settings.match_v0_ark_path(ark_id) + + # If NOT None!, then it is a version 0 ARK ID. + if match is not None: + self.url_version = 0 + + if match is None: + raise ArkUrlException(f"Invalid ARK ID: {ark_id}") + + # Which version of ARK ID did we match? + if self.url_version == settings.dsp_ark_version: + # Version 1. + self.project_id = match[1] + escaped_resource_id_with_check_digit = match[2] + + if escaped_resource_id_with_check_digit is not None: + # TODO: move to rust + self.resource_id = unescape_and_validate_uuid( + ark_url=ark_id, + escaped_uuid=escaped_resource_id_with_check_digit + ) + + escaped_value_id_with_check_digit = match[3] + + if escaped_value_id_with_check_digit is not None: + self.value_id = unescape_and_validate_uuid( + ark_url=ark_id, + escaped_uuid=escaped_value_id_with_check_digit + ) + else: + self.value_id = None + + self.timestamp = match[4] + else: + self.resource_id = None + self.value_id = None + self.timestamp = None + elif self.url_version == 0: + # Version 0. + self.project_id = match[0].upper() + self.resource_id = match[1] + self.value_id = None + + submitted_timestamp = match[2] + + if submitted_timestamp is None or len(submitted_timestamp) < 8: + self.timestamp = None + else: + self.timestamp = submitted_timestamp + + project_config = self.settings.get_project_config(self.project_id) + + if project_config: + if not project_config.get_boolean("AllowVersion0"): + raise ArkUrlException(f"Invalid ARK ID (version 0 not allowed): {ark_id}") + # else: although project not found, it is still a valid ARK ID + + else: + raise ArkUrlException(f"Invalid ARK ID {ark_id}. The version of the ARK ID doesn't match the version defined in the settings.") + + self.template_dict = { + "url_version": self.url_version, + "project_id": self.project_id, + "resource_id": self.resource_id, + "timestamp": self.timestamp + } + + def to_redirect_url(self) -> str: + """ + Checks if the object that it is called on is the top level object which is redirected to TopLevelObjectURL. + If not, returns the redirect URL of either a PHP-SALSAH or DSP object. + """ + if self.project_id is None: + # return the redirect URL of the top level object + return self.settings.default_config.get("TopLevelObjectUrl") + else: + project_config = self.settings.get_project_config(self.project_id) + + if project_config.get_boolean("UsePhp"): + # return the redirect URL of a PHP-SALSAH object + return self.to_php_redirect_url(project_config) + else: + # return the redirect URL of a DSP object + return self.to_dsp_redirect_url(project_config) + + def to_resource_iri(self) -> str: + """ + Converts an ARK URL to a DSP resource IRI. In case of an ARK URL version 0 the UUID for the IRI needs to be of + version 5 and created from the DaSCH specific namespace and the resource_id coming from the ARK URL. This is for + objects that have been migrated from salsah.org to DSP. + """ + project_config = self.settings.get_project_config(self.project_id) + resource_iri_template = Template(project_config.get("DSPResourceIri")) + + template_dict = self.template_dict.copy() + template_dict["host"] = project_config.get("Host") + + if self.url_version == 0: + # in case of an ARK URL version 0, the resource_id generated from the salsah ID has to be converted to a + # base64 UUID version 5 + generic_namespace_url = uuid.NAMESPACE_URL + dasch_uuid_ns = uuid.uuid5(generic_namespace_url, "https://dasch.swiss") # cace8b00-717e-50d5-bcb9-486f39d733a2 + resource_id = template_dict["resource_id"] + dsp_iri = base64.urlsafe_b64encode(uuid.uuid5(dasch_uuid_ns, resource_id).bytes).decode("utf-8") + # remove the padding ('==') from the end of the string + dsp_iri = dsp_iri[:-2] + template_dict["resource_id"] = dsp_iri + + return resource_iri_template.substitute(template_dict) + + def to_dsp_redirect_url(self, project_config) -> str: + """ + In case it's called on a DSP object (either version 0 or version 1), converts an ARK URL to the URL that the + client should be redirected to according to its type (project, resource, or value) + """ + + resource_iri_template = Template(project_config.get("DSPResourceIri")) + project_iri_template = Template(project_config.get("DSPProjectIri")) + + template_dict = self.template_dict.copy() + template_dict["host"] = project_config.get("Host") + + # it's a project + if self.resource_id is None: + request_template = Template(project_config.get("DSPProjectRedirectUrl")) + template_dict["project_host"] = project_config.get("ProjectHost") + # it's a resource + elif self.value_id is None: + if self.timestamp is None: + request_template = Template(project_config.get("DSPResourceRedirectUrl")) + else: + request_template = Template(project_config.get("DSPResourceVersionRedirectUrl")) + # it's a value + elif self.value_id: + template_dict["value_id"] = self.value_id + if self.timestamp is None: + request_template = Template(project_config.get("DSPValueRedirectUrl")) + else: + request_template = Template(project_config.get("DSPValueVersionRedirectUrl")) + + # in case of a version 0 ARK URL, convert the resource ID to a UUID (base64 encoded) + if self.url_version == 0: + res_iri = self.to_resource_iri() + template_dict["resource_id"] = res_iri.split("/")[-1] + + # add the DSP resource IRI to the template_dict + resource_iri = resource_iri_template.substitute(template_dict) + url_encoded_resource_iri = parse.quote(resource_iri, safe="") + template_dict["resource_iri"] = url_encoded_resource_iri + + # add the DSP project IRI to the template_dict + project_iri = project_iri_template.substitute(template_dict) + url_encoded_project_iri = parse.quote(project_iri, safe="") + template_dict["project_iri"] = url_encoded_project_iri + + return request_template.substitute(template_dict) + + def to_php_redirect_url(self, project_config) -> str: + """ + In case it's called on a PHP-SALSAH object, converts the ARK URL to the URL that the client should be + redirected to. + """ + template_dict = self.template_dict.copy() + template_dict["host"] = project_config.get("Host") + + # it's a resource + if self.resource_id is not None: + try: + resource_int_id = (int(self.resource_id, 16) // self.settings.resource_int_id_factor) - 1 + except ValueError: + logging.exception(f"Invalid resource ID: {self.resource_id}") + raise ArkUrlException(f"Invalid resource ID: {self.resource_id}") + + template_dict["resource_int_id"] = resource_int_id + + if self.timestamp is None: + request_template = Template(project_config.get("PhpResourceRedirectUrl")) + else: + request_template = Template(project_config.get("PhpResourceVersionRedirectUrl")) + + # The PHP server only takes timestamps in the format YYYYMMDD + template_dict["timestamp"] = self.timestamp[0:8] + + # it's a project + else: + request_template = Template(project_config.get("DSPProjectRedirectUrl")) + template_dict["project_host"] = project_config.get("ProjectHost") + + return request_template.substitute(template_dict) + + +def add_check_digit_and_escape(uuid) -> str: + """ + Adds a check digit to a Base64-encoded UUID, and escapes the result. + """ + check_digit = check_digit_py.calculate_check_digit(uuid) + uuid_with_check_digit = uuid + check_digit + + # Escape '-' as '=' in the resource ID and check digit, because '-' can be ignored in ARK URLs. + return uuid_with_check_digit.replace('-', '=') + + +def unescape_and_validate_uuid(ark_url, escaped_uuid) -> str: + """ + Unescapes a Base64-encoded UUID, validates its check digit, and returns the unescaped UUID without the check digit. + """ + # '-' is escaped as '=' in the UUID and check digit, because '-' can be ignored in ARK URLs. + unescaped_uuid = escaped_uuid.replace('=', '-') + + if not check_digit_py.is_valid(unescaped_uuid): + raise ArkUrlException(f"Invalid ARK ID: {ark_url}") + + return unescaped_uuid[0:-1] + + +class ArkUrlFormatter: + """ + Handles formatting of DSP resource IRIs into ARK URLs + """ + + def __init__(self, settings): + self.settings = settings + + def resource_iri_to_ark_url(self, resource_iri, value_id=None, timestamp=None) -> str: + """ + Converts a DSP resource IRI to an ARK URL. + """ + # checks if given resource IRI is valid and matches (i.e. tokenizes) it into project_id and resource_id + match = self.settings.match_resource_iri(resource_iri) + + if match is None: + raise ArkUrlException("Invalid resource IRI: {}".format(resource_iri)) + + project_id = match[0] + resource_id = match[1] + escaped_resource_id_with_check_digit = add_check_digit_and_escape(resource_id) + + # checks if there is a value_id + if value_id is not None: + escaped_value_id_with_check_digit = add_check_digit_and_escape(value_id) + else: + escaped_value_id_with_check_digit = None + + # formats and returns the ARK URL + return self.format_ark_url( + project_id=project_id, + resource_id_with_check_digit=escaped_resource_id_with_check_digit, + value_id_with_check_digit=escaped_value_id_with_check_digit, + timestamp=timestamp + ) + + def format_ark_url(self, + project_id, + resource_id_with_check_digit, + value_id_with_check_digit, + timestamp) -> str: + """ + Formats and returns a DSP ARK URL from the given parameters and configuration. + """ + if self.settings.ark_config.get_boolean("ArkHttpsProxy"): + protocol = "https" + else: + protocol = "http" + + url = "{}://{}/ark:/{}/{}/{}/{}".format( + protocol, + self.settings.ark_config.get("ArkExternalHost"), + self.settings.ark_config.get("ArkNaan"), + self.settings.dsp_ark_version, + project_id, + resource_id_with_check_digit + ) + + # If there's a value UUID, add it. + if value_id_with_check_digit is not None: + url += "/" + value_id_with_check_digit + + # If there's a timestamp, add it as an object variant. + if timestamp is not None: + url += "." + timestamp + + return url diff --git a/python/src/ark_resolver/health.py b/python/src/ark_resolver/health.py new file mode 100644 index 0000000..e81a4f8 --- /dev/null +++ b/python/src/ark_resolver/health.py @@ -0,0 +1,33 @@ +from sanic import Blueprint, json +import time +import os + +health_bp = Blueprint("health", url_prefix="/health") + +# Store service start time for uptime calculation +start_time = time.time() + +async def check_database(): + """Simulate a database check (Replace with actual DB check)""" + return "ok" + +async def check_external_api(): + """Simulate an external API check (Replace with real API health check)""" + return "ok" + +@health_bp.get("/") +async def health(request): + """Health check endpoint""" + db_status = await check_database() + api_status = await check_external_api() + + return json({ + "status": "ok", + "version": os.getenv("VERSION", "0.1.0"), + "build": os.getenv("GIT_COMMIT_HASH", "unknown"), + "uptime": int(time.time() - start_time), + "dependencies": { + "database": db_status, + "external_api": api_status + } + }) diff --git a/python/test/test_arkResolver.py b/python/tests/test_ark_url_rust.py similarity index 64% rename from python/test/test_arkResolver.py rename to python/tests/test_ark_url_rust.py index 8ae54f2..dc18617 100644 --- a/python/test/test_arkResolver.py +++ b/python/tests/test_ark_url_rust.py @@ -1,22 +1,18 @@ import os -import pickle import pytest +import _rust -from ark_resolver.ark_url import ArkUrlFormatter, ArkUrlInfo, ArkUrlException +from ark_resolver import ark_url_rust @pytest.fixture(scope="module") def settings(): - """Loads settings from a pickle file.""" - try: - with open(os.path.join("python", "test", "settings.pkl"), "rb") as mock_settings: - return pickle.load(mock_settings) - except FileNotFoundError: - # When running tests directly from the IDE, the working directory might be different - with open("settings.pkl", "rb") as mock_settings: - return pickle.load(mock_settings) + """Loads settings.""" + config_path = "python/src/ark_resolver/ark-config.ini" + os.environ['ARK_REGISTRY'] = 'python/src/ark_resolver/ark-registry.ini' + return _rust.load_settings(config_path) def test_ark_url_formatter(settings): - ark_url_formatter = ArkUrlFormatter(settings) + ark_url_formatter = ark_url_rust.ArkUrlFormatter(settings) # generate an ARK URL from a resource IRI without a timestamp resource_iri = "http://rdfh.ch/0001/cmfk1DMHRBiR4-_6HXpEFA" ark_url = ark_url_formatter.resource_iri_to_ark_url(resource_iri=resource_iri) @@ -38,127 +34,110 @@ def test_ark_url_formatter(settings): def test_ark_url_info_redirect_top_level_object(settings): # parse and redirect an ARK URL representing the top-level object - ark_url_info = ArkUrlInfo(settings, "https://ark.example.org/ark:/00000/1") + ark_url_info = ark_url_rust.ArkUrlInfo(settings, "ark:/00000/1") redirect_url = ark_url_info.to_redirect_url() assert redirect_url == "http://dasch.swiss" def test_ark_url_info_redirect_project(settings): # parse and redirect an ARK URL of a project with default project host, i.e. without specified project host - ark_url_info = ArkUrlInfo(settings, "https://ark.example.org/ark:/00000/1/0003") + ark_url_info = ark_url_rust.ArkUrlInfo(settings, "ark:/00000/1/0003") redirect_url = ark_url_info.to_redirect_url() assert redirect_url == "http://meta.dasch.swiss/projects/0003" # parse and redirect an ARK URL of a project with a specific project host - ark_url_info = ArkUrlInfo(settings, "https://ark.example.org/ark:/00000/1/0004") + ark_url_info = ark_url_rust.ArkUrlInfo(settings, "ark:/00000/1/0004") redirect_url = ark_url_info.to_redirect_url() assert redirect_url == "http://other-meta.dasch.swiss/projects/0004" def test_ark_url_info_redirect_resource(settings): # parse and redirect an ARK URL of a DSP resource without a timestamp - ark_url_info = ArkUrlInfo(settings, "https://ark.example.org/ark:/00000/1/0001/cmfk1DMHRBiR4=_6HXpEFAn") - redirect_url = ark_url_info.to_redirect_url() - assert redirect_url == "http://0.0.0.0:4200/resource/0001/cmfk1DMHRBiR4-_6HXpEFA" - - # parse and redirect an ARK HTTP URL of a DSP resource without a timestamp - ark_url_info = ArkUrlInfo(settings, "http://ark.example.org/ark:/00000/1/0001/cmfk1DMHRBiR4=_6HXpEFAn") + ark_url_info = ark_url_rust.ArkUrlInfo(settings, "ark:/00000/1/0001/cmfk1DMHRBiR4=_6HXpEFAn") redirect_url = ark_url_info.to_redirect_url() assert redirect_url == "http://0.0.0.0:4200/resource/0001/cmfk1DMHRBiR4-_6HXpEFA" # parse and redirect an ARK URL of a DSP resource with a timestamp with a fractional part - ark_url_info = ArkUrlInfo(settings, - "https://ark.example.org/ark:/00000/1/0001/cmfk1DMHRBiR4=_6HXpEFAn.20180604T085622513Z") + ark_url_info = ark_url_rust.ArkUrlInfo(settings, "ark:/00000/1/0001/cmfk1DMHRBiR4=_6HXpEFAn.20180604T085622513Z") redirect_url = ark_url_info.to_redirect_url() assert redirect_url == "http://0.0.0.0:4200/resource/0001/cmfk1DMHRBiR4-_6HXpEFA?version=20180604T085622513Z" # parse and redirect an ARK URL of a DSP resource with a timestamp without a fractional part - ark_url_info = ArkUrlInfo(settings, - "https://ark.example.org/ark:/00000/1/0001/cmfk1DMHRBiR4=_6HXpEFAn.20180604T085622Z") + ark_url_info = ark_url_rust.ArkUrlInfo(settings, "ark:/00000/1/0001/cmfk1DMHRBiR4=_6HXpEFAn.20180604T085622Z") redirect_url = ark_url_info.to_redirect_url() assert redirect_url == "http://0.0.0.0:4200/resource/0001/cmfk1DMHRBiR4-_6HXpEFA?version=20180604T085622Z" # parse an ARK URL of a DSP resource without a timestamp and redirect it to a customized location - ark_url_info = ArkUrlInfo(settings, - "https://ark.example.org/ark:/00000/1/0005/0_sWRg5jT3S0PLxakX9ffg1") + ark_url_info = ark_url_rust.ArkUrlInfo(settings, "ark:/00000/1/0005/0_sWRg5jT3S0PLxakX9ffg1") redirect_url = ark_url_info.to_redirect_url() assert redirect_url == "http://0.0.0.0:4200/resources/0005/0_sWRg5jT3S0PLxakX9ffg" def test_ark_url_info_redirect_value(settings): # parse an ARK URL of a DSP value without a timestamp and redirect it to a customized location - ark_url_info = ArkUrlInfo(settings, - "https://ark.example.org/ark:/00000/1/0005/SQkTPdHdTzq_gqbwj6QR=AR/=SSbnPK3Q7WWxzBT1UPpRgo") + ark_url_info = ark_url_rust.ArkUrlInfo(settings, "ark:/00000/1/0005/SQkTPdHdTzq_gqbwj6QR=AR/=SSbnPK3Q7WWxzBT1UPpRgo") redirect_url = ark_url_info.to_redirect_url() assert redirect_url == "http://0.0.0.0:4200/resources/0005/SQkTPdHdTzq_gqbwj6QR-A/-SSbnPK3Q7WWxzBT1UPpRg" # parse and redirect an ARK URL of a DSP value with a timestamp - ark_url_info = ArkUrlInfo(settings, - "https://ark.example.org/ark:/00000/1/0001/cmfk1DMHRBiR4=_6HXpEFAn/pLlW4ODASumZfZFbJdpw1gu.20180604T085622Z") + ark_url_info = ark_url_rust.ArkUrlInfo(settings, "ark:/00000/1/0001/cmfk1DMHRBiR4=_6HXpEFAn/pLlW4ODASumZfZFbJdpw1gu.20180604T085622Z") redirect_url = ark_url_info.to_redirect_url() assert redirect_url == "http://0.0.0.0:4200/resource/0001/cmfk1DMHRBiR4-_6HXpEFA/pLlW4ODASumZfZFbJdpw1g?version=20180604T085622Z" - # parse an ARK URL of a DSP value without a timestamp and redirect it to a customized location - ark_url_info = ArkUrlInfo(settings, - "https://ark.example.org/ark:/00000/1/0005/SQkTPdHdTzq_gqbwj6QR=AR/=SSbnPK3Q7WWxzBT1UPpRgo") - redirect_url = ark_url_info.to_redirect_url() - assert redirect_url == "http://0.0.0.0:4200/resources/0005/SQkTPdHdTzq_gqbwj6QR-A/-SSbnPK3Q7WWxzBT1UPpRg" - def test_ark_url_info_redirect_salsah_resource(settings): # parse and redirect a version 1 ARK URL of a PHP-SALSAH resource without a timestamp - ark_url_info = ArkUrlInfo(settings, "https://ark.example.org/ark:/00000/1/0803/751e0b8am") + ark_url_info = ark_url_rust.ArkUrlInfo(settings, "ark:/00000/1/0803/751e0b8am") redirect_url = ark_url_info.to_redirect_url() assert redirect_url == "http://data.dasch.swiss/resources/1" # parse and redirect a version 1 ARK URL of a PHP-SALSAH resource with a timestamp - ark_url_info = ArkUrlInfo(settings, "https://ark.example.org/ark:/00000/1/0803/751e0b8am.20190118T102919Z") + ark_url_info = ark_url_rust.ArkUrlInfo(settings, "ark:/00000/1/0803/751e0b8am.20190118T102919Z") redirect_url = ark_url_info.to_redirect_url() assert redirect_url == "http://data.dasch.swiss/resources/1?citdate=20190118" # parse and redirect a version 0 ARK URL of a PHP-SALSAH resource without a timestamp - ark_url_info = ArkUrlInfo(settings, "http://ark.example.org/ark:/00000/080e-76bb2132d30d6-0") + ark_url_info = ark_url_rust.ArkUrlInfo(settings, "ark:/00000/080e-76bb2132d30d6-0") redirect_url = ark_url_info.to_redirect_url() assert redirect_url == "http://data.dasch.swiss/resources/2126045" # parse and redirect a version 0 ARK URL of a PHP-SALSAH resource with a timestamp - ark_url_info = ArkUrlInfo(settings, "http://ark.example.org/ark:/00000/080e-76bb2132d30d6-0.20190129") + ark_url_info = ark_url_rust.ArkUrlInfo(settings, "ark:/00000/080e-76bb2132d30d6-0.20190129") redirect_url = ark_url_info.to_redirect_url() assert redirect_url == "http://data.dasch.swiss/resources/2126045?citdate=20190129" # parse and redirect a version 0 ARK URL of a PHP-SALSAH resource with a timestamp that's too short - ark_url_info = ArkUrlInfo(settings, "http://ark.example.org/ark:/00000/080e-76bb2132d30d6-0.2019111") + ark_url_info = ark_url_rust.ArkUrlInfo(settings, "ark:/00000/080e-76bb2132d30d6-0.2019111") redirect_url = ark_url_info.to_redirect_url() assert redirect_url == "http://data.dasch.swiss/resources/2126045" def test_ark_url_info_redirect_salsah_project(settings): # parse and redirect an ARK URL of a project on Salsah with default project host, i.e. without specified project host - ark_url_info = ArkUrlInfo(settings, "https://ark.example.org/ark:/00000/1/0803") + ark_url_info = ark_url_rust.ArkUrlInfo(settings, "ark:/00000/1/0803") redirect_url = ark_url_info.to_redirect_url() assert redirect_url == "http://meta.dasch.swiss/projects/0803" # parse and redirect an ARK URL of a project on Salsah with a specific project host - ark_url_info = ArkUrlInfo(settings, "https://ark.example.org/ark:/00000/1/0006") + ark_url_info = ark_url_rust.ArkUrlInfo(settings, "ark:/00000/1/0006") redirect_url = ark_url_info.to_redirect_url() assert redirect_url == "http://other-meta.dasch.swiss/projects/0006" def test_ark_url_info_redirect_salsah_ark(settings): # parse and redirect a version 0 ARK URL of a PHP-SALSAH resource which is on DSP (migrated from salsah to DSP) without a timestamp - ark_url_info = ArkUrlInfo(settings, "http://ark.example.org/ark:/00000/0002-779b9990a0c3f-6e") + ark_url_info = ark_url_rust.ArkUrlInfo(settings, "ark:/00000/0002-779b9990a0c3f-6e") redirect_url = ark_url_info.to_redirect_url() assert redirect_url == "http://0.0.0.0:4200/resource/0002/Ef9heHjPWDS7dMR_gGax2Q" # parse and redirect a version 0 ARK URL of a PHP-SALSAH resource which is on DSP (migrated from salsah to DSP) with a timestamp - ark_url_info = ArkUrlInfo(settings, "http://ark.example.org/ark:/00000/0002-779b9990a0c3f-6e.20190129") + ark_url_info = ark_url_rust.ArkUrlInfo(settings, "ark:/00000/0002-779b9990a0c3f-6e.20190129") redirect_url = ark_url_info.to_redirect_url() assert redirect_url == "http://0.0.0.0:4200/resource/0002/Ef9heHjPWDS7dMR_gGax2Q?version=20190129" def test_conversion_to_resource_iri_with_ark_version_0(settings): # convert a version 0 ARK URL to a DSP resource IRI - ark_url_info = ArkUrlInfo(settings, "http://ark.example.org/ark:/00000/0002-751e0b8a-6.2021519") + ark_url_info = ark_url_rust.ArkUrlInfo(settings, "ark:/00000/0002-751e0b8a-6.2021519") resource_iri = ark_url_info.to_resource_iri() assert resource_iri == "http://rdfh.ch/0002/70aWaB2kWsuiN6ujYgM0ZQ" def test_conversion_to_resource_iri_with_ark_version_1(settings): # convert a version 1 ARK URL to a DSP resource IRI - ark_url_info = ArkUrlInfo(settings, - "https://ark.example.org/ark:/00000/1/0002/0_sWRg5jT3S0PLxakX9ffg1.20210712T074927466631Z") + ark_url_info = ark_url_rust.ArkUrlInfo(settings, "ark:/00000/1/0002/0_sWRg5jT3S0PLxakX9ffg1.20210712T074927466631Z") resource_iri = ark_url_info.to_resource_iri() assert resource_iri == "http://rdfh.ch/0002/0_sWRg5jT3S0PLxakX9ffg" @@ -166,7 +145,20 @@ def test_reject_ark_with_wrong_digit(settings): # reject an ARK URL that doesn't pass check digit validation rejected = False try: - ArkUrlInfo(settings, "https://ark.example.org/ark:/00000/1/0001/cmfk1DMHRBir4=_6HXpEFAn") - except ArkUrlException: + ark_url_rust.ArkUrlInfo(settings, "ark:/00000/1/0001/cmfk1DMHRBir4=_6HXpEFAn") + except ark_url_rust.ArkUrlException: rejected = True assert rejected + +def test_ark_url_settings(settings): + assert settings.ark_config.get("ArkNaan") == "00000" + assert settings.ark_config.get("ArkExternalHost") == "ark.example.org" + assert settings.ark_config.get("ArkInternalHost") == "0.0.0.0" + assert settings.ark_config.get("ArkInternalPort") == "3336" + assert settings.ark_config.get("ArkHttpsProxy") == "true" + assert settings.get_default_config("TopLevelObjectUrl") == "http://dasch.swiss" + assert settings.get_default_config("TopLevelObjectUrl") == "http://dasch.swiss" + assert settings.get_default_config("ProjectHost") == "meta.dasch.swiss" + assert settings.get_project_config("0003").get("ProjectHost") == "meta.dasch.swiss" + assert settings.get_project_config("080e").get("Host") == "data.dasch.swiss" + assert settings.get_project_config("080E").get("Host") == "data.dasch.swiss" \ No newline at end of file diff --git a/src/ark_url_settings.rs b/src/ark_url_settings.rs new file mode 100644 index 0000000..ba528b3 --- /dev/null +++ b/src/ark_url_settings.rs @@ -0,0 +1,357 @@ +use config::{Config, File, FileFormat}; +use pyo3::prelude::*; +use regex::Regex; +use std::collections::HashMap; +use std::env; +use crate::parsing::{ark_path_regex, resource_iri_regex, v0_ark_path_regex}; + +struct ArkConfig { + ark_external_host: String, + ark_internal_host: String, + ark_internal_port: String, + ark_naan: String, + ark_https_proxy: String, + ark_registry: String, + ark_github_secret: String, +} + +impl Into for ArkConfig { + fn into(self) -> ConfigWrapper { + let mut map = HashMap::new(); + map.insert("ArkExternalHost".to_string(), self.ark_external_host); + map.insert("ArkInternalHost".to_string(), self.ark_internal_host); + map.insert("ArkInternalPort".to_string(), self.ark_internal_port); + map.insert("ArkNaan".to_string(), self.ark_naan); + map.insert("ArkHttpsProxy".to_string(), self.ark_https_proxy); + map.insert("ArkRegistry".to_string(), self.ark_registry); + map.insert("ArkGithubSecret".to_string(), self.ark_github_secret); + + ConfigWrapper { + config: map, + } + } +} + +/// Wrapper for config settings, providing helper methods +#[pyclass] +#[derive(Debug, Clone, Default)] +pub struct ConfigWrapper { + config: HashMap, +} + +impl From> for ConfigWrapper { + fn from(config: HashMap) -> Self { + Self { config } + } +} + + +#[pymethods] +impl ConfigWrapper { + #[new] + pub fn new(config: HashMap) -> Self { + Self { config } + } + + pub fn get(&self, key: &str) -> Option<&String> { + self.config.get(key) + } + + pub fn get_boolean(&self, key: &str) -> PyResult { + match self.config.get(key) { + Some(value) => match value.as_str() { + "true" | "1" => Ok(true), + "false" | "0" => Ok(false), + _ => Err(pyo3::exceptions::PyValueError::new_err(format!( + "Invalid boolean value for key '{}': {}", + key, value + ))), + }, + None => Ok(false), + } + } +} + +#[pyclass] +#[derive(Debug)] +pub struct ArkUrlSettings { + #[pyo3(get)] + ark_config: ConfigWrapper, + #[pyo3(get)] + default_config: HashMap, + registry: HashMap, + #[pyo3(get)] + dsp_ark_version: u8, + #[pyo3(get)] + resource_int_id_factor: u32, + resource_iri_regex: Regex, + ark_path_regex: Regex, + v0_ark_path_regex: Regex, +} + +#[pymethods] +impl ArkUrlSettings { + #[new] + #[pyo3(text_signature = "(config_path)")] + pub fn new(config_path: String) -> PyResult { + let settings = new_impl(config_path).map_err(pyo3::exceptions::PyIOError::new_err)?; + Ok(settings) + } + + #[pyo3(text_signature = "(self, key)")] + pub fn get_default_config(&self, key: &str) -> Option { + self.default_config.get(key).map(|s| s.to_string()) + } + + /// Get a project configuration section from the registry + #[pyo3(text_signature = "(self, project_id)")] + pub fn get_project_config(&self, project_id: &str) -> Option { + let mut defaults = self.default_config.clone(); + self.registry.get(&project_id.to_lowercase())? + .clone() + .config + .into_iter() + .for_each(|(k, v)| { + defaults.insert(k, v); + }); + Some(defaults.into()) + } + + /// Check resource IRI + #[pyo3(text_signature = "(self, resource_iri)")] + pub fn match_resource_iri(&self, resource_iri: &str) -> Option<(String, String)> { + self.resource_iri_regex + .captures(resource_iri) + .map(|captures| { + ( + captures.get(1).map_or("", |m| m.as_str()).to_string(), + captures.get(2).map_or("", |m| m.as_str()).to_string(), + ) + }) + } + + /// Check structure and extract ARK path components + /// Returns a tuple with ARK version, project ID, resource ID, value ID and timestamp + #[pyo3(text_signature = "(self, ark_path)")] + pub fn match_ark_path( + &self, + ark_path: &str, + ) -> Option<( + Option, + Option, + Option, + Option, + Option, + )> { + let (processed, timestamp) = if let Some(index) = ark_path.find('.') { + (&ark_path[..index], Some(&ark_path[index + 1..])) + } else { + (ark_path, None) + }; + + self.ark_path_regex.captures(processed).map(|captures| { + ( + captures.get(1).map(|m| m.as_str().to_string()), + captures.get(2).map(|m| m.as_str().to_string()), + captures.get(3).map(|m| m.as_str().to_string()), + captures.get(4).map(|m| m.as_str().to_string()), + timestamp.map(|m| m.to_string()), + ) + }) + } + + /// Check if a URL matches the V0 ARK path regex + /// Returns a tuple with project ID, resource ID and optional timestamp + #[pyo3(text_signature = "(self, v0_ark_path)")] + pub fn match_v0_ark_path( + &self, + v0_ark_path: &str, + ) -> Option<(Option, Option, Option)> { + self.v0_ark_path_regex + .captures(v0_ark_path) + .map(|captures| { + ( + captures.get(1).map(|m| m.as_str().to_string()), + captures.get(2).map(|m| m.as_str().to_string()), + captures.get(3).map(|m| m.as_str().to_string()), + ) + }) + } +} + +/// Expose a direct function for Python to load settings +#[pyfunction] +pub fn load_settings(config_path: String) -> PyResult { + ArkUrlSettings::new(config_path) +} + +fn new_impl(_config_path: String) -> Result { + + let registry_path = env::var("ARK_REGISTRY").unwrap_or("python/src/ark_resolver/ark-registry.ini".to_string()); + + let ark_config: ConfigWrapper = ArkConfig { + ark_external_host: env::var("ARK_EXTERNAL_HOST").unwrap_or("ark.example.org".to_string()), + ark_internal_host: env::var("ARK_INTERNAL_HOST").unwrap_or("0.0.0.0".to_string()), + ark_internal_port: env::var("ARK_INTERNAL_PORT").unwrap_or("3336".to_string()), + ark_naan: env::var("ARK_NAAN").unwrap_or("00000".to_string()), + ark_https_proxy: env::var("ARK_HTTPS_PROXY").unwrap_or("true".to_string()), + ark_registry: registry_path.clone(), + ark_github_secret: env::var("ARK_GITHUB_SECRET").unwrap_or("".to_string()), + }.into(); + + let registry_ini = Config::builder() + .add_source(File::with_name(®istry_path).format(FileFormat::Ini)) + .build() + .map_err(|e| e.to_string())?; + + // Deserialize into a nested map + let raw_registry: HashMap = + registry_ini.try_deserialize().map_err(|e| e.to_string())?; + + // Extract DEFAULT section separately + let default_section = raw_registry + .get("DEFAULT") + .and_then(|v| v.as_object()) + .map(|default_map| { + default_map + .iter() + .map(|(k, v)| (k.clone(), v.as_str().unwrap_or("").to_string())) + .collect::>() + }) + .unwrap_or_default(); // Use empty map if no DEFAULT section + + let mut registry: HashMap = HashMap::new(); + + // Convert JSON-like structure into Rust nested HashMap + for (section, value) in raw_registry.iter() { + if section == "DEFAULT" { + continue; // Skip DEFAULT section + } + let mut section_map = HashMap::new(); // Start with an empty map + + if let Some(inner_map) = value.as_object() { + for (key, inner_value) in inner_map { + section_map.insert(key.clone(), inner_value.as_str().unwrap_or("").to_string()); + } + } + + registry.insert(section.to_lowercase(), ConfigWrapper::new(section_map)); + } + + let default_ark_naan = "00000".to_string(); + let ark_naan = default_section + .get("ArkNaan") + .unwrap_or(&default_ark_naan); + + Ok(ArkUrlSettings { + ark_config, + default_config: default_section.clone().into(), + registry: registry.clone(), + dsp_ark_version: 1, + resource_int_id_factor: 982451653, + resource_iri_regex: resource_iri_regex(), + ark_path_regex: ark_path_regex(ark_naan), + v0_ark_path_regex: v0_ark_path_regex(ark_naan), + }) +} + +#[cfg(test)] +mod tests { + use crate::ark_url_settings::{ + new_impl + }; + + #[test] + fn test_match_ark_path_impl() { + let settings = new_impl("python/src/ark_resolver/ark-config.ini".to_string()).unwrap(); + + // project + let captures = settings.match_ark_path("ark:/00000/1/0003").unwrap(); + assert_eq!( + captures, + ( + Some("1".to_string()), + Some("0003".to_string()), + None, + None, + None + ) + ); + + // resource + let captures = settings + .match_ark_path("ark:/00000/1/0001/cmfk1DMHRBiR4=_6HXpEFAn") + .unwrap(); + assert_eq!( + captures, + ( + Some("1".to_string()), + Some("0001".to_string()), + Some("cmfk1DMHRBiR4=_6HXpEFAn".to_string()), + None, + None + ) + ); + + // resource with timestamp + let captures = settings + .match_ark_path("ark:/00000/1/0001/cmfk1DMHRBiR4=_6HXpEFAn.20180604T085622Z") + .unwrap(); + assert_eq!( + captures, + ( + Some("1".to_string()), + Some("0001".to_string()), + Some("cmfk1DMHRBiR4=_6HXpEFAn".to_string()), + None, + Some("20180604T085622Z".to_string()) + ) + ); + + // resource with value + let captures = settings + .match_ark_path("ark:/00000/1/0005/SQkTPdHdTzq_gqbwj6QR=AR/=SSbnPK3Q7WWxzBT1UPpRgo") + .unwrap(); + assert_eq!( + captures, + ( + Some("1".to_string()), + Some("0005".to_string()), + Some("SQkTPdHdTzq_gqbwj6QR=AR".to_string()), + Some("=SSbnPK3Q7WWxzBT1UPpRgo".to_string()), + None + ) + ); + + // resource with value and timestamp + let captures = settings.match_ark_path("ark:/00000/1/0001/cmfk1DMHRBiR4=_6HXpEFAn/pLlW4ODASumZfZFbJdpw1gu.20180604T085622Z").unwrap(); + assert_eq!( + captures, + ( + Some("1".to_string()), + Some("0001".to_string()), + Some("cmfk1DMHRBiR4=_6HXpEFAn".to_string()), + Some("pLlW4ODASumZfZFbJdpw1gu".to_string()), + Some("20180604T085622Z".to_string()) + ) + ); + } + + #[test] + fn test_settings() { + let settings = new_impl("python/src/ark_resolver/ark-config.ini".to_string()).unwrap(); + + assert_eq!(settings.ark_config.get("ArkNaan"), Some(&"00000".to_string())); + assert_eq!(settings.ark_config.get("ArkExternalHost"), Some(&"ark.example.org".to_string())); + assert_eq!(settings.ark_config.get("ArkInternalHost"), Some(&"0.0.0.0".to_string())); + assert_eq!(settings.ark_config.get("ArkInternalPort"), Some(&"3336".to_string())); + assert_eq!(settings.ark_config.get("ArkHttpsProxy"), Some(&"true".to_string())); + assert_eq!(settings.default_config.get("TopLevelObjectUrl"), Some(&"http://dasch.swiss".to_string())); + assert_eq!(settings.get_default_config("TopLevelObjectUrl"), Some("http://dasch.swiss".to_string())); + assert_eq!(settings.get_project_config("0003").unwrap().get("ProjectHost"), Some(&"meta.dasch.swiss".to_string())); + assert_eq!(settings.get_project_config("080e").unwrap().get("Host"), Some(&"data.dasch.swiss".to_string())); + assert_eq!(settings.get_project_config("080E").unwrap().get("Host"), Some(&"data.dasch.swiss".to_string())); + + + } +} diff --git a/src/base64url_ckeck_digit.rs b/src/base64url_ckeck_digit.rs new file mode 100644 index 0000000..c63196d --- /dev/null +++ b/src/base64url_ckeck_digit.rs @@ -0,0 +1,8 @@ +use base64::{engine::general_purpose::URL_SAFE, Engine as _}; +use pyo3::{pyfunction, PyResult}; + +/// Encodes input using base64url and returns the result +#[pyfunction] +pub fn base64url_check_digit(data: &str) -> PyResult { + Ok(URL_SAFE.encode(data)) +} diff --git a/src/lib.rs b/src/lib.rs index 11d4ef7..6f292d1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,33 +1,29 @@ +use crate::ark_url_settings::ArkUrlSettings; +use crate::ark_url_settings::load_settings; +use crate::base64url_ckeck_digit::base64url_check_digit; use pyo3::prelude::*; use pyo3::types::PyModule; use pyo3::wrap_pyfunction_bound; -use base64::{engine::general_purpose::URL_SAFE, Engine as _}; +use tracing_subscriber::prelude::*; + +mod ark_url_settings; +mod base64url_ckeck_digit; +mod parsing; -/// Encodes input using base64url and returns the result #[pyfunction] -fn base64url_check_digit(data: &str) -> PyResult { - Ok(URL_SAFE.encode(data)) +pub fn initialize_tracing(py_impl: Bound<'_, PyAny>) { + tracing_subscriber::registry() + .with(pyo3_python_tracing_subscriber::PythonCallbackLayerBridge::new(py_impl)) + .init(); } -/// Create Python module and add function +/// Create Python module and add the functions and classes to it #[pymodule] fn _rust(py: Python, m: &Bound<'_, PyModule>) -> PyResult<()> { m.add_function(wrap_pyfunction_bound!(base64url_check_digit, py)?)?; + m.add_function(wrap_pyfunction_bound!(load_settings, py)?)?; + m.add_function(wrap_pyfunction!(initialize_tracing, m)?)?; + m.add_class::()?; + Ok(()) } - - -pub fn add(left: u64, right: u64) -> u64 { - left + right -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn it_works() { - let result = add(2, 2); - assert_eq!(result, 4); - } -} diff --git a/src/parsing.rs b/src/parsing.rs new file mode 100644 index 0000000..68fc7c3 --- /dev/null +++ b/src/parsing.rs @@ -0,0 +1,143 @@ +use regex::Regex; + +const PROJECT_ID_PATTERN: &str = "([0-9A-Fa-f]{4})"; +const ENCODED_UUID_PATTERN: &str = "([A-Za-z0-9=_]+)"; // Allows base64-like characters. The '-' is not allowed in the encoded UUID. +const TIMESTAMP_PATTERN: &str = r"([0-9]{8}T[0-9]{6,15}Z)"; + +pub fn resource_iri_regex() -> Regex { + Regex::new(&format!( + r"^http://rdfh.ch/{}/([A-Za-z0-9_-]+)$", + PROJECT_ID_PATTERN + )) + .unwrap() +} +// FIXME: This regex excludes timestamps +pub fn ark_path_regex(ark_naan: &str) -> Regex { + let ark_path_pattern = format!( + r"^ark:/{}/([0-9]+)(?:/{}(?:/{}(?:/{})?)?)?$", + ark_naan, PROJECT_ID_PATTERN, ENCODED_UUID_PATTERN, ENCODED_UUID_PATTERN + ); + Regex::new(&ark_path_pattern).unwrap() +} + +pub fn v0_ark_path_regex(ark_naan: &str) -> Regex { + let v0_ark_path_pattern = format!( + r"ark:/{}/([0-9A-Fa-f]+)-([A-Za-z0-9]+)-[A-Za-z0-9]+(?:\.([0-9]{{6,8}}))?", + ark_naan + ); + Regex::new(&format!(r"^{}$", v0_ark_path_pattern)).unwrap() +} + +#[cfg(test)] +mod tests { + use crate::parsing::{ + resource_iri_regex, PROJECT_ID_PATTERN, TIMESTAMP_PATTERN, + }; + use regex::Regex; + + #[test] + fn test_timestamp_pattern() { + let re = Regex::new(&format!(r"^{}$", TIMESTAMP_PATTERN)).unwrap(); + assert!(re.is_match("20180604T085622Z")); + assert!(re.is_match("20180604T085622513Z")); + assert!(re.is_match("20190118T102919Z")); + } + + #[test] + fn test_project_id_regex() { + let re = Regex::new(&format!(r"^{}$", PROJECT_ID_PATTERN)).unwrap(); + assert!(re.is_match("0000")); + assert!(re.is_match("fFfF")); + assert!(re.is_match("FFFF")); + assert!(!re.is_match("00000")); + assert!(!re.is_match("FFFFF")); + } + + #[test] + fn test_resource_iri_regex() { + let re = resource_iri_regex(); + assert!(re.is_match("http://rdfh.ch/0002/0_sWRg5jT3S0PLxakX9ffg")); + + let captures = re + .captures("http://rdfh.ch/0002/0_sWRg5jT3S0PLxakX9ffg") + .unwrap(); + assert_eq!(captures.get(1).unwrap().as_str(), "0002"); + assert_eq!(captures.get(2).unwrap().as_str(), "0_sWRg5jT3S0PLxakX9ffg"); + } + + #[test] + fn test_ark_path_regex() { + let re = super::ark_path_regex("00000"); + assert!(re.is_match("ark:/00000/1")); + assert!(re.is_match("ark:/00000/1/0003")); + assert!(re.is_match("ark:/00000/1/0003/cmfk1DMHRBiR4=_6HXpEFAn")); + // assert!(re.is_match("ark:/00000/1/0003/cmfk1DMHRBiR4=_6HXpEFAn.20180604T085622Z")); + // assert!(re.is_match("ark:/00000/1/0003/cmfk1DMHRBiR4=_6HXpEFAn.20180604T085622513Z")); + assert!(re.is_match("ark:/00000/1/0005/SQkTPdHdTzq_gqbwj6QR=AR/=SSbnPK3Q7WWxzBT1UPpRgo")); + // assert!(re.is_match("ark:/00000/1/0001/cmfk1DMHRBiR4=_6HXpEFAn/pLlW4ODASumZfZFbJdpw1gu.20180604T085622Z")); + assert!(re.is_match("ark:/00000/1/0803/751e0b8am")); + // assert!(re.is_match("ark:/00000/1/0803/751e0b8am.20190118T102919Z")); + + // Version 0 ARK paths that should not match + assert!(!re.is_match("ark:/00000/0002-779b9990a0c3f-6e")); + assert!(!re.is_match("ark:/00000/0002-779b9990a0c3f-6e.20190129")); + + // project + let captures = re.captures("ark:/00000/1/0003").unwrap(); + assert_eq!(captures.get(1).unwrap().as_str(), "1"); + assert_eq!(captures.get(2).unwrap().as_str(), "0003"); + + // resource + let captures = re + .captures("ark:/00000/1/0001/cmfk1DMHRBiR4=_6HXpEFAn") + .unwrap(); + assert_eq!(captures.get(1).unwrap().as_str(), "1"); + assert_eq!(captures.get(2).unwrap().as_str(), "0001"); + assert_eq!(captures.get(3).unwrap().as_str(), "cmfk1DMHRBiR4=_6HXpEFAn"); + + // resource with timestamp + // let captures = re.captures("ark:/00000/1/0001/cmfk1DMHRBiR4=_6HXpEFAn.20180604T085622Z").unwrap(); + // assert_eq!(captures.get(1).unwrap().as_str(), "1"); + // assert_eq!(captures.get(2).unwrap().as_str(), "0001"); + // assert_eq!(captures.get(3).unwrap().as_str(), "cmfk1DMHRBiR4=_6HXpEFAn"); + // assert_eq!(captures.get(4).unwrap().as_str(), "20180604T085622Z"); + + // resource with value + let captures = re + .captures("ark:/00000/1/0005/SQkTPdHdTzq_gqbwj6QR=AR/=SSbnPK3Q7WWxzBT1UPpRgo") + .unwrap(); + assert_eq!(captures.get(1).unwrap().as_str(), "1"); + assert_eq!(captures.get(2).unwrap().as_str(), "0005"); + assert_eq!(captures.get(3).unwrap().as_str(), "SQkTPdHdTzq_gqbwj6QR=AR"); + assert_eq!(captures.get(4).unwrap().as_str(), "=SSbnPK3Q7WWxzBT1UPpRgo"); + + // resource with value and timestamp + // let captures = re.captures("ark:/00000/1/0001/cmfk1DMHRBiR4=_6HXpEFAn/pLlW4ODASumZfZFbJdpw1gu.20180604T085622Z").unwrap(); + // assert_eq!(captures.get(1).unwrap().as_str(), "1"); + // assert_eq!(captures.get(2).unwrap().as_str(), "0001"); + // assert_eq!(captures.get(3).unwrap().as_str(), "cmfk1DMHRBiR4=_6HXpEFAn"); + // assert_eq!(captures.get(4).unwrap().as_str(), "pLlW4ODASumZfZFbJdpw1gu"); + // assert_eq!(captures.get(5).unwrap().as_str(), "20180604T085622Z"); + } + + #[test] + fn test_v0_ark_path_regex() { + let re = super::v0_ark_path_regex("00000"); + assert!(re.is_match("ark:/00000/0002-779b9990a0c3f-6e")); + assert!(re.is_match("ark:/00000/0002-779b9990a0c3f-6e.20190129")); + assert!(re.is_match("ark:/00000/080e-76bb2132d30d6-0")); + assert!(re.is_match("ark:/00000/080e-76bb2132d30d6-0.20190129")); + assert!(re.is_match("ark:/00000/080e-76bb2132d30d6-0.2019111")); + + // Version 1 ARK paths that should not match + assert!(!re.is_match("ark:/00000/1/0003")); + assert!(!re.is_match("ark:/00000/1/0001/cmfk1DMHRBiR4=_6HXpEFAn")); + assert!(!re.is_match("ark:/00000/1/0001/cmfk1DMHRBiR4=_6HXpEFAn.20180604T085622Z")); + assert!(!re.is_match("ark:/00000/1/0001/cmfk1DMHRBiR4=_6HXpEFAn.20180604T085622513Z")); + assert!(!re.is_match("ark:/00000/1/0005/SQkTPdHdTzq_gqbwj6QR=AR/=SSbnPK3Q7WWxzBT1UPpRgo")); + assert!(!re.is_match( + "ark:/00000/1/0001/cmfk1DMHRBiR4=_6HXpEFAn/pLlW4ODASumZfZFbJdpw1gu.20180604T085622Z" + )); + } + +} \ No newline at end of file diff --git a/tests/smoke_test.rs b/tests/smoke_test.rs new file mode 100644 index 0000000..b5584ff --- /dev/null +++ b/tests/smoke_test.rs @@ -0,0 +1,34 @@ +use assert_cmd::Command; +use std::{thread, time::Duration}; + +#[test] +fn smoke_test() { + // Step 1: Start the service using Docker + let mut cmd = Command::new("docker-compose"); + cmd.args(["up", "-d"]).assert().success(); + + // Step 2: Wait for service to be available + let health_url = "http://localhost:3336/health"; + let mut success = false; + for _ in 0..10 { + // Try for ~30 seconds + match reqwest::blocking::get(health_url) { + Ok(response) if response.status().is_success() => { + success = true; + break; + } + _ => { + println!("Waiting for service..."); + thread::sleep(Duration::from_secs(3)); + } + } + } + + assert!(success, "Service did not become healthy in time!"); + + // Step 3: Stop the service + Command::new("docker-compose") + .args(["down"]) + .assert() + .success(); +}