From 14184b7ff6a5cfc49442aca29d582aa73e82c3c7 Mon Sep 17 00:00:00 2001 From: Ivan Subotic <400790+subotic@users.noreply.github.com> Date: Mon, 24 Feb 2025 16:12:33 +0100 Subject: [PATCH] refactor: existing python code refactor: existing python code refactor: existing python code revert unintended change feat: rust code fix configuration loading feat: fix settings (rust) fix configuration loading feat: fix settings (rust) feat: rust code debugging feat: rust code debugging feat: rust code debugging feat: rust code debugging feat: rust code debugging fix configuration loading feat: fix settings (rust) fix configuration loading feat: fix settings (rust) feat: rust code debugging feat: rust code debugging feat: rust code debugging feat: rust code debugging feat: rust code debugging refactor: existing python code revert unintended change feat: rust code fix configuration loading feat: fix settings (rust) fix configuration loading feat: fix settings (rust) feat: rust code debugging feat: rust code debugging feat: rust code debugging feat: rust code debugging feat: rust code debugging fix configuration loading feat: fix settings (rust) fix configuration loading feat: fix settings (rust) feat: rust code debugging feat: rust code debugging feat: rust code debugging feat: rust code debugging feat: rust code debugging refactor: exiting python code fix configuration loading feat: fix settings (rust) fix configuration loading feat: fix settings (rust) feat: rust code debugging feat: rust code debugging feat: rust code debugging feat: rust code debugging feat: rust code debugging fix configuration loading feat: fix settings (rust) fix configuration loading feat: fix settings (rust) feat: rust code debugging feat: rust code debugging feat: rust code debugging feat: rust code debugging feat: rust code debugging refactor: existing python code revert unintended change feat: rust code fix configuration loading feat: fix settings (rust) fix configuration loading feat: fix settings (rust) feat: rust code debugging feat: rust code debugging feat: rust code debugging feat: rust code debugging feat: rust code debugging fix configuration loading feat: fix settings (rust) fix configuration loading feat: fix settings (rust) feat: rust code debugging feat: rust code debugging feat: rust code debugging feat: rust code debugging feat: rust code debugging refactor: existing python code refactor: existing python code refactor: existing python code refactor: existing python code --- python/{ => src}/ark_resolver/__init__.py | 0 python/{ => src}/ark_resolver/ark-config.ini | 0 .../{ => src}/ark_resolver/ark-registry.ini | 2 +- python/{ => src}/ark_resolver/ark.py | 125 +++++++++++--- python/{ => src}/ark_resolver/ark_url.py | 57 +++---- .../ark_resolver/check_digit.py} | 0 python/test/create_mock_settings.py | 15 -- python/{test => tests}/__init__.py | 0 python/tests/test_ark_url.py | 161 ++++++++++++++++++ .../test_ckeck_digit.py} | 16 +- 10 files changed, 296 insertions(+), 80 deletions(-) rename python/{ => src}/ark_resolver/__init__.py (100%) rename python/{ => src}/ark_resolver/ark-config.ini (100%) rename python/{ => src}/ark_resolver/ark-registry.ini (99%) rename python/{ => src}/ark_resolver/ark.py (62%) rename python/{ => src}/ark_resolver/ark_url.py (88%) rename python/{ark_resolver/base64url_check_digit.py => src/ark_resolver/check_digit.py} (100%) delete mode 100644 python/test/create_mock_settings.py rename python/{test => tests}/__init__.py (100%) create mode 100644 python/tests/test_ark_url.py rename python/{test/test_base64.py => tests/test_ckeck_digit.py} (62%) diff --git a/python/ark_resolver/__init__.py b/python/src/ark_resolver/__init__.py similarity index 100% rename from python/ark_resolver/__init__.py rename to python/src/ark_resolver/__init__.py diff --git a/python/ark_resolver/ark-config.ini b/python/src/ark_resolver/ark-config.ini similarity index 100% rename from python/ark_resolver/ark-config.ini rename to python/src/ark_resolver/ark-config.ini diff --git a/python/ark_resolver/ark-registry.ini b/python/src/ark_resolver/ark-registry.ini similarity index 99% rename from python/ark_resolver/ark-registry.ini rename to python/src/ark_resolver/ark-registry.ini index d9600a3..660a6de 100644 --- a/python/ark_resolver/ark-registry.ini +++ b/python/src/ark_resolver/ark-registry.ini @@ -41,7 +41,7 @@ PhpResourceVersionRedirectUrl : http://$host/resources/$resource_int_id?citdate= ProjectHost : meta.dasch.swiss ############################################################################ -# anything test project +# anything tests project [0001] diff --git a/python/ark_resolver/ark.py b/python/src/ark_resolver/ark.py similarity index 62% rename from python/ark_resolver/ark.py rename to python/src/ark_resolver/ark.py index a62c5b5..9d016a9 100755 --- a/python/ark_resolver/ark.py +++ b/python/src/ark_resolver/ark.py @@ -19,13 +19,41 @@ from asyncio import sleep from io import StringIO +from opentelemetry import trace +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import ( + BatchSpanProcessor, + ConsoleSpanExporter, +) + +import sentry_sdk +from sentry_sdk.integrations.asyncio import AsyncioIntegration +from sentry_sdk.integrations.sanic import SanicIntegration +from sentry_sdk.integrations.rust_tracing import RustTracingIntegration + import requests from sanic import HTTPResponse, Sanic, response from sanic.log import logger from sanic_cors import CORS -import ark_resolver.base64url_check_digit as base64url_check_digit_py -from ark_resolver.ark_url import (ArkUrlException, ArkUrlFormatter, ArkUrlInfo, ArkUrlSettings) +import _rust +from ark_resolver import ark_url +import ark_resolver.check_digit as check_digit_py +import ark_resolver.health + + +################################################################################################# +# OpenTelemetry + +provider = TracerProvider() +processor = BatchSpanProcessor(ConsoleSpanExporter()) +provider.add_span_processor(processor) + +# Sets the global default tracer provider +trace.set_tracer_provider(provider) + +# Creates a tracer from the global tracer provider +tracer = trace.get_tracer("my.tracer.name") ################################################################################################# # Server implementation. @@ -35,8 +63,53 @@ app = Sanic('ark_resolver') CORS(app) +# Register health check route +app.blueprint(ark_resolver.health.health_bp) + + +@app.before_server_start +async def init_sentry(_): + sentry_dsn = os.environ.get("ARK_SENTRY_DSN", None) + sentry_debug = os.environ.get("ARK_SENTRY_DEBUG", "False") + sentry_environment = os.environ.get("ARK_SENTRY_ENVIRONMENT", None) + sentry_release = os.environ.get("ARK_SENTRY_RELEASE", None) + if sentry_dsn: + sentry_sdk.init( + dsn=sentry_dsn, + debug=sentry_debug, + environment=sentry_environment, + release=sentry_release, + # Add data like request headers and IP for users; + # see https://docs.sentry.io/platforms/python/data-management/data-collected/ for more info + send_default_pii=True, + # Set traces_sample_rate to 1.0 to capture 100% + # of transactions for tracing. + traces_sample_rate=1.0, + # Set profiles_sample_rate to 1.0 to profile 100% + # of sampled transactions. + # We recommend adjusting this value in production. + profiles_sample_rate=1.0, + instrumenter="otel", + integrations=[ + AsyncioIntegration(), + RustTracingIntegration( + "_rust", + _rust.initialize_tracing, + include_tracing_fields=True, + ), + SanicIntegration( + # Configure the Sanic integration so that we generate + # transactions for all HTTP status codes, including 404 + unsampled_statuses=None, + ), + ], + ) + logger.info("Sentry initialized.") + else: + logger.info("No SENTRY_DSN found in environment variables. Sentry will not be initialized.") + -def get_config() -> str: +def get_safe_config() -> str: """ Returns the app's configuration """ @@ -55,20 +128,22 @@ def get_config() -> str: return safe_config_output.getvalue() +@tracer.start_as_current_span("config_get") @app.get("/config") -async def config_get(_) -> HTTPResponse: +async def safe_config_get(_) -> HTTPResponse: """ Returns the app's configuration """ - return response.text(get_config()) + return response.text(get_safe_config()) +@tracer.start_as_current_span("config_head") @app.head("/config") -async def config_head(_) -> HTTPResponse: +async def safe_config_head(_) -> HTTPResponse: """ Returns only the head of the config response """ - config_str = get_config() + config_str = get_safe_config() headers = { "Content-Length": str(len(config_str)), @@ -78,6 +153,7 @@ async def config_head(_) -> HTTPResponse: return response.text("", headers=headers) +@tracer.start_as_current_span("reload") @app.post("/reload") async def reload(req) -> HTTPResponse: """ @@ -108,23 +184,28 @@ async def reload(req) -> HTTPResponse: else: return response.text("Unauthorized", status=401) - +@tracer.start_as_current_span("redirect") @app.get('/') async def catch_all(_, path="") -> HTTPResponse: """ Catch all URL. Tries to redirect the given ARK ID. """ try: - redirect_url = ArkUrlInfo(settings=app.config.settings, ark_url=path, path_only=True).to_redirect_url() - except ArkUrlException as ex: + redirect_url = ark_url.ArkUrlInfo(settings=app.config.settings, ark_id=path).to_redirect_url() + + except ark_url.ArkUrlException as ex: + logger.error(f"Invalid ARK ID: {path}", exc_info=ex) return response.text(body=ex.message, status=400) - except base64url_check_digit_py.CheckDigitException as ex: + except check_digit_py.CheckDigitException as ex: + logger.error(f"Invalid ARK ID: {path}", exc_info=ex) return response.text(body=ex.message, status=400) - except KeyError: + except KeyError as ex: + logger.error(f"Invalid ARK ID: {path}", exc_info=ex) return response.text(body="Invalid ARK ID", status=400) + logger.info(f"Redirecting {path} to {redirect_url}") return response.redirect(redirect_url) @@ -158,7 +239,7 @@ def reload_config() -> None: ################################################################################################# # Loading of config and registry files. -def load_settings(config_path: str) -> ArkUrlSettings: +def load_settings(config_path: str) -> ark_url.ArkUrlSettings: """ Loads configuration from given path and returns an ArkUrlSettings. """ @@ -187,7 +268,7 @@ def load_settings(config_path: str) -> ArkUrlSettings: else: config.read_file(open(registry_path)) - settings = ArkUrlSettings(config) + settings = ark_url.ArkUrlSettings(config) return settings @@ -205,8 +286,10 @@ def main() -> None: parser.add_argument("-c", "--config", help="config file (default {})".format(default_config_path)) group = parser.add_mutually_exclusive_group() group.add_argument("-s", "--server", help="start server", action="store_true") - group.add_argument("-i", "--iri", help="print the converted ARK URL from a given DSP resource IRI (add -v and -d optionally)") - group.add_argument("-a", "--ark", help="print the converted DSP resource IRI (requires -r) or DSP URL from a given ARK URL") + group.add_argument("-i", "--iri", + help="print the converted ARK URL from a given DSP resource IRI (add -v and -d optionally)") + group.add_argument("-a", "--ark", + help="print the converted DSP resource IRI (requires -r) or DSP URL from a given ARK ID") parser.add_argument("-r", "--resource", help="generate resource IRI", action="store_true") parser.add_argument("-v", "--value", help="value UUID (has to be provided with -i)") parser.add_argument("-d", "--date", help="DSP ARK timestamp (has to be provided with -i)") @@ -226,20 +309,20 @@ def main() -> None: server(settings) elif args.iri: # prints the converted ARK URL from a given DSP resource IRI - print(ArkUrlFormatter(settings).resource_iri_to_ark_url(args.iri, args.value, args.date)) + print(ark_url.ArkUrlFormatter(settings).resource_iri_to_ark_url(args.iri, args.value, args.date)) elif args.ark: if args.resource: # prints the converted DSP resource IRI from a given ARK URL - print(ArkUrlInfo(settings, args.ark).to_resource_iri()) + print(ark_url.ArkUrlInfo(settings, args.ark).to_resource_iri()) else: # prints the converted DSP URL from a given ARK URL - print(ArkUrlInfo(settings, args.ark).to_redirect_url()) + print(ark_url.ArkUrlInfo(settings, args.ark).to_redirect_url()) else: parser.print_help() - except ArkUrlException as ex: + except ark_url.ArkUrlException as ex: print(ex.message) exit(1) - except base64url_check_digit_py.CheckDigitException as ex: + except check_digit_py.CheckDigitException as ex: print(ex.message) exit(1) diff --git a/python/ark_resolver/ark_url.py b/python/src/ark_resolver/ark_url.py similarity index 88% rename from python/ark_resolver/ark_url.py rename to python/src/ark_resolver/ark_url.py index 18be6dc..8b51042 100644 --- a/python/ark_resolver/ark_url.py +++ b/python/src/ark_resolver/ark_url.py @@ -9,7 +9,7 @@ from string import Template from urllib import parse -import ark_resolver.base64url_check_digit as base64url_check_digit_py +import ark_resolver.check_digit as check_digit_py ################################################################################################# @@ -56,42 +56,27 @@ def __init__(self, message): class ArkUrlInfo: """ - Represents the information retrieved from a DSP ARK URL. + Represents the information retrieved from a DSP ARK ID. """ - def __init__(self, settings, ark_url, path_only=False): + def __init__(self, settings, ark_id): self.settings = settings - # Are we matching just the path part of the URL? - # TODO: path_only=True should be tested in unit tests - if path_only: - # Yes. Is it a version 1 ARK ID? - match = settings.ark_path_regex.match(ark_url) - - if match: - # Yes. - self.url_version = int(match.group(1)) - else: - # No. Is it a version 0 ARK ID? - match = settings.v0_ark_path_regex.match(ark_url) - - if match is not None: - self.url_version = 0 + match = settings.ark_path_regex.match(ark_id) + if match: + # Yes. Is it a version 1 ARK ID? + self.url_version = int(match.group(1)) else: - # We are matching a whole URL. Does it contain a version 1 ARK ID? - match = settings.ark_url_regex.match(ark_url) + # No. Is it a version 0 ARK ID? + match = settings.v0_ark_path_regex.match(ark_id) + + # If NOT None!, then it is a version 0 ARK ID. if match is not None: - # Yes. - self.url_version = int(match.group(1)) - else: - # No. Does it contain a version 0 ARK ID? - match = settings.v0_ark_url_regex.match(ark_url) + self.url_version = 0 - if match is not None: - self.url_version = 0 if match is None: - raise ArkUrlException(f"Invalid ARK ID: {ark_url}") + raise ArkUrlException(f"Invalid ARK ID: {ark_id}") # Which version of ARK ID did we match? if self.url_version == settings.dsp_ark_version: @@ -101,7 +86,7 @@ def __init__(self, settings, ark_url, path_only=False): if escaped_resource_id_with_check_digit is not None: self.resource_id = unescape_and_validate_uuid( - ark_url=ark_url, + ark_url=ark_id, escaped_uuid=escaped_resource_id_with_check_digit ) @@ -109,7 +94,7 @@ def __init__(self, settings, ark_url, path_only=False): if escaped_value_id_with_check_digit is not None: self.value_id = unescape_and_validate_uuid( - ark_url=ark_url, + ark_url=ark_id, escaped_uuid=escaped_value_id_with_check_digit ) else: @@ -136,9 +121,10 @@ def __init__(self, settings, ark_url, path_only=False): project_config = self.settings.config[self.project_id] if not project_config.getboolean("AllowVersion0"): - raise ArkUrlException(f"Invalid ARK ID (version 0 not allowed): {ark_url}") + raise ArkUrlException(f"Invalid ARK ID (version 0 not allowed): {ark_id}") else: - raise ArkUrlException(f"Invalid ARK ID {ark_url}. The version of the ARK ID doesn't match the version defined in the settings.") + raise ArkUrlException( + f"Invalid ARK ID {ark_id}. The version of the ARK ID doesn't match the version defined in the settings.") self.template_dict = { "url_version": self.url_version, @@ -181,7 +167,8 @@ def to_resource_iri(self) -> str: # in case of an ARK URL version 0, the resource_id generated from the salsah ID has to be converted to a # base64 UUID version 5 generic_namespace_url = uuid.NAMESPACE_URL - dasch_uuid_ns = uuid.uuid5(generic_namespace_url, "https://dasch.swiss") # cace8b00-717e-50d5-bcb9-486f39d733a2 + dasch_uuid_ns = uuid.uuid5(generic_namespace_url, + "https://dasch.swiss") # cace8b00-717e-50d5-bcb9-486f39d733a2 resource_id = template_dict["resource_id"] dsp_iri = base64.urlsafe_b64encode(uuid.uuid5(dasch_uuid_ns, resource_id).bytes).decode("utf-8") # remove the padding ('==') from the end of the string @@ -273,7 +260,7 @@ def add_check_digit_and_escape(uuid) -> str: """ Adds a check digit to a Base64-encoded UUID, and escapes the result. """ - check_digit = base64url_check_digit_py.calculate_check_digit(uuid) + check_digit = check_digit_py.calculate_check_digit(uuid) uuid_with_check_digit = uuid + check_digit # Escape '-' as '=' in the resource ID and check digit, because '-' can be ignored in ARK URLs. @@ -287,7 +274,7 @@ def unescape_and_validate_uuid(ark_url, escaped_uuid) -> str: # '-' is escaped as '=' in the UUID and check digit, because '-' can be ignored in ARK URLs. unescaped_uuid = escaped_uuid.replace('=', '-') - if not base64url_check_digit_py.is_valid(unescaped_uuid): + if not check_digit_py.is_valid(unescaped_uuid): raise ArkUrlException(f"Invalid ARK ID: {ark_url}") return unescaped_uuid[0:-1] diff --git a/python/ark_resolver/base64url_check_digit.py b/python/src/ark_resolver/check_digit.py similarity index 100% rename from python/ark_resolver/base64url_check_digit.py rename to python/src/ark_resolver/check_digit.py diff --git a/python/test/create_mock_settings.py b/python/test/create_mock_settings.py deleted file mode 100644 index 0521ab8..0000000 --- a/python/test/create_mock_settings.py +++ /dev/null @@ -1,15 +0,0 @@ -"""Creates a pickle file 'settings.pkl' with the configuration in ark-registry.ini. This file can then be used in the -unit tests. Run it from inside ark-resolver with 'python3 python/test/create_mock_settings.py' """ - -import pickle as pkl -import os -import test - -from ark_resolver.ark import load_settings - -if __name__ == "__main__": - config_path = "python/ark_resolver/ark-config.ini" - os.environ['ARK_REGISTRY'] = 'python/ark_resolver/ark-registry.ini' - settings = load_settings(config_path) - with open("python/test/settings.pkl", 'wb') as file: - pkl.dump(settings, file) diff --git a/python/test/__init__.py b/python/tests/__init__.py similarity index 100% rename from python/test/__init__.py rename to python/tests/__init__.py diff --git a/python/tests/test_ark_url.py b/python/tests/test_ark_url.py new file mode 100644 index 0000000..abb813f --- /dev/null +++ b/python/tests/test_ark_url.py @@ -0,0 +1,161 @@ +import os +import pytest + +from ark_resolver.ark_url import ArkUrlFormatter, ArkUrlInfo, ArkUrlException +from ark_resolver import ark + +@pytest.fixture(scope="module") +def settings(): + """Loads settings.""" + config_path = "python/src/ark_resolver/ark-config.ini" + os.environ['ARK_REGISTRY'] = 'python/src/ark_resolver/ark-registry.ini' + return ark.load_settings(config_path) + +def test_ark_url_formatter(settings): + ark_url_formatter = ArkUrlFormatter(settings) + # generate an ARK URL from a resource IRI without a timestamp + resource_iri = "http://rdfh.ch/0001/cmfk1DMHRBiR4-_6HXpEFA" + ark_url = ark_url_formatter.resource_iri_to_ark_url(resource_iri=resource_iri) + assert ark_url == "https://ark.example.org/ark:/00000/1/0001/cmfk1DMHRBiR4=_6HXpEFAn" + + # generate an ARK URL from a resource IRI with a timestamp + ark_url = ark_url_formatter.resource_iri_to_ark_url(resource_iri=resource_iri, timestamp="20180604T085622513Z") + assert ark_url == "https://ark.example.org/ark:/00000/1/0001/cmfk1DMHRBiR4=_6HXpEFAn.20180604T085622513Z" + + # generate an ARK URL from a resource IRI and value UUID without a timestamp + value_id = "pLlW4ODASumZfZFbJdpw1g" + ark_url = ark_url_formatter.resource_iri_to_ark_url(resource_iri=resource_iri, value_id=value_id) + assert ark_url == "https://ark.example.org/ark:/00000/1/0001/cmfk1DMHRBiR4=_6HXpEFAn/pLlW4ODASumZfZFbJdpw1gu" + + # generate an ARK URL from a resource IRI and value UUID with a timestamp + ark_url = ark_url_formatter.resource_iri_to_ark_url(resource_iri=resource_iri, value_id=value_id, + timestamp="20180604T085622513Z") + assert ark_url == "https://ark.example.org/ark:/00000/1/0001/cmfk1DMHRBiR4=_6HXpEFAn/pLlW4ODASumZfZFbJdpw1gu.20180604T085622513Z" + +def test_ark_url_info_redirect_top_level_object(settings): + # parse and redirect an ARK URL representing the top-level object + ark_url_info = ArkUrlInfo(settings, "ark:/00000/1") + redirect_url = ark_url_info.to_redirect_url() + assert redirect_url == "http://dasch.swiss" + +def test_ark_url_info_redirect_project(settings): + # parse and redirect an ARK URL of a project with default project host, i.e. without specified project host + ark_url_info = ArkUrlInfo(settings, "ark:/00000/1/0003") + redirect_url = ark_url_info.to_redirect_url() + assert redirect_url == "http://meta.dasch.swiss/projects/0003" + + # parse and redirect an ARK URL of a project with a specific project host + ark_url_info = ArkUrlInfo(settings, "ark:/00000/1/0004") + redirect_url = ark_url_info.to_redirect_url() + assert redirect_url == "http://other-meta.dasch.swiss/projects/0004" + +def test_ark_url_info_redirect_resource(settings): + # parse and redirect an ARK URL of a DSP resource without a timestamp + ark_url_info = ArkUrlInfo(settings, "ark:/00000/1/0001/cmfk1DMHRBiR4=_6HXpEFAn") + redirect_url = ark_url_info.to_redirect_url() + assert redirect_url == "http://0.0.0.0:4200/resource/0001/cmfk1DMHRBiR4-_6HXpEFA" + + # parse and redirect an ARK HTTP URL of a DSP resource without a timestamp + ark_url_info = ArkUrlInfo(settings, "ark:/00000/1/0001/cmfk1DMHRBiR4=_6HXpEFAn") + redirect_url = ark_url_info.to_redirect_url() + assert redirect_url == "http://0.0.0.0:4200/resource/0001/cmfk1DMHRBiR4-_6HXpEFA" + + # parse and redirect an ARK URL of a DSP resource with a timestamp with a fractional part + ark_url_info = ArkUrlInfo(settings, "ark:/00000/1/0001/cmfk1DMHRBiR4=_6HXpEFAn.20180604T085622513Z") + redirect_url = ark_url_info.to_redirect_url() + assert redirect_url == "http://0.0.0.0:4200/resource/0001/cmfk1DMHRBiR4-_6HXpEFA?version=20180604T085622513Z" + + # parse and redirect an ARK URL of a DSP resource with a timestamp without a fractional part + ark_url_info = ArkUrlInfo(settings, "ark:/00000/1/0001/cmfk1DMHRBiR4=_6HXpEFAn.20180604T085622Z") + redirect_url = ark_url_info.to_redirect_url() + assert redirect_url == "http://0.0.0.0:4200/resource/0001/cmfk1DMHRBiR4-_6HXpEFA?version=20180604T085622Z" + + # parse an ARK URL of a DSP resource without a timestamp and redirect it to a customized location + ark_url_info = ArkUrlInfo(settings, "ark:/00000/1/0005/0_sWRg5jT3S0PLxakX9ffg1") + redirect_url = ark_url_info.to_redirect_url() + assert redirect_url == "http://0.0.0.0:4200/resources/0005/0_sWRg5jT3S0PLxakX9ffg" + +def test_ark_url_info_redirect_value(settings): + # parse an ARK URL of a DSP value without a timestamp and redirect it to a customized location + ark_url_info = ArkUrlInfo(settings, "ark:/00000/1/0005/SQkTPdHdTzq_gqbwj6QR=AR/=SSbnPK3Q7WWxzBT1UPpRgo") + redirect_url = ark_url_info.to_redirect_url() + assert redirect_url == "http://0.0.0.0:4200/resources/0005/SQkTPdHdTzq_gqbwj6QR-A/-SSbnPK3Q7WWxzBT1UPpRg" + + # parse and redirect an ARK URL of a DSP value with a timestamp + ark_url_info = ArkUrlInfo(settings, "ark:/00000/1/0001/cmfk1DMHRBiR4=_6HXpEFAn/pLlW4ODASumZfZFbJdpw1gu.20180604T085622Z") + redirect_url = ark_url_info.to_redirect_url() + assert redirect_url == "http://0.0.0.0:4200/resource/0001/cmfk1DMHRBiR4-_6HXpEFA/pLlW4ODASumZfZFbJdpw1g?version=20180604T085622Z" + + # parse an ARK URL of a DSP value without a timestamp and redirect it to a customized location + ark_url_info = ArkUrlInfo(settings, "ark:/00000/1/0005/SQkTPdHdTzq_gqbwj6QR=AR/=SSbnPK3Q7WWxzBT1UPpRgo") + redirect_url = ark_url_info.to_redirect_url() + assert redirect_url == "http://0.0.0.0:4200/resources/0005/SQkTPdHdTzq_gqbwj6QR-A/-SSbnPK3Q7WWxzBT1UPpRg" + +def test_ark_url_info_redirect_salsah_resource(settings): + # parse and redirect a version 1 ARK URL of a PHP-SALSAH resource without a timestamp + ark_url_info = ArkUrlInfo(settings, "ark:/00000/1/0803/751e0b8am") + redirect_url = ark_url_info.to_redirect_url() + assert redirect_url == "http://data.dasch.swiss/resources/1" + + # parse and redirect a version 1 ARK URL of a PHP-SALSAH resource with a timestamp + ark_url_info = ArkUrlInfo(settings, "ark:/00000/1/0803/751e0b8am.20190118T102919Z") + redirect_url = ark_url_info.to_redirect_url() + assert redirect_url == "http://data.dasch.swiss/resources/1?citdate=20190118" + + # parse and redirect a version 0 ARK URL of a PHP-SALSAH resource without a timestamp + ark_url_info = ArkUrlInfo(settings, "ark:/00000/080e-76bb2132d30d6-0") + redirect_url = ark_url_info.to_redirect_url() + assert redirect_url == "http://data.dasch.swiss/resources/2126045" + + # parse and redirect a version 0 ARK URL of a PHP-SALSAH resource with a timestamp + ark_url_info = ArkUrlInfo(settings, "ark:/00000/080e-76bb2132d30d6-0.20190129") + redirect_url = ark_url_info.to_redirect_url() + assert redirect_url == "http://data.dasch.swiss/resources/2126045?citdate=20190129" + + # parse and redirect a version 0 ARK URL of a PHP-SALSAH resource with a timestamp that's too short + ark_url_info = ArkUrlInfo(settings, "ark:/00000/080e-76bb2132d30d6-0.2019111") + redirect_url = ark_url_info.to_redirect_url() + assert redirect_url == "http://data.dasch.swiss/resources/2126045" + +def test_ark_url_info_redirect_salsah_project(settings): + # parse and redirect an ARK URL of a project on Salsah with default project host, i.e. without specified project host + ark_url_info = ArkUrlInfo(settings, "ark:/00000/1/0803") + redirect_url = ark_url_info.to_redirect_url() + assert redirect_url == "http://meta.dasch.swiss/projects/0803" + + # parse and redirect an ARK URL of a project on Salsah with a specific project host + ark_url_info = ArkUrlInfo(settings, "ark:/00000/1/0006") + redirect_url = ark_url_info.to_redirect_url() + assert redirect_url == "http://other-meta.dasch.swiss/projects/0006" + +def test_ark_url_info_redirect_salsah_ark(settings): + # parse and redirect a version 0 ARK URL of a PHP-SALSAH resource which is on DSP (migrated from salsah to DSP) without a timestamp + ark_url_info = ArkUrlInfo(settings, "ark:/00000/0002-779b9990a0c3f-6e") + redirect_url = ark_url_info.to_redirect_url() + assert redirect_url == "http://0.0.0.0:4200/resource/0002/Ef9heHjPWDS7dMR_gGax2Q" + + # parse and redirect a version 0 ARK URL of a PHP-SALSAH resource which is on DSP (migrated from salsah to DSP) with a timestamp + ark_url_info = ArkUrlInfo(settings, "ark:/00000/0002-779b9990a0c3f-6e.20190129") + redirect_url = ark_url_info.to_redirect_url() + assert redirect_url == "http://0.0.0.0:4200/resource/0002/Ef9heHjPWDS7dMR_gGax2Q?version=20190129" + +def test_conversion_to_resource_iri_with_ark_version_0(settings): + # convert a version 0 ARK URL to a DSP resource IRI + ark_url_info = ArkUrlInfo(settings, "ark:/00000/0002-751e0b8a-6.2021519") + resource_iri = ark_url_info.to_resource_iri() + assert resource_iri == "http://rdfh.ch/0002/70aWaB2kWsuiN6ujYgM0ZQ" + +def test_conversion_to_resource_iri_with_ark_version_1(settings): + # convert a version 1 ARK URL to a DSP resource IRI + ark_url_info = ArkUrlInfo(settings, "ark:/00000/1/0002/0_sWRg5jT3S0PLxakX9ffg1.20210712T074927466631Z") + resource_iri = ark_url_info.to_resource_iri() + assert resource_iri == "http://rdfh.ch/0002/0_sWRg5jT3S0PLxakX9ffg" + +def test_reject_ark_with_wrong_digit(settings): + # reject an ARK URL that doesn't pass check digit validation + rejected = False + try: + ArkUrlInfo(settings, "ark:/00000/1/0001/cmfk1DMHRBir4=_6HXpEFAn") + except ArkUrlException: + rejected = True + assert rejected diff --git a/python/test/test_base64.py b/python/tests/test_ckeck_digit.py similarity index 62% rename from python/test/test_base64.py rename to python/tests/test_ckeck_digit.py index 43224b1..3780512 100644 --- a/python/test/test_base64.py +++ b/python/tests/test_ckeck_digit.py @@ -1,33 +1,33 @@ -import ark_resolver.base64url_check_digit as base64url_check_digit_py +from ark_resolver import check_digit as ckeck_digit_py def test_base64url_check_digit(): correct_resource_id = "cmfk1DMHRBiR4-_6HXpEFA" # reject a string without a check digit - assert not base64url_check_digit_py.is_valid(correct_resource_id) + assert not ckeck_digit_py.is_valid(correct_resource_id) # calculate a check digit for a string and validate it correct_resource_id_check_digit = "n" - check_digit = base64url_check_digit_py.calculate_check_digit(correct_resource_id) + check_digit = ckeck_digit_py.calculate_check_digit(correct_resource_id) assert check_digit == correct_resource_id_check_digit correct_resource_id_with_correct_check_digit = correct_resource_id + check_digit - assert base64url_check_digit_py.is_valid(correct_resource_id_with_correct_check_digit) + assert ckeck_digit_py.is_valid(correct_resource_id_with_correct_check_digit) # reject a string with an incorrect check digit correct_resource_id_with_incorrect_check_digit = correct_resource_id + "m" - assert not base64url_check_digit_py.is_valid(correct_resource_id_with_incorrect_check_digit) + assert not ckeck_digit_py.is_valid(correct_resource_id_with_incorrect_check_digit) # reject a string with a missing character resource_id_with_missing_character = "cmfk1DMHRBiR4-6HXpEFA" resource_id_with_missing_character_and_correct_check_digit = resource_id_with_missing_character + correct_resource_id_check_digit - assert not base64url_check_digit_py.is_valid(resource_id_with_missing_character_and_correct_check_digit) + assert not ckeck_digit_py.is_valid(resource_id_with_missing_character_and_correct_check_digit) # reject a string with an incorrect character resource_id_with_incorrect_character = "cmfk1DMHRBir4-_6HXpEFA" resource_id_with_incorrect_character_and_correct_check_digit = resource_id_with_incorrect_character + correct_resource_id_check_digit - assert not base64url_check_digit_py.is_valid(resource_id_with_incorrect_character_and_correct_check_digit) + assert not ckeck_digit_py.is_valid(resource_id_with_incorrect_character_and_correct_check_digit) # reject a string with swapped characters resource_id_with_swapped_characters = "cmfk1DMHRBiR4_-6HXpEFA" resource_id_with_swapped_characters_and_correct_check_digit = resource_id_with_swapped_characters + correct_resource_id_check_digit - assert not base64url_check_digit_py.is_valid(resource_id_with_swapped_characters_and_correct_check_digit) + assert not ckeck_digit_py.is_valid(resource_id_with_swapped_characters_and_correct_check_digit)