From e9dda396e9b025cad77bd93bf1ce0eb22f0d3fce Mon Sep 17 00:00:00 2001 From: Ivan Subotic <400790+subotic@users.noreply.github.com> Date: Sat, 22 Feb 2025 16:50:35 +0100 Subject: [PATCH] refactor: existing python code revert unintended change feat: rust code fix configuration loading feat: fix settings (rust) fix configuration loading feat: fix settings (rust) feat: rust code debugging feat: rust code debugging feat: rust code debugging feat: rust code debugging feat: rust code debugging fix configuration loading feat: fix settings (rust) fix configuration loading feat: fix settings (rust) feat: rust code debugging feat: rust code debugging feat: rust code debugging feat: rust code debugging feat: rust code debugging refactor: existing python code refactor: existing python code refactor: existing python code refactor: existing python code --- python/{ => src}/ark_resolver/__init__.py | 0 python/{ => src}/ark_resolver/ark-config.ini | 0 .../{ => src}/ark_resolver/ark-registry.ini | 2 +- python/{ => src}/ark_resolver/ark.py | 62 ++++++++++--------- python/{ => src}/ark_resolver/ark_url.py | 15 +++-- .../ark_resolver/check_digit.py} | 0 python/{test => tests}/__init__.py | 0 python/{test => tests}/test_ark_url.py | 8 +-- .../test_ckeck_digit.py} | 16 ++--- 9 files changed, 54 insertions(+), 49 deletions(-) rename python/{ => src}/ark_resolver/__init__.py (100%) rename python/{ => src}/ark_resolver/ark-config.ini (100%) rename python/{ => src}/ark_resolver/ark-registry.ini (99%) rename python/{ => src}/ark_resolver/ark.py (84%) rename python/{ => src}/ark_resolver/ark_url.py (95%) rename python/{ark_resolver/base64url_check_digit.py => src/ark_resolver/check_digit.py} (100%) rename python/{test => tests}/__init__.py (100%) rename python/{test => tests}/test_ark_url.py (97%) rename python/{test/test_base64.py => tests/test_ckeck_digit.py} (62%) diff --git a/python/ark_resolver/__init__.py b/python/src/ark_resolver/__init__.py similarity index 100% rename from python/ark_resolver/__init__.py rename to python/src/ark_resolver/__init__.py diff --git a/python/ark_resolver/ark-config.ini b/python/src/ark_resolver/ark-config.ini similarity index 100% rename from python/ark_resolver/ark-config.ini rename to python/src/ark_resolver/ark-config.ini diff --git a/python/ark_resolver/ark-registry.ini b/python/src/ark_resolver/ark-registry.ini similarity index 99% rename from python/ark_resolver/ark-registry.ini rename to python/src/ark_resolver/ark-registry.ini index d9600a3..660a6de 100644 --- a/python/ark_resolver/ark-registry.ini +++ b/python/src/ark_resolver/ark-registry.ini @@ -41,7 +41,7 @@ PhpResourceVersionRedirectUrl : http://$host/resources/$resource_int_id?citdate= ProjectHost : meta.dasch.swiss ############################################################################ -# anything test project +# anything tests project [0001] diff --git a/python/ark_resolver/ark.py b/python/src/ark_resolver/ark.py similarity index 84% rename from python/ark_resolver/ark.py rename to python/src/ark_resolver/ark.py index 48c8613..9d016a9 100755 --- a/python/ark_resolver/ark.py +++ b/python/src/ark_resolver/ark.py @@ -29,19 +29,18 @@ import sentry_sdk from sentry_sdk.integrations.asyncio import AsyncioIntegration from sentry_sdk.integrations.sanic import SanicIntegration +from sentry_sdk.integrations.rust_tracing import RustTracingIntegration import requests from sanic import HTTPResponse, Sanic, response from sanic.log import logger from sanic_cors import CORS -import ark_resolver.base64url_check_digit as base64url_check_digit_py -from ark_resolver.ark_url import (ArkUrlException, ArkUrlFormatter, ArkUrlInfo, ArkUrlSettings) - import _rust -from sentry_sdk.integrations.rust_tracing import RustTracingIntegration +from ark_resolver import ark_url +import ark_resolver.check_digit as check_digit_py +import ark_resolver.health -from ark_resolver.health import health_bp ################################################################################################# # OpenTelemetry @@ -65,17 +64,19 @@ CORS(app) # Register health check route -app.blueprint(health_bp) +app.blueprint(ark_resolver.health.health_bp) @app.before_server_start async def init_sentry(_): sentry_dsn = os.environ.get("ARK_SENTRY_DSN", None) + sentry_debug = os.environ.get("ARK_SENTRY_DEBUG", "False") sentry_environment = os.environ.get("ARK_SENTRY_ENVIRONMENT", None) sentry_release = os.environ.get("ARK_SENTRY_RELEASE", None) if sentry_dsn: sentry_sdk.init( dsn=sentry_dsn, + debug=sentry_debug, environment=sentry_environment, release=sentry_release, # Add data like request headers and IP for users; @@ -108,7 +109,7 @@ async def init_sentry(_): logger.info("No SENTRY_DSN found in environment variables. Sentry will not be initialized.") -def get_config() -> str: +def get_safe_config() -> str: """ Returns the app's configuration """ @@ -127,20 +128,22 @@ def get_config() -> str: return safe_config_output.getvalue() +@tracer.start_as_current_span("config_get") @app.get("/config") -async def config_get(_) -> HTTPResponse: +async def safe_config_get(_) -> HTTPResponse: """ Returns the app's configuration """ - return response.text(get_config()) + return response.text(get_safe_config()) +@tracer.start_as_current_span("config_head") @app.head("/config") -async def config_head(_) -> HTTPResponse: +async def safe_config_head(_) -> HTTPResponse: """ Returns only the head of the config response """ - config_str = get_config() + config_str = get_safe_config() headers = { "Content-Length": str(len(config_str)), @@ -181,7 +184,6 @@ async def reload(req) -> HTTPResponse: else: return response.text("Unauthorized", status=401) - @tracer.start_as_current_span("redirect") @app.get('/') async def catch_all(_, path="") -> HTTPResponse: @@ -189,19 +191,21 @@ async def catch_all(_, path="") -> HTTPResponse: Catch all URL. Tries to redirect the given ARK ID. """ try: - redirect_url = ArkUrlInfo(settings=app.config.settings, ark_id=path).to_redirect_url() - except ArkUrlException as ex: - logger.info(ex.message, ex, exc_info=True) + redirect_url = ark_url.ArkUrlInfo(settings=app.config.settings, ark_id=path).to_redirect_url() + + except ark_url.ArkUrlException as ex: + logger.error(f"Invalid ARK ID: {path}", exc_info=ex) return response.text(body=ex.message, status=400) - except base64url_check_digit_py.CheckDigitException as ex: - logger.info(ex.message, ex, exc_info=True) + except check_digit_py.CheckDigitException as ex: + logger.error(f"Invalid ARK ID: {path}", exc_info=ex) return response.text(body=ex.message, status=400) - except KeyError: - logger.info("Invalid ARK ID") + except KeyError as ex: + logger.error(f"Invalid ARK ID: {path}", exc_info=ex) return response.text(body="Invalid ARK ID", status=400) + logger.info(f"Redirecting {path} to {redirect_url}") return response.redirect(redirect_url) @@ -235,7 +239,7 @@ def reload_config() -> None: ################################################################################################# # Loading of config and registry files. -def load_settings(config_path: str) -> ArkUrlSettings: +def load_settings(config_path: str) -> ark_url.ArkUrlSettings: """ Loads configuration from given path and returns an ArkUrlSettings. """ @@ -264,7 +268,7 @@ def load_settings(config_path: str) -> ArkUrlSettings: else: config.read_file(open(registry_path)) - settings = ArkUrlSettings(config) + settings = ark_url.ArkUrlSettings(config) return settings @@ -282,8 +286,10 @@ def main() -> None: parser.add_argument("-c", "--config", help="config file (default {})".format(default_config_path)) group = parser.add_mutually_exclusive_group() group.add_argument("-s", "--server", help="start server", action="store_true") - group.add_argument("-i", "--iri", help="print the converted ARK URL from a given DSP resource IRI (add -v and -d optionally)") - group.add_argument("-a", "--ark", help="print the converted DSP resource IRI (requires -r) or DSP URL from a given ARK ID") + group.add_argument("-i", "--iri", + help="print the converted ARK URL from a given DSP resource IRI (add -v and -d optionally)") + group.add_argument("-a", "--ark", + help="print the converted DSP resource IRI (requires -r) or DSP URL from a given ARK ID") parser.add_argument("-r", "--resource", help="generate resource IRI", action="store_true") parser.add_argument("-v", "--value", help="value UUID (has to be provided with -i)") parser.add_argument("-d", "--date", help="DSP ARK timestamp (has to be provided with -i)") @@ -303,20 +309,20 @@ def main() -> None: server(settings) elif args.iri: # prints the converted ARK URL from a given DSP resource IRI - print(ArkUrlFormatter(settings).resource_iri_to_ark_url(args.iri, args.value, args.date)) + print(ark_url.ArkUrlFormatter(settings).resource_iri_to_ark_url(args.iri, args.value, args.date)) elif args.ark: if args.resource: # prints the converted DSP resource IRI from a given ARK URL - print(ArkUrlInfo(settings, args.ark).to_resource_iri()) + print(ark_url.ArkUrlInfo(settings, args.ark).to_resource_iri()) else: # prints the converted DSP URL from a given ARK URL - print(ArkUrlInfo(settings, args.ark).to_redirect_url()) + print(ark_url.ArkUrlInfo(settings, args.ark).to_redirect_url()) else: parser.print_help() - except ArkUrlException as ex: + except ark_url.ArkUrlException as ex: print(ex.message) exit(1) - except base64url_check_digit_py.CheckDigitException as ex: + except check_digit_py.CheckDigitException as ex: print(ex.message) exit(1) diff --git a/python/ark_resolver/ark_url.py b/python/src/ark_resolver/ark_url.py similarity index 95% rename from python/ark_resolver/ark_url.py rename to python/src/ark_resolver/ark_url.py index df39f51..8b51042 100644 --- a/python/ark_resolver/ark_url.py +++ b/python/src/ark_resolver/ark_url.py @@ -9,7 +9,7 @@ from string import Template from urllib import parse -import ark_resolver.base64url_check_digit as base64url_check_digit_py +import ark_resolver.check_digit as check_digit_py ################################################################################################# @@ -44,9 +44,6 @@ def __init__(self, config): self.v0_ark_url_regex = re.compile( "^https?://" + self.top_config["ArkExternalHost"] + "/" + self.v0_ark_path_pattern + "$") - print(f"config: {self.config.__dict__}") - print(f"top_config: {self.top_config.__dict__}") - class ArkUrlException(Exception): """ @@ -126,7 +123,8 @@ def __init__(self, settings, ark_id): if not project_config.getboolean("AllowVersion0"): raise ArkUrlException(f"Invalid ARK ID (version 0 not allowed): {ark_id}") else: - raise ArkUrlException(f"Invalid ARK ID {ark_id}. The version of the ARK ID doesn't match the version defined in the settings.") + raise ArkUrlException( + f"Invalid ARK ID {ark_id}. The version of the ARK ID doesn't match the version defined in the settings.") self.template_dict = { "url_version": self.url_version, @@ -169,7 +167,8 @@ def to_resource_iri(self) -> str: # in case of an ARK URL version 0, the resource_id generated from the salsah ID has to be converted to a # base64 UUID version 5 generic_namespace_url = uuid.NAMESPACE_URL - dasch_uuid_ns = uuid.uuid5(generic_namespace_url, "https://dasch.swiss") # cace8b00-717e-50d5-bcb9-486f39d733a2 + dasch_uuid_ns = uuid.uuid5(generic_namespace_url, + "https://dasch.swiss") # cace8b00-717e-50d5-bcb9-486f39d733a2 resource_id = template_dict["resource_id"] dsp_iri = base64.urlsafe_b64encode(uuid.uuid5(dasch_uuid_ns, resource_id).bytes).decode("utf-8") # remove the padding ('==') from the end of the string @@ -261,7 +260,7 @@ def add_check_digit_and_escape(uuid) -> str: """ Adds a check digit to a Base64-encoded UUID, and escapes the result. """ - check_digit = base64url_check_digit_py.calculate_check_digit(uuid) + check_digit = check_digit_py.calculate_check_digit(uuid) uuid_with_check_digit = uuid + check_digit # Escape '-' as '=' in the resource ID and check digit, because '-' can be ignored in ARK URLs. @@ -275,7 +274,7 @@ def unescape_and_validate_uuid(ark_url, escaped_uuid) -> str: # '-' is escaped as '=' in the UUID and check digit, because '-' can be ignored in ARK URLs. unescaped_uuid = escaped_uuid.replace('=', '-') - if not base64url_check_digit_py.is_valid(unescaped_uuid): + if not check_digit_py.is_valid(unescaped_uuid): raise ArkUrlException(f"Invalid ARK ID: {ark_url}") return unescaped_uuid[0:-1] diff --git a/python/ark_resolver/base64url_check_digit.py b/python/src/ark_resolver/check_digit.py similarity index 100% rename from python/ark_resolver/base64url_check_digit.py rename to python/src/ark_resolver/check_digit.py diff --git a/python/test/__init__.py b/python/tests/__init__.py similarity index 100% rename from python/test/__init__.py rename to python/tests/__init__.py diff --git a/python/test/test_ark_url.py b/python/tests/test_ark_url.py similarity index 97% rename from python/test/test_ark_url.py rename to python/tests/test_ark_url.py index 16fca19..abb813f 100644 --- a/python/test/test_ark_url.py +++ b/python/tests/test_ark_url.py @@ -1,15 +1,15 @@ import os import pytest -from ark_resolver.ark import load_settings from ark_resolver.ark_url import ArkUrlFormatter, ArkUrlInfo, ArkUrlException +from ark_resolver import ark @pytest.fixture(scope="module") def settings(): """Loads settings.""" - config_path = "python/ark_resolver/ark-config.ini" - os.environ['ARK_REGISTRY'] = 'python/ark_resolver/ark-registry.ini' - return load_settings(config_path) + config_path = "python/src/ark_resolver/ark-config.ini" + os.environ['ARK_REGISTRY'] = 'python/src/ark_resolver/ark-registry.ini' + return ark.load_settings(config_path) def test_ark_url_formatter(settings): ark_url_formatter = ArkUrlFormatter(settings) diff --git a/python/test/test_base64.py b/python/tests/test_ckeck_digit.py similarity index 62% rename from python/test/test_base64.py rename to python/tests/test_ckeck_digit.py index 43224b1..3780512 100644 --- a/python/test/test_base64.py +++ b/python/tests/test_ckeck_digit.py @@ -1,33 +1,33 @@ -import ark_resolver.base64url_check_digit as base64url_check_digit_py +from ark_resolver import check_digit as ckeck_digit_py def test_base64url_check_digit(): correct_resource_id = "cmfk1DMHRBiR4-_6HXpEFA" # reject a string without a check digit - assert not base64url_check_digit_py.is_valid(correct_resource_id) + assert not ckeck_digit_py.is_valid(correct_resource_id) # calculate a check digit for a string and validate it correct_resource_id_check_digit = "n" - check_digit = base64url_check_digit_py.calculate_check_digit(correct_resource_id) + check_digit = ckeck_digit_py.calculate_check_digit(correct_resource_id) assert check_digit == correct_resource_id_check_digit correct_resource_id_with_correct_check_digit = correct_resource_id + check_digit - assert base64url_check_digit_py.is_valid(correct_resource_id_with_correct_check_digit) + assert ckeck_digit_py.is_valid(correct_resource_id_with_correct_check_digit) # reject a string with an incorrect check digit correct_resource_id_with_incorrect_check_digit = correct_resource_id + "m" - assert not base64url_check_digit_py.is_valid(correct_resource_id_with_incorrect_check_digit) + assert not ckeck_digit_py.is_valid(correct_resource_id_with_incorrect_check_digit) # reject a string with a missing character resource_id_with_missing_character = "cmfk1DMHRBiR4-6HXpEFA" resource_id_with_missing_character_and_correct_check_digit = resource_id_with_missing_character + correct_resource_id_check_digit - assert not base64url_check_digit_py.is_valid(resource_id_with_missing_character_and_correct_check_digit) + assert not ckeck_digit_py.is_valid(resource_id_with_missing_character_and_correct_check_digit) # reject a string with an incorrect character resource_id_with_incorrect_character = "cmfk1DMHRBir4-_6HXpEFA" resource_id_with_incorrect_character_and_correct_check_digit = resource_id_with_incorrect_character + correct_resource_id_check_digit - assert not base64url_check_digit_py.is_valid(resource_id_with_incorrect_character_and_correct_check_digit) + assert not ckeck_digit_py.is_valid(resource_id_with_incorrect_character_and_correct_check_digit) # reject a string with swapped characters resource_id_with_swapped_characters = "cmfk1DMHRBiR4_-6HXpEFA" resource_id_with_swapped_characters_and_correct_check_digit = resource_id_with_swapped_characters + correct_resource_id_check_digit - assert not base64url_check_digit_py.is_valid(resource_id_with_swapped_characters_and_correct_check_digit) + assert not ckeck_digit_py.is_valid(resource_id_with_swapped_characters_and_correct_check_digit)