Skip to content

Commit

Permalink
refactor: existing python code
Browse files Browse the repository at this point in the history
refactor: existing python code

refactor: existing python code

revert unintended change

feat: rust code

fix configuration loading

feat: fix settings (rust)

 fix configuration loading

feat: fix settings (rust)

feat: rust code debugging

feat: rust code debugging

feat: rust code debugging

feat: rust code debugging

feat: rust code debugging

 fix configuration loading

feat: fix settings (rust)

 fix configuration loading

feat: fix settings (rust)

feat: rust code debugging

feat: rust code debugging

feat: rust code debugging

feat: rust code debugging

feat: rust code debugging

refactor: existing python code

revert unintended change

feat: rust code

fix configuration loading

feat: fix settings (rust)

 fix configuration loading

feat: fix settings (rust)

feat: rust code debugging

feat: rust code debugging

feat: rust code debugging

feat: rust code debugging

feat: rust code debugging

 fix configuration loading

feat: fix settings (rust)

 fix configuration loading

feat: fix settings (rust)

feat: rust code debugging

feat: rust code debugging

feat: rust code debugging

feat: rust code debugging

feat: rust code debugging

refactor: exiting python code

fix configuration loading

feat: fix settings (rust)

 fix configuration loading

feat: fix settings (rust)

feat: rust code debugging

feat: rust code debugging

feat: rust code debugging

feat: rust code debugging

feat: rust code debugging

 fix configuration loading

feat: fix settings (rust)

 fix configuration loading

feat: fix settings (rust)

feat: rust code debugging

feat: rust code debugging

feat: rust code debugging

feat: rust code debugging

feat: rust code debugging

refactor: existing python code

revert unintended change

feat: rust code

fix configuration loading

feat: fix settings (rust)

 fix configuration loading

feat: fix settings (rust)

feat: rust code debugging

feat: rust code debugging

feat: rust code debugging

feat: rust code debugging

feat: rust code debugging

 fix configuration loading

feat: fix settings (rust)

 fix configuration loading

feat: fix settings (rust)

feat: rust code debugging

feat: rust code debugging

feat: rust code debugging

feat: rust code debugging

feat: rust code debugging

refactor: existing python code

refactor: existing python code

refactor: existing python code

refactor: existing python code
  • Loading branch information
subotic committed Feb 24, 2025
1 parent e2d9d12 commit 14184b7
Show file tree
Hide file tree
Showing 10 changed files with 296 additions and 80 deletions.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ PhpResourceVersionRedirectUrl : http://$host/resources/$resource_int_id?citdate=
ProjectHost : meta.dasch.swiss

############################################################################
# anything test project
# anything tests project

[0001]

Expand Down
125 changes: 104 additions & 21 deletions python/ark_resolver/ark.py → python/src/ark_resolver/ark.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,41 @@
from asyncio import sleep
from io import StringIO

from opentelemetry import trace
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import (
BatchSpanProcessor,
ConsoleSpanExporter,
)

import sentry_sdk
from sentry_sdk.integrations.asyncio import AsyncioIntegration
from sentry_sdk.integrations.sanic import SanicIntegration
from sentry_sdk.integrations.rust_tracing import RustTracingIntegration

import requests
from sanic import HTTPResponse, Sanic, response
from sanic.log import logger
from sanic_cors import CORS

import ark_resolver.base64url_check_digit as base64url_check_digit_py
from ark_resolver.ark_url import (ArkUrlException, ArkUrlFormatter, ArkUrlInfo, ArkUrlSettings)
import _rust
from ark_resolver import ark_url
import ark_resolver.check_digit as check_digit_py
import ark_resolver.health


#################################################################################################
# OpenTelemetry

provider = TracerProvider()
processor = BatchSpanProcessor(ConsoleSpanExporter())
provider.add_span_processor(processor)

# Sets the global default tracer provider
trace.set_tracer_provider(provider)

# Creates a tracer from the global tracer provider
tracer = trace.get_tracer("my.tracer.name")

#################################################################################################
# Server implementation.
Expand All @@ -35,8 +63,53 @@
app = Sanic('ark_resolver')
CORS(app)

# Register health check route
app.blueprint(ark_resolver.health.health_bp)


@app.before_server_start
async def init_sentry(_):
sentry_dsn = os.environ.get("ARK_SENTRY_DSN", None)
sentry_debug = os.environ.get("ARK_SENTRY_DEBUG", "False")
sentry_environment = os.environ.get("ARK_SENTRY_ENVIRONMENT", None)
sentry_release = os.environ.get("ARK_SENTRY_RELEASE", None)
if sentry_dsn:
sentry_sdk.init(
dsn=sentry_dsn,
debug=sentry_debug,
environment=sentry_environment,
release=sentry_release,
# Add data like request headers and IP for users;
# see https://docs.sentry.io/platforms/python/data-management/data-collected/ for more info
send_default_pii=True,
# Set traces_sample_rate to 1.0 to capture 100%
# of transactions for tracing.
traces_sample_rate=1.0,
# Set profiles_sample_rate to 1.0 to profile 100%
# of sampled transactions.
# We recommend adjusting this value in production.
profiles_sample_rate=1.0,
instrumenter="otel",
integrations=[
AsyncioIntegration(),
RustTracingIntegration(
"_rust",
_rust.initialize_tracing,
include_tracing_fields=True,
),
SanicIntegration(
# Configure the Sanic integration so that we generate
# transactions for all HTTP status codes, including 404
unsampled_statuses=None,
),
],
)
logger.info("Sentry initialized.")
else:
logger.info("No SENTRY_DSN found in environment variables. Sentry will not be initialized.")


def get_config() -> str:
def get_safe_config() -> str:
"""
Returns the app's configuration
"""
Expand All @@ -55,20 +128,22 @@ def get_config() -> str:
return safe_config_output.getvalue()


@tracer.start_as_current_span("config_get")
@app.get("/config")
async def config_get(_) -> HTTPResponse:
async def safe_config_get(_) -> HTTPResponse:
"""
Returns the app's configuration
"""
return response.text(get_config())
return response.text(get_safe_config())


@tracer.start_as_current_span("config_head")
@app.head("/config")
async def config_head(_) -> HTTPResponse:
async def safe_config_head(_) -> HTTPResponse:
"""
Returns only the head of the config response
"""
config_str = get_config()
config_str = get_safe_config()

headers = {
"Content-Length": str(len(config_str)),
Expand All @@ -78,6 +153,7 @@ async def config_head(_) -> HTTPResponse:
return response.text("", headers=headers)


@tracer.start_as_current_span("reload")
@app.post("/reload")
async def reload(req) -> HTTPResponse:
"""
Expand Down Expand Up @@ -108,23 +184,28 @@ async def reload(req) -> HTTPResponse:
else:
return response.text("Unauthorized", status=401)


@tracer.start_as_current_span("redirect")
@app.get('/<path:path>')
async def catch_all(_, path="") -> HTTPResponse:
"""
Catch all URL. Tries to redirect the given ARK ID.
"""
try:
redirect_url = ArkUrlInfo(settings=app.config.settings, ark_url=path, path_only=True).to_redirect_url()
except ArkUrlException as ex:
redirect_url = ark_url.ArkUrlInfo(settings=app.config.settings, ark_id=path).to_redirect_url()

except ark_url.ArkUrlException as ex:
logger.error(f"Invalid ARK ID: {path}", exc_info=ex)
return response.text(body=ex.message, status=400)

except base64url_check_digit_py.CheckDigitException as ex:
except check_digit_py.CheckDigitException as ex:
logger.error(f"Invalid ARK ID: {path}", exc_info=ex)
return response.text(body=ex.message, status=400)

except KeyError:
except KeyError as ex:
logger.error(f"Invalid ARK ID: {path}", exc_info=ex)
return response.text(body="Invalid ARK ID", status=400)

logger.info(f"Redirecting {path} to {redirect_url}")
return response.redirect(redirect_url)


Expand Down Expand Up @@ -158,7 +239,7 @@ def reload_config() -> None:
#################################################################################################
# Loading of config and registry files.

def load_settings(config_path: str) -> ArkUrlSettings:
def load_settings(config_path: str) -> ark_url.ArkUrlSettings:
"""
Loads configuration from given path and returns an ArkUrlSettings.
"""
Expand Down Expand Up @@ -187,7 +268,7 @@ def load_settings(config_path: str) -> ArkUrlSettings:
else:
config.read_file(open(registry_path))

settings = ArkUrlSettings(config)
settings = ark_url.ArkUrlSettings(config)

return settings

Expand All @@ -205,8 +286,10 @@ def main() -> None:
parser.add_argument("-c", "--config", help="config file (default {})".format(default_config_path))
group = parser.add_mutually_exclusive_group()
group.add_argument("-s", "--server", help="start server", action="store_true")
group.add_argument("-i", "--iri", help="print the converted ARK URL from a given DSP resource IRI (add -v and -d optionally)")
group.add_argument("-a", "--ark", help="print the converted DSP resource IRI (requires -r) or DSP URL from a given ARK URL")
group.add_argument("-i", "--iri",
help="print the converted ARK URL from a given DSP resource IRI (add -v and -d optionally)")
group.add_argument("-a", "--ark",
help="print the converted DSP resource IRI (requires -r) or DSP URL from a given ARK ID")
parser.add_argument("-r", "--resource", help="generate resource IRI", action="store_true")
parser.add_argument("-v", "--value", help="value UUID (has to be provided with -i)")
parser.add_argument("-d", "--date", help="DSP ARK timestamp (has to be provided with -i)")
Expand All @@ -226,20 +309,20 @@ def main() -> None:
server(settings)
elif args.iri:
# prints the converted ARK URL from a given DSP resource IRI
print(ArkUrlFormatter(settings).resource_iri_to_ark_url(args.iri, args.value, args.date))
print(ark_url.ArkUrlFormatter(settings).resource_iri_to_ark_url(args.iri, args.value, args.date))
elif args.ark:
if args.resource:
# prints the converted DSP resource IRI from a given ARK URL
print(ArkUrlInfo(settings, args.ark).to_resource_iri())
print(ark_url.ArkUrlInfo(settings, args.ark).to_resource_iri())
else:
# prints the converted DSP URL from a given ARK URL
print(ArkUrlInfo(settings, args.ark).to_redirect_url())
print(ark_url.ArkUrlInfo(settings, args.ark).to_redirect_url())
else:
parser.print_help()
except ArkUrlException as ex:
except ark_url.ArkUrlException as ex:
print(ex.message)
exit(1)
except base64url_check_digit_py.CheckDigitException as ex:
except check_digit_py.CheckDigitException as ex:
print(ex.message)
exit(1)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from string import Template
from urllib import parse

import ark_resolver.base64url_check_digit as base64url_check_digit_py
import ark_resolver.check_digit as check_digit_py


#################################################################################################
Expand Down Expand Up @@ -56,42 +56,27 @@ def __init__(self, message):

class ArkUrlInfo:
"""
Represents the information retrieved from a DSP ARK URL.
Represents the information retrieved from a DSP ARK ID.
"""

def __init__(self, settings, ark_url, path_only=False):
def __init__(self, settings, ark_id):
self.settings = settings

# Are we matching just the path part of the URL?
# TODO: path_only=True should be tested in unit tests
if path_only:
# Yes. Is it a version 1 ARK ID?
match = settings.ark_path_regex.match(ark_url)

if match:
# Yes.
self.url_version = int(match.group(1))
else:
# No. Is it a version 0 ARK ID?
match = settings.v0_ark_path_regex.match(ark_url)

if match is not None:
self.url_version = 0
match = settings.ark_path_regex.match(ark_id)

if match:
# Yes. Is it a version 1 ARK ID?
self.url_version = int(match.group(1))
else:
# We are matching a whole URL. Does it contain a version 1 ARK ID?
match = settings.ark_url_regex.match(ark_url)
# No. Is it a version 0 ARK ID?
match = settings.v0_ark_path_regex.match(ark_id)

# If NOT None!, then it is a version 0 ARK ID.
if match is not None:
# Yes.
self.url_version = int(match.group(1))
else:
# No. Does it contain a version 0 ARK ID?
match = settings.v0_ark_url_regex.match(ark_url)
self.url_version = 0

if match is not None:
self.url_version = 0
if match is None:
raise ArkUrlException(f"Invalid ARK ID: {ark_url}")
raise ArkUrlException(f"Invalid ARK ID: {ark_id}")

# Which version of ARK ID did we match?
if self.url_version == settings.dsp_ark_version:
Expand All @@ -101,15 +86,15 @@ def __init__(self, settings, ark_url, path_only=False):

if escaped_resource_id_with_check_digit is not None:
self.resource_id = unescape_and_validate_uuid(
ark_url=ark_url,
ark_url=ark_id,
escaped_uuid=escaped_resource_id_with_check_digit
)

escaped_value_id_with_check_digit = match.group(4)

if escaped_value_id_with_check_digit is not None:
self.value_id = unescape_and_validate_uuid(
ark_url=ark_url,
ark_url=ark_id,
escaped_uuid=escaped_value_id_with_check_digit
)
else:
Expand All @@ -136,9 +121,10 @@ def __init__(self, settings, ark_url, path_only=False):
project_config = self.settings.config[self.project_id]

if not project_config.getboolean("AllowVersion0"):
raise ArkUrlException(f"Invalid ARK ID (version 0 not allowed): {ark_url}")
raise ArkUrlException(f"Invalid ARK ID (version 0 not allowed): {ark_id}")
else:
raise ArkUrlException(f"Invalid ARK ID {ark_url}. The version of the ARK ID doesn't match the version defined in the settings.")
raise ArkUrlException(
f"Invalid ARK ID {ark_id}. The version of the ARK ID doesn't match the version defined in the settings.")

self.template_dict = {
"url_version": self.url_version,
Expand Down Expand Up @@ -181,7 +167,8 @@ def to_resource_iri(self) -> str:
# in case of an ARK URL version 0, the resource_id generated from the salsah ID has to be converted to a
# base64 UUID version 5
generic_namespace_url = uuid.NAMESPACE_URL
dasch_uuid_ns = uuid.uuid5(generic_namespace_url, "https://dasch.swiss") # cace8b00-717e-50d5-bcb9-486f39d733a2
dasch_uuid_ns = uuid.uuid5(generic_namespace_url,
"https://dasch.swiss") # cace8b00-717e-50d5-bcb9-486f39d733a2
resource_id = template_dict["resource_id"]
dsp_iri = base64.urlsafe_b64encode(uuid.uuid5(dasch_uuid_ns, resource_id).bytes).decode("utf-8")
# remove the padding ('==') from the end of the string
Expand Down Expand Up @@ -273,7 +260,7 @@ def add_check_digit_and_escape(uuid) -> str:
"""
Adds a check digit to a Base64-encoded UUID, and escapes the result.
"""
check_digit = base64url_check_digit_py.calculate_check_digit(uuid)
check_digit = check_digit_py.calculate_check_digit(uuid)
uuid_with_check_digit = uuid + check_digit

# Escape '-' as '=' in the resource ID and check digit, because '-' can be ignored in ARK URLs.
Expand All @@ -287,7 +274,7 @@ def unescape_and_validate_uuid(ark_url, escaped_uuid) -> str:
# '-' is escaped as '=' in the UUID and check digit, because '-' can be ignored in ARK URLs.
unescaped_uuid = escaped_uuid.replace('=', '-')

if not base64url_check_digit_py.is_valid(unescaped_uuid):
if not check_digit_py.is_valid(unescaped_uuid):
raise ArkUrlException(f"Invalid ARK ID: {ark_url}")

return unescaped_uuid[0:-1]
Expand Down
File renamed without changes.
15 changes: 0 additions & 15 deletions python/test/create_mock_settings.py

This file was deleted.

File renamed without changes.
Loading

0 comments on commit 14184b7

Please sign in to comment.