Skip to content

Commit

Permalink
Merge pull request #4 from arXiv/handle_identify
Browse files Browse the repository at this point in the history
Handle identify
  • Loading branch information
kyokukou authored Nov 18, 2024
2 parents 92efc95 + a864587 commit be0008e
Show file tree
Hide file tree
Showing 4 changed files with 79 additions and 9 deletions.
16 changes: 13 additions & 3 deletions oaipmh/data/oai_config.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
from datetime import datetime, timezone
from oaipmh.data.oai_properties import MetadataFormat

#TODO do we want to change this
EARLIEST_DATE=datetime(2007, 5, 23, 0, 0, tzinfo=timezone.utc)
#TODO test/ determine limits
RECORDS_LIMIT=2000
IDENTIFIERS_LIMIT=5000

SUPPORTED_METADATA_FORMATS={
"oai_dc":MetadataFormat(
Expand All @@ -25,4 +26,13 @@
schema="http://arxiv.org/OAI/arXivRaw.xsd",
namespace="http://arxiv.org/OAI/arXivRaw/"
),
}
}

#required definiton
REPOSITORY_NAME='arXiv'
BASE_URL='https://arxiv.org/oai'
PROTOCOL_VERSION='2.0'
EARLIEST_DATE=datetime(2007, 5, 23, 0, 0, tzinfo=timezone.utc) #TODO change?
DELETED_RECORD='persistent'
GRANULARITY='YYYY-MM-DD'
ADMIN_EMAIL='help@arxiv.org'
15 changes: 10 additions & 5 deletions oaipmh/requests/info_queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from flask import render_template

from oaipmh.data.oai_config import SUPPORTED_METADATA_FORMATS
from oaipmh.data import oai_config
from oaipmh.data.oai_errors import OAIBadArgument
from oaipmh.data.oai_properties import OAIParams, OAIVerbs
from oaipmh.serializers.output_formats import Response
Expand All @@ -15,9 +15,14 @@ def identify(params: Dict[str, str]) -> Response:
query_data: Dict[OAIParams, str]={OAIParams.VERB : OAIVerbs.IDENTIFY}
if set(params.keys()) != {OAIParams.VERB}:
raise OAIBadArgument(f"No other parameters allowed for {OAIVerbs.IDENTIFY}")

#TODO
return "<a>b</a>", 200, {}

response=render_template("identify.xml", #TODO look into links and url in logo description, make sure there is no conflict
response_date=datetime.now(timezone.utc),
query_params=query_data,
config_data=oai_config
)
headers={"Content-Type":"application/xml"}
return response, 200, headers

def list_metadata_formats(params: Dict[str, str]) -> Response:
"""used to retrieve the metadata formats available from a repository.
Expand All @@ -44,7 +49,7 @@ def list_metadata_formats(params: Dict[str, str]) -> Response:
response=render_template("metaformats.xml",
response_date=datetime.now(timezone.utc),
query_params=query_data,
formats=SUPPORTED_METADATA_FORMATS
formats=oai_config.SUPPORTED_METADATA_FORMATS
)
headers={"Content-Type":"application/xml"}
return response, 200, headers
Expand Down
41 changes: 41 additions & 0 deletions oaipmh/templates/identify.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
{% extends "base.xml" %}

{% block request_element %}
{{ macros.request_element(query_params) }}
{% endblock %}

{% block interior_xml %}
<Identify>
<repositoryName>{{config_data.REPOSITORY_NAME}}</repositoryName>
<baseURL>{{config_data.BASE_URL}}</baseURL>
<protocolVersion>{{config_data.PROTOCOL_VERSION}}</protocolVersion>
<adminEmail>{{config_data.ADMIN_EMAIL}}</adminEmail>
<earliestDatestamp>{{config_data.EARLIEST_DATE.strftime('%Y-%m-%d')}}</earliestDatestamp>
<deletedRecord>{{config_data.DELETED_RECORD}}</deletedRecord>
<granularity>{{config_data.GRANULARITY}}</granularity>
{{arXiv_desc()}}
</Identify>
{% endblock %}

{% macro arXiv_desc() %}
<description>
<eprints xmlns="http://www.openarchives.org/OAI/1.1/eprints" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/1.1/eprints http://www.openarchives.org/OAI/1.1/eprints.xsd">
<content>
<text>Author self-archived e-prints</text>
</content>
<metadataPolicy>
<text>Metadata harvesting permitted through OAI interface</text>
<URL>http://arxiv.org/help/oa/metadataPolicy</URL>
</metadataPolicy>
<dataPolicy>
<text>Full-content harvesting not permitted (except by special arrangement)</text>
<URL>http://arxiv.org/help/oa/dataPolicy</URL>
</dataPolicy>
<submissionPolicy>
<text>Author self-submission preferred, submissions screened for appropriateness.</text>
<URL>http://arxiv.org/help/submit</URL>
</submissionPolicy>
</eprints>
</description>
{% endmacro %}

16 changes: 15 additions & 1 deletion tests/test_identify.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,18 @@ def test_extra_params(test_client):
assert response.status_code == 200
text=response.get_data(as_text=True)
assert "<error code='badArgument'>" in text
assert "No other parameters allowed" in text
assert "No other parameters allowed" in text

def test_contents(test_client):
params = {OAIParams.VERB: OAIVerbs.IDENTIFY}
response = test_client.get("/oai", query_string=params)
assert response.status_code == 200
text=response.get_data(as_text=True)
assert "<repositoryName>arXiv</repositoryName>" in text
assert "<baseURL>https://arxiv.org/oai</baseURL>" in text
assert "<earliestDatestamp>2007-05-23</earliestDatestamp>" in text
assert "<granularity>YYYY-MM-DD</granularity>" in text
assert "<description>" in text
assert "<text>Full-content harvesting not permitted (except by special arrangement)</text>" in text
assert "<URL>http://arxiv.org/help/oa/metadataPolicy</URL>" in text

0 comments on commit be0008e

Please sign in to comment.