Skip to content

Commit

Permalink
s
Browse files Browse the repository at this point in the history
  • Loading branch information
afuetterer committed Apr 17, 2024
1 parent f1f1ad9 commit 1da89d4
Show file tree
Hide file tree
Showing 21 changed files with 1,678 additions and 330 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ with Scythe("https://zenodo.org/oai2d") as scythe:
`oaipmh-scythe` is built with:

- [httpx](https://github.com/encode/httpx) for issuing HTTP requests
- [lxml](https://github.com/lxml/lxml) for parsing XML responses
- [xsdata](https://github.com/tefra/xsdata) for parsing XML responses

## Installation

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ ignore = [
minversion = "8.0"
addopts = ["-ra", "--showlocals", "--strict-markers", "--strict-config"]
filterwarnings = ["error"]
log_cli_level = "INFO"
# log_cli_level = "INFO"
xfail_strict = true
testpaths = "tests"

Expand Down
17 changes: 5 additions & 12 deletions src/oaipmh_scythe/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@
logger = logging.getLogger(__name__)

USER_AGENT: str = f"oaipmh-scythe/{__version__}"
OAI_NAMESPACE: str = "{http://www.openarchives.org/OAI/2.0/}"


class Scythe:
Expand Down Expand Up @@ -86,7 +85,6 @@ def __init__(
self.max_retries = max_retries
self.retry_status_codes = retry_status_codes or (503,)
self.default_retry_after = default_retry_after
self.oai_namespace = OAI_NAMESPACE
self.encoding = encoding
self.auth = auth
self.timeout = timeout
Expand Down Expand Up @@ -158,8 +156,9 @@ def harvest(self, query: dict[str, str]) -> Response:
logger.warning("HTTP %d! Retrying after %d seconds...", http_response.status_code, retry_after)
time.sleep(retry_after)
http_response = self._request(query)
http_response.raise_for_status()
return _build_response(http_response)
# http_response.raise_for_status()
metadata_prefix = query.get("metadataPrefix")
return _build_response(http_response, metadata_prefix)

def _request(self, query: dict[str, str]) -> httpx.Response:
"""Send an HTTP request to the OAI server using the configured HTTP method and given query parameters.
Expand Down Expand Up @@ -255,7 +254,6 @@ def list_identifiers(
cannotDisseminateFormat: If the specified metadata_prefix is not supported by the OAI server.
noRecordsMatch: If no records match the provided criteria.
noSetHierarchy: If set-based harvesting is requested but the OAI server does not support sets.
"""
_query = {
"verb": Verb.LIST_IDENTIFIERS.value,
Expand Down Expand Up @@ -287,7 +285,6 @@ def list_sets(self, resumption_token: str | None = None) -> Iterator[Response |
Raises:
badResumptionToken: If the provided resumption token is invalid or expired.
noSetHierarchy: If the OAI server does not support sets or has no set hierarchy available.
"""
_query = {
"verb": Verb.LIST_SETS.value,
Expand All @@ -308,10 +305,7 @@ def identify(self) -> Identify:
Returns:
Identify: An object encapsulating the server's identify response, which contains various pieces of information
about the OAI server.
"""
# return Identify(self.harvest(query))

query = {"verb": Verb.IDENTIFY.value}
response = self.harvest(query)
return response.parsed.identify
Expand All @@ -337,14 +331,14 @@ def get_record(self, identifier: str, metadata_prefix: str = "oai_dc") -> Respon
cannotDisseminateFormat: If the specified metadata_prefix is not supported by the OAI server
for the requested record.
idDoesNotExist: If the specified identifier does not correspond to any record in the OAI server.
"""
query = {
"verb": Verb.GET_RECORD.value,
"identifier": identifier,
"metadataPrefix": metadata_prefix,
}
return next(iter(self.iterator(self, query)))
response = self.harvest(query)
return response.parsed.get_record.record

def list_metadata_formats(self, identifier: str | None = None) -> Iterator[Response | MetadataFormat]:
"""Issue a ListMetadataFormats request to the OAI server.
Expand All @@ -367,7 +361,6 @@ def list_metadata_formats(self, identifier: str | None = None) -> Iterator[Respo
Raises:
idDoesNotExist: If the specified identifier does not correspond to any record in the OAI server.
noMetadataFormats: If there are no metadata formats available for the requested record or repository.
"""
_query = {
"verb": Verb.LIST_METADATA_FORMATS.value,
Expand Down
39 changes: 18 additions & 21 deletions src/oaipmh_scythe/iterator.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from __future__ import annotations

from abc import ABC, abstractmethod
from enum import StrEnum
from enum import Enum
from operator import attrgetter
from typing import TYPE_CHECKING

Expand All @@ -29,40 +29,41 @@
from collections.abc import Iterator

from oaipmh_scythe import Scythe
from oaipmh_scythe.models import Header, MetadataFormat, Record, ResumptionToken, Set
from oaipmh_scythe.models import Header, Item, MetadataFormat, Record, ResumptionToken, Set
from oaipmh_scythe.response import Response


class GetRecord(StrEnum):
class GetRecord(Enum):
attribute = "get_record"
element = "record"


class ListIdentifiers(StrEnum):
class ListIdentifiers(Enum):
attribute = "list_identifiers"
element = "header"


class ListRecords(StrEnum):
class ListRecords(Enum):
attribute = "list_records"
element = "record"


class ListSets(StrEnum):
class ListSets(Enum):
attribute = "list_sets"
element = "set"


class ListMetadataFormats(StrEnum):
attribute = "list_metadataformats"
element = "metadataformat"
class ListMetadataFormats(Enum):
attribute = "list_metadata_formats"
element = "metadata_format"


MAPPING = {
Verb.LIST_IDENTIFIERS.value: ListIdentifiers,
Verb.GET_RECORD.value: GetRecord,
Verb.LIST_RECORDS.value: ListRecords,
Verb.LIST_SETS.value: ListSets,
Verb.LIST_METADATA_FORMATS.value: ListMetadataFormats,
}


Expand Down Expand Up @@ -112,11 +113,14 @@ def _get_resumption_token(self) -> ResumptionToken | None:
Returns:
A ResumptionToken instance if a token is found in the response, otherwise None.
"""
if self.response is not None:
if self.response is None:
return None
try:
attribute = MAPPING[self.verb].attribute.value
parsed_data = getattr(self.response.parsed, attribute)
return parsed_data.resumption_token
return None
except AttributeError:
return None

def _next_response(self) -> None:
"""Request the next batch of data from the OAI server using the current resumption token.
Expand All @@ -132,7 +136,7 @@ def _next_response(self) -> None:
self.resumption_token = self._get_resumption_token()


class OAIResponseIterator(BaseOAIIterator):
class ResponseIterator(BaseOAIIterator):
"""An iterator class for iterating over OAI responses obtained via the OAI-PMH protocol.
This iterator specifically handles the iteration of OAIResponse objects, allowing for seamless
Expand Down Expand Up @@ -176,7 +180,6 @@ def __init__(self, scythe: Scythe, query: dict[str, str], ignore_deleted: bool =
attribute = MAPPING[query["verb"]].attribute.value
element = MAPPING[query["verb"]].element.value
self.items_getter = attrgetter(f"{attribute}.{element}")
print(self.items_getter)
super().__init__(scythe, query, ignore_deleted)

def _next_response(self) -> None:
Expand All @@ -191,7 +194,7 @@ def _next_response(self) -> None:
else:
self._items = iter(())

def __iter__(self) -> Iterator[Header | Record | Set | MetadataFormat]:
def __iter__(self) -> Iterator[Item]:
"""Iterate over individual OAI items from the response.
Go through the items in the OAI-PMH response, applying any necessary mapping and handling
Expand All @@ -202,13 +205,7 @@ def __iter__(self) -> Iterator[Header | Record | Set | MetadataFormat]:
"""
while True:
for item in self._items:
status = None
if isinstance(item, Header):
status = item.status
elif isinstance(item, Record):
status = item.header.status

if self.ignore_deleted and status and status.DELETED:
if self.ignore_deleted and item.deleted:
continue
yield item
if self.resumption_token:
Expand Down
10 changes: 6 additions & 4 deletions src/oaipmh_scythe/models/.xsdata.xml
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,12 @@
<Substitution type="package" search="http://schemas.xmlsoap.org/wsdl/soap/" replace="soap"/>
<Substitution type="package" search="http://schemas.xmlsoap.org/wsdl/soap12/" replace="soap12"/>
<Substitution type="package" search="http://schemas.xmlsoap.org/soap/envelope/" replace="soapenv"/>

<!-- # TODO: replace "ElementType" with "Element" -->
<!-- <Substitution type="class" search="(.*)(T|t)ype$" replace="\1"/> -->
<!-- replaces e.g. "ElementType" with "Element" -->
<Substitution type="class" search="(.*)Type$" replace="\1"/>
</Substitutions>
<Extensions/>
<Extensions>
<!-- add mixin classes -->
<Extension type="class" class="Header" import="oaipmh_scythe.models.mixins.HeaderMixin" prepend="false" applyIfDerived="false"/>
<Extension type="class" class="Record" import="oaipmh_scythe.models.mixins.RecordMixin" prepend="false" applyIfDerived="false"/>
</Extensions>
</Config>
Loading

0 comments on commit 1da89d4

Please sign in to comment.