diff --git a/oxomoc/_version.py b/oxomoc/_version.py index 8c9cf07..ae24e91 100644 --- a/oxomoc/_version.py +++ b/oxomoc/_version.py @@ -1,5 +1,5 @@ # flake8: noqa -__version__ = '0.0.3-alpha' +__version__ = '0.0.4-alpha' def get_version(): diff --git a/oxomoc/ckpselective.py b/oxomoc/ckpselective.py index 009cafe..1ca89c8 100644 --- a/oxomoc/ckpselective.py +++ b/oxomoc/ckpselective.py @@ -22,11 +22,11 @@ def __init__(self, mongodb_uri="mongodb://localhost:27017/"): """ self.client = MongoClient(mongodb_uri) - def create(self, base_url: str, mongo_db: str, mongo_collection: str, metadataPrefix='oai_dc', force_http_get=True, days=30, max_tries=4): # noqa: E501 + def create(self, base_url: str, mongo_db: str, mongo_collection: str, metadataPrefix='oai_dc', force_http_get=True, days=10, max_tries=4): # noqa: E501 """ Method to create the checkpoint, this allows to save all the ids for records and sets in order to know what was downloaded. - All the checkpints are saved in the mongo collections + All the checkpoints are saved in the mongo collections Parameters: ---------- @@ -40,6 +40,10 @@ def create(self, base_url: str, mongo_db: str, mongo_collection: str, metadataPr metadata type for xml schema ex: dim, xoai, mods, oai_dc (default: oai_dc) force_http_get:bool force to use get instead post for requests + days:int + number of days for the selective checkpoint date range + max_tries:int + number of tries in case of failing the request """ client = Client(base_url, force_http_get=force_http_get) try: diff --git a/oxomoc/harvester.py b/oxomoc/harvester.py index 5cf9334..345e610 100644 --- a/oxomoc/harvester.py +++ b/oxomoc/harvester.py @@ -146,14 +146,20 @@ def process_endpoint(self, endpoint: str): metadataPrefix = self.endpoints[endpoint]["metadataPrefix"] selective = self.endpoints[endpoint]["checkpoint"]["selective"] checkpoint = self.endpoints[endpoint]["checkpoint"]["enabled"] + if selective: self.checkpoint[endpoint] = OxomocCheckPointSelective( self.mongodb_uri) else: self.checkpoint[endpoint] = OxomocCheckPoint(self.mongodb_uri) if checkpoint: - self.checkpoint[endpoint].create( - url, self.mongo_db, endpoint, metadataPrefix) + if selective: + days = self.endpoints[endpoint]["checkpoint"]["days"] + self.checkpoint[endpoint].create( + url, self.mongo_db, endpoint, metadataPrefix,days) + else: + self.checkpoint[endpoint].create( + url, self.mongo_db, endpoint, metadataPrefix) print(f"\n=== Processing {endpoint} from {url} ") if self.checkpoint[endpoint].exists_records(self.mongo_db, endpoint):