Skip to content

Commit

Permalink
Merge pull request #6 from omazapa/main
Browse files Browse the repository at this point in the history
updates
  • Loading branch information
omazapa authored Dec 3, 2022
2 parents 7a4b170 + 5ef202c commit 0a0589a
Show file tree
Hide file tree
Showing 3 changed files with 15 additions and 5 deletions.
2 changes: 1 addition & 1 deletion oxomoc/_version.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# flake8: noqa
__version__ = '0.0.3-alpha'
__version__ = '0.0.4-alpha'


def get_version():
Expand Down
8 changes: 6 additions & 2 deletions oxomoc/ckpselective.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,11 @@ def __init__(self, mongodb_uri="mongodb://localhost:27017/"):
"""
self.client = MongoClient(mongodb_uri)

def create(self, base_url: str, mongo_db: str, mongo_collection: str, metadataPrefix='oai_dc', force_http_get=True, days=30, max_tries=4): # noqa: E501
def create(self, base_url: str, mongo_db: str, mongo_collection: str, metadataPrefix='oai_dc', force_http_get=True, days=10, max_tries=4): # noqa: E501
"""
Method to create the checkpoint, this allows to save all the ids for records and sets
in order to know what was downloaded.
All the checkpints are saved in the mongo collections
All the checkpoints are saved in the mongo collections
Parameters:
----------
Expand All @@ -40,6 +40,10 @@ def create(self, base_url: str, mongo_db: str, mongo_collection: str, metadataPr
metadata type for xml schema ex: dim, xoai, mods, oai_dc (default: oai_dc)
force_http_get:bool
force to use get instead post for requests
days:int
number of days for the selective checkpoint date range
max_tries:int
number of tries in case of failing the request
"""
client = Client(base_url, force_http_get=force_http_get)
try:
Expand Down
10 changes: 8 additions & 2 deletions oxomoc/harvester.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,14 +146,20 @@ def process_endpoint(self, endpoint: str):
metadataPrefix = self.endpoints[endpoint]["metadataPrefix"]
selective = self.endpoints[endpoint]["checkpoint"]["selective"]
checkpoint = self.endpoints[endpoint]["checkpoint"]["enabled"]

if selective:
self.checkpoint[endpoint] = OxomocCheckPointSelective(
self.mongodb_uri)
else:
self.checkpoint[endpoint] = OxomocCheckPoint(self.mongodb_uri)
if checkpoint:
self.checkpoint[endpoint].create(
url, self.mongo_db, endpoint, metadataPrefix)
if selective:
days = self.endpoints[endpoint]["checkpoint"]["days"]
self.checkpoint[endpoint].create(
url, self.mongo_db, endpoint, metadataPrefix,days)
else:
self.checkpoint[endpoint].create(
url, self.mongo_db, endpoint, metadataPrefix)

print(f"\n=== Processing {endpoint} from {url} ")
if self.checkpoint[endpoint].exists_records(self.mongo_db, endpoint):
Expand Down

0 comments on commit 0a0589a

Please sign in to comment.