Skip to content

Commit

Permalink
Merge pull request #336 from omazapa/main
Browse files Browse the repository at this point in the history
integrating entities and several fixes
  • Loading branch information
omazapa authored Oct 2, 2024
2 parents 06245a4 + 7417038 commit 0dfe52e
Show file tree
Hide file tree
Showing 20 changed files with 751 additions and 117 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,53 @@
from re import search


def get_units_affiations(db, author_db, affiliations):
"""
Method to get the units of an author in a register. ex: faculty, department and group.
Parameters:
----------
db : pymongo.database.Database
Database connection to colav database.
author_db : dict
record from person
affiliations : list
list of affiliations from the parse_minciencias_opendata method
Returns:
-------
list
list of units of an author (entries from using affiliations)
"""
institution_id = None
# verifiying univeristy
for j, aff in enumerate(affiliations):
aff_db = db["affiliations"].find_one(
{"_id": aff["id"]}, {"_id": 1, "types": 1})
if aff_db:
types = [i["type"] for i in aff_db["types"]]
if "group" in types or "department" in types or "faculty" in types:
aff_db = None
continue
if aff_db:
count = db["person"].count_documents(
{"_id": author_db["_id"], "affiliations.id": aff_db["_id"]})
if count > 0:
institution_id = aff_db["_id"]
break
units = []
for aff in author_db["affiliations"]:
if aff["id"] == institution_id:
continue
count = db["affiliations"].count_documents(
{"_id": aff["id"], "relations.id": institution_id})
if count > 0:
types = [i["type"] for i in aff["types"]]
if "department" in types or "faculty" in types:
units.append(aff)
return units


def process_one_update(openadata_reg, colav_reg, db, collection, empty_event, verbose=0):
"""
Method to update a register in the kahi database from minciencias opendata database if it is found.
Expand Down Expand Up @@ -71,10 +118,7 @@ def process_one_update(openadata_reg, colav_reg, db, collection, empty_event, ve
group_id = minciencias_author["affiliations"][0]['external_ids'][0]['id']

affiliations_db = db["affiliations"].find_one(
{"external_ids.source": "scienti", "external_ids.id": group_id})
if not affiliations_db:
affiliations_db = db["affiliations"].find_one(
{"external_ids.id": group_id})
{"external_ids.id": group_id})

if affiliations_db:
for i, author in enumerate(colav_reg["authors"]):
Expand Down Expand Up @@ -178,6 +222,12 @@ def process_one_update(openadata_reg, colav_reg, db, collection, empty_event, ve
if "education" in types:
if relation["id"] not in affs:
author["affiliations"].append(relation)
aff_units = get_units_affiations(
db, author_db, author["affiliations"])
for aff_unit in aff_units:
if aff_unit not in author["affiliations"]:
author["affiliations"].append(aff_unit)

break

collection.update_one(
Expand Down Expand Up @@ -239,10 +289,8 @@ def process_one_insert(openadata_reg, db, collection, empty_work, es_handler, ve
if minciencias_author["affiliations"]:
group_id = minciencias_author["affiliations"][0]['external_ids'][0]['id']
affiliations_db = db["affiliations"].find_one(
{"external_ids.source": "scienti", "external_ids.id": group_id})
if not affiliations_db:
affiliations_db = db["affiliations"].find_one(
{"external_ids.id": group_id})
{"external_ids.id": group_id})
if affiliations_db:
if entry['authors'][0]['external_ids'][0]['id'] == ext['id']:
entry['authors'][0]["affiliations"].append(
{
Expand Down Expand Up @@ -271,8 +319,35 @@ def process_one_insert(openadata_reg, db, collection, empty_work, es_handler, ve
group_id = openadata_reg["cod_grupo_gr"]
rgroup = db["affiliations"].find_one({"external_ids.id": group_id})
if rgroup:
entry["groups"].append(
{"id": rgroup["_id"], "name": rgroup["names"][0]["name"]})
found = False
for group in entry["groups"]:
if group["id"] == rgroup["_id"]:
found = True
break
if not found:
entry["groups"].append(
{"id": rgroup["_id"], "name": rgroup["names"][0]["name"]})

# Adding group relation affiliation to the author affiliations
if author_db and rgroup["relations"]:
for author in entry["authors"]:
if author["id"] == author_db["_id"]:
affs = [aff["id"] for aff in author["affiliations"]]
for relation in rgroup["relations"]:
types = []
if "types" in relation.keys() and relation["types"]:
types = [rel["type"].lower()
for rel in relation["types"]]
if "education" in types:
if relation["id"] not in affs:
author["affiliations"].append(relation)
aff_units = get_units_affiations(
db, author_db, author["affiliations"])
for aff_unit in aff_units:
if aff_unit not in author["affiliations"]:
author["affiliations"].append(aff_unit)

break

# insert in mongo
collection.insert_one(entry)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,54 @@
from re import search


def process_one_update(openadata_reg, colav_reg, db, collection, empty_event, verbose=0):
def get_units_affiations(db, author_db, affiliations):
"""
Method to get the units of an author in a register. ex: faculty, department and group.
Parameters:
----------
db : pymongo.database.Database
Database connection to colav database.
author_db : dict
record from person
affiliations : list
list of affiliations from the parse_minciencias_opendata method
Returns:
-------
list
list of units of an author (entries from using affiliations)
"""
institution_id = None
# verifiying univeristy
for j, aff in enumerate(affiliations):
aff_db = db["affiliations"].find_one(
{"_id": aff["id"]}, {"_id": 1, "types": 1})
if aff_db:
types = [i["type"] for i in aff_db["types"]]
if "group" in types or "department" in types or "faculty" in types:
aff_db = None
continue
if aff_db:
count = db["person"].count_documents(
{"_id": author_db["_id"], "affiliations.id": aff_db["_id"]})
if count > 0:
institution_id = aff_db["_id"]
break
units = []
for aff in author_db["affiliations"]:
if aff["id"] == institution_id:
continue
count = db["affiliations"].count_documents(
{"_id": aff["id"], "relations.id": institution_id})
if count > 0:
types = [i["type"] for i in aff["types"]]
if "department" in types or "faculty" in types:
units.append(aff)
return units


def process_one_update(openadata_reg, colav_reg, db, collection, empty_patent, verbose=0):
"""
Method to update a register in the kahi database from minciencias opendata database if it is found.
This means that the register is already on the kahi database and it is being updated with new information.
Expand All @@ -20,13 +67,13 @@ def process_one_update(openadata_reg, colav_reg, db, collection, empty_event, ve
Database where the colav collections are stored, used to search for authors and affiliations.
collection : pymongo.collection.Collection
Collection in the database where the register is stored (Collection of patents)
empty_work : dict
empty_patent : dict
Empty dictionary with the structure of a register in the database
verbose : int, optional
Verbosity level. The default is 0.
"""
entry = parse_minciencias_opendata(
openadata_reg, empty_event.copy(), verbose=verbose)
openadata_reg, empty_patent.copy(), verbose=verbose)
# updated
for upd in colav_reg["updated"]:
if upd["source"] == "minciencias":
Expand Down Expand Up @@ -69,10 +116,7 @@ def process_one_update(openadata_reg, colav_reg, db, collection, empty_event, ve
group_id = minciencias_author["affiliations"][0]['external_ids'][0]['id']

affiliations_db = db["affiliations"].find_one(
{"external_ids.source": "scienti", "external_ids.id": group_id})
if not affiliations_db:
affiliations_db = db["affiliations"].find_one(
{"external_ids.id": group_id})
{"external_ids.id": group_id})

if affiliations_db:
for i, author in enumerate(colav_reg["authors"]):
Expand Down Expand Up @@ -176,6 +220,12 @@ def process_one_update(openadata_reg, colav_reg, db, collection, empty_event, ve
if "education" in types:
if relation["id"] not in affs:
author["affiliations"].append(relation)
aff_units = get_units_affiations(
db, author_db, author["affiliations"])
for aff_unit in aff_units:
if aff_unit not in author["affiliations"]:
author["affiliations"].append(aff_unit)

break

collection.update_one(
Expand All @@ -191,7 +241,7 @@ def process_one_update(openadata_reg, colav_reg, db, collection, empty_event, ve
)


def process_one_insert(openadata_reg, db, collection, empty_work, es_handler, verbose=0):
def process_one_insert(openadata_reg, db, collection, empty_patent, es_handler, verbose=0):
"""
Function to insert a new register in the database if it is not found in the colav(kahi patents) database.
This means that the register is not on the database and it is being inserted.
Expand All @@ -210,15 +260,15 @@ def process_one_insert(openadata_reg, db, collection, empty_work, es_handler, ve
Database where the colav collections are stored, used to search for authors and affiliations.
collection : pymongo.collection.Collection
Collection in the database where the register is stored (Collection of patents)
empty_work : dict
empty_patent : dict
Empty dictionary with the structure of a register in the database
es_handler : Similarity
Elasticsearch handler to insert the register in the elasticsearch index, Mohan's Similarity class.
verbose : int, optional
Verbosity level. The default is 0.
"""
# parse
entry = parse_minciencias_opendata(openadata_reg, empty_work.copy())
entry = parse_minciencias_opendata(openadata_reg, empty_patent.copy())
# search authors and affiliations in db
# authors
minciencias_author = ""
Expand All @@ -237,10 +287,8 @@ def process_one_insert(openadata_reg, db, collection, empty_work, es_handler, ve
if minciencias_author["affiliations"]:
group_id = minciencias_author["affiliations"][0]['external_ids'][0]['id']
affiliations_db = db["affiliations"].find_one(
{"external_ids.source": "scienti", "external_ids.id": group_id})
if not affiliations_db:
affiliations_db = db["affiliations"].find_one(
{"external_ids.id": group_id})
{"external_ids.id": group_id})
if affiliations_db:
if entry['authors'][0]['external_ids'][0]['id'] == ext['id']:
entry['authors'][0]["affiliations"].append(
{
Expand Down Expand Up @@ -269,8 +317,35 @@ def process_one_insert(openadata_reg, db, collection, empty_work, es_handler, ve
group_id = openadata_reg["cod_grupo_gr"]
rgroup = db["affiliations"].find_one({"external_ids.id": group_id})
if rgroup:
entry["groups"].append(
{"id": rgroup["_id"], "name": rgroup["names"][0]["name"]})
found = False
for group in entry["groups"]:
if group["id"] == rgroup["_id"]:
found = True
break
if not found:
entry["groups"].append(
{"id": rgroup["_id"], "name": rgroup["names"][0]["name"]})

# Adding group relation affiliation to the author affiliations
if author_db and rgroup["relations"]:
for author in entry["authors"]:
if author["id"] == author_db["_id"]:
affs = [aff["id"] for aff in author["affiliations"]]
for relation in rgroup["relations"]:
types = []
if "types" in relation.keys() and relation["types"]:
types = [rel["type"].lower()
for rel in relation["types"]]
if "education" in types:
if relation["id"] not in affs:
author["affiliations"].append(relation)
aff_units = get_units_affiations(
db, author_db, author["affiliations"])
for aff_unit in aff_units:
if aff_unit not in author["affiliations"]:
author["affiliations"].append(aff_unit)

break

# insert in mongo
collection.insert_one(entry)
Expand Down
Loading

0 comments on commit 0dfe52e

Please sign in to comment.