Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

integrating entities and several fixes #336

Merged
merged 32 commits into from
Oct 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
6e06f5d
[kahi_scienti_works] integrating bibligraphic products to work entity
omazapa Sep 11, 2024
db821d0
[kahi_scienti_works] foxed code quality
omazapa Sep 11, 2024
ce1dba9
[kahi_scienti_works] fixed mongo error with empty keys creating the a…
omazapa Sep 11, 2024
22c98c2
Merge remote-tracking branch 'upstream/main'
omazapa Sep 12, 2024
a2b5f6b
[kahi_minciencias_opendata_works] fixed error in affiliations of the …
omazapa Sep 17, 2024
37ef39e
[Kahi_scienti_works] several fixes to process data in elastic search
omazapa Sep 17, 2024
13c43c0
[Kahi_scienti_works] fixed filter for bibligraphic products in find f…
omazapa Sep 18, 2024
2b555aa
[kahi_openalex_works] added units and subunits to the work
omazapa Sep 19, 2024
e2d26ca
[kahi_minciencias_opendata_works] removed edition
omazapa Sep 19, 2024
600c884
[kahi_minciencias_opendata_works_misc] renamed plugin and removed edi…
omazapa Sep 20, 2024
e6d7fbe
[kahi_scienti_works] added units and subunits in insert
omazapa Sep 26, 2024
e5d953e
[kahi_openalex_works] fixed units and subunits
omazapa Sep 26, 2024
75d1ee6
[kahi_openalex_works] added units and subunits to update and checked …
omazapa Sep 26, 2024
1121ea3
[kahi_openalex_works] fixed affiliation for faculty and departmen, it…
omazapa Sep 27, 2024
6479bf1
Merge branch 'colav:main' into main
omazapa Sep 27, 2024
b9b36f3
[kahi_scienti_works] implmented units/sub units/ groups in author in …
omazapa Sep 29, 2024
df8852d
[kahi_openalex_works] fixed group, faculty and deparment search in types
omazapa Sep 29, 2024
2796110
[kahi_ranking_udea_works] added support to add units and sub units
omazapa Sep 30, 2024
a783e90
[kahi_minciencias_opendata_works_misc] fixed import
omazapa Sep 30, 2024
76084be
[kahi_minciencias_opendata_works] added affiliation to author given t…
omazapa Sep 30, 2024
54cf506
[kahi_minciencias_opendata_works_misc] fixed affiliations
omazapa Sep 30, 2024
2e36eb0
[kahi_minciencias_opendata_events] fixed affiliations
omazapa Sep 30, 2024
da742d2
[kahi_minciencias_opendata_projects] fixed affiliations
omazapa Oct 1, 2024
df5240d
[kahi_minciencias_opendata_patents] fixed affiliations
omazapa Oct 1, 2024
d66ecea
[kahi_minciencias_opendata_works] added units and sub units
omazapa Oct 1, 2024
6993f82
[kahi_minciencias_opendata_works] added units and subunits in update …
omazapa Oct 1, 2024
c71cc88
[kahi_minciencias_opendata_works_misc] added units and subunits in af…
omazapa Oct 1, 2024
f7baed8
[kahi_minciencias_opendata_events] added units and subunits to affili…
omazapa Oct 1, 2024
d652658
[Kahi_minciencias_opendata_works] fixed doc
omazapa Oct 1, 2024
bfa1862
[kahi_minciencias_opendata_projects] implemented units and subunits i…
omazapa Oct 1, 2024
2121e0f
[kahi_minciencias_opendata_patents] implmented units and subunits in …
omazapa Oct 1, 2024
7417038
[kahi_minciencias_opendata_projects] fixed var name
omazapa Oct 1, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,53 @@
from re import search


def get_units_affiations(db, author_db, affiliations):
"""
Method to get the units of an author in a register. ex: faculty, department and group.

Parameters:
----------
db : pymongo.database.Database
Database connection to colav database.
author_db : dict
record from person
affiliations : list
list of affiliations from the parse_minciencias_opendata method

Returns:
-------
list
list of units of an author (entries from using affiliations)
"""
institution_id = None
# verifiying univeristy
for j, aff in enumerate(affiliations):
aff_db = db["affiliations"].find_one(
{"_id": aff["id"]}, {"_id": 1, "types": 1})
if aff_db:
types = [i["type"] for i in aff_db["types"]]
if "group" in types or "department" in types or "faculty" in types:
aff_db = None
continue
if aff_db:
count = db["person"].count_documents(
{"_id": author_db["_id"], "affiliations.id": aff_db["_id"]})
if count > 0:
institution_id = aff_db["_id"]
break
units = []
for aff in author_db["affiliations"]:
if aff["id"] == institution_id:
continue
count = db["affiliations"].count_documents(
{"_id": aff["id"], "relations.id": institution_id})
if count > 0:
types = [i["type"] for i in aff["types"]]
if "department" in types or "faculty" in types:
units.append(aff)
return units


def process_one_update(openadata_reg, colav_reg, db, collection, empty_event, verbose=0):
"""
Method to update a register in the kahi database from minciencias opendata database if it is found.
Expand Down Expand Up @@ -71,10 +118,7 @@ def process_one_update(openadata_reg, colav_reg, db, collection, empty_event, ve
group_id = minciencias_author["affiliations"][0]['external_ids'][0]['id']

affiliations_db = db["affiliations"].find_one(
{"external_ids.source": "scienti", "external_ids.id": group_id})
if not affiliations_db:
affiliations_db = db["affiliations"].find_one(
{"external_ids.id": group_id})
{"external_ids.id": group_id})

if affiliations_db:
for i, author in enumerate(colav_reg["authors"]):
Expand Down Expand Up @@ -178,6 +222,12 @@ def process_one_update(openadata_reg, colav_reg, db, collection, empty_event, ve
if "education" in types:
if relation["id"] not in affs:
author["affiliations"].append(relation)
aff_units = get_units_affiations(
db, author_db, author["affiliations"])
for aff_unit in aff_units:
if aff_unit not in author["affiliations"]:
author["affiliations"].append(aff_unit)

break

collection.update_one(
Expand Down Expand Up @@ -239,10 +289,8 @@ def process_one_insert(openadata_reg, db, collection, empty_work, es_handler, ve
if minciencias_author["affiliations"]:
group_id = minciencias_author["affiliations"][0]['external_ids'][0]['id']
affiliations_db = db["affiliations"].find_one(
{"external_ids.source": "scienti", "external_ids.id": group_id})
if not affiliations_db:
affiliations_db = db["affiliations"].find_one(
{"external_ids.id": group_id})
{"external_ids.id": group_id})
if affiliations_db:
if entry['authors'][0]['external_ids'][0]['id'] == ext['id']:
entry['authors'][0]["affiliations"].append(
{
Expand Down Expand Up @@ -271,8 +319,35 @@ def process_one_insert(openadata_reg, db, collection, empty_work, es_handler, ve
group_id = openadata_reg["cod_grupo_gr"]
rgroup = db["affiliations"].find_one({"external_ids.id": group_id})
if rgroup:
entry["groups"].append(
{"id": rgroup["_id"], "name": rgroup["names"][0]["name"]})
found = False
for group in entry["groups"]:
if group["id"] == rgroup["_id"]:
found = True
break
if not found:
entry["groups"].append(
{"id": rgroup["_id"], "name": rgroup["names"][0]["name"]})

# Adding group relation affiliation to the author affiliations
if author_db and rgroup["relations"]:
for author in entry["authors"]:
if author["id"] == author_db["_id"]:
affs = [aff["id"] for aff in author["affiliations"]]
for relation in rgroup["relations"]:
types = []
if "types" in relation.keys() and relation["types"]:
types = [rel["type"].lower()
for rel in relation["types"]]
if "education" in types:
if relation["id"] not in affs:
author["affiliations"].append(relation)
aff_units = get_units_affiations(
db, author_db, author["affiliations"])
for aff_unit in aff_units:
if aff_unit not in author["affiliations"]:
author["affiliations"].append(aff_unit)

break

# insert in mongo
collection.insert_one(entry)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,54 @@
from re import search


def process_one_update(openadata_reg, colav_reg, db, collection, empty_event, verbose=0):
def get_units_affiations(db, author_db, affiliations):
"""
Method to get the units of an author in a register. ex: faculty, department and group.

Parameters:
----------
db : pymongo.database.Database
Database connection to colav database.
author_db : dict
record from person
affiliations : list
list of affiliations from the parse_minciencias_opendata method

Returns:
-------
list
list of units of an author (entries from using affiliations)
"""
institution_id = None
# verifiying univeristy
for j, aff in enumerate(affiliations):
aff_db = db["affiliations"].find_one(
{"_id": aff["id"]}, {"_id": 1, "types": 1})
if aff_db:
types = [i["type"] for i in aff_db["types"]]
if "group" in types or "department" in types or "faculty" in types:
aff_db = None
continue
if aff_db:
count = db["person"].count_documents(
{"_id": author_db["_id"], "affiliations.id": aff_db["_id"]})
if count > 0:
institution_id = aff_db["_id"]
break
units = []
for aff in author_db["affiliations"]:
if aff["id"] == institution_id:
continue
count = db["affiliations"].count_documents(
{"_id": aff["id"], "relations.id": institution_id})
if count > 0:
types = [i["type"] for i in aff["types"]]
if "department" in types or "faculty" in types:
units.append(aff)
return units


def process_one_update(openadata_reg, colav_reg, db, collection, empty_patent, verbose=0):
"""
Method to update a register in the kahi database from minciencias opendata database if it is found.
This means that the register is already on the kahi database and it is being updated with new information.
Expand All @@ -20,13 +67,13 @@ def process_one_update(openadata_reg, colav_reg, db, collection, empty_event, ve
Database where the colav collections are stored, used to search for authors and affiliations.
collection : pymongo.collection.Collection
Collection in the database where the register is stored (Collection of patents)
empty_work : dict
empty_patent : dict
Empty dictionary with the structure of a register in the database
verbose : int, optional
Verbosity level. The default is 0.
"""
entry = parse_minciencias_opendata(
openadata_reg, empty_event.copy(), verbose=verbose)
openadata_reg, empty_patent.copy(), verbose=verbose)
# updated
for upd in colav_reg["updated"]:
if upd["source"] == "minciencias":
Expand Down Expand Up @@ -69,10 +116,7 @@ def process_one_update(openadata_reg, colav_reg, db, collection, empty_event, ve
group_id = minciencias_author["affiliations"][0]['external_ids'][0]['id']

affiliations_db = db["affiliations"].find_one(
{"external_ids.source": "scienti", "external_ids.id": group_id})
if not affiliations_db:
affiliations_db = db["affiliations"].find_one(
{"external_ids.id": group_id})
{"external_ids.id": group_id})

if affiliations_db:
for i, author in enumerate(colav_reg["authors"]):
Expand Down Expand Up @@ -176,6 +220,12 @@ def process_one_update(openadata_reg, colav_reg, db, collection, empty_event, ve
if "education" in types:
if relation["id"] not in affs:
author["affiliations"].append(relation)
aff_units = get_units_affiations(
db, author_db, author["affiliations"])
for aff_unit in aff_units:
if aff_unit not in author["affiliations"]:
author["affiliations"].append(aff_unit)

break

collection.update_one(
Expand All @@ -191,7 +241,7 @@ def process_one_update(openadata_reg, colav_reg, db, collection, empty_event, ve
)


def process_one_insert(openadata_reg, db, collection, empty_work, es_handler, verbose=0):
def process_one_insert(openadata_reg, db, collection, empty_patent, es_handler, verbose=0):
"""
Function to insert a new register in the database if it is not found in the colav(kahi patents) database.
This means that the register is not on the database and it is being inserted.
Expand All @@ -210,15 +260,15 @@ def process_one_insert(openadata_reg, db, collection, empty_work, es_handler, ve
Database where the colav collections are stored, used to search for authors and affiliations.
collection : pymongo.collection.Collection
Collection in the database where the register is stored (Collection of patents)
empty_work : dict
empty_patent : dict
Empty dictionary with the structure of a register in the database
es_handler : Similarity
Elasticsearch handler to insert the register in the elasticsearch index, Mohan's Similarity class.
verbose : int, optional
Verbosity level. The default is 0.
"""
# parse
entry = parse_minciencias_opendata(openadata_reg, empty_work.copy())
entry = parse_minciencias_opendata(openadata_reg, empty_patent.copy())
# search authors and affiliations in db
# authors
minciencias_author = ""
Expand All @@ -237,10 +287,8 @@ def process_one_insert(openadata_reg, db, collection, empty_work, es_handler, ve
if minciencias_author["affiliations"]:
group_id = minciencias_author["affiliations"][0]['external_ids'][0]['id']
affiliations_db = db["affiliations"].find_one(
{"external_ids.source": "scienti", "external_ids.id": group_id})
if not affiliations_db:
affiliations_db = db["affiliations"].find_one(
{"external_ids.id": group_id})
{"external_ids.id": group_id})
if affiliations_db:
if entry['authors'][0]['external_ids'][0]['id'] == ext['id']:
entry['authors'][0]["affiliations"].append(
{
Expand Down Expand Up @@ -269,8 +317,35 @@ def process_one_insert(openadata_reg, db, collection, empty_work, es_handler, ve
group_id = openadata_reg["cod_grupo_gr"]
rgroup = db["affiliations"].find_one({"external_ids.id": group_id})
if rgroup:
entry["groups"].append(
{"id": rgroup["_id"], "name": rgroup["names"][0]["name"]})
found = False
for group in entry["groups"]:
if group["id"] == rgroup["_id"]:
found = True
break
if not found:
entry["groups"].append(
{"id": rgroup["_id"], "name": rgroup["names"][0]["name"]})

# Adding group relation affiliation to the author affiliations
if author_db and rgroup["relations"]:
for author in entry["authors"]:
if author["id"] == author_db["_id"]:
affs = [aff["id"] for aff in author["affiliations"]]
for relation in rgroup["relations"]:
types = []
if "types" in relation.keys() and relation["types"]:
types = [rel["type"].lower()
for rel in relation["types"]]
if "education" in types:
if relation["id"] not in affs:
author["affiliations"].append(relation)
aff_units = get_units_affiations(
db, author_db, author["affiliations"])
for aff_unit in aff_units:
if aff_unit not in author["affiliations"]:
author["affiliations"].append(aff_unit)

break

# insert in mongo
collection.insert_one(entry)
Expand Down
Loading
Loading