Skip to content

Commit

Permalink
Starting to fix references
Browse files Browse the repository at this point in the history
  • Loading branch information
Adafede committed Dec 19, 2023
1 parent 5c0eb59 commit 776c254
Show file tree
Hide file tree
Showing 3 changed files with 94 additions and 32 deletions.
22 changes: 13 additions & 9 deletions api/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,11 @@

from api.models import (CoupleResult, Item, ReferenceInfo, ReferenceResult,
StructureInfo, StructureResult, TaxonInfo, TaxonResult)
from api.queries import (get_matching_references_from_reference_in_item,
from api.queries import (
# get_matching_references_from_couple_in_item,
get_matching_references_from_reference_in_item,
get_matching_references_from_structure_in_item,
get_matching_references_from_taxon_in_item,
get_matching_structures_from_reference_in_item,
get_matching_structures_from_structure_in_item,
get_matching_structures_from_taxon_in_item,
Expand Down Expand Up @@ -183,9 +187,9 @@ async def search_references(item: Item) -> ReferenceResult:
)

# We want the set of all the references which have couples matching the query
matching_references_by_couple = get_matching_references_from_couple_in_item(
dm, item
)
# matching_references_by_couple = get_matching_references_from_couple_in_item(
# dm, item
# )

# We want the set of all the references which have structures matching the query
matching_references_by_structure = get_matching_references_from_structure_in_item(
Expand All @@ -200,18 +204,18 @@ async def search_references(item: Item) -> ReferenceResult:
matching_references_by_structure
& matching_references_by_taxon
& matching_references_by_reference
& matching_references_by_couple
# & matching_references_by_couple
if matching_references_by_structure
and matching_references_by_taxon
and matching_references_by_reference
and matching_references_by_couple
# and matching_references_by_couple
else matching_references_by_structure
or matching_references_by_taxon
or matching_references_by_reference
or matching_references_by_couple
# or matching_references_by_couple
)

items = list(dm.get_dict_of_sid_to_smiles(matching_structures).items())
items = list(dm.get_dict_of_rid_to_ref_doi(matching_references).items())

if item.limit == 0:
items = items
Expand All @@ -220,7 +224,7 @@ async def search_references(item: Item) -> ReferenceResult:

return ReferenceResult(
ids=matching_references,
references={rid: ReferenceInfo(name=value) for rid, value in items},
references={rid: ReferenceInfo(doi=value) for rid, value in items},
description="References matching the query",
count=len(matching_references),
)
Expand Down
96 changes: 76 additions & 20 deletions api/queries.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,28 @@
)


def get_matching_references_from_reference_in_item(
dm: DataModel, item: Item
) -> set[int] | None:
"""Returns the WID of matching references."""
if item.reference_wid is None and item.reference_doi is None:
return None
else:
# This needs to be explained in the API doc
if item.reference_wid:
if item.reference_wid in dm.get_references():
return {item.reference_wid}
else:
if item.reference_doi:
references = set(dm.get_references_with_doi(item.reference_doi))
if references is None:
references = dm.get_references()
else:
references = dm.get_references()

return references


def get_matching_structures_from_structure_in_item(
dm: DataModel, item: Item
) -> set[int]:
Expand Down Expand Up @@ -75,39 +97,58 @@ def get_matching_taxa_from_taxon_in_item(dm: DataModel, item: Item) -> set[int]
return taxa


def get_matching_references_from_reference_in_item(
# TODO WIP
# def get_matching_references_from_couple_in_item(dm: DataModel, item: Item) -> set[int]:
# # We need to get all the matching taxa
# taxa = get_matching_taxa_from_taxon_in_item(dm, item)
# # We need to get all the matching structures
# structures = get_matching_structures_from_structure_in_item(dm, item)

# if taxa is None:
# return None

# if structures is None:
# return None

# tax = set()
# for taxon in taxa:
# tax.update(dm.get_structures_of_taxon(taxon))

# stru = set()
# for structure in structures:
# stru.update(dm.get_references_containing_structure(structure))

# # TODO get couples and intersect

# return out


def get_matching_references_from_structure_in_item(
dm: DataModel, item: Item
) -> set[int] | None:
"""Returns the WID of matching references."""
if item.reference_wid is None and item.reference_doi is None:
) -> set[int]:
# We need to get all the matching structures
structures = get_matching_structures_from_structure_in_item(dm, item)

if structures is None:
return None
else:
# This needs to be explained in the API doc
if item.reference_wid:
if item.reference_wid in dm.get_refs():
return {item.reference_wid}
else:
if item.reference_doi:
references = set(dm.get_references_with_doi(item.reference_doi))
if references is None:
references = dm.get_refs()
else:
references = dm.get_refs()

return references
out = set()
for structure in structures:
out.update(dm.get_references_containing_structure(structure))

return out


def get_matching_structures_from_taxon_in_item(dm: DataModel, item: Item) -> set[int]:
def get_matching_references_from_taxon_in_item(dm: DataModel, item: Item) -> set[int]:
# We need to get all the matching taxa
taxa = get_matching_taxa_from_taxon_in_item(dm, item)

if taxa is None:
return None

# Set recursive=True to have all the structures from the parents too
out = set()
for taxon in taxa:
out.update(dm.get_structures_of_taxon(taxon))
out.update(dm.get_references_containing_taxon(taxon))

return out

Expand All @@ -128,6 +169,21 @@ def get_matching_structures_from_reference_in_item(
return out


def get_matching_structures_from_taxon_in_item(dm: DataModel, item: Item) -> set[int]:
# We need to get all the matching taxa
taxa = get_matching_taxa_from_taxon_in_item(dm, item)

if taxa is None:
return None

# Set recursive=True to have all the structures from the parents too
out = set()
for taxon in taxa:
out.update(dm.get_structures_of_taxon(taxon))

return out


def get_matching_taxa_from_structure_in_item(dm: DataModel, item: Item) -> set[int]:
# We need to get all the matching structures
structures = get_matching_structures_from_structure_in_item(dm, item)
Expand Down
8 changes: 5 additions & 3 deletions model.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ def structure_get_tsv_from_scores(self, sids: list[int], scores) -> str:

### Biblionomy
@functools.lru_cache(maxsize=None)
def get_refs(self) -> dict[int, str]:
def get_references(self) -> dict[int, str]:
return self.db["reference_doi"]

def get_ref_doi_from_list_of_rids(self, rids: list[int]) -> list[str]:
Expand Down Expand Up @@ -334,6 +334,7 @@ def get_number_of_references_containing_couple(self, sid: int, tid: int) -> int:
return len(self.db["tc2r"][key])
return 0

# TODO not working, fix
def get_references_containing_structure(self, sid: int) -> list[int]:
references = [
rid
Expand All @@ -352,7 +353,8 @@ def get_number_of_references_containing_structure(self, sid: int) -> int:
]
return len(references)

def get_references_containing_taxa(self, tid: int) -> list[int]:
# TODO not working, fix
def get_references_containing_taxon(self, tid: int) -> list[int]:
references = [
rid
for (taxon_id, sid), rids in self.db["tc2r"].items()
Expand All @@ -361,7 +363,7 @@ def get_references_containing_taxa(self, tid: int) -> list[int]:
]
return references

def get_number_of_references_containing_taxa(self, tid: int) -> int:
def get_number_of_references_containing_taxon(self, tid: int) -> int:
references = [
rid
for (taxon_id, sid), rids in self.db["tc2r"].items()
Expand Down

0 comments on commit 776c254

Please sign in to comment.