Skip to content

Commit

Permalink
Allowing for more data incompleteness
Browse files Browse the repository at this point in the history
  • Loading branch information
Adafede committed Oct 31, 2023
1 parent ce9c76e commit bc43efd
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 2 deletions.
3 changes: 2 additions & 1 deletion update/download_couples_referenced.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ def run(root: Path, retry: int = 3) -> None:
query = """
SELECT DISTINCT ?compound ?taxon ?reference WHERE {
?compound p:P703 ?statement;
wdt:P233 ?canonical_smiles.
# We use InChIKey (P235) instead of SMILES as some of them are incomplete.
wdt:P235 [].
?statement ps:P703 ?taxon;
(prov:wasDerivedFrom/pr:P248) ?reference.
}
Expand Down
10 changes: 9 additions & 1 deletion update/download_smiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,15 @@ def run(root: Path, retry: int = 3) -> None:
query = """
SELECT DISTINCT ?structure ?structure_smiles ?canonical_smiles WHERE {
?structure wdt:P703 ?taxon;
wdt:P233 ?canonical_smiles.
# Using InChIKey (P235) to recognize chemicals.
# Could also be
# P31 wd:Q113145171 `type of a chemical entity`
# P31 wd:Q59199015 `group of stereoisomers`
wdt:P235 [].
# All P2017 should also have P233 but some of them are not complete.
OPTIONAL {
?structure wdt:P233 ?canonical_smiles.
}
OPTIONAL {
?structure wdt:P2017 ?structure_smiles.
}
Expand Down

0 comments on commit bc43efd

Please sign in to comment.