Skip to content

Commit

Permalink
Add a docker-compose and start fixing the API
Browse files Browse the repository at this point in the history
  • Loading branch information
bjonnh committed Dec 13, 2023
1 parent 70c4dd1 commit 5a452b8
Show file tree
Hide file tree
Showing 8 changed files with 97 additions and 22 deletions.
12 changes: 12 additions & 0 deletions Dockerfile.local
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
FROM docker.io/library/python:3.11-slim
RUN apt update && apt install -y libxrender1 libxtst6 libxi6
RUN pip install poetry
COPY poetry.lock pyproject.toml ./
RUN poetry config virtualenvs.create false \
&& poetry install --no-interaction --no-ansi
RUN mkdir /app /app/data
RUN adduser --system --no-create-home nonroot
RUN chown -R nonroot /app/data
USER nonroot
WORKDIR /app
CMD [ "uvicorn", "api:app", "--reload", "--host", "0.0.0.0", "--port", "5000"]
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,17 @@ The dataset is from the [LOTUS](https://lotus.nprod.net) initiative and [Wikidat

## Install & use

### The docker compose way (recommended)

```shell
docker-compose run -it backend python update.py
docker-compose up
```

The web server is then available on <http://localhost:3000> and the API on <http://localhost:5000>.

### The manual way

To run it yourself, the source is available at: <https://github.com/lotusnprod/lotus-search>:

- Install dependencies using poetry
Expand Down
42 changes: 24 additions & 18 deletions api.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,32 +36,32 @@ def get_matching_structures_from_structure_in_item(
dm: DataModel, item: Item
) -> set[int]:
"""Returns all_structures if the item do not filter by structure, else returns the WID of matching structures"""
if item.molecule is None and item.structure_wid is None:
if item.structure is None and item.structure_wid is None:
return all_structures
elif item.molecule and item.structure_wid:
elif item.structure and item.structure_wid:
raise HTTPException(
status_code=500,
detail=f"You cannot provide both 'molecule' and 'structure_wid'",
detail=f"You cannot provide both 'structure' and 'structure_wid'",
)
else:
# This needs to be explained in the API doc
if item.structure_wid:
if item.structure_wid in all_structures:
return {item.structure_wid}
else:
if item.molecule:
if item.structure:
if item.substructure_search:
try:
results = dm.structure_search_substructure(item.molecule)
results = dm.structure_search_substructure(item.structure)
structures = {_id for _id, _ in results}
except ValueError:
raise HTTPException(
status_code=500,
detail=f"The structure given is invalid: {item.molecule}",
detail=f"The structure given is invalid: {item.structure}",
)
else:
try:
results = dm.structure_search(item.molecule)
results = dm.structure_search(item.structure)
structures = {
_id
for _id, score in results
Expand All @@ -70,18 +70,18 @@ def get_matching_structures_from_structure_in_item(
except ValueError:
raise HTTPException(
status_code=500,
detail=f"The structure given is invalid: {item.molecule}",
detail=f"The structure given is invalid: {item.structure}",
)
else:
structures = all_structures

return structures


def get_matching_taxa_from_taxon_in_item(dm: DataModel, item: Item) -> set[int]:
"""Returns all_taxa if the item do not filter by taxon, else returns the WID of matching taxa"""
def get_matching_taxa_from_taxon_in_item(dm: DataModel, item: Item) -> set[int] | None:
"""Returns all_taxa if the item do not filter by taxon, else returns the WID of matching taxa or None if no taxa requested"""
if item.taxon_wid is None and item.taxon_name is None:
return all_taxa
return None
else:
# This needs to be explained in the API doc
if item.taxon_wid:
Expand All @@ -102,6 +102,9 @@ def get_matching_structures_from_taxon_in_item(dm: DataModel, item: Item) -> set
# We need to get all the matching taxa
taxa = get_matching_taxa_from_taxon_in_item(dm, item)

if taxa is None:
return None

# We could have a parameter "recursive" in the query to have all the structures from the parents too
out = set()
for taxon in taxa:
Expand Down Expand Up @@ -164,13 +167,18 @@ async def search_structures(item: Item) -> StructureResult:
matching_structures_by_structure = get_matching_structures_from_structure_in_item(
dm, item
)
# We want the set of all the molecules found in the given taxa
# We want the set of all the structures found in the given taxa
matching_structures_by_taxon = get_matching_structures_from_taxon_in_item(dm, item)

# We want the intersection of both (and we can do the same for the references later)
matching_structures = (
matching_structures_by_structure & matching_structures_by_taxon
)
# But if one of the sets is fully empty
if matching_structures_by_taxon is None:
matching_structures = matching_structures_by_structure
else:
matching_structures = (
matching_structures_by_structure & matching_structures_by_taxon
)

return StructureResult(
ids=matching_structures,
structures={
Expand All @@ -188,13 +196,11 @@ async def search_taxa(item: Item) -> TaxonResult:
# We want the set of all the taxa matching the query
matching_taxa_by_taxon = get_matching_taxa_from_taxon_in_item(dm, item)

# We want the set of all the taxa which have molecules matching the query
# We want the set of all the taxa which have structures matching the query
matching_taxa_by_structure = get_matching_taxa_from_structure_in_item(dm, item)

# We want the intersection of both (and we can do the same for the references later)
matching_taxa = matching_taxa_by_taxon & matching_taxa_by_structure
print(matching_taxa)
print(dm.get_dict_of_wid_to_taxon_name(matching_taxa))

return TaxonResult(
ids=matching_taxa,
Expand Down
9 changes: 8 additions & 1 deletion doc/example_queries.http
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,14 @@ accept: application/json
Content-Type: application/json

{
"molecule": "CC1C=C(C(=O)C2(C1CC3C4(C2C(=O)C(=C(C4CC(=O)O3)C)OC)C)C)OC"
"structure": "CC1C=C(C(=O)C2(C1CC3C4(C2C(=O)C(=C(C4CC(=O)O3)C)OC)C)C)OC"
}
###
POST http://127.0.0.1:5000/v1_0/structures
accept: application/json
Content-Type: application/json

{
}

###
Expand Down
24 changes: 24 additions & 0 deletions docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# A docker-compose file for a two services system. One is the backend, with the Dockerfile in this directory
# the other one is inside frontend.

version: '3.8'
services:
backend:
build:
context: .
dockerfile: Dockerfile.local
ports:
- "5000:5000"
volumes:
- .:/app
- backend_data:/app/data
frontend:
build: frontend/
ports:
- "3000:3000"
volumes:
- ./frontend/src:/app/src
depends_on:
- backend
volumes:
backend_data:
14 changes: 14 additions & 0 deletions frontend/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
FROM node:21.4
ENV PNPM_HOME="/pnpm"
ENV PATH="$PNPM_HOME:$PATH"
RUN corepack enable
WORKDIR /app
COPY package.json .
RUN pnpm install
COPY tsconfig.json .
COPY tsconfig.node.json .
COPY vite.config.ts .
COPY package.json .
COPY pnpm-lock.yaml .
COPY index.html .
CMD ["pnpm", "run", "dev"]
3 changes: 2 additions & 1 deletion frontend/vite.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,10 @@ export default defineConfig({
},
server: {
port: 3000,
host: "0.0.0.0",
proxy: {
'/api': {
target: 'http://localhost:5000',
target: 'http://backend:5000',
changeOrigin: true,
rewrite: (path) => path.replace(/^\/api/, ''),
}
Expand Down
4 changes: 2 additions & 2 deletions model.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

class Item(BaseModel):
structure_wid: int | None = None
molecule: str | None = None
structure: str | None = None
substructure_search: bool | None = None
similarity_level: float = 1.0
taxon_wid: int | None = None
Expand All @@ -27,7 +27,7 @@ class Item(BaseModel):
"examples": [
{
"structure_wid": "27151406",
"molecule": "C=C[C@@H]1[C@@H]2CCOC(=O)C2=CO[C@H]1O[C@H]3[C@@H]([C@H]([C@@H]([C@H](O3)CO)O)O)O",
"structure": "C=C[C@@H]1[C@@H]2CCOC(=O)C2=CO[C@H]1O[C@H]3[C@@H]([C@H]([C@@H]([C@H](O3)CO)O)O)O",
"substructure_search": True,
"similarity_level": 0.8,
"taxon_wid": 158572,
Expand Down

0 comments on commit 5a452b8

Please sign in to comment.