From 5a452b85deeff113994cdd2f79fc9d789ab5b21e Mon Sep 17 00:00:00 2001 From: Jonathan Bisson Date: Tue, 12 Dec 2023 21:56:19 -0600 Subject: [PATCH] Add a docker-compose and start fixing the API --- Dockerfile.local | 12 ++++++++++++ README.md | 11 +++++++++++ api.py | 42 +++++++++++++++++++++++----------------- doc/example_queries.http | 9 ++++++++- docker-compose.yml | 24 +++++++++++++++++++++++ frontend/Dockerfile | 14 ++++++++++++++ frontend/vite.config.ts | 3 ++- model.py | 4 ++-- 8 files changed, 97 insertions(+), 22 deletions(-) create mode 100644 Dockerfile.local create mode 100644 docker-compose.yml create mode 100644 frontend/Dockerfile diff --git a/Dockerfile.local b/Dockerfile.local new file mode 100644 index 0000000..12b70d4 --- /dev/null +++ b/Dockerfile.local @@ -0,0 +1,12 @@ +FROM docker.io/library/python:3.11-slim +RUN apt update && apt install -y libxrender1 libxtst6 libxi6 +RUN pip install poetry +COPY poetry.lock pyproject.toml ./ +RUN poetry config virtualenvs.create false \ + && poetry install --no-interaction --no-ansi +RUN mkdir /app /app/data +RUN adduser --system --no-create-home nonroot +RUN chown -R nonroot /app/data +USER nonroot +WORKDIR /app +CMD [ "uvicorn", "api:app", "--reload", "--host", "0.0.0.0", "--port", "5000"] diff --git a/README.md b/README.md index c159b1c..c117acf 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,17 @@ The dataset is from the [LOTUS](https://lotus.nprod.net) initiative and [Wikidat ## Install & use +### The docker compose way (recommended) + +```shell +docker-compose run -it backend python update.py +docker-compose up +``` + +The web server is then available on and the API on . + +### The manual way + To run it yourself, the source is available at: : - Install dependencies using poetry diff --git a/api.py b/api.py index b5c0e87..bf8edb2 100644 --- a/api.py +++ b/api.py @@ -36,12 +36,12 @@ def get_matching_structures_from_structure_in_item( dm: DataModel, item: Item ) -> set[int]: """Returns all_structures if the item do not filter by structure, else returns the WID of matching structures""" - if item.molecule is None and item.structure_wid is None: + if item.structure is None and item.structure_wid is None: return all_structures - elif item.molecule and item.structure_wid: + elif item.structure and item.structure_wid: raise HTTPException( status_code=500, - detail=f"You cannot provide both 'molecule' and 'structure_wid'", + detail=f"You cannot provide both 'structure' and 'structure_wid'", ) else: # This needs to be explained in the API doc @@ -49,19 +49,19 @@ def get_matching_structures_from_structure_in_item( if item.structure_wid in all_structures: return {item.structure_wid} else: - if item.molecule: + if item.structure: if item.substructure_search: try: - results = dm.structure_search_substructure(item.molecule) + results = dm.structure_search_substructure(item.structure) structures = {_id for _id, _ in results} except ValueError: raise HTTPException( status_code=500, - detail=f"The structure given is invalid: {item.molecule}", + detail=f"The structure given is invalid: {item.structure}", ) else: try: - results = dm.structure_search(item.molecule) + results = dm.structure_search(item.structure) structures = { _id for _id, score in results @@ -70,7 +70,7 @@ def get_matching_structures_from_structure_in_item( except ValueError: raise HTTPException( status_code=500, - detail=f"The structure given is invalid: {item.molecule}", + detail=f"The structure given is invalid: {item.structure}", ) else: structures = all_structures @@ -78,10 +78,10 @@ def get_matching_structures_from_structure_in_item( return structures -def get_matching_taxa_from_taxon_in_item(dm: DataModel, item: Item) -> set[int]: - """Returns all_taxa if the item do not filter by taxon, else returns the WID of matching taxa""" +def get_matching_taxa_from_taxon_in_item(dm: DataModel, item: Item) -> set[int] | None: + """Returns all_taxa if the item do not filter by taxon, else returns the WID of matching taxa or None if no taxa requested""" if item.taxon_wid is None and item.taxon_name is None: - return all_taxa + return None else: # This needs to be explained in the API doc if item.taxon_wid: @@ -102,6 +102,9 @@ def get_matching_structures_from_taxon_in_item(dm: DataModel, item: Item) -> set # We need to get all the matching taxa taxa = get_matching_taxa_from_taxon_in_item(dm, item) + if taxa is None: + return None + # We could have a parameter "recursive" in the query to have all the structures from the parents too out = set() for taxon in taxa: @@ -164,13 +167,18 @@ async def search_structures(item: Item) -> StructureResult: matching_structures_by_structure = get_matching_structures_from_structure_in_item( dm, item ) - # We want the set of all the molecules found in the given taxa + # We want the set of all the structures found in the given taxa matching_structures_by_taxon = get_matching_structures_from_taxon_in_item(dm, item) # We want the intersection of both (and we can do the same for the references later) - matching_structures = ( - matching_structures_by_structure & matching_structures_by_taxon - ) + # But if one of the sets is fully empty + if matching_structures_by_taxon is None: + matching_structures = matching_structures_by_structure + else: + matching_structures = ( + matching_structures_by_structure & matching_structures_by_taxon + ) + return StructureResult( ids=matching_structures, structures={ @@ -188,13 +196,11 @@ async def search_taxa(item: Item) -> TaxonResult: # We want the set of all the taxa matching the query matching_taxa_by_taxon = get_matching_taxa_from_taxon_in_item(dm, item) - # We want the set of all the taxa which have molecules matching the query + # We want the set of all the taxa which have structures matching the query matching_taxa_by_structure = get_matching_taxa_from_structure_in_item(dm, item) # We want the intersection of both (and we can do the same for the references later) matching_taxa = matching_taxa_by_taxon & matching_taxa_by_structure - print(matching_taxa) - print(dm.get_dict_of_wid_to_taxon_name(matching_taxa)) return TaxonResult( ids=matching_taxa, diff --git a/doc/example_queries.http b/doc/example_queries.http index 259a3de..6d4f15f 100644 --- a/doc/example_queries.http +++ b/doc/example_queries.http @@ -9,7 +9,14 @@ accept: application/json Content-Type: application/json { - "molecule": "CC1C=C(C(=O)C2(C1CC3C4(C2C(=O)C(=C(C4CC(=O)O3)C)OC)C)C)OC" + "structure": "CC1C=C(C(=O)C2(C1CC3C4(C2C(=O)C(=C(C4CC(=O)O3)C)OC)C)C)OC" +} +### +POST http://127.0.0.1:5000/v1_0/structures +accept: application/json +Content-Type: application/json + +{ } ### diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..72f4ba8 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,24 @@ +# A docker-compose file for a two services system. One is the backend, with the Dockerfile in this directory +# the other one is inside frontend. + +version: '3.8' +services: + backend: + build: + context: . + dockerfile: Dockerfile.local + ports: + - "5000:5000" + volumes: + - .:/app + - backend_data:/app/data + frontend: + build: frontend/ + ports: + - "3000:3000" + volumes: + - ./frontend/src:/app/src + depends_on: + - backend +volumes: + backend_data: diff --git a/frontend/Dockerfile b/frontend/Dockerfile new file mode 100644 index 0000000..a3affca --- /dev/null +++ b/frontend/Dockerfile @@ -0,0 +1,14 @@ +FROM node:21.4 +ENV PNPM_HOME="/pnpm" +ENV PATH="$PNPM_HOME:$PATH" +RUN corepack enable +WORKDIR /app +COPY package.json . +RUN pnpm install +COPY tsconfig.json . +COPY tsconfig.node.json . +COPY vite.config.ts . +COPY package.json . +COPY pnpm-lock.yaml . +COPY index.html . +CMD ["pnpm", "run", "dev"] diff --git a/frontend/vite.config.ts b/frontend/vite.config.ts index d8e1272..5a44638 100644 --- a/frontend/vite.config.ts +++ b/frontend/vite.config.ts @@ -34,9 +34,10 @@ export default defineConfig({ }, server: { port: 3000, + host: "0.0.0.0", proxy: { '/api': { - target: 'http://localhost:5000', + target: 'http://backend:5000', changeOrigin: true, rewrite: (path) => path.replace(/^\/api/, ''), } diff --git a/model.py b/model.py index f33aca1..7b166af 100644 --- a/model.py +++ b/model.py @@ -17,7 +17,7 @@ class Item(BaseModel): structure_wid: int | None = None - molecule: str | None = None + structure: str | None = None substructure_search: bool | None = None similarity_level: float = 1.0 taxon_wid: int | None = None @@ -27,7 +27,7 @@ class Item(BaseModel): "examples": [ { "structure_wid": "27151406", - "molecule": "C=C[C@@H]1[C@@H]2CCOC(=O)C2=CO[C@H]1O[C@H]3[C@@H]([C@H]([C@@H]([C@H](O3)CO)O)O)O", + "structure": "C=C[C@@H]1[C@@H]2CCOC(=O)C2=CO[C@H]1O[C@H]3[C@@H]([C@H]([C@@H]([C@H](O3)CO)O)O)O", "substructure_search": True, "similarity_level": 0.8, "taxon_wid": 158572,