Merge pull request #20 from colbyford/main

A Dockerfile to run EvoDiff locally in a container (with GPU support)
microsoft · Aug 9, 2024 · 2378099 · 2378099
2 parents 683b08d + aa4a7de
commit 2378099
Show file tree

Hide file tree

Showing 3 changed files with 69 additions and 4 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,34 @@
+FROM pytorch/pytorch:2.0.1-cuda11.7-cudnn8-runtime
+
+LABEL author="Colby T. Ford <colby@tuple.xyz>"
+
+## Environment Settings
+ENV DEBIAN_FRONTEND=noninteractive
+
+## Install Basic Dependencies
+RUN apt-get clean && \
+    apt-get update && \
+    apt-get -y install \
+        sudo \
+        git \
+        curl \
+        wget \
+        g++
+
+## Pip Install EvoDiff (and other dependencies)
+RUN pip install evodiff \
+        torch_geometric \
+        torch_scatter
+
+## Clone EvoDiff GitHub Repo (to get scripts)
+RUN git clone https://github.com/microsoft/evodiff
+
+## Set Working Directory
+WORKDIR /workspace/evodiff
+
+## Get UniRef50 Data
+RUN cd data && \
+    # wget https://evodiff.blob.core.windows.net/evodiff-data/uniref50.tar.gz && \
+    wget -O uniref50.tar.gz https://zenodo.org/records/6564798/files/uniref50.tar.gz?download=1 && \
+    tar -xzf uniref50.tar.gz && \
+    rm uniref50.tar.gz
diff --git a/README.md b/README.md
@@ -38,6 +38,7 @@ EvoDiff is described in this [preprint](https://www.biorxiv.org/content/10.1101/
     - [Scaffolding functional motifs](#scaffolding-functional-motifs)
 - [Analysis](#analysis-of-generations)
 - [Downloading generated sequences](#downloading-generated-sequences)
+- [Docker](#docker)
 
 ----
 
@@ -326,6 +327,36 @@ The CSV files containing generated data are organized as follows:
   * `rmsd`: motifRMSD between predicted motif coordinates and crystal motif coordinates
   * `model`: model type used for generations
 
+
+## Docker
+
+The Docker image for EvoDiff is hosted on DockerHub at [https://hub.docker.com/r/cford38/evodiff](https://hub.docker.com/r/cford38/evodiff).
+
+```sh
+docker pull cford38/evodiff:latest
+```
+
+Alternatively, you can build the Docker image locally.
+
+```sh
+## Build Docker Image
+docker build -t evodiff .
+```
+
+Then, run the Docker image locally with the following command.
+
+```sh
+## Run Docker Image (Bash Console)
+docker run --gpus all --ipc=host --ulimit memlock=-1 --ulimit stack=67108864 --name evodiff --rm -it evodiff /bin/bash
+```
+
+__Note:__ You may need to set your default Torch device to `cuda` in the Docker container so that EvoDiff executes on the GPU.
+
+```py
+import torch
+torch.set_default_device('cuda:0')
+```
+
 ## Contributing
 
 This project welcomes contributions and suggestions.  Most contributions require you to agree to a
@@ -346,4 +377,4 @@ This project may contain trademarks or logos for projects, products, or services
 trademarks or logos are subject to and must follow 
 [Microsoft's Trademark & Brand Guidelines](https://www.microsoft.com/en-us/legal/intellectualproperty/trademarks/usage/general).
 Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship.
-Any use of third party trademarks or logos is subject to those third-party's policies.
+Any use of third party trademarks or logos is subject to those third-party's policies.
diff --git a/examples/evodiff.ipynb b/examples/evodiff.ipynb
@@ -179,7 +179,7 @@
     "from evodiff.generate import generate_oaardm\n",
     "\n",
     "seq_len = 100\n",
-    "tokeinzed_sample, generated_sequence = generate_oaardm(model, tokenizer, seq_len, batch_size=1, device='cpu')\n",
+    "tokenized_sample, generated_sequence = generate_oaardm(model, tokenizer, seq_len, batch_size=1, device='cpu')\n",
     "print(\"Generated sequence:\", generated_sequence)"
    ]
   },
@@ -275,7 +275,7 @@
     "\n",
     "seq_len = 100 \n",
     "\n",
-    "tokeinzed_sample, generated_sequence = generate_d3pm(model, tokenizer, Q, Q_bar, timestep, seq_len, batch_size=1, device='cpu')"
+    "tokenized_sample, generated_sequence = generate_d3pm(model, tokenizer, Q, Q_bar, timestep, seq_len, batch_size=1, device='cpu')"
    ]
   },
   {
@@ -373,7 +373,7 @@
     "selection_type='random' # or 'MaxHamming'; MSA subsampling scheme\n",
     "\n",
     "\n",
-    "tokeinzed_sample, generated_sequence  = generate_query_oadm_msa_simple(path_to_msa, model, tokenizer, n_sequences, seq_length, device='cpu', selection_type=selection_type)\n",
+    "tokenized_sample, generated_sequence  = generate_query_oadm_msa_simple(path_to_msa, model, tokenizer, n_sequences, seq_length, device='cpu', selection_type=selection_type)\n",
     "    \n",
     "\n",
     "print(\"New sequence (no gaps, pad tokens)\", re.sub('[!-]', '', generated_sequence[0][0],))"