diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..69ec9a5 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,34 @@ +FROM pytorch/pytorch:2.0.1-cuda11.7-cudnn8-runtime + +LABEL author="Colby T. Ford " + +## Environment Settings +ENV DEBIAN_FRONTEND=noninteractive + +## Install Basic Dependencies +RUN apt-get clean && \ + apt-get update && \ + apt-get -y install \ + sudo \ + git \ + curl \ + wget \ + g++ + +## Pip Install EvoDiff (and other dependencies) +RUN pip install evodiff \ + torch_geometric \ + torch_scatter + +## Clone EvoDiff GitHub Repo (to get scripts) +RUN git clone https://github.com/microsoft/evodiff + +## Set Working Directory +WORKDIR /workspace/evodiff + +## Get UniRef50 Data +RUN cd data && \ + # wget https://evodiff.blob.core.windows.net/evodiff-data/uniref50.tar.gz && \ + wget -O uniref50.tar.gz https://zenodo.org/records/6564798/files/uniref50.tar.gz?download=1 && \ + tar -xzf uniref50.tar.gz && \ + rm uniref50.tar.gz diff --git a/README.md b/README.md index 882f6c2..078baa2 100644 --- a/README.md +++ b/README.md @@ -38,6 +38,7 @@ EvoDiff is described in this [preprint](https://www.biorxiv.org/content/10.1101/ - [Scaffolding functional motifs](#scaffolding-functional-motifs) - [Analysis](#analysis-of-generations) - [Downloading generated sequences](#downloading-generated-sequences) +- [Docker](#docker) ---- @@ -326,6 +327,36 @@ The CSV files containing generated data are organized as follows: * `rmsd`: motifRMSD between predicted motif coordinates and crystal motif coordinates * `model`: model type used for generations + +## Docker + +The Docker image for EvoDiff is hosted on DockerHub at [https://hub.docker.com/r/cford38/evodiff](https://hub.docker.com/r/cford38/evodiff). + +```sh +docker pull cford38/evodiff:latest +``` + +Alternatively, you can build the Docker image locally. + +```sh +## Build Docker Image +docker build -t evodiff . +``` + +Then, run the Docker image locally with the following command. + +```sh +## Run Docker Image (Bash Console) +docker run --gpus all --ipc=host --ulimit memlock=-1 --ulimit stack=67108864 --name evodiff --rm -it evodiff /bin/bash +``` + +__Note:__ You may need to set your default Torch device to `cuda` in the Docker container so that EvoDiff executes on the GPU. + +```py +import torch +torch.set_default_device('cuda:0') +``` + ## Contributing This project welcomes contributions and suggestions. Most contributions require you to agree to a @@ -346,4 +377,4 @@ This project may contain trademarks or logos for projects, products, or services trademarks or logos are subject to and must follow [Microsoft's Trademark & Brand Guidelines](https://www.microsoft.com/en-us/legal/intellectualproperty/trademarks/usage/general). Use of Microsoft trademarks or logos in modified versions of this project must not cause confusion or imply Microsoft sponsorship. -Any use of third party trademarks or logos is subject to those third-party's policies. +Any use of third party trademarks or logos is subject to those third-party's policies. \ No newline at end of file diff --git a/examples/evodiff.ipynb b/examples/evodiff.ipynb index b13af6a..6bba8aa 100644 --- a/examples/evodiff.ipynb +++ b/examples/evodiff.ipynb @@ -179,7 +179,7 @@ "from evodiff.generate import generate_oaardm\n", "\n", "seq_len = 100\n", - "tokeinzed_sample, generated_sequence = generate_oaardm(model, tokenizer, seq_len, batch_size=1, device='cpu')\n", + "tokenized_sample, generated_sequence = generate_oaardm(model, tokenizer, seq_len, batch_size=1, device='cpu')\n", "print(\"Generated sequence:\", generated_sequence)" ] }, @@ -275,7 +275,7 @@ "\n", "seq_len = 100 \n", "\n", - "tokeinzed_sample, generated_sequence = generate_d3pm(model, tokenizer, Q, Q_bar, timestep, seq_len, batch_size=1, device='cpu')" + "tokenized_sample, generated_sequence = generate_d3pm(model, tokenizer, Q, Q_bar, timestep, seq_len, batch_size=1, device='cpu')" ] }, { @@ -373,7 +373,7 @@ "selection_type='random' # or 'MaxHamming'; MSA subsampling scheme\n", "\n", "\n", - "tokeinzed_sample, generated_sequence = generate_query_oadm_msa_simple(path_to_msa, model, tokenizer, n_sequences, seq_length, device='cpu', selection_type=selection_type)\n", + "tokenized_sample, generated_sequence = generate_query_oadm_msa_simple(path_to_msa, model, tokenizer, n_sequences, seq_length, device='cpu', selection_type=selection_type)\n", " \n", "\n", "print(\"New sequence (no gaps, pad tokens)\", re.sub('[!-]', '', generated_sequence[0][0],))"