-
Notifications
You must be signed in to change notification settings - Fork 8
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #38 from AGI-Collective/docker
added docker server support
- Loading branch information
Showing
16 changed files
with
406 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
FROM nvidia/cuda:11.8.0-devel-ubuntu22.04 | ||
|
||
WORKDIR /app | ||
|
||
ENV DEBIAN_FRONTEND="noninteractive" | ||
ENV HOST_NAME="docker" | ||
|
||
RUN apt-get update \ | ||
&& apt-get install -y \ | ||
software-properties-common \ | ||
wget \ | ||
git \ | ||
git-lfs \ | ||
unzip | ||
|
||
# Install Python 3.10 | ||
RUN add-apt-repository ppa:deadsnakes/ppa \ | ||
&& apt-get update \ | ||
&& apt-get install -y \ | ||
python3.10 \ | ||
python3.10-dev \ | ||
python3.10-distutils \ | ||
&& ln -s /usr/bin/python3.10 /usr/bin/python \ | ||
&& apt-get clean \ | ||
&& rm -rf /var/lib/apt/lists/* | ||
|
||
# Install Pip | ||
RUN wget https://bootstrap.pypa.io/get-pip.py \ | ||
&& python get-pip.py \ | ||
&& rm get-pip.py \ | ||
&& pip install --upgrade pip | ||
|
||
# Install Robin | ||
RUN git clone https://github.com/AGI-Collective/robin.git \ | ||
&& cd robin \ | ||
&& git checkout frontier/dev \ | ||
&& pip install -e ".[train]" | ||
|
||
# Setup corrections | ||
RUN pip uninstall -y bitsandbytes \ | ||
&& rm -rf /usr/local/lib/python3.10/site-packages/triton \ | ||
&& rm -rf /usr/local/lib/python3.10/site-packages/sklearn | ||
|
||
RUN pip install flash-attn==2.3.3 | ||
|
||
RUN pip install torch==2.0.1+cu118 torchvision==0.15.2+cu118 --index-url https://download.pytorch.org/whl/cu118 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
docker build -t robin_env . |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
FROM docker.io/library/robin_env | ||
|
||
WORKDIR /app | ||
|
||
RUN mkdir /app/downloaded_models /export | ||
|
||
# Data setup | ||
ADD scienceqa.tar.gz /app/playground/data/eval | ||
ADD gqa.tar.gz /app/playground/data/eval | ||
|
||
# RUN git lfs install \ | ||
# && cd /app/downloaded_models \ | ||
# && git clone https://huggingface.co/agi-collective/mistral-7b-oh-siglip-so400m-finetune-lora \ | ||
# && git clone https://huggingface.co/teknium/OpenHermes-2.5-Mistral-7B | ||
|
||
|
||
# launch script | ||
COPY entrypoint.sh /entrypoint.sh | ||
RUN chmod +x /entrypoint.sh | ||
ENTRYPOINT ["/entrypoint.sh"] | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
docker build -t robin_evals . |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
#!/bin/bash | ||
MODEL=$1 | ||
BASE=$2 | ||
version="" | ||
|
||
# download models | ||
git lfs install | ||
cd /app/downloaded_models | ||
git clone https://huggingface.co/$MODEL | ||
git clone https://huggingface.co/$BASE | ||
|
||
#get updated code | ||
cd /app/robin | ||
git pull | ||
git checkout eval | ||
|
||
# setup variables | ||
MODEL=${MODEL##*/} | ||
BASE=${BASE##*/} | ||
|
||
EXPORT_PATH=/export/$MODEL | ||
|
||
MODEL=/app/downloaded_models/$MODEL | ||
BASE=/app/downloaded_models/$BASE | ||
|
||
mkdir -p $EXPORT_PATH | ||
|
||
# launch training | ||
cd /app/robin/scripts/v1_5/eval | ||
echo "Launching SQA" | ||
./docker_sqa.sh $MODEL $BASE $version > $EXPORT_PATH/scienceqa/results.log 2>&1 | ||
|
||
echo "Launching QGA" | ||
./docker_gqa.sh $MODEL $BASE $version > $EXPORT_PATH/gqa/results.log 2>&1 | ||
|
||
echo "Finished evals!" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
# -it \ | ||
docker run \ | ||
--rm \ | ||
--detach \ | ||
--gpus 1 \ | ||
--shm-size 8G \ | ||
--volume /home/$(whoami)/evals:/export \ | ||
docker.io/library/robin_evals \ | ||
agi-collective/mistral-7b-oh-siglip-so400m-finetune-lora \ | ||
teknium/OpenHermes-2.5-Mistral-7B \ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
FROM docker.io/library/robin_env | ||
|
||
WORKDIR /app | ||
|
||
RUN mkdir /app/wandb_cache /app/downloaded_models /export | ||
|
||
# Data setup | ||
# ADD LLaVA-Finetune.tar.gz /app/ | ||
COPY data_download_parallel.py /app/ | ||
RUN apt update && apt-get install imagemagick -y | ||
RUN python /app/data_download_parallel.py | ||
|
||
# launch script | ||
COPY entrypoint.sh /entrypoint.sh | ||
RUN chmod +x /entrypoint.sh | ||
ENTRYPOINT ["/entrypoint.sh"] | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
docker build -t robin_finetune . |
85 changes: 85 additions & 0 deletions
85
scripts/robin_v2/docker/image_robin_finetune/data_download_parallel.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,85 @@ | ||
import multiprocessing | ||
import concurrent.futures | ||
import os | ||
import json | ||
import urllib.request as ureq | ||
|
||
# BASE_PATH = "images" | ||
BASE_PATH = "/app/LLaVA-Finetune" | ||
|
||
def download_dataset(folder_name, urls): | ||
if folder_name == "json": | ||
os.system(f"wget -q https://huggingface.co/datasets/liuhaotian/LLaVA-Instruct-150K/resolve/main/llava_v1_5_mix665k.json -P {BASE_PATH}") | ||
return | ||
|
||
if isinstance(urls, str): | ||
urls = [urls] | ||
|
||
for url in urls: | ||
name = url.split("/")[-1] | ||
os.makedirs(f"{BASE_PATH}/{folder_name}", exist_ok=True) | ||
os.system(f"wget -q {url} -P {BASE_PATH}/{folder_name}") | ||
os.system(f"unzip -q {BASE_PATH}/{folder_name}/{name} -d {BASE_PATH}/{folder_name}") | ||
os.system(f"rm {BASE_PATH}/{folder_name}/*.zip") | ||
|
||
def download_image(k, url, path): | ||
ext = os.path.splitext(url)[1] | ||
outputFile = f'{path}/images/%s%s' % (k, ext) | ||
ureq.urlretrieve(url, outputFile) | ||
|
||
def download_ocr_data(): | ||
folder_name = "ocr_vqa" | ||
os.makedirs(f"{BASE_PATH}/{folder_name}", exist_ok=True) | ||
|
||
### DOWNLOAD DATASET.JSON | ||
meta_url = "https://drive.usercontent.google.com/download?id=1r0tyZUwGCc4wIG4RkiglCGNL_nFJjR6Q&export=download&authuser=0&confirm=t&at=APZUnTW8fGOfgvS7p_RjJKw6sXyU:1707402060685" | ||
ureq.urlretrieve(meta_url, f'{BASE_PATH}/{folder_name}/dataset.json') | ||
|
||
with open(f'{BASE_PATH}/{folder_name}/dataset.json', 'r') as fp: | ||
data = json.load(fp) | ||
|
||
os.makedirs(f'{BASE_PATH}/{folder_name}/images', exist_ok=True) | ||
|
||
# for k in data.keys(): | ||
# ext = os.path.splitext(data[k]['imageURL'])[1] | ||
# outputFile = f'{BASE_PATH}/{folder_name}/images/%s%s' % (k, ext) | ||
# ureq.urlretrieve(data[k]['imageURL'], outputFile) | ||
|
||
pool = multiprocessing.Pool(100) | ||
|
||
# Call the run function in parallel using the pool | ||
inputs = [(k, data[k]['imageURL'], f"{BASE_PATH}/{folder_name}") for k in data.keys()] | ||
results = pool.starmap(download_image, inputs) | ||
|
||
# Close the pool and wait for all processes to finish | ||
pool.close() | ||
pool.join() | ||
|
||
os.system(f"mogrify -format jpg {BASE_PATH}/{folder_name}/images/*.gif") | ||
os.system(f"mogrify -format jpg {BASE_PATH}/{folder_name}/images/*.png") | ||
|
||
if __name__ == "__main__": | ||
# Define your arguments | ||
datasets = [ | ||
("json", ""), | ||
("coco", "images.cocodataset.org/zips/train2017.zip"), | ||
("gqa", "https://downloads.cs.stanford.edu/nlp/data/gqa/images.zip"), | ||
("textvqa", "https://dl.fbaipublicfiles.com/textvqa/images/train_val_images.zip"), | ||
("vg", ("https://cs.stanford.edu/people/rak248/VG_100K_2/images.zip", | ||
"https://cs.stanford.edu/people/rak248/VG_100K_2/images2.zip") ) | ||
] | ||
|
||
os.makedirs(f"{BASE_PATH}", exist_ok=True) | ||
|
||
# Create a multiprocessing pool | ||
pool = multiprocessing.Pool() | ||
|
||
# Call the run function in parallel using the pool | ||
results = pool.starmap(download_dataset, datasets) | ||
|
||
# Close the pool and wait for all processes to finish | ||
pool.close() | ||
pool.join() | ||
|
||
# Download OCR Data | ||
download_ocr_data() |
74 changes: 74 additions & 0 deletions
74
scripts/robin_v2/docker/image_robin_finetune/entrypoint.sh
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
#!/bin/bash | ||
MODEL=$1 | ||
VISION=$2 | ||
GAS=${3:-1} | ||
|
||
# download models | ||
git lfs install | ||
cd /app/downloaded_models | ||
git clone https://huggingface.co/$MODEL | ||
git clone https://huggingface.co/$VISION | ||
|
||
#get updated code | ||
git pull | ||
|
||
# setup variables | ||
MODEL=${MODEL##*/} | ||
VISION=${VISION##*/} | ||
|
||
CHECKPOINT_PATH=/app/checkpoints/$MODEL-$VISION | ||
EXPORT_PATH=/export/$MODEL-$VISION | ||
|
||
MODEL=/app/downloaded_models/$MODEL | ||
VISION=/app/downloaded_models/$VISION | ||
|
||
GPU_COUNT=$(nvidia-smi --list-gpus | wc -l) | ||
BATCH_SIZE=$(( 128 / $GPU_COUNT / $GAS )) | ||
|
||
DATA_PATH=/app/LLaVA-Finetune | ||
|
||
PRETRAIN=$EXPORT_PATH/pretrain | ||
|
||
# launch training | ||
cd /app/robin | ||
deepspeed \ | ||
robin/train/train_mem.py \ | ||
--deepspeed ./scripts/zero2.json \ | ||
--model_name_or_path $MODEL \ | ||
--version v1 \ | ||
--data_path $DATA_PATH/llava_v1_5_mix665k.json \ | ||
--image_folder $DATA_PATH \ | ||
--vision_tower $VISION \ | ||
--finetune_ve True \ | ||
--lora_enable True --lora_r 128 --lora_alpha 256 --mm_projector_lr 2e-5 \ | ||
--pretrain_mm_mlp_adapter $PRETRAIN/mm_projector.bin \ | ||
--group_by_modality_length True \ | ||
--image_aspect_ratio pad \ | ||
--mm_projector_type mlp2x_gelu \ | ||
--mm_vision_select_layer -2 \ | ||
--mm_use_im_start_end False \ | ||
--mm_use_im_patch_token False \ | ||
--fp16 True \ | ||
--output_dir $CHECKPOINT_PATH/finetune \ | ||
--num_train_epochs 1 \ | ||
--per_device_train_batch_size $BATCH_SIZE \ | ||
--per_device_eval_batch_size 4 \ | ||
--gradient_accumulation_steps $GAS \ | ||
--evaluation_strategy "no" \ | ||
--save_strategy "steps" \ | ||
--save_steps 100 \ | ||
--save_total_limit 2 \ | ||
--learning_rate 2e-5 \ | ||
--weight_decay 0. \ | ||
--warmup_ratio 0.03 \ | ||
--lr_scheduler_type "cosine" \ | ||
--logging_steps 1 \ | ||
--tf32 False \ | ||
--model_max_length 2048 \ | ||
--gradient_checkpointing True \ | ||
--dataloader_num_workers 4 \ | ||
--lazy_preprocess True \ | ||
--report_to wandb \ | ||
--vision_lr 5e-5 | ||
|
||
mv $CHECKPOINT_PATH/finetune/*.* $EXPORT_PATH/finetune/ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
# -it \ | ||
docker run \ | ||
--rm \ | ||
--detach \ | ||
--gpus 2 \ | ||
--shm-size 8G \ | ||
--volume /home/$(whoami)/checkpoints:/export \ | ||
docker.io/library/robin_finetune \ | ||
teknium/OpenHermes-2.5-Mistral-7B \ | ||
facebook/metaclip-l14-fullcc2.5b \ | ||
4 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
FROM docker.io/library/robin_env | ||
|
||
WORKDIR /app | ||
|
||
RUN mkdir /app/wandb_cache /app/downloaded_models /export | ||
|
||
# Data setup | ||
#ADD LLaVA-Pretrain.tar.gz /app/ | ||
RUN git lfs install \ | ||
&& git clone https://huggingface.co/datasets/liuhaotian/LLaVA-Pretrain \ | ||
&& cd LLaVA-Pretrain \ | ||
&& rm -rf .git* \ | ||
&& unzip -q images.zip -d images \ | ||
&& rm images.zip | ||
|
||
# launch script | ||
COPY entrypoint.sh /entrypoint.sh | ||
RUN chmod +x /entrypoint.sh | ||
ENTRYPOINT ["/entrypoint.sh"] | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
docker build -t robin_pretrain . |
Oops, something went wrong.