Merge branch 'add_readme' into main

Nota-NetsPresso · Mar 7, 2024 · 66268e9 · 66268e9
2 parents 9ee4536 + 92bf829
commit 66268e9
Show file tree

Hide file tree

Showing 10 changed files with 109 additions and 19 deletions.
diff --git a/.gitignore b/.gitignore
@@ -19,8 +19,7 @@ __pycache__
 *.gz
 *.json
 
-results/
+results*
 temp/
 sample*
-
-app.sh
+data/lrs3_v0.4_txt/lrs3_v0.4/
diff --git a/Dockerfile b/Dockerfile
@@ -5,3 +5,5 @@ RUN apt-get update
 RUN apt-get install ffmpeg libsm6 libxext6 tmux git -y
 
 WORKDIR /workspace
+COPY requirements.txt .
+RUN pip install --no-cache -r requirements.txt
diff --git a/README.md b/README.md
@@ -10,4 +10,75 @@ pinned: true
 license: apache-2.0
 ---
 
-# README here
+# 28× Compressed Wav2Lip by Nota AI
+
+Official codebase for [**Accelerating Speech-Driven Talking Face Generation with 28× Compressed Wav2Lip**](https://arxiv.org/abs/2304.00471).
+
+- Presented at [ICCV'23 Demo](https://iccv2023.thecvf.com/demos-111.php) Track; [On-Device Intelligence Workshop](https://sites.google.com/g.harvard.edu/on-device-workshop-23/home) @ MLSys'23; [NVIDIA GTC 2023](https://www.nvidia.com/en-us/on-demand/search/?facet.mimetype[]=event%20session&layout=list&page=1&q=52409&sort=relevance&sortDir=desc) Poster.
+
+
+## Installation
+#### Docker (recommended)
+```bash
+git clone https://github.com/Nota-NetsPresso/nota-wav2lip.git
+cd nota-wav2lip
+docker compose run --service-ports --name nota-compressed-wav2lip compressed-wav2lip bash
+```
+
+#### Conda
+<details>
+<summary>Click</summary>
+
+```bash
+git clone https://github.com/Nota-NetsPresso/nota-wav2lip.git
+cd nota-wav2lip
+apt-get update
+apt-get install ffmpeg libsm6 libxext6 tmux git -y
+conda create -n nota-wav2lip python=3.9
+conda activate nota-wav2lip
+pip install -r requirements.txt
+```
+</details>
+
+## Gradio Demo
+Use the below script to run the [nota-ai/compressed-wav2lip demo](https://huggingface.co/spaces/nota-ai/compressed-wav2lip). The models and sample data will be downloaded automatically.
+
+  ```bash
+  bash app.sh
+  ```
+
+## Inference
+(1) Download YouTube videos in the LRS3-TED label text file and preprocess them properly.
+  - Download `lrs3_v0.4_txt.zip` from [this link](https://mmai.io/datasets/lip_reading/).
+  - Unzip the file and make a folder structure: `./data/lrs3_v0.4_txt/lrs3_v0.4/test`
+  - Run `bash download.sh`
+  - Run `bash preprocess.sh`
+
+(2) Run the script to compare the original Wav2Lip with Nota's compressed version.
+
+  ```bash
+  bash inference.sh
+  ```
+
+## License
+- All rights related to this repository and the compressed models are reserved by Nota Inc.
+- The intended use is strictly limited to research and non-commercial projects.
+
+## Contact
+- To obtain compression code and assistance, kindly contact Nota AI (contact@nota.ai). These are provided as part of our business solutions.
+- For Q&A about this repo, use this board: [Nota-NetsPresso/discussions](https://github.com/orgs/Nota-NetsPresso/discussions)
+
+## Acknowledgment
+ - [NVIDIA Applied Research Accelerator Program](https://www.nvidia.com/en-us/industries/higher-education-research/applied-research-program/) for supporting this research.
+ - [Wav2Lip](https://github.com/Rudrabha/Wav2Lip) and [LRS3-TED](https://www.robots.ox.ac.uk/~vgg/data/lip_reading/) for facilitating the development of the original Wav2Lip.
+
+## Citation
+```bibtex
+@article{kim2023unified,
+      title={A Unified Compression Framework for Efficient Speech-Driven Talking-Face Generation}, 
+      author={Kim, Bo-Kyeong and Kang, Jaemin and Seo, Daeun and Park, Hancheol and Choi, Shinkook and Song, Hyoung-Kyu and Kim, Hyungshin and Lim, Sungsu},
+      journal={MLSys Workshop on On-Device Intelligence (ODIW)},
+      year={2023},
+      url={https://arxiv.org/abs/2304.00471}
+}
+```
diff --git a/app.sh b/app.sh
@@ -0,0 +1,4 @@
+export LRS_ORIGINAL_URL=https://netspresso-huggingface-demo-checkpoint.s3.us-east-2.amazonaws.com/compressed-wav2lip/lrs3-wav2lip.pth && \
+export LRS_COMPRESSED_URL=https://netspresso-huggingface-demo-checkpoint.s3.us-east-2.amazonaws.com/compressed-wav2lip/lrs3-nota-wav2lip.pth && \
+export LRS_INFERENCE_SAMPLE=https://netspresso-huggingface-demo-checkpoint.s3.us-east-2.amazonaws.com/data/compressed-wav2lip-inference/sample.tar.gz && \
+python app.py
diff --git a/data/.gitkeep b/data/.gitkeep
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -1,16 +1,11 @@
 version: "3.9"
-
-# docker compose run --service-ports --name compressed-wav2lip-hksong compressed-wav2lip bash
-
 services:
   compressed-wav2lip:
-    image: compressed-wav2lip:dev
+    image: nota-compressed-wav2lip:dev
     build: ./
-    container_name: efficient-wav2lip-hksong
+    container_name: nota-compressed-wav2lip
     ipc: host
     ports:
       - "7860:7860"
     volumes:
-      - /data2/hksong/compressed-wav2lip:/workspace
-      - /data2/hksong/DATA:/DATA
-      - /data2/hksong/LOG:/LOG
+      - ./:/workspace
diff --git a/download.sh b/download.sh
@@ -1,2 +1,7 @@
+# example for audio source
 python download.py\
-  -i 00003.txt
+  -i data/lrs3_v0.4_txt/lrs3_v0.4/test/sxnlvwprfSc/00007.txt
+
+# example for video source
+python download.py\
+  -i data/lrs3_v0.4_txt/lrs3_v0.4/test/Li4S1yyrsTI/00010.txt   
diff --git a/inference.py b/inference.py
@@ -47,7 +47,7 @@ def parse_args():
         '--model',
         choices=['wav2lip', 'nota_wav2lip'],
         default='nota_wav2ilp',
-        help="Model for generating talking video. Defaults: wav2lip"
+        help="Model for generating talking video. Defaults: nota_wav2lip"
     )
 
     parser.add_argument(

diff --git a/inference.sh b/inference.sh
@@ -1,6 +1,15 @@
+# Original Wav2Lip
 python inference.py\
-  -a "sample/1673_orig.wav"\
-  -v "sample_video_lrs3/EV3OmxrowWE-00003"\
+  -a "sample_video_lrs3/sxnlvwprf_c-00007.wav"\
+  -v "sample_video_lrs3/Li4-1yyrsTI-00010"\
+  -m "wav2lip"\
+  -o "result_original"\
+  --device cpu
+
+# Nota's Wav2Lip (28× Compressed)
+python inference.py\
+  -a "sample_video_lrs3/sxnlvwprf_c-00007.wav"\
+  -v "sample_video_lrs3/Li4-1yyrsTI-00010"\
   -m "nota_wav2lip"\
-  -o "result"\
-  --device cpu
+  -o "result_nota"\
+  --device cpu 
diff --git a/preprocess.sh b/preprocess.sh
@@ -1,2 +1,7 @@
+# example for audio source
 python preprocess.py\
-  -i sample_video_lrs3/EV3OmxrowWE-00003.mp4
+  -i sample_video_lrs3/sxnlvwprf_c-00007.mp4
+
+# example for video source
+python preprocess.py\
+  -i sample_video_lrs3/Li4-1yyrsTI-00010.mp4