From b9b802a32ec525864a90784b2c1fe1a9f67834b3 Mon Sep 17 00:00:00 2001
From: bknota <bokyeong.kim@nota.ai>
Date: Tue, 27 Feb 2024 01:20:01 +0000
Subject: [PATCH 1/5] add readme; modify commands

---
 .gitignore         |  5 ++--
 Dockerfile         |  2 +-
 README.md          | 75 +++++++++++++++++++++++++++++++++++++++++++++-
 app.py             |  2 +-
 app.sh             |  4 +++
 docker-compose.yml | 11 ++-----
 download.sh        |  9 +++++-
 inference.py       |  2 +-
 inference.sh       | 20 ++++++++++---
 preprocess.sh      |  7 ++++-
 requirements.txt   |  2 +-
 11 files changed, 117 insertions(+), 22 deletions(-)
 create mode 100644 app.sh
diff --git a/.gitignore b/.gitignore
index 67ac225..e56eeef 100644
--- a/.gitignore
+++ b/.gitignore
@@ -19,8 +19,7 @@ __pycache__
 *.gz
 *.json
 
-results/
+results*
 temp/
 sample*
-
-app.sh
\ No newline at end of file
+data/lrs3_v0.4_txt/lrs3_v0.4/
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile
index 3cc3c5e..3d78479 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -3,5 +3,5 @@ FROM nvcr.io/nvidia/pytorch:22.03-py3
 ARG DEBIAN_FRONTEND=noninteractive
 RUN apt-get update
 RUN apt-get install ffmpeg libsm6 libxext6 tmux git -y
-
+RUN pip install --no-cache -r requirements.txt
 WORKDIR /workspace
diff --git a/README.md b/README.md
index 02a2a69..f9b4260 100644
--- a/README.md
+++ b/README.md
@@ -10,4 +10,77 @@ pinned: true
 license: apache-2.0
 ---
 
-# README here
\ No newline at end of file
+# 28× Compressed Wav2Lip by Nota AI
+
+Official codebase for [**Accelerating Speech-Driven Talking Face Generation with 28× Compressed Wav2Lip**](https://arxiv.org/abs/2304.00471).
+
+- Presented at [ICCV'23 Demo](https://iccv2023.thecvf.com/demos-111.php) Track; [On-Device Intelligence Workshop](https://sites.google.com/g.harvard.edu/on-device-workshop-23/home) @ MLSys'23; [NVIDIA GTC 2023](https://www.nvidia.com/en-us/on-demand/search/?facet.mimetype[]=event%20session&layout=list&page=1&q=52409&sort=relevance&sortDir=desc) Poster.
+
+
+## Installation
+#### Docker (recommended)
+```bash
+docker compose run --service-ports --name nota-compressed-wav2lip compressed-wav2lip bash
+```
+
+#### Conda
+<details>
+<summary>Click</summary>
+
+```bash
+conda create -n nota-wav2lip python=3.9
+conda activate nota-wav2lip
+git clone https://github.com/Nota-NetsPresso/nota-wav2lip.git
+cd nota-wav2lip
+pip install -r requirements.txt
+apt-get update
+apt-get install ffmpeg libsm6 libxext6 tmux git -y
+```
+</details>
+
+## Gradio Demo
+Use the below script to run the [nota-ai/compressed-wav2lip demo](https://huggingface.co/spaces/nota-ai/compressed-wav2lip). The models and sample data will be downloaded automatically.
+
+  ```bash
+  bash app.sh
+  ```
+
+## Inference
+(1) Download YouTube videos in the LRS3-TED label text file and preprocess them properly.
+  - Download `lrs3_v0.4_txt.zip` from [this link](https://mmai.io/datasets/lip_reading/).
+  - Unzip the file and make a folder structure: `./data/lrs3_v0.4_txt/lrs3_v0.4/test`
+  - Run `bash download.sh`
+  - Run `bash preprocess.sh`
+
+(2) Run the script to compare the original Wav2Lip with Nota's compressed version.
+
+  ```bash
+  bash inference.sh
+  ```
+
+## License
+The purpose of this repository, along with its model weights, is strictly for research and non-commercial projects.
+
+## Contact
+- To obtain compression code and assistance, kindly contact Nota AI. These are provided as part of our business solutions (for business inquiries: contact@nota.ai).
+- For Q&A about this repo, use this board: [Nota-NetsPresso/discussions](https://github.com/orgs/Nota-NetsPresso/discussions)
+
+## Acknowledgment
+ - [NVIDIA Applied Research Accelerator Program](https://www.nvidia.com/en-us/industries/higher-education-research/applied-research-program/) for supporting this research.
+ - [Wav2Lip](https://github.com/Rudrabha/Wav2Lip) and [LRS3-TED](https://www.robots.ox.ac.uk/~vgg/data/lip_reading/) for facilitating the development of the original Wav2Lip.
+
+## Citation
+```bibtex
+@article{kim2023unified,
+      title={A Unified Compression Framework for Efficient Speech-Driven Talking-Face Generation}, 
+      author={Kim, Bo-Kyeong and Kang, Jaemin and Seo, Daeun and Park, Hancheol and Choi, Shinkook and Song, Hyoung-Kyu and Kim, Hyungshin and Lim, Sungsu},
+      journal={MLSys Workshop on On-Device Intelligence (ODIW)},
+      year={2023},
+      url={https://arxiv.org/abs/2304.00471}
+}
+```
+
+
+
+
+
diff --git a/app.py b/app.py
index 517f54e..1ecf536 100644
--- a/app.py
+++ b/app.py
@@ -102,4 +102,4 @@
 
         gr.Markdown(Path('docs/footer.md').read_text())
 
-    demo.queue().launch()
+    demo.queue().launch(share=True)
diff --git a/app.sh b/app.sh
new file mode 100644
index 0000000..be817bb
--- /dev/null
+++ b/app.sh
@@ -0,0 +1,4 @@
+export LRS_ORIGINAL_URL=https://netspresso-huggingface-demo-checkpoint.s3.us-east-2.amazonaws.com/compressed-wav2lip/lrs3-wav2lip.pth && \
+export LRS_COMPRESSED_URL=https://netspresso-huggingface-demo-checkpoint.s3.us-east-2.amazonaws.com/compressed-wav2lip/lrs3-nota-wav2lip.pth && \
+export LRS_INFERENCE_SAMPLE=https://netspresso-huggingface-demo-checkpoint.s3.us-east-2.amazonaws.com/data/compressed-wav2lip-inference/sample.tar.gz && \
+python app.py
\ No newline at end of file
diff --git a/docker-compose.yml b/docker-compose.yml
index 8935901..c0820a0 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -1,16 +1,11 @@
 version: "3.9"
-
-# docker compose run --service-ports --name compressed-wav2lip-hksong compressed-wav2lip bash
-
 services:
   compressed-wav2lip:
-    image: compressed-wav2lip:dev
+    image: nota-compressed-wav2lip:dev
     build: ./
-    container_name: efficient-wav2lip-hksong
+    container_name: nota-compressed-wav2lip
     ipc: host
     ports:
       - "7860:7860"
     volumes:
-      - /data2/hksong/compressed-wav2lip:/workspace
-      - /data2/hksong/DATA:/DATA
-      - /data2/hksong/LOG:/LOG
+      - ./:/workspace
\ No newline at end of file
diff --git a/download.sh b/download.sh
index ace8174..c282ce3 100644
--- a/download.sh
+++ b/download.sh
@@ -1,2 +1,9 @@
+# example for audio source
 python download.py\
-  -i 00003.txt
\ No newline at end of file
+  -i data/lrs3_v0.4_txt/lrs3_v0.4/test/sxnlvwprfSc/00007.txt
+
+# example for video source
+python download.py\
+  -i data/lrs3_v0.4_txt/lrs3_v0.4/test/Li4S1yyrsTI/00010.txt
+
+   
\ No newline at end of file
diff --git a/inference.py b/inference.py
index a2eaaf7..d204231 100644
--- a/inference.py
+++ b/inference.py
@@ -47,7 +47,7 @@ def parse_args():
         '--model',
         choices=['wav2lip', 'nota_wav2lip'],
         default='nota_wav2ilp',
-        help="Model for generating talking video. Defaults: wav2lip"
+        help="Model for generating talking video. Defaults: nota_wav2lip"
     )
 
     parser.add_argument(
diff --git a/inference.sh b/inference.sh
index d7c9ff0..f139104 100644
--- a/inference.sh
+++ b/inference.sh
@@ -1,6 +1,18 @@
+
+# Original Wav2Lip
 python inference.py\
-  -a "sample/1673_orig.wav"\
-  -v "sample_video_lrs3/EV3OmxrowWE-00003"\
+  -a "sample_video_lrs3/sxnlvwprf_c-00007.wav"\
+  -v "sample_video_lrs3/Li4-1yyrsTI-00010"\
+  -m "wav2lip"\
+  -o "result_original"\
+  --device cpu
+
+# Nota's Wav2Lip (28× Compressed)
+python inference.py\
+  -a "sample_video_lrs3/sxnlvwprf_c-00007.wav"\
+  -v "sample_video_lrs3/Li4-1yyrsTI-00010"\
   -m "nota_wav2lip"\
-  -o "result"\
-  --device cpu
\ No newline at end of file
+  -o "result_nota"\
+  --device cpu
+
+ 
\ No newline at end of file
diff --git a/preprocess.sh b/preprocess.sh
index e9c9d41..df4a52f 100644
--- a/preprocess.sh
+++ b/preprocess.sh
@@ -1,2 +1,7 @@
+# example for audio source
 python preprocess.py\
-  -i sample_video_lrs3/EV3OmxrowWE-00003.mp4
\ No newline at end of file
+  -i sample_video_lrs3/sxnlvwprf_c-00007.mp4
+
+# example for video source
+python preprocess.py\
+  -i sample_video_lrs3/Li4-1yyrsTI-00010.mp4
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 2c73619..5dcd1bf 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -9,4 +9,4 @@ tqdm==4.63.0
 lws==1.2.7
 omegaconf==2.3.0
 yt-dlp==2022.6.22
-loguru==0.7.2
\ No newline at end of file
+loguru==0.7.2

From 41ccc330d43eddf2fed0f30e81ccf3aee4758c6d Mon Sep 17 00:00:00 2001
From: bknota <bokyeong.kim@nota.ai>
Date: Tue, 27 Feb 2024 16:09:28 +0000
Subject: [PATCH 2/5] revise documentation

---
 Dockerfile       |  4 +++-
 README.md        | 20 +++++++++-----------
 download.py      |  2 --
 download.sh      |  4 +---
 inference.sh     |  5 +----
 requirements.txt |  2 +-
 6 files changed, 15 insertions(+), 22 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index 3d78479..4fc7fdd 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -3,5 +3,7 @@ FROM nvcr.io/nvidia/pytorch:22.03-py3
 ARG DEBIAN_FRONTEND=noninteractive
 RUN apt-get update
 RUN apt-get install ffmpeg libsm6 libxext6 tmux git -y
-RUN pip install --no-cache -r requirements.txt
+
 WORKDIR /workspace
+COPY requirements.txt .
+RUN pip install --no-cache -r requirements.txt
\ No newline at end of file
diff --git a/README.md b/README.md
index f9b4260..3952366 100644
--- a/README.md
+++ b/README.md
@@ -20,6 +20,8 @@ Official codebase for [**Accelerating Speech-Driven Talking Face Generation with
 ## Installation
 #### Docker (recommended)
 ```bash
+git clone https://github.com/Nota-NetsPresso/nota-wav2lip.git
+cd nota-wav2lip
 docker compose run --service-ports --name nota-compressed-wav2lip compressed-wav2lip bash
 ```
 
@@ -28,13 +30,13 @@ docker compose run --service-ports --name nota-compressed-wav2lip compressed-wav
 <summary>Click</summary>
 
 ```bash
-conda create -n nota-wav2lip python=3.9
-conda activate nota-wav2lip
 git clone https://github.com/Nota-NetsPresso/nota-wav2lip.git
 cd nota-wav2lip
-pip install -r requirements.txt
 apt-get update
 apt-get install ffmpeg libsm6 libxext6 tmux git -y
+conda create -n nota-wav2lip python=3.9
+conda activate nota-wav2lip
+pip install -r requirements.txt
 ```
 </details>
 
@@ -59,10 +61,11 @@ Use the below script to run the [nota-ai/compressed-wav2lip demo](https://huggin
   ```
 
 ## License
-The purpose of this repository, along with its model weights, is strictly for research and non-commercial projects.
+- All rights related to this repository and the compressed models are reserved by Nota Inc.
+- The intended use is strictly limited to research and non-commercial projects.
 
 ## Contact
-- To obtain compression code and assistance, kindly contact Nota AI. These are provided as part of our business solutions (for business inquiries: contact@nota.ai).
+- To obtain compression code and assistance, kindly contact Nota AI (contact@nota.ai). These are provided as part of our business solutions.
 - For Q&A about this repo, use this board: [Nota-NetsPresso/discussions](https://github.com/orgs/Nota-NetsPresso/discussions)
 
 ## Acknowledgment
@@ -78,9 +81,4 @@ The purpose of this repository, along with its model weights, is strictly for re
       year={2023},
       url={https://arxiv.org/abs/2304.00471}
 }
-```
-
-
-
-
-
+```
\ No newline at end of file
diff --git a/download.py b/download.py
index 536638d..ee4d663 100644
--- a/download.py
+++ b/download.py
@@ -1,8 +1,6 @@
 import argparse
-
 from nota_wav2lip.preprocess import get_cropped_face_from_lrs3_label
 
-
 def parse_args():
 
     parser = argparse.ArgumentParser(description="NotaWav2Lip: Get LRS3 video sample with the label text file")
diff --git a/download.sh b/download.sh
index c282ce3..4671365 100644
--- a/download.sh
+++ b/download.sh
@@ -4,6 +4,4 @@ python download.py\
 
 # example for video source
 python download.py\
-  -i data/lrs3_v0.4_txt/lrs3_v0.4/test/Li4S1yyrsTI/00010.txt
-
-   
\ No newline at end of file
+  -i data/lrs3_v0.4_txt/lrs3_v0.4/test/Li4S1yyrsTI/00010.txt   
\ No newline at end of file
diff --git a/inference.sh b/inference.sh
index f139104..64cf045 100644
--- a/inference.sh
+++ b/inference.sh
@@ -1,4 +1,3 @@
-
 # Original Wav2Lip
 python inference.py\
   -a "sample_video_lrs3/sxnlvwprf_c-00007.wav"\
@@ -13,6 +12,4 @@ python inference.py\
   -v "sample_video_lrs3/Li4-1yyrsTI-00010"\
   -m "nota_wav2lip"\
   -o "result_nota"\
-  --device cpu
-
- 
\ No newline at end of file
+  --device cpu 
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 5dcd1bf..2c73619 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -9,4 +9,4 @@ tqdm==4.63.0
 lws==1.2.7
 omegaconf==2.3.0
 yt-dlp==2022.6.22
-loguru==0.7.2
+loguru==0.7.2
\ No newline at end of file

From a08156ea07497853ac4a4643b9b59a2e80c909d4 Mon Sep 17 00:00:00 2001
From: bknota <bokyeong.kim@nota.ai>
Date: Tue, 27 Feb 2024 16:13:46 +0000
Subject: [PATCH 3/5] add empty data folder (placeholder for lrs3_v0.4)

---
 data/.gitkeep | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 data/.gitkeep

diff --git a/data/.gitkeep b/data/.gitkeep
new file mode 100644
index 0000000..e69de29

From c86362282aab4521d08e11c7cfbfaeaa4d8cda28 Mon Sep 17 00:00:00 2001
From: Hyoung-Kyu Song <hyoungkyu.song@nota.ai>
Date: Thu, 7 Mar 2024 18:23:16 +0900
Subject: [PATCH 4/5] ruff fix

---
 download.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/download.py b/download.py
index ee4d663..536638d 100644
--- a/download.py
+++ b/download.py
@@ -1,6 +1,8 @@
 import argparse
+
 from nota_wav2lip.preprocess import get_cropped_face_from_lrs3_label
 
+
 def parse_args():
 
     parser = argparse.ArgumentParser(description="NotaWav2Lip: Get LRS3 video sample with the label text file")

From 92bf82926c764f5e105f6fa3ecda36215896ddce Mon Sep 17 00:00:00 2001
From: Hyoung-Kyu Song <hyoungkyu.song@nota.ai>
Date: Thu, 7 Mar 2024 18:24:06 +0900
Subject: [PATCH 5/5] disable share server

---
 app.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app.py b/app.py
index 1ecf536..517f54e 100644
--- a/app.py
+++ b/app.py
@@ -102,4 +102,4 @@
 
         gr.Markdown(Path('docs/footer.md').read_text())
 
-    demo.queue().launch(share=True)
+    demo.queue().launch()