From c52e1f62c843c46e0c36e05359aa27777db3d9e2 Mon Sep 17 00:00:00 2001
From: Cory Stephenson <cory.stephenson@databricks.com>
Date: Tue, 17 Sep 2024 19:01:53 +0000
Subject: [PATCH 01/12] Update versions

---
 setup.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/setup.py b/setup.py
index f909592a..014373dc 100644
--- a/setup.py
+++ b/setup.py
@@ -6,11 +6,11 @@
 from setuptools import find_packages, setup
 
 install_requires = [
-    'mosaicml==0.20.1', 'mosaicml-streaming==0.7.4', 'hydra-core>=1.2', 'hydra-colorlog>=1.1.0',
-    'diffusers[torch]==0.26.3', 'transformers[torch]==4.38.2', 'huggingface_hub==0.21.2', 'wandb==0.16.3',
-    'xformers==0.0.23.post1', 'triton==2.1.0', 'torchmetrics[image]==1.3.1', 'lpips==0.1.4', 'clean-fid==0.1.35',
-    'clip@git+https://github.com/openai/CLIP.git@a1d071733d7111c9c014f024669f959182114e33', 'gradio==4.19.2',
-    'datasets==2.19.2', 'peft==0.12.0'
+    'mosaicml==0.24.1', 'mosaicml-streaming==0.8.1', 'hydra-core>=1.2', 'hydra-colorlog>=1.1.0',
+    'diffusers[torch]==0.30.3', 'transformers[torch]==4.44.2', 'huggingface_hub==0.25.0', 'wandb==0.18.1',
+    'xformers==0.0.27post2', 'triton==2.1.0', 'torchmetrics[image]==1.4.2', 'lpips==0.1.4', 'clean-fid==0.1.35',
+    'clip@git+https://github.com/openai/CLIP.git@a1d071733d7111c9c014f024669f959182114e33', 'gradio==4.44.0',
+    'datasets==2.19.2', 'peft==0.12.0', 'sentencepeice',
 ]
 
 extras_require = {}

From 15e4a4d2cff5ebaa8c7c7f0fcf634f100195c16f Mon Sep 17 00:00:00 2001
From: Cory Stephenson <cory.stephenson@databricks.com>
Date: Wed, 18 Sep 2024 17:17:36 +0000
Subject: [PATCH 02/12] Include mlflow

---
 setup.py | 25 ++++++++++++++++++++-----
 1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/setup.py b/setup.py
index 014373dc..ffed638a 100644
--- a/setup.py
+++ b/setup.py
@@ -6,11 +6,26 @@
 from setuptools import find_packages, setup
 
 install_requires = [
-    'mosaicml==0.24.1', 'mosaicml-streaming==0.8.1', 'hydra-core>=1.2', 'hydra-colorlog>=1.1.0',
-    'diffusers[torch]==0.30.3', 'transformers[torch]==4.44.2', 'huggingface_hub==0.25.0', 'wandb==0.18.1',
-    'xformers==0.0.27post2', 'triton==2.1.0', 'torchmetrics[image]==1.4.2', 'lpips==0.1.4', 'clean-fid==0.1.35',
-    'clip@git+https://github.com/openai/CLIP.git@a1d071733d7111c9c014f024669f959182114e33', 'gradio==4.44.0',
-    'datasets==2.19.2', 'peft==0.12.0', 'sentencepeice',
+    'mosaicml==0.24.1',
+    'mosaicml-streaming==0.8.1',
+    'hydra-core>=1.2',
+    'hydra-colorlog>=1.1.0',
+    'diffusers[torch]==0.30.3',
+    'transformers[torch]==4.44.2',
+    'huggingface-hub[hf_transfer]>=0.23.2',
+    'wandb>=0.18.1',
+    'xformers==0.0.27post2',
+    'triton==2.1.0',
+    'torchmetrics[image]==1.4.2',
+    'lpips==0.1.4',
+    'clean-fid==0.1.35',
+    'clip@git+https://github.com/openai/CLIP.git@a1d071733d7111c9c014f024669f959182114e33',
+    'gradio==4.44.0',
+    'datasets==2.19.2',
+    'peft==0.12.0',
+    'sentencepeice',
+    'mlflow',
+    'pynvml',
 ]
 
 extras_require = {}

From a6ced94ff2656a3f0d864ff32e2a2e5fa32da8b0 Mon Sep 17 00:00:00 2001
From: Cory Stephenson <cory.stephenson@databricks.com>
Date: Wed, 18 Sep 2024 17:23:13 +0000
Subject: [PATCH 03/12] Spelling is important

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index ffed638a..8ba95ce9 100644
--- a/setup.py
+++ b/setup.py
@@ -23,7 +23,7 @@
     'gradio==4.44.0',
     'datasets==2.19.2',
     'peft==0.12.0',
-    'sentencepeice',
+    'sentencepiece',
     'mlflow',
     'pynvml',
 ]

From fd7cffda94dd3f7e4d49b2380c0f7c6c3f38115b Mon Sep 17 00:00:00 2001
From: Cory Stephenson <cory.stephenson@databricks.com>
Date: Wed, 18 Sep 2024 17:30:31 +0000
Subject: [PATCH 04/12] Torchmetrics :(

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 8ba95ce9..d0b12d51 100644
--- a/setup.py
+++ b/setup.py
@@ -16,7 +16,7 @@
     'wandb>=0.18.1',
     'xformers==0.0.27post2',
     'triton==2.1.0',
-    'torchmetrics[image]==1.4.2',
+    'torchmetrics[image]>=1.4.0.post0',
     'lpips==0.1.4',
     'clean-fid==0.1.35',
     'clip@git+https://github.com/openai/CLIP.git@a1d071733d7111c9c014f024669f959182114e33',

From 75bfd308ae491f5695a5e61dcfe0ea516ee51270 Mon Sep 17 00:00:00 2001
From: Cory Stephenson <cory.stephenson@databricks.com>
Date: Wed, 18 Sep 2024 17:36:02 +0000
Subject: [PATCH 05/12] Triton

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index d0b12d51..4c85f8ad 100644
--- a/setup.py
+++ b/setup.py
@@ -15,7 +15,7 @@
     'huggingface-hub[hf_transfer]>=0.23.2',
     'wandb>=0.18.1',
     'xformers==0.0.27post2',
-    'triton==2.1.0',
+    'triton>=2.1.0',
     'torchmetrics[image]>=1.4.0.post0',
     'lpips==0.1.4',
     'clean-fid==0.1.35',

From cc90fff6e22e6c0846ca2540efd64075bc8629db Mon Sep 17 00:00:00 2001
From: Cory Stephenson <cory.stephenson@databricks.com>
Date: Thu, 3 Oct 2024 22:17:20 +0000
Subject: [PATCH 06/12] New xformers

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 4c85f8ad..18be958d 100644
--- a/setup.py
+++ b/setup.py
@@ -14,7 +14,7 @@
     'transformers[torch]==4.44.2',
     'huggingface-hub[hf_transfer]>=0.23.2',
     'wandb>=0.18.1',
-    'xformers==0.0.27post2',
+    'xformers==0.0.28.post1',
     'triton>=2.1.0',
     'torchmetrics[image]>=1.4.0.post0',
     'lpips==0.1.4',

From 6cb12eed5eee9f3dd6055e70e237460c78aaccb6 Mon Sep 17 00:00:00 2001
From: Cory Stephenson <cory.stephenson@databricks.com>
Date: Thu, 3 Oct 2024 22:26:25 +0000
Subject: [PATCH 07/12] Automatically add per-device batch size as a streaming
 kwarg

---
 diffusion/train.py | 24 ++++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

diff --git a/diffusion/train.py b/diffusion/train.py
index becff0f1..c8674cbe 100644
--- a/diffusion/train.py
+++ b/diffusion/train.py
@@ -103,19 +103,22 @@ def train(config: DictConfig) -> None:
     else:
         optimizer = hydra.utils.instantiate(config.optimizer, params=model.parameters())
 
-    # Load train dataset. Currently this expects to load according to the datasetHparam method.
-    # This means adding external datasets is currently not super easy. Will refactor or check for
-    # upstream composer changes that could make this easier.
+    # Load train dataset. Need to ensure that the per-device batch size is added as a streaming kwarg
+    per_device_train_batch_size = config.dataset.train_batch_size // dist.get_world_size()
+    if hasattr(config.dataset.train_dataset, 'streaming_kwargs'):
+        config.dataset.train_dataset.streaming_kwargs['batch_size'] = per_device_train_batch_size
+    else:
+        config.dataset.train_dataset.streaming_kwargs = {'batch_size': per_device_train_batch_size}
     if tokenizer:
         train_dataloader: Union[Iterable, DataSpec, Dict[str, Any]] = hydra.utils.instantiate(
             config.dataset.train_dataset,
             tokenizer=tokenizer,
-            batch_size=config.dataset.train_batch_size // dist.get_world_size(),
+            batch_size=per_device_train_batch_size,
         )
     else:
         train_dataloader: Union[Iterable, DataSpec, Dict[str, Any]] = hydra.utils.instantiate(
             config.dataset.train_dataset,
-            batch_size=config.dataset.train_batch_size // dist.get_world_size(),
+            batch_size=per_device_train_batch_size,
         )
     # Need to sleep for a bit to avoid dataloader crash
     time.sleep(10)
@@ -148,13 +151,18 @@ def train(config: DictConfig) -> None:
         eval_set = evaluators
 
     else:
+        # Need to ensure that the per-device batch size is added as a streaming kwarg
+        per_device_eval_batch_size = config.dataset.eval_batch_size // dist.get_world_size()
+        if hasattr(config.dataset.eval_dataset, 'streaming_kwargs'):
+            config.dataset.eval_dataset.streaming_kwargs['batch_size'] = per_device_eval_batch_size
+        else:
+            config.dataset.eval_dataset.streaming_kwargs = {'batch_size': per_device_eval_batch_size}
         if tokenizer:
             eval_set = hydra.utils.instantiate(config.dataset.eval_dataset,
                                                tokenizer=model.tokenizer,
-                                               batch_size=config.dataset.eval_batch_size // dist.get_world_size())
+                                               batch_size=per_device_eval_batch_size)
         else:
-            eval_set = hydra.utils.instantiate(config.dataset.eval_dataset,
-                                               batch_size=config.dataset.eval_batch_size // dist.get_world_size())
+            eval_set = hydra.utils.instantiate(config.dataset.eval_dataset, batch_size=per_device_eval_batch_size)
 
         # Need to sleep for a bit to avoid dataloader crash
         time.sleep(10)

From 755e5cccdb59a77e6ce05c0bb6f7d6a05d0c8006 Mon Sep 17 00:00:00 2001
From: Cory Stephenson <cory.stephenson@databricks.com>
Date: Fri, 4 Oct 2024 00:23:12 +0000
Subject: [PATCH 08/12] Bump composer

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 18be958d..13a17da9 100644
--- a/setup.py
+++ b/setup.py
@@ -6,7 +6,7 @@
 from setuptools import find_packages, setup
 
 install_requires = [
-    'mosaicml==0.24.1',
+    'mosaicml==0.25.0',
     'mosaicml-streaming==0.8.1',
     'hydra-core>=1.2',
     'hydra-colorlog>=1.1.0',

From 192dc176f76e45a95acb2ef3c38330d264437879 Mon Sep 17 00:00:00 2001
From: Cory Stephenson <cory.stephenson@databricks.com>
Date: Fri, 4 Oct 2024 04:38:42 +0000
Subject: [PATCH 09/12] Update streaming

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 13a17da9..3f9f42a5 100644
--- a/setup.py
+++ b/setup.py
@@ -7,7 +7,7 @@
 
 install_requires = [
     'mosaicml==0.25.0',
-    'mosaicml-streaming==0.8.1',
+    'mosaicml-streaming==0.9.0',
     'hydra-core>=1.2',
     'hydra-colorlog>=1.1.0',
     'diffusers[torch]==0.30.3',

From a004a9d133492c27fb4671b823a49987d8479e91 Mon Sep 17 00:00:00 2001
From: Cory Stephenson <cory.stephenson@databricks.com>
Date: Fri, 4 Oct 2024 05:51:11 +0000
Subject: [PATCH 10/12] Fix huggingface warning

---
 diffusion/models/models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/diffusion/models/models.py b/diffusion/models/models.py
index 33b02f8d..82c1480f 100644
--- a/diffusion/models/models.py
+++ b/diffusion/models/models.py
@@ -125,7 +125,7 @@ def stable_diffusion_2(
     precision = torch.float16 if encode_latents_in_fp16 else None
     # Make the text encoder
     text_encoder = CLIPTextModel.from_pretrained(model_name, subfolder='text_encoder', torch_dtype=precision)
-    tokenizer = CLIPTokenizer.from_pretrained(model_name, subfolder='tokenizer')
+    tokenizer = CLIPTokenizer.from_pretrained(model_name, subfolder='tokenizer', clean_up_tokenization_spaces=True)
 
     # Make the autoencoder
     if autoencoder_path is None:

From 1e2c20ca3e6601ee2a64d8498e8ed957dfff493e Mon Sep 17 00:00:00 2001
From: Cory Stephenson <cory.stephenson@databricks.com>
Date: Fri, 4 Oct 2024 06:05:09 +0000
Subject: [PATCH 11/12] Update to new torch autocast

---
 diffusion/models/precomputed_text_latent_diffusion.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/diffusion/models/precomputed_text_latent_diffusion.py b/diffusion/models/precomputed_text_latent_diffusion.py
index d1ee9136..31acdb2f 100644
--- a/diffusion/models/precomputed_text_latent_diffusion.py
+++ b/diffusion/models/precomputed_text_latent_diffusion.py
@@ -189,7 +189,7 @@ def set_rng_generator(self, rng_generator: torch.Generator):
         self.rng_generator = rng_generator
 
     def encode_images(self, inputs, dtype=torch.bfloat16):
-        with torch.amp.autocast('cuda', enabled=False):
+        with torch.autocast(device_type='cuda', enabled=False):
             latents = self.vae.encode(inputs.to(dtype))['latent_dist'].sample().data
         latents = (latents - self.latent_mean) / self.latent_std  # scale latents
         return latents

From 37bbd4c61b151cfa204ae6c21ac794bfc6cbd814 Mon Sep 17 00:00:00 2001
From: Cory Stephenson <cory.stephenson@databricks.com>
Date: Fri, 4 Oct 2024 16:38:30 +0000
Subject: [PATCH 12/12] Update workflows

---
 .github/workflows/code-quality.yaml | 2 +-
 .github/workflows/docker.yaml       | 6 ++++++
 .github/workflows/pr-cpu.yaml       | 8 ++++----
 3 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/code-quality.yaml b/.github/workflows/code-quality.yaml
index 261edd5e..16e2a125 100644
--- a/.github/workflows/code-quality.yaml
+++ b/.github/workflows/code-quality.yaml
@@ -24,8 +24,8 @@ jobs:
     strategy:
       matrix:
         python_version:
-          - "3.9"
           - "3.10"
+          - "3.11"
         pip_deps:
           - "[dev]"
     steps:
diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml
index 164b5ccb..20dd7c6e 100644
--- a/.github/workflows/docker.yaml
+++ b/.github/workflows/docker.yaml
@@ -29,6 +29,12 @@ jobs:
         - name: "2.4.0_cu124_aws"
           base_image: mosaicml/pytorch:2.4.0_cu124-python3.11-ubuntu20.04-aws
           dep_groups: "[all]"
+        - name: "2.4.1_cu124"
+          base_image: mosaicml/pytorch:2.4.1_cu124-python3.11-ubuntu20.04
+          dep_groups: "[all]"
+        - name: "2.4.1_cu124_aws"
+          base_image: mosaicml/pytorch:2.4.1_cu124-python3.11-ubuntu20.04-aws
+          dep_groups: "[all]"
     steps:
 
     - name: Checkout
diff --git a/.github/workflows/pr-cpu.yaml b/.github/workflows/pr-cpu.yaml
index b43ce0ae..b5f8cc2e 100644
--- a/.github/workflows/pr-cpu.yaml
+++ b/.github/workflows/pr-cpu.yaml
@@ -19,12 +19,12 @@ jobs:
     strategy:
       matrix:
         include:
-          - name: 'cpu-3.9-1.12'
-            container: mosaicml/pytorch:1.12.1_cpu-python3.9-ubuntu20.04
+          - name: 'cpu-3.10-2.1'
+            container: mosaicml/pytorch:2.1.2_cpu-python3.10-ubuntu20.04
             markers: 'not gpu'
             pytest_command: 'coverage run -m pytest'
-          - name: 'cpu-3.10-1.13'
-            container: mosaicml/pytorch:1.13.1_cpu-python3.10-ubuntu20.04
+          - name: 'cpu-3.11-2.4'
+            container: mosaicml/pytorch:2.4.1_cpu-python3.11-ubuntu20.04
             markers: 'not gpu'
             pytest_command: 'coverage run -m pytest'
     name: ${{ matrix.name }}