mosaicml · coryMosaicML · Sep 17, 2024 · Sep 18, 2024 · Sep 18, 2024 · Sep 18, 2024
diff --git a/.github/workflows/code-quality.yaml b/.github/workflows/code-quality.yaml
@@ -24,8 +24,8 @@ jobs:
     strategy:
       matrix:
         python_version:
-          - "3.9"
           - "3.10"
+          - "3.11"
         pip_deps:
           - "[dev]"
     steps:

diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml
@@ -29,6 +29,12 @@ jobs:
         - name: "2.4.0_cu124_aws"
           base_image: mosaicml/pytorch:2.4.0_cu124-python3.11-ubuntu20.04-aws
           dep_groups: "[all]"
+        - name: "2.4.1_cu124"
+          base_image: mosaicml/pytorch:2.4.1_cu124-python3.11-ubuntu20.04
+          dep_groups: "[all]"
+        - name: "2.4.1_cu124_aws"
+          base_image: mosaicml/pytorch:2.4.1_cu124-python3.11-ubuntu20.04-aws
+          dep_groups: "[all]"
     steps:
 
     - name: Checkout

diff --git a/.github/workflows/pr-cpu.yaml b/.github/workflows/pr-cpu.yaml
@@ -19,12 +19,12 @@ jobs:
     strategy:
       matrix:
         include:
-          - name: 'cpu-3.9-1.12'
-            container: mosaicml/pytorch:1.12.1_cpu-python3.9-ubuntu20.04
+          - name: 'cpu-3.10-2.1'
+            container: mosaicml/pytorch:2.1.2_cpu-python3.10-ubuntu20.04
             markers: 'not gpu'
             pytest_command: 'coverage run -m pytest'
-          - name: 'cpu-3.10-1.13'
-            container: mosaicml/pytorch:1.13.1_cpu-python3.10-ubuntu20.04
+          - name: 'cpu-3.11-2.4'
+            container: mosaicml/pytorch:2.4.1_cpu-python3.11-ubuntu20.04
             markers: 'not gpu'
             pytest_command: 'coverage run -m pytest'
     name: ${{ matrix.name }}

diff --git a/diffusion/models/models.py b/diffusion/models/models.py
@@ -125,7 +125,7 @@ def stable_diffusion_2(
     precision = torch.float16 if encode_latents_in_fp16 else None
     # Make the text encoder
     text_encoder = CLIPTextModel.from_pretrained(model_name, subfolder='text_encoder', torch_dtype=precision)
-    tokenizer = CLIPTokenizer.from_pretrained(model_name, subfolder='tokenizer')
+    tokenizer = CLIPTokenizer.from_pretrained(model_name, subfolder='tokenizer', clean_up_tokenization_spaces=True)
 
     # Make the autoencoder
     if autoencoder_path is None:

diff --git a/diffusion/models/precomputed_text_latent_diffusion.py b/diffusion/models/precomputed_text_latent_diffusion.py
@@ -189,7 +189,7 @@ def set_rng_generator(self, rng_generator: torch.Generator):
         self.rng_generator = rng_generator
 
     def encode_images(self, inputs, dtype=torch.bfloat16):
-        with torch.amp.autocast('cuda', enabled=False):
+        with torch.autocast(device_type='cuda', enabled=False):
             latents = self.vae.encode(inputs.to(dtype))['latent_dist'].sample().data
         latents = (latents - self.latent_mean) / self.latent_std  # scale latents
         return latents

diff --git a/diffusion/train.py b/diffusion/train.py
@@ -103,19 +103,22 @@ def train(config: DictConfig) -> None:
     else:
         optimizer = hydra.utils.instantiate(config.optimizer, params=model.parameters())
 
-    # Load train dataset. Currently this expects to load according to the datasetHparam method.
-    # This means adding external datasets is currently not super easy. Will refactor or check for
-    # upstream composer changes that could make this easier.
+    # Load train dataset. Need to ensure that the per-device batch size is added as a streaming kwarg
+    per_device_train_batch_size = config.dataset.train_batch_size // dist.get_world_size()
+    if hasattr(config.dataset.train_dataset, 'streaming_kwargs'):
+        config.dataset.train_dataset.streaming_kwargs['batch_size'] = per_device_train_batch_size
+    else:
+        config.dataset.train_dataset.streaming_kwargs = {'batch_size': per_device_train_batch_size}
     if tokenizer:
         train_dataloader: Union[Iterable, DataSpec, Dict[str, Any]] = hydra.utils.instantiate(
             config.dataset.train_dataset,
             tokenizer=tokenizer,
-            batch_size=config.dataset.train_batch_size // dist.get_world_size(),
+            batch_size=per_device_train_batch_size,
         )
     else:
         train_dataloader: Union[Iterable, DataSpec, Dict[str, Any]] = hydra.utils.instantiate(
             config.dataset.train_dataset,
-            batch_size=config.dataset.train_batch_size // dist.get_world_size(),
+            batch_size=per_device_train_batch_size,
         )
     # Need to sleep for a bit to avoid dataloader crash
     time.sleep(10)
@@ -148,13 +151,18 @@ def train(config: DictConfig) -> None:
         eval_set = evaluators
 
     else:
+        # Need to ensure that the per-device batch size is added as a streaming kwarg
+        per_device_eval_batch_size = config.dataset.eval_batch_size // dist.get_world_size()
+        if hasattr(config.dataset.eval_dataset, 'streaming_kwargs'):
+            config.dataset.eval_dataset.streaming_kwargs['batch_size'] = per_device_eval_batch_size
+        else:
+            config.dataset.eval_dataset.streaming_kwargs = {'batch_size': per_device_eval_batch_size}
         if tokenizer:
             eval_set = hydra.utils.instantiate(config.dataset.eval_dataset,
                                                tokenizer=model.tokenizer,
-                                               batch_size=config.dataset.eval_batch_size // dist.get_world_size())
+                                               batch_size=per_device_eval_batch_size)
         else:
-            eval_set = hydra.utils.instantiate(config.dataset.eval_dataset,
-                                               batch_size=config.dataset.eval_batch_size // dist.get_world_size())
+            eval_set = hydra.utils.instantiate(config.dataset.eval_dataset, batch_size=per_device_eval_batch_size)
 
         # Need to sleep for a bit to avoid dataloader crash
         time.sleep(10)

diff --git a/setup.py b/setup.py
@@ -6,11 +6,26 @@
 from setuptools import find_packages, setup
 
 install_requires = [
-    'mosaicml==0.20.1', 'mosaicml-streaming==0.7.4', 'hydra-core>=1.2', 'hydra-colorlog>=1.1.0',
-    'diffusers[torch]==0.26.3', 'transformers[torch]==4.38.2', 'huggingface_hub==0.21.2', 'wandb==0.16.3',
-    'xformers==0.0.23.post1', 'triton==2.1.0', 'torchmetrics[image]==1.3.1', 'lpips==0.1.4', 'clean-fid==0.1.35',
-    'clip@git+https://github.com/openai/CLIP.git@a1d071733d7111c9c014f024669f959182114e33', 'gradio==4.19.2',
-    'datasets==2.19.2', 'peft==0.12.0'
+    'mosaicml==0.25.0',
+    'mosaicml-streaming==0.9.0',
+    'hydra-core>=1.2',
+    'hydra-colorlog>=1.1.0',
+    'diffusers[torch]==0.30.3',
+    'transformers[torch]==4.44.2',
+    'huggingface-hub[hf_transfer]>=0.23.2',
+    'wandb>=0.18.1',
+    'xformers==0.0.28.post1',
+    'triton>=2.1.0',
+    'torchmetrics[image]>=1.4.0.post0',
+    'lpips==0.1.4',
+    'clean-fid==0.1.35',
+    'clip@git+https://github.com/openai/CLIP.git@a1d071733d7111c9c014f024669f959182114e33',
+    'gradio==4.44.0',
+    'datasets==2.19.2',
+    'peft==0.12.0',
+    'sentencepiece',
+    'mlflow',
+    'pynvml',
 ]
 
 extras_require = {}