Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update dependencies for use with torch 2.4.1 and composer 0.25.0 #175

Closed
wants to merge 12 commits into from
2 changes: 1 addition & 1 deletion .github/workflows/code-quality.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ jobs:
strategy:
matrix:
python_version:
- "3.9"
- "3.10"
- "3.11"
pip_deps:
- "[dev]"
steps:
Expand Down
6 changes: 6 additions & 0 deletions .github/workflows/docker.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,12 @@ jobs:
- name: "2.4.0_cu124_aws"
base_image: mosaicml/pytorch:2.4.0_cu124-python3.11-ubuntu20.04-aws
dep_groups: "[all]"
- name: "2.4.1_cu124"
base_image: mosaicml/pytorch:2.4.1_cu124-python3.11-ubuntu20.04
dep_groups: "[all]"
- name: "2.4.1_cu124_aws"
base_image: mosaicml/pytorch:2.4.1_cu124-python3.11-ubuntu20.04-aws
dep_groups: "[all]"
steps:

- name: Checkout
Expand Down
8 changes: 4 additions & 4 deletions .github/workflows/pr-cpu.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,12 @@ jobs:
strategy:
matrix:
include:
- name: 'cpu-3.9-1.12'
container: mosaicml/pytorch:1.12.1_cpu-python3.9-ubuntu20.04
- name: 'cpu-3.10-2.1'
container: mosaicml/pytorch:2.1.2_cpu-python3.10-ubuntu20.04
markers: 'not gpu'
pytest_command: 'coverage run -m pytest'
- name: 'cpu-3.10-1.13'
container: mosaicml/pytorch:1.13.1_cpu-python3.10-ubuntu20.04
- name: 'cpu-3.11-2.4'
container: mosaicml/pytorch:2.4.1_cpu-python3.11-ubuntu20.04
markers: 'not gpu'
pytest_command: 'coverage run -m pytest'
name: ${{ matrix.name }}
Expand Down
2 changes: 1 addition & 1 deletion diffusion/models/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def stable_diffusion_2(
precision = torch.float16 if encode_latents_in_fp16 else None
# Make the text encoder
text_encoder = CLIPTextModel.from_pretrained(model_name, subfolder='text_encoder', torch_dtype=precision)
tokenizer = CLIPTokenizer.from_pretrained(model_name, subfolder='tokenizer')
tokenizer = CLIPTokenizer.from_pretrained(model_name, subfolder='tokenizer', clean_up_tokenization_spaces=True)

# Make the autoencoder
if autoencoder_path is None:
Expand Down
2 changes: 1 addition & 1 deletion diffusion/models/precomputed_text_latent_diffusion.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@ def set_rng_generator(self, rng_generator: torch.Generator):
self.rng_generator = rng_generator

def encode_images(self, inputs, dtype=torch.bfloat16):
with torch.amp.autocast('cuda', enabled=False):
with torch.autocast(device_type='cuda', enabled=False):
latents = self.vae.encode(inputs.to(dtype))['latent_dist'].sample().data
latents = (latents - self.latent_mean) / self.latent_std # scale latents
return latents
Expand Down
24 changes: 16 additions & 8 deletions diffusion/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,19 +103,22 @@ def train(config: DictConfig) -> None:
else:
optimizer = hydra.utils.instantiate(config.optimizer, params=model.parameters())

# Load train dataset. Currently this expects to load according to the datasetHparam method.
# This means adding external datasets is currently not super easy. Will refactor or check for
# upstream composer changes that could make this easier.
# Load train dataset. Need to ensure that the per-device batch size is added as a streaming kwarg
per_device_train_batch_size = config.dataset.train_batch_size // dist.get_world_size()
if hasattr(config.dataset.train_dataset, 'streaming_kwargs'):
config.dataset.train_dataset.streaming_kwargs['batch_size'] = per_device_train_batch_size
else:
config.dataset.train_dataset.streaming_kwargs = {'batch_size': per_device_train_batch_size}
if tokenizer:
train_dataloader: Union[Iterable, DataSpec, Dict[str, Any]] = hydra.utils.instantiate(
config.dataset.train_dataset,
tokenizer=tokenizer,
batch_size=config.dataset.train_batch_size // dist.get_world_size(),
batch_size=per_device_train_batch_size,
)
else:
train_dataloader: Union[Iterable, DataSpec, Dict[str, Any]] = hydra.utils.instantiate(
config.dataset.train_dataset,
batch_size=config.dataset.train_batch_size // dist.get_world_size(),
batch_size=per_device_train_batch_size,
)
# Need to sleep for a bit to avoid dataloader crash
time.sleep(10)
Expand Down Expand Up @@ -148,13 +151,18 @@ def train(config: DictConfig) -> None:
eval_set = evaluators

else:
# Need to ensure that the per-device batch size is added as a streaming kwarg
per_device_eval_batch_size = config.dataset.eval_batch_size // dist.get_world_size()
if hasattr(config.dataset.eval_dataset, 'streaming_kwargs'):
config.dataset.eval_dataset.streaming_kwargs['batch_size'] = per_device_eval_batch_size
else:
config.dataset.eval_dataset.streaming_kwargs = {'batch_size': per_device_eval_batch_size}
if tokenizer:
eval_set = hydra.utils.instantiate(config.dataset.eval_dataset,
tokenizer=model.tokenizer,
batch_size=config.dataset.eval_batch_size // dist.get_world_size())
batch_size=per_device_eval_batch_size)
else:
eval_set = hydra.utils.instantiate(config.dataset.eval_dataset,
batch_size=config.dataset.eval_batch_size // dist.get_world_size())
eval_set = hydra.utils.instantiate(config.dataset.eval_dataset, batch_size=per_device_eval_batch_size)

# Need to sleep for a bit to avoid dataloader crash
time.sleep(10)
Expand Down
25 changes: 20 additions & 5 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,26 @@
from setuptools import find_packages, setup

install_requires = [
'mosaicml==0.20.1', 'mosaicml-streaming==0.7.4', 'hydra-core>=1.2', 'hydra-colorlog>=1.1.0',
'diffusers[torch]==0.26.3', 'transformers[torch]==4.38.2', 'huggingface_hub==0.21.2', 'wandb==0.16.3',
'xformers==0.0.23.post1', 'triton==2.1.0', 'torchmetrics[image]==1.3.1', 'lpips==0.1.4', 'clean-fid==0.1.35',
'clip@git+https://github.com/openai/CLIP.git@a1d071733d7111c9c014f024669f959182114e33', 'gradio==4.19.2',
'datasets==2.19.2', 'peft==0.12.0'
'mosaicml==0.25.0',
'mosaicml-streaming==0.9.0',
'hydra-core>=1.2',
'hydra-colorlog>=1.1.0',
'diffusers[torch]==0.30.3',
'transformers[torch]==4.44.2',
'huggingface-hub[hf_transfer]>=0.23.2',
'wandb>=0.18.1',
'xformers==0.0.28.post1',
'triton>=2.1.0',
'torchmetrics[image]>=1.4.0.post0',
'lpips==0.1.4',
'clean-fid==0.1.35',
'clip@git+https://github.com/openai/CLIP.git@a1d071733d7111c9c014f024669f959182114e33',
'gradio==4.44.0',
'datasets==2.19.2',
'peft==0.12.0',
'sentencepiece',
'mlflow',
'pynvml',
]

extras_require = {}
Expand Down
Loading