Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ to communicate reliably.
- Agent-level hyper-parameter to enable quantisation for self-hosted models.
- Using Liger kernel in DPO training for increased speed and lower memory usage.
- Now logging prompt and completion token lengths for self-hosted models.
- Enabled optimiser and parameter offloading for DPO training.


### Changed
Expand Down
12 changes: 12 additions & 0 deletions nip/language_model_server/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,18 @@ class Settings(
against the template directory: ``nip/language_model_server/templates/``.
"""

offload_optimizer: bool = False
"""Whether to offload the optimizer state to CPU memory when training.

This can reduce GPU memory usage, but may slow down training.
"""

offload_parameters: bool = False
"""Whether to offload the model parameters to CPU memory when training.

This can reduce GPU memory usage, but may slow down training.
"""

parent_script_cwd: CliSuppress[str | None] = None
"""Path to the working directory of the script which called this process.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,17 @@ compute_environment: LOCAL_MACHINE
debug: false
deepspeed_config:
gradient_accumulation_steps: 1
offload_optimizer_device: none
offload_param_device: none
offload_optimizer_device: '{{ offload_optimizer_device }}'
offload_param_device: '{{ offload_param_device }}'
zero3_init_flag: false
zero_stage: 2
mixed_precision: {{ mixed_precision }}
distributed_type: DEEPSPEED
mixed_precision: '{{ mixed_precision }}'
distributed_type: '{{ distributed_type }}'
downcast_bf16: 'no'
enable_cpu_affinity: false
machine_rank: 0
main_training_function: main
mixed_precision: {{ mixed_precision }}
mixed_precision: '{{ mixed_precision }}'
num_machines: 1
num_processes: {{ num_gpus }}
rdzv_backend: static
Expand Down
25 changes: 24 additions & 1 deletion nip/language_model_server/trainer_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -448,9 +448,32 @@ def _get_accelerate_config_path(self) -> Optional[Path]:
else:
mixed_precision = "fp16"

# For multi-GPU training or when offloading optimizer/parameters, use DeepSpeed
# as the distributed type. Otherwise we don't do distributed training, becuase
# it slows down training
if (
num_gpus > 1
or self.settings.offload_optimizer
or self.settings.offload_parameters
):
distributed_type = "DEEPSPEED"
else:
distributed_type = "NO"

offload_optimizer_device = "cpu" if self.settings.offload_optimizer else "none"
offload_param_device = "cpu" if self.settings.offload_parameters else "none"

rendered_path = self.temporary_directory_path.joinpath("accelerate_config.yaml")
with open(rendered_path, "w") as f:
f.write(template.render(num_gpus=num_gpus, mixed_precision=mixed_precision))
f.write(
template.render(
num_gpus=num_gpus,
mixed_precision=mixed_precision,
distributed_type=distributed_type,
offload_optimizer_device=offload_optimizer_device,
offload_param_device=offload_param_device,
)
)

return rendered_path

Expand Down