Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Auto feature selection for deepspeed-chat training stage 1 #605

Closed
wants to merge 12 commits into from
Closed
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ repos:
rev: v0.32.0
hooks:
- id: yapf
additional_dependencies: [toml]
files: ^applications/DeepSpeed-Chat/.+

- repo: https://github.com/pycqa/flake8
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
from utils.ds_utils import get_train_ds_config
from utils.module.lora import convert_linear_layer_to_lora, convert_lora_to_linear_layer, only_optimize_lora_parameters
from utils.model.model_utils import create_hf_model
from utils.feature_selection import feature_selection


def parse_args():
Expand Down Expand Up @@ -144,10 +145,11 @@ def parse_args():
parser.add_argument('--offload',
action='store_true',
help='Enable ZeRO Offload techniques.')

parser.add_argument(
'--zero_stage',
type=int,
default=0,
type=str,
default="auto",
help='ZeRO optimization stage for Actor model (and clones).')
## LoRA for efficient training setting
parser.add_argument("--lora_dim",
Expand Down Expand Up @@ -187,6 +189,9 @@ def main():

args.global_rank = torch.distributed.get_rank()

args = feature_selection(args=args, model_class=AutoModelForCausalLM)
print(f'done with feature selection, args: {args}')

ds_config = get_train_ds_config(offload=args.offload,
stage=args.zero_stage)
ds_config[
Expand All @@ -195,6 +200,8 @@ def main():
'train_batch_size'] = args.per_device_train_batch_size * torch.distributed.get_world_size(
) * args.gradient_accumulation_steps

ds_config['wall_clock_breakdown'] = False

# If passed along, set the training seed now.
set_random_seed(args.seed)

Expand All @@ -216,6 +223,9 @@ def main():
if args.only_optimize_lora:
model = only_optimize_lora_parameters(model)

deepspeed.runtime.utils.see_memory_usage('**** post-model creation ****',
force=True)

# Prepare the data
train_phase = 1
train_dataset, eval_dataset = create_prompt_dataset(
Expand Down
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
Empty file.
3 changes: 2 additions & 1 deletion applications/DeepSpeed-Chat/training/utils/ds_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ def get_train_ds_config(offload,
"zero_optimization": zero_opt_dict,
"fp16": {
"enabled": True,
"loss_scale_window": 100
"loss_scale_window": 100,
"initial_scale_power": 8
},
"gradient_clipping": 1.0,
"prescale_gradients": False,
Expand Down
Loading