diff --git a/projects/fine_tuning/testing/config.yaml b/projects/fine_tuning/testing/config.yaml index 1a67c277e6..785bb84615 100644 --- a/projects/fine_tuning/testing/config.yaml +++ b/projects/fine_tuning/testing/config.yaml @@ -99,6 +99,7 @@ ci_presets: tests.fine_tuning.test_settings.hyper_parameters.gradient_accumulation_steps: 1 tests.fine_tuning.test_settings.hyper_parameters.per_device_train_batch_size: 4 + tests.fine_tuning.test_settings.hyper_parameters.log_level: "debug" tests.fine_tuning.test_settings.hyper_parameters.peft_method: "none" tests.fine_tuning.test_settings.hyper_parameters.max_seq_length: 1024 tests.fine_tuning.test_settings.hyper_parameters.use_flash_attn: true diff --git a/projects/fine_tuning/toolbox/fine_tuning_run_fine_tuning_job/files/entrypoint/run_fms.sh b/projects/fine_tuning/toolbox/fine_tuning_run_fine_tuning_job/files/entrypoint/run_fms.sh index 62882f1a8c..ebae00030c 100644 --- a/projects/fine_tuning/toolbox/fine_tuning_run_fine_tuning_job/files/entrypoint/run_fms.sh +++ b/projects/fine_tuning/toolbox/fine_tuning_run_fine_tuning_job/files/entrypoint/run_fms.sh @@ -16,13 +16,7 @@ if [[ $WORLD_SIZE == 1 ]]; then else echo "Running with a $NUM_GPUS GPUs" fi - time python -m torch.distributed.run \ - --node_rank "$RANK" \ - --nnodes "$WORLD_SIZE" \ - --nproc_per_node "$(nvidia-smi --query-gpu=gpu_name --format=csv,noheader | wc -l) " \ - --master_addr "$MASTER_ADDR" \ - --master_port "$MASTER_PORT" \ - launch_training.py + time python /app/accelerate_launch.py exit 0 fi echo "Running on $WORLD_SIZE machines with $NUM_GPUS GPUs each."