File tree 3 files changed +4
-3
lines changed
3 files changed +4
-3
lines changed Original file line number Diff line number Diff line change 51
51
# batch / data settings
52
52
" train_micro_batch_size_per_gpu " : 4,
53
53
" data-impl " : " mmap" ,
54
+ " gradient_accumulation_steps " : 1,
54
55
55
56
# activation checkpointing
56
57
" checkpoint-activations " : true,
Original file line number Diff line number Diff line change 1
1
# Suggested data paths when using GPT-NeoX locally
2
2
{
3
+ " global_num_gpus" :1,
3
4
" data-path " : " data/enwik8/enwik8_text_document" ,
4
5
5
6
# or for weighted datasets:
24
25
25
26
" tensorboard-dir " : " tensorboard" ,
26
27
" log-dir " : " logs" ,
27
- " use_wandb " : True ,
28
+ " use_wandb " : False ,
28
29
" wandb_host " : " https://api.wandb.ai" ,
29
30
" wandb_project " : " neox"
30
31
}
Original file line number Diff line number Diff line change @@ -941,8 +941,7 @@ class NeoXArgsTraining(NeoXArgsTemplate):
941
941
"""
942
942
943
943
gas : int = None
944
- """gradient_accumulation_steps""" # TODO this is a duplicate, remove?
945
-
944
+ gradient_accumulation_steps : int = 1
946
945
clip_grad : float = None
947
946
"""
948
947
Gradient clipping based on global L2 norm.
You can’t perform that action at this time.
0 commit comments