-
Notifications
You must be signed in to change notification settings - Fork 1
/
tuning_config.json
42 lines (38 loc) · 1.13 KB
/
tuning_config.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
{
"paths" : {
"main_path" : "data",
"train_path" : "%main_path%/eltec_ud_ner.jsonl",
"dev_path" : "%main_path%/dev_SrpKor4Tagging.jsonl",
"tokenizer_path" :"%main_path%/tokenizer.json",
"model_folder" : "saved/tune-saved",
"pretrained" : "C:\\Users\\mihailo\\OneDrive - Faculty of Mining and Geology\\resursi\\bert modeli\\bertovic-base"
},
"model-options": {
"model_type" : "roberta-base",
"tuning_task": "token-classification",
"token_col": "token",
"label_col": "ner"
},
"training-options": {
"num_train_epochs" : 2,
"per_device_train_batch_size" : 2,
"per_device_eval_batch_size" : 2,
"learning_rate" : 0.00005,
"weight_decay" : 0,
"warmup_steps" : 2000,
"save_total_limit" : 1,
"load_best_model_at_end" : true,
"overwrite_output_dir" : true,
"evaluation_strategy" : "epoch",
"output_dir" : "saved/tune-saved",
"save_strategy" : "epoch"
},
"tokenizer_training": {
"path" : "C:/gpt2/tokenizer/",
"size": 49152,
"freq": 2,
"special_tokens": ["<s>", "<pad>", "</s>", "<unk>", "<mask>"],
"unk_token": "<UNK>",
"type": "BPE"
}
}