-
Notifications
You must be signed in to change notification settings - Fork 0
/
params.yaml
63 lines (63 loc) · 1.32 KB
/
params.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
ogt_protein_classifier_data_prep:
split_type: taxid # random splitting or by taxid ['random', 'taxid']
ogt_window:
- 30.0
- 60.0
max_protein_len: 250
min_protein_len: 50
train_test_frac: 0.1
deduplication:
- do: True
- jaccard: 0.6
- kgram: 3
- num_perm: 100
min_balance: 0.5
max_upsampling: 0.0
data_batch_size: 200
dev_keep_columns: true
dev_sample_init_data: false
ogt_protein_classifier_train_evaluate:
model: protbert
protocol: finetune
dropout: 0.0
batch_size: 32
epochs: 2
lr: 5e-5
lr_scheduler: linear
n_save_per_epoch: 10
grad_checkpointing: true
grad_accum: 25
fp16: true
push: true
dev_subsample_data: null
ogt_protein_regressor_data_prep:
split_type: taxid
max_protein_len: 250
min_protein_len: 50
train_test_frac: 0.1
deduplication:
- do: true
- jaccard: 0.6
- kgram: 3
- num_perm: 100
balancing:
- do: true
- max_bin_size: 'auto'
- num_bins: 20
- min_total_data_kept: 150000
data_batch_size: 200
dev_keep_columns: true
dev_sample_init_data: false
ogt_protein_regressor_train_evaluate:
model: protbert
protocol: finetune
dropout: 0.0
batch_size: 32
epochs: 1
lr: 1e-5
lr_scheduler: linear
n_save_per_epoch: 10
grad_checkpointing: true
grad_accum: 25
fp16: true
dev_subsample_data: false