-
Notifications
You must be signed in to change notification settings - Fork 22
/
default_settings.yaml
287 lines (275 loc) · 6.8 KB
/
default_settings.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
# CONST CONFIG
model:
frag_types:
- b
- y
- b_modloss
- y_modloss
max_frag_charge: 2
PEPTDEEP_HOME: "~/peptdeep" # ~ refers to user folder (e.g. C:/Users/username)
local_model_zip_name: "pretrained_models.zip"
local_hla_model_zip_name: "hla_model.zip"
# overwritable config
model_url: "https://github.com/MannLabs/alphapeptdeep/releases/download/pre-trained-models/pretrained_models.zip"
hla_model_url: "https://github.com/MannLabs/alphapeptdeep/releases/download/pre-trained-models/hla_model.zip"
task_workflow: [library]
task_choices:
- train
- library
# - rescore
thread_num: 16
MAX_THREADS: 60
torch_device:
device_type: gpu
device_type_choices:
- get_available
- gpu
- mps
- cpu
device_ids: []
log_level: info
log_level_choices:
- debug
- info
- warning
- error
- critical
common:
modloss_importance_level: 1.0
user_defined_modifications: {}
# For example,
# user_defined_modifications:
# "Dimethyl2@Any_N-term":
# composition: "H(2)2H(2)C(2)"
# modloss_composition: "H(0)" # can be missing if no modloss
# "Dimethyl2@K":
# composition: "H(2)2H(2)C(2)"
# "Dimethyl6@Any_N-term":
# composition: "2H(4)13C(2)"
# "Dimethyl6@K":
# composition: "2H(4)13C(2)"
peak_matching:
ms2_ppm: True
ms2_tol_value: 20.0
ms1_ppm: True
ms1_tol_value: 20.0
model_mgr:
default_nce: 30.0
default_instrument: Lumos
mask_modloss: True
model_type: generic
model_choices:
- generic
- phos
- hla # same as generic
- digly
external_ms2_model: ''
external_rt_model: ''
external_ccs_model: ''
charge_model_type: seq
charge_model_choices:
- seq
- modseq
charge_model_file: ''
charge_prob_cutoff: 0.3
use_predicted_charge_in_speclib: True # if True, it ignores min/max_precursor_charge in `library`
instrument_group:
ThermoTOF: ThermoTOF
Astral: Lumos
Lumos: Lumos
QE: QE
timsTOF: timsTOF
SciexTOF: SciexTOF
Fusion: Lumos
Eclipse: Lumos
Velos: Lumos # not important
Elite: Lumos # not important
OrbitrapTribrid: Lumos
ThermoTribrid: Lumos
QE+: QE
QEHF: QE
QEHFX: QE
Exploris: QE
Exploris480: QE
predict:
batch_size_ms2: 512
batch_size_rt_ccs: 1024
batch_size_charge: 1024
verbose: True
multiprocessing: True
transfer:
model_output_folder: "{PEPTDEEP_HOME}/refined_models"
epoch_ms2: 20
warmup_epoch_ms2: 10
batch_size_ms2: 512
lr_ms2: 0.0001
epoch_rt_ccs: 40
warmup_epoch_rt_ccs: 10
batch_size_rt_ccs: 1024
lr_rt_ccs: 0.0001
verbose: False
grid_nce_search: False
grid_nce_first: 15.0
grid_nce_last: 45.0
grid_nce_step: 3.0
grid_instrument: ['Lumos']
psm_type: alphapept
psm_type_choices:
- alphapept
- pfind
- maxquant
- diann
- speclib_tsv
- msfragger_pepxml
- spectronaut_report
dda_psm_types: # otherwise DIA
- alphapept
- pfind
- maxquant
- msfragger_pepxml
psm_files: []
ms_file_type: alphapept_hdf
ms_file_type_choices:
- alphapept_hdf
- thermo_raw
- mgf
- mzml
ms_files: []
psm_num_to_train_ms2: 100000000
psm_num_per_mod_to_train_ms2: 50
psm_num_to_test_ms2: 0
psm_num_to_train_rt_ccs: 100000000
psm_num_per_mod_to_train_rt_ccs: 50
psm_num_to_test_rt_ccs: 0
top_n_mods_to_train: 10
psm_modification_mapping: {} # alphabase modifications to modifications of other engine PSMs
# Example (note that `X(UniMod:id)` format can directly be recognized by alphabase),
# psm_modification_mapping:
# Dimethyl@Any_N-term:
# - _(Dimethyl-n-0)
# - _(Dimethyl)
# Dimethyl:2H(2)@K:
# - K(Dimethyl-K-2)
# ...
# percolator:
# require_model_tuning: True
# raw_num_to_tune: 8
# require_raw_specific_tuning: True
# raw_specific_ms2_tuning: False
# psm_num_per_raw_to_tune: 200
# epoch_per_raw_to_tune: 5
# multiprocessing: True
# top_k_frags_to_calc_spc: 10
# calibrate_frag_mass_error: False
# max_perc_train_sample: 1000000
# min_perc_train_sample: 100
# percolator_backend: sklearn
# percolator_backend_choices:
# - sklearn
# - pytorch
# percolator_model: linear
# percolator_model_choices:
# pytorch_as_backend:
# - linear # not fully tested, performance may be unstable
# - mlp # not implemented yet
# sklearn_as_backend:
# - linear # logistic regression
# - random_forest
# lr_percolator_torch_model: 0.1 # learning rate, only used when percolator_backend==pytorch
# percolator_iter_num: 5 # percolator iteration number
# cv_fold: 1
# fdr: 0.01
# fdr_level: psm
# fdr_level_choices:
# - psm
# - precursor
# - peptide
# - sequence
# use_fdr_for_each_raw: False
# frag_types: ['b_z1','b_z2','y_z1','y_z2']
# input_files:
# psm_type: alphapept
# psm_type_choices:
# - alphapept
# - pfind
# psm_files: []
# ms_file_type: alphapept_hdf
# ms_file_type_choices:
# - alphapept_hdf
# - thermo_raw # if alpharaw is installed
# - mgf
# - mzml
# ms_files: []
# output_folder: "{PEPTDEEP_HOME}/rescore"
library:
infile_type: fasta
infile_type_choices:
- fasta
- sequence_table
- peptide_table # sequence with mods and mod_sites
- precursor_table # peptide with charge state
- all_other_psm_reader_types # see psm_type_choices in model_mgr section
infiles:
- xxx.fasta
fasta:
protease: 'trypsin'
protease_choices:
- 'trypsin'
- '([KR])'
- 'trypsin_not_P'
- '([KR](?=[^P]))'
- 'lys-c'
- 'K'
- 'lys-n'
- '\w(?=K)'
- 'chymotrypsin'
- 'asp-n'
- 'glu-c'
max_miss_cleave: 2
add_contaminants: False
fix_mods:
- Carbamidomethyl@C
var_mods:
- Acetyl@Protein_N-term
- Oxidation@M
special_mods: [] # normally for Phospho or GlyGly@K
special_mods_cannot_modify_pep_n_term: False
special_mods_cannot_modify_pep_c_term: False
labeling_channels: {}
# For example,
# labeling_channels:
# 0: ['Dimethyl@Any_N-term','Dimethyl@K']
# 4: ['Dimethyl:2H(2)@Any_N-term','Dimethyl:2H(2)@K']
# 8: [...]
min_var_mod_num: 0
max_var_mod_num: 2
min_special_mod_num: 0
max_special_mod_num: 1
min_precursor_charge: 2
max_precursor_charge: 4
min_peptide_len: 7
max_peptide_len: 35
min_precursor_mz: 200.0
max_precursor_mz: 2000.0
decoy: None
decoy_choices:
- None
- pseudo_reverse
- diann
max_frag_charge: 2
frag_types:
- b
- y
rt_to_irt: False
irt_library: xxx/library.tsv
irt_library_type: speclib_tsv
generate_precursor_isotope: False
output_folder: "{PEPTDEEP_HOME}/spec_libs"
output_tsv:
enabled: False
min_fragment_mz: 200.0
max_fragment_mz: 2000.0
min_relative_intensity: 0.001
keep_higest_k_peaks: 12
translate_batch_size: 100000
translate_mod_to_unimod_id: False