peptdeep/constants/default_settings.yaml

# CONST CONFIG
model:
  frag_types:
  - b
  - y
  - b_modloss
  - y_modloss
  max_frag_charge: 2

PEPTDEEP_HOME: "~/peptdeep" # ~ refers to user folder (e.g. C:/Users/username)

local_model_zip_name: "pretrained_models.zip"
local_hla_model_zip_name: "hla_model.zip"

# overwritable config
model_url: "https://github.com/MannLabs/alphapeptdeep/releases/download/pre-trained-models/pretrained_models.zip"
hla_model_url: "https://github.com/MannLabs/alphapeptdeep/releases/download/pre-trained-models/hla_model.zip"

task_workflow: [library]
task_choices:
  - train
  - library
  # - rescore
thread_num: 16
MAX_THREADS: 60
torch_device:
  device_type: gpu
  device_type_choices:
    - get_available
    - gpu
    - mps
    - cpu
  device_ids: []

log_level: info
log_level_choices:
  - debug
  - info
  - warning
  - error
  - critical

common:
  modloss_importance_level: 1.0
  user_defined_modifications: {}
  # For example,
  # user_defined_modifications:
  #   "Dimethyl2@Any_N-term":
  #     composition: "H(2)2H(2)C(2)"
  #     modloss_composition: "H(0)" # can be missing if no modloss
  #   "Dimethyl2@K":
  #     composition: "H(2)2H(2)C(2)"
  #   "Dimethyl6@Any_N-term":
  #     composition: "2H(4)13C(2)"
  #   "Dimethyl6@K":
  #     composition: "2H(4)13C(2)"

peak_matching:
  ms2_ppm: True
  ms2_tol_value: 20.0
  ms1_ppm: True
  ms1_tol_value: 20.0

model_mgr:
  default_nce: 30.0
  default_instrument: Lumos
  mask_modloss: True
  model_type: generic
  model_choices:
  - generic
  - phos
  - hla # same as generic
  - digly
  external_ms2_model: ''
  external_rt_model: ''
  external_ccs_model: ''
  charge_model_type: seq
  charge_model_choices:
  - seq
  - modseq
  charge_model_file: ''
  charge_prob_cutoff: 0.3
  use_predicted_charge_in_speclib: True # if True, it ignores min/max_precursor_charge in `library`
  instrument_group:
    ThermoTOF: ThermoTOF
    Astral: Lumos
    Lumos: Lumos
    QE: QE
    timsTOF: timsTOF
    SciexTOF: SciexTOF
    Fusion: Lumos
    Eclipse: Lumos
    Velos: Lumos # not important
    Elite: Lumos # not important
    OrbitrapTribrid: Lumos
    ThermoTribrid: Lumos
    QE+: QE
    QEHF: QE
    QEHFX: QE
    Exploris: QE
    Exploris480: QE
  predict:
    batch_size_ms2: 512
    batch_size_rt_ccs: 1024
    batch_size_charge: 1024
    verbose: True
    multiprocessing: True
  transfer:
    model_output_folder: "{PEPTDEEP_HOME}/refined_models"
    epoch_ms2: 20
    warmup_epoch_ms2: 10
    batch_size_ms2: 512
    lr_ms2: 0.0001
    epoch_rt_ccs: 40
    warmup_epoch_rt_ccs: 10
    batch_size_rt_ccs: 1024
    lr_rt_ccs: 0.0001
    verbose: False
    grid_nce_search: False
    grid_nce_first: 15.0
    grid_nce_last: 45.0
    grid_nce_step: 3.0
    grid_instrument: ['Lumos']
    psm_type: alphapept
    psm_type_choices:
      - alphapept
      - pfind
      - maxquant
      - diann
      - speclib_tsv
      - msfragger_pepxml
      - spectronaut_report
    dda_psm_types: # otherwise DIA
      - alphapept
      - pfind
      - maxquant
      - msfragger_pepxml
    psm_files: []
    ms_file_type: alphapept_hdf
    ms_file_type_choices:
      - alphapept_hdf
      - thermo_raw
      - mgf
      - mzml
    ms_files: []
    psm_num_to_train_ms2: 100000000
    psm_num_per_mod_to_train_ms2: 50
    psm_num_to_test_ms2: 0
    psm_num_to_train_rt_ccs: 100000000
    psm_num_per_mod_to_train_rt_ccs: 50
    psm_num_to_test_rt_ccs: 0
    top_n_mods_to_train: 10
    psm_modification_mapping: {} # alphabase modifications to modifications of other engine PSMs
    # Example (note that `X(UniMod:id)` format can directly be recognized by alphabase),
    # psm_modification_mapping:
    #   Dimethyl@Any_N-term:
    #     - _(Dimethyl-n-0)
    #     - _(Dimethyl)
    #   Dimethyl:2H(2)@K:
    #     - K(Dimethyl-K-2)
    #   ...
# percolator:
#   require_model_tuning: True
#   raw_num_to_tune: 8

#   require_raw_specific_tuning: True
#   raw_specific_ms2_tuning: False
#   psm_num_per_raw_to_tune: 200
#   epoch_per_raw_to_tune: 5

#   multiprocessing: True

#   top_k_frags_to_calc_spc: 10
#   calibrate_frag_mass_error: False
#   max_perc_train_sample: 1000000
#   min_perc_train_sample: 100

#   percolator_backend: sklearn
#   percolator_backend_choices:
#     - sklearn
#     - pytorch
#   percolator_model: linear
#   percolator_model_choices:
#     pytorch_as_backend:
#       - linear # not fully tested, performance may be unstable
#       - mlp # not implemented yet
#     sklearn_as_backend:
#       - linear # logistic regression
#       - random_forest
#   lr_percolator_torch_model: 0.1 # learning rate, only used when percolator_backend==pytorch
#   percolator_iter_num: 5 # percolator iteration number
#   cv_fold: 1
#   fdr: 0.01
#   fdr_level: psm
#   fdr_level_choices:
#     - psm
#     - precursor
#     - peptide
#     - sequence
#   use_fdr_for_each_raw: False
#   frag_types: ['b_z1','b_z2','y_z1','y_z2']
#   input_files:
#     psm_type: alphapept
#     psm_type_choices:
#       - alphapept
#       - pfind
#     psm_files: []
#     ms_file_type: alphapept_hdf
#     ms_file_type_choices:
#       - alphapept_hdf
#       - thermo_raw # if alpharaw is installed
#       - mgf
#       - mzml
#     ms_files: []
#   output_folder: "{PEPTDEEP_HOME}/rescore"
library:
  infile_type: fasta
  infile_type_choices:
  - fasta
  - sequence_table
  - peptide_table # sequence with mods and mod_sites
  - precursor_table # peptide with charge state
  - all_other_psm_reader_types # see psm_type_choices in model_mgr section
  infiles:
  - xxx.fasta
  fasta:
    protease: 'trypsin'
    protease_choices:
    - 'trypsin'
    - '([KR])'
    - 'trypsin_not_P'
    - '([KR](?=[^P]))'
    - 'lys-c'
    - 'K'
    - 'lys-n'
    - '\w(?=K)'
    - 'chymotrypsin'
    - 'asp-n'
    - 'glu-c'
    max_miss_cleave: 2
    add_contaminants: False
  fix_mods:
  - Carbamidomethyl@C
  var_mods:
  - Acetyl@Protein_N-term
  - Oxidation@M
  special_mods: [] # normally for Phospho or GlyGly@K
  special_mods_cannot_modify_pep_n_term: False
  special_mods_cannot_modify_pep_c_term: False
  labeling_channels: {}
  # For example,
  # labeling_channels:
  #   0: ['Dimethyl@Any_N-term','Dimethyl@K']
  #   4: ['Dimethyl:2H(2)@Any_N-term','Dimethyl:2H(2)@K']
  #   8: [...]
  min_var_mod_num: 0
  max_var_mod_num: 2
  min_special_mod_num: 0
  max_special_mod_num: 1
  min_precursor_charge: 2
  max_precursor_charge: 4
  min_peptide_len: 7
  max_peptide_len: 35
  min_precursor_mz: 200.0
  max_precursor_mz: 2000.0
  decoy: None
  decoy_choices:
  - None
  - pseudo_reverse
  - diann
  max_frag_charge: 2
  frag_types:
  - b
  - y
  rt_to_irt: False
  irt_library: xxx/library.tsv
  irt_library_type: speclib_tsv
  generate_precursor_isotope: False
  output_folder: "{PEPTDEEP_HOME}/spec_libs"
  output_tsv:
    enabled: False
    min_fragment_mz: 200.0
    max_fragment_mz: 2000.0
    min_relative_intensity: 0.001
    keep_higest_k_peaks: 12
    translate_batch_size: 100000
    translate_mod_to_unimod_id: False