diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml deleted file mode 100644 index 21fb8d75..00000000 --- a/.pre-commit-config.yaml +++ /dev/null @@ -1,6 +0,0 @@ -repos: -- repo: https://github.com/psf/black - rev: stable - hooks: - - id: black - language_version: python3.7 diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 13d0a31f..00000000 --- a/.travis.yml +++ /dev/null @@ -1,21 +0,0 @@ -dist: xenial -language: python -python: - - "3.7" - -install: - - sudo apt-get update - - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh - - bash miniconda.sh -b -p $HOME/miniconda - - export PATH="$HOME/miniconda/bin:$PATH" - - hash -r - - conda config --set always_yes yes --set changeps1 no - - conda update -q conda - - conda config --add channels bioconda - - conda install conda-build conda-verify cython numpy - - conda info -a - - conda build . --numpy 1.15 --python 3.7 -c bioconda - - conda install ms2pip --use-local -c bioconda - -script: - - ms2pip -h diff --git a/README.md b/README.md index cfe0a6e7..f189fb6f 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ [![PyPI - Python Version](https://img.shields.io/pypi/pyversions/ms2pip?style=flat-square)](https://pypi.org/project/ms2pip/) [![PyPI](https://img.shields.io/pypi/v/ms2pip?style=flat-square)](https://pypi.org/project/ms2pip/) [![PyPI - Wheel](https://img.shields.io/pypi/wheel/ms2pip?style=flat-square)](https://pypi.org/project/ms2pip/) -[![GitHub Workflow Status](https://img.shields.io/github/workflow/status/compomics/ms2pip_c/Tests)](https://github.com/compomics/ms2pip_c/actions) +[![GitHub Workflow Status](https://img.shields.io/github/workflow/status/compomics/ms2pip_c/Tests?style=flat-square)](https://github.com/compomics/ms2pip_c/actions) [![GitHub issues](https://img.shields.io/github/issues/compomics/ms2pip_c?style=flat-square)](https://github.com/compomics/ms2pip_c/issues) [![GitHub](https://img.shields.io/github/license/compomics/ms2pip_c.svg?style=flat-square)](https://www.apache.org/licenses/LICENSE-2.0) diff --git a/fasta2speclib/fasta2speclib.py b/fasta2speclib/fasta2speclib.py index b9a3cb85..be85812c 100644 --- a/fasta2speclib/fasta2speclib.py +++ b/fasta2speclib/fasta2speclib.py @@ -5,13 +5,13 @@ - Remove peptide redundancy - Add all variations of variable modifications (max 7 PTMs/peptide) - Add variations on charge state -- Run peptides through MS2PIP -- Write to MSP, MGF or HDF file +- Predict spectra with MS2PIP +- Write to various output file formats """ __author__ = "Ralf Gabriels" -__copyright__ = "CompOmics 2018" +__copyright__ = "CompOmics" __credits__ = ["Ralf Gabriels", "Sven Degroeve", "Lennart Martens"] __license__ = "Apache License, Version 2.0" __email__ = "Ralf.Gabriels@ugent.be" @@ -39,13 +39,26 @@ def ArgParse(): - parser = argparse.ArgumentParser(description='Create an MS2PIP-predicted spectral library, starting from a fasta file.') - parser.add_argument('fasta_filename', action='store', - help='Path to the fasta file containing protein sequences') - parser.add_argument('-o', dest='output_filename', action='store', - help='Name for output file(s) (if not given, derived from input file)') - parser.add_argument('-c', dest='config_filename', action='store', - help='Name of configuration json file (default: fasta2speclib_config.json)') + parser = argparse.ArgumentParser( + description="Create an MS2PIP-predicted spectral library, starting from a fasta file." + ) + parser.add_argument( + "fasta_filename", + action="store", + help="Path to the fasta file containing protein sequences", + ) + parser.add_argument( + "-o", + dest="output_filename", + action="store", + help="Name for output file(s) (if not given, derived from input file)", + ) + parser.add_argument( + "-c", + dest="config_filename", + action="store", + help="Name of configuration json file (default: fasta2speclib_config.json)", + ) args = parser.parse_args() return args @@ -55,22 +68,23 @@ def get_params(): args = ArgParse() if not args.config_filename: - config_filename = 'fasta2speclib_config.json' + config_filename = "fasta2speclib_config.json" else: config_filename = args.config_filename - with open(config_filename, 'rt') as config_file: + with open(config_filename, "rt") as config_file: params = json.load(config_file) - params.update({ - 'fasta_filename': args.fasta_filename, - 'log_level': logging.INFO, - }) + params.update( + {"fasta_filename": args.fasta_filename, "log_level": logging.INFO,} + ) if args.output_filename: - params['output_filename'] = args.output_filename + params["output_filename"] = args.output_filename else: - params['output_filename'] = '_'.join(params['fasta_filename'].split('\\')[-1].split('.')[:-1]) + params["output_filename"] = "_".join( + params["fasta_filename"].split("\\")[-1].split(".")[:-1] + ) return params @@ -78,22 +92,26 @@ def get_params(): def prot_to_peprec(protein): params = get_params() # Calculate longest and shortest possible peptide with given max_pepmass - max_pepmass_min_len = int(params['max_pepmass'] / 186.08 + 2) - max_pepmass_max_len = int(params['max_pepmass'] / 57.02 + 2) - tmp = pd.DataFrame(columns=['spec_id', 'peptide', 'modifications', 'charge']) + max_pepmass_min_len = int(params["max_pepmass"] / 186.08 + 2) + max_pepmass_max_len = int(params["max_pepmass"] / 57.02 + 2) + tmp = pd.DataFrame(columns=["spec_id", "peptide", "modifications", "charge"]) pep_count = 0 - for peptide in cleave(str(protein.seq), expasy_rules['trypsin'], params['missed_cleavages']): - if False not in [aa not in peptide for aa in ['B', 'J', 'O', 'U', 'X', 'Z']]: - if params['min_peplen'] <= len(peptide) <= max_pepmass_max_len: + for peptide in cleave( + str(protein.seq), expasy_rules["trypsin"], params["missed_cleavages"] + ): + if False not in [aa not in peptide for aa in ["B", "J", "O", "U", "X", "Z"]]: + if params["min_peplen"] <= len(peptide) <= max_pepmass_max_len: # Skip peptide if it's mass is larger than allowed # Only calculate if longer than shortest possible peptide with max_pepmass if len(peptide) > max_pepmass_min_len: - if mass.calculate_mass(sequence=peptide) > params['max_pepmass']: + if mass.calculate_mass(sequence=peptide) > params["max_pepmass"]: continue pep_count += 1 row = { - 'spec_id': '{}_{:03d}'.format(protein.id, pep_count), - 'peptide': peptide, 'modifications': '-', 'charge': np.nan + "spec_id": "{}_{:03d}".format(protein.id, pep_count), + "peptide": peptide, + "modifications": "-", + "charge": np.nan, } tmp = tmp.append(row, ignore_index=True) return tmp @@ -101,14 +119,14 @@ def prot_to_peprec(protein): def get_protein_list(df): peptide_to_prot = {} - for pi, pep in zip(df['spec_id'], df['peptide']): - pi = '_'.join(pi.split('_')[0:2]) + for pi, pep in zip(df["spec_id"], df["peptide"]): + pi = "_".join(pi.split("_")[0:2]) if pep in peptide_to_prot.keys(): peptide_to_prot[pep].append(pi) else: peptide_to_prot[pep] = [pi] - df['protein_list'] = [list(set(peptide_to_prot[pep])) for pep in df['peptide']] - df = df[~df.duplicated(['peptide', 'charge', 'modifications'])] + df["protein_list"] = [list(set(peptide_to_prot[pep])) for pep in df["peptide"]] + df = df[~df.duplicated(["peptide", "charge", "modifications"])] return df @@ -121,78 +139,101 @@ def add_mods(tup): mod_versions = [dict()] # First add all fixed modifications - for mod in params['modifications']: - if mod['fixed']: - if not mod['n_term'] and mod['amino_acid']: - mod_versions[0].update({i:mod['name'] for i, aa in enumerate(row['peptide']) if aa == mod['amino_acid']}) - elif mod['n_term']: - if mod['amino_acid']: - if row['peptide'][0] == mod['amino_acid']: - mod_versions[0]['N'] = mod['name'] + for mod in params["modifications"]: + if mod["fixed"]: + if not mod["n_term"] and mod["amino_acid"]: + mod_versions[0].update( + { + i: mod["name"] + for i, aa in enumerate(row["peptide"]) + if aa == mod["amino_acid"] + } + ) + elif mod["n_term"]: + if mod["amino_acid"]: + if row["peptide"][0] == mod["amino_acid"]: + mod_versions[0]["N"] = mod["name"] else: - mod_versions[0]['N'] = mod['name'] + mod_versions[0]["N"] = mod["name"] # Continue with variable modifications - for mod in params['modifications']: - if mod['fixed']: + for mod in params["modifications"]: + if mod["fixed"]: continue - # List all positions with specific amino acid, to avoid combinatorial explotion, limit to 4 positions - all_pos = [i for i, aa in enumerate(row['peptide']) if aa == mod['amino_acid']] + # List all positions with specific amino acid, to avoid combinatorial explotion, + # limit to 4 positions + all_pos = [i for i, aa in enumerate(row["peptide"]) if aa == mod["amino_acid"]] if len(all_pos) > 4: all_pos = all_pos[:4] for version in mod_versions: # For non-position-specific mods: - if not mod['n_term']: + if not mod["n_term"]: pos = [p for p in all_pos if p not in version.keys()] - combos = [x for l in range(1, len(pos) + 1) for x in combinations(pos, l)] + combos = [ + x for l in range(1, len(pos) + 1) for x in combinations(pos, l) + ] for combo in combos: new_version = version.copy() for pos in combo: - new_version[pos] = mod['name'] + new_version[pos] = mod["name"] mod_versions.append(new_version) # For N-term mods and N-term is not yet modified: - elif mod['n_term'] and 'N' not in version.keys(): + elif mod["n_term"] and "N" not in version.keys(): # N-term with specific first AA: - if mod['amino_acid']: - if row['peptide'][0] == mod['amino_acid']: + if mod["amino_acid"]: + if row["peptide"][0] == mod["amino_acid"]: new_version = version.copy() - new_version['N'] = mod['name'] + new_version["N"] = mod["name"] mod_versions.append(new_version) # N-term without specific first AA: else: new_version = version.copy() - new_version['N'] = mod['name'] + new_version["N"] = mod["name"] mod_versions.append(new_version) df_out = pd.DataFrame(columns=row.index) - df_out['modifications'] = ['|'.join('{}|{}'.format(0, value) if key == 'N' - else '{}|{}'.format(key + 1, value) for key, value - in version.items()) for version in mod_versions] - df_out['modifications'] = ['-' if not mods else mods for mods in df_out['modifications']] - df_out['spec_id'] = ['{}_{:03d}'.format(row['spec_id'], i) for i in range(len(mod_versions))] - df_out['charge'] = row['charge'] - df_out['peptide'] = row['peptide'] - if 'protein_list' in row.index: - df_out['protein_list'] = str(row['protein_list']) + df_out["modifications"] = [ + "|".join( + "{}|{}".format(0, value) if key == "N" else "{}|{}".format(key + 1, value) + for key, value in version.items() + ) + for version in mod_versions + ] + df_out["modifications"] = [ + "-" if not mods else mods for mods in df_out["modifications"] + ] + df_out["spec_id"] = [ + "{}_{:03d}".format(row["spec_id"], i) for i in range(len(mod_versions)) + ] + df_out["charge"] = row["charge"] + df_out["peptide"] = row["peptide"] + if "protein_list" in row.index: + df_out["protein_list"] = str(row["protein_list"]) return df_out def add_charges(df_in): params = get_params() df_out = pd.DataFrame(columns=df_in.columns) - for charge in params['charges']: + for charge in params["charges"]: tmp = df_in.copy() - tmp['spec_id'] = tmp['spec_id'] + '_{}'.format(charge) - tmp['charge'] = charge + tmp["spec_id"] = tmp["spec_id"] + "_{}".format(charge) + tmp["charge"] = charge df_out = df_out.append(tmp, ignore_index=True) - df_out.sort_values(['spec_id', 'charge'], inplace=True) + df_out.sort_values(["spec_id", "charge"], inplace=True) df_out.reset_index(drop=True, inplace=True) return df_out -def create_decoy_peprec(peprec, spec_id_prefix='decoy_', keep_cterm_aa=True, remove_redundancy=True, move_mods=True): +def create_decoy_peprec( + peprec, + spec_id_prefix="decoy_", + keep_cterm_aa=True, + remove_redundancy=True, + move_mods=True, +): """ Create decoy peptides by reversing the sequences in a PEPREC DataFrame. @@ -207,31 +248,40 @@ def create_decoy_peprec(peprec, spec_id_prefix='decoy_', keep_cterm_aa=True, rem """ def move_mods(row): - mods = row['modifications'] + mods = row["modifications"] if type(mods) == str: - if not mods == '-': - mods = mods.split('|') - mods = sorted(zip([int(p) if (p == '-1' or p == '0') - else len(row['peptide']) - int(p) - for p in mods[::2] - ], mods[1::2])) - mods = '|'.join(['|'.join([str(x) for x in mod]) for mod in mods]) - row['modifications'] = mods + if not mods == "-": + mods = mods.split("|") + mods = sorted( + zip( + [ + int(p) + if (p == "-1" or p == "0") + else len(row["peptide"]) - int(p) + for p in mods[::2] + ], + mods[1::2], + ) + ) + mods = "|".join(["|".join([str(x) for x in mod]) for mod in mods]) + row["modifications"] = mods return row peprec_decoy = peprec.copy() - peprec_decoy['spec_id'] = spec_id_prefix + peprec_decoy['spec_id'].astype(str) + peprec_decoy["spec_id"] = spec_id_prefix + peprec_decoy["spec_id"].astype(str) if keep_cterm_aa: - peprec_decoy['peptide'] = peprec_decoy['peptide'].apply(lambda pep: pep[-2::-1] + pep[-1]) + peprec_decoy["peptide"] = peprec_decoy["peptide"].apply( + lambda pep: pep[-2::-1] + pep[-1] + ) else: - peprec_decoy['peptide'] = peprec_decoy['peptide'].apply(lambda pep: pep[-1::-1]) + peprec_decoy["peptide"] = peprec_decoy["peptide"].apply(lambda pep: pep[-1::-1]) if remove_redundancy: - peprec_decoy = peprec_decoy[~peprec_decoy['peptide'].isin(peprec['peptide'])] + peprec_decoy = peprec_decoy[~peprec_decoy["peptide"].isin(peprec["peptide"])] - if 'protein_list' in peprec_decoy.columns: - peprec_decoy['protein_list'] = 'decoy' + if "protein_list" in peprec_decoy.columns: + peprec_decoy["protein_list"] = "decoy" if move_mods: peprec_decoy = peprec_decoy.apply(move_mods, axis=1) @@ -240,142 +290,160 @@ def move_mods(row): def remove_from_peprec_filter(peprec_pred, peprec_filter): - peprec_pred_comb = peprec_pred['modifications'] + peprec_pred['peptide'] + peprec_pred['charge'].astype(str) - peprec_filter_comb = peprec_filter['modifications'] + peprec_filter['peptide'] + peprec_filter['charge'].astype(str) + peprec_pred_comb = ( + peprec_pred["modifications"] + + peprec_pred["peptide"] + + peprec_pred["charge"].astype(str) + ) + peprec_filter_comb = ( + peprec_filter["modifications"] + + peprec_filter["peptide"] + + peprec_filter["charge"].astype(str) + ) return peprec_pred[~peprec_pred_comb.isin(peprec_filter_comb)].copy() def run_batches(peprec, decoy=False): params = get_params() if decoy: - params['output_filename'] += '_decoy' + params["output_filename"] += "_decoy" ms2pip_params = { - 'model': params['ms2pip_model'], - 'frag_error': 0.02, + "model": params["ms2pip_model"], + "frag_error": 0.02, # Modify fasta2speclib modifications dict to MS2PIP params PTMs entry - 'ptm': ['{},{},opt,{}'.format(mods['name'], mods['mass_shift'], mods['amino_acid']) - if not mods['n_term'] - else '{},{},opt,N-term'.format(mods['name'], mods['mass_shift']) - for mods in params['modifications']], - 'sptm': [], - 'gptm': [], + "ptm": [ + "{},{},opt,{}".format(mods["name"], mods["mass_shift"], mods["amino_acid"]) + if not mods["n_term"] + else "{},{},opt,N-term".format(mods["name"], mods["mass_shift"]) + for mods in params["modifications"] + ], + "sptm": [], + "gptm": [], } # Split up into batches to save memory: - b_size = params['batch_size'] + b_size = params["batch_size"] b_count = 0 num_b_counts = ceil(len(peprec) / b_size) for i in range(0, len(peprec), b_size): if i + b_size < len(peprec): - peprec_batch = peprec[i:i + b_size] + peprec_batch = peprec[i : i + b_size] else: peprec_batch = peprec[i:] b_count += 1 - logging.info("Predicting batch %d of %d, containing %d unmodified peptides", b_count, num_b_counts, len(peprec_batch)) + logging.info( + "Predicting batch %d of %d, containing %d unmodified peptides", + b_count, + num_b_counts, + len(peprec_batch), + ) logging.debug("Adding all modification combinations") peprec_mods = pd.DataFrame(columns=peprec_batch.columns) - with Pool(params['num_cpu']) as p: - peprec_mods = peprec_mods.append(p.map(add_mods, peprec_batch.iterrows()), ignore_index=True) + with Pool(params["num_cpu"]) as p: + peprec_mods = peprec_mods.append( + p.map(add_mods, peprec_batch.iterrows()), ignore_index=True + ) peprec_batch = peprec_mods - if type(params['elude_model_file']) == str: + if type(params["elude_model_file"]) == str: logging.debug("Adding ELUDE predicted retention times") - peprec_batch['rt'] = get_elude_predictions( + peprec_batch["rt"] = get_elude_predictions( peprec_batch, - params['elude_model_file'], - unimod_mapping={mod['name']: mod['unimod_accession'] for mod in params['modifications']} + params["elude_model_file"], + unimod_mapping={ + mod["name"]: mod["unimod_accession"] + for mod in params["modifications"] + }, ) - if type(params['rt_predictions_file']) == str: + if type(params["rt_predictions_file"]) == str: logging.info("Adding RT predictions from file") - rt_df = pd.read_csv(params['rt_predictions_file']) - for col in ['peptide', 'modifications', 'rt']: - assert col in rt_df.columns, "RT file should contain a `%s` column" % col - peprec_batch = peprec_batch.merge(rt_df, on=['peptide', 'modifications'], how='left') - assert not peprec_batch['rt'].isna().any(), "Not all required peptide-modification combinations could be found in RT file" + rt_df = pd.read_csv(params["rt_predictions_file"]) + for col in ["peptide", "modifications", "rt"]: + assert col in rt_df.columns, ( + "RT file should contain a `%s` column" % col + ) + peprec_batch = peprec_batch.merge( + rt_df, on=["peptide", "modifications"], how="left" + ) + assert ( + not peprec_batch["rt"].isna().any() + ), "Not all required peptide-modification combinations could be found in RT file" - logging.debug("Adding charge states %s", str(params['charges'])) + logging.debug("Adding charge states %s", str(params["charges"])) peprec_batch = add_charges(peprec_batch) - if type(params['peprec_filter']) == str: + if type(params["peprec_filter"]) == str: logging.debug("Removing peptides present in peprec filter") - peprec_filter = pd.read_csv(params['peprec_filter'], sep=' ') + peprec_filter = pd.read_csv(params["peprec_filter"], sep=" ") peprec_batch = remove_from_peprec_filter(peprec_batch, peprec_filter) # Write ptm/charge-extended peprec from this batch to H5 file: peprec_batch.astype(str).to_hdf( - '{}_expanded.peprec.hdf'.format(params['output_filename']), key='table', - format='table', complevel=3, complib='zlib', mode='a' + "{}_expanded.peprec.hdf".format(params["output_filename"]), + key="table", + format="table", + complevel=3, + complib="zlib", + mode="a", ) logging.info("Running MS2PIPc for %d peptides", len(peprec_batch)) - all_preds = run(peprec_batch, num_cpu=params['num_cpu'], output_filename=params['output_filename'], - params=ms2pip_params, return_results=True) + all_preds = run( + peprec_batch, + num_cpu=params["num_cpu"], + output_filename=params["output_filename"], + params=ms2pip_params, + return_results=True, + ) if b_count == 1: - write_mode = 'w' + write_mode = "w" append = False else: - write_mode = 'a' + write_mode = "a" append = True - if 'hdf' in params['output_filetype']: - logging.info("Writing predictions to %s_predictions.hdf", params['output_filename']) + if "hdf" in params["output_filetype"]: + logging.info( + "Writing predictions to %s_predictions.hdf", params["output_filename"] + ) all_preds.astype(str).to_hdf( - '{}_predictions.hdf'.format(params['output_filename']), - key='table', format='table', complevel=3, complib='zlib', - mode=write_mode, append=append, min_itemsize=50 + "{}_predictions.hdf".format(params["output_filename"]), + key="table", + format="table", + complevel=3, + complib="zlib", + mode=write_mode, + append=append, + min_itemsize=50, ) - if 'msp' in params['output_filetype']: - """ - logging.info("Writing MSP file with unmodified peptides") - write_msp( - all_preds, - peprec_batch[peprec_batch['modifications'] == '-'], - output_filename="{}_unmodified".format(params['output_filename']), - write_mode=write_mode, - ) - """ + spec_out = spectrum_output.SpectrumOutput( + all_preds, + peprec_batch, + ms2pip_params, + output_filename="{}".format(params["output_filename"]), + write_mode=write_mode, + ) + if "msp" in params["output_filetype"]: logging.info("Writing MSP file") - spectrum_output.write_msp( - all_preds, - peprec_batch, - output_filename="{}".format(params['output_filename']), - write_mode=write_mode, - ) + spec_out.write_msp() - if 'mgf' in params['output_filetype']: + if "mgf" in params["output_filetype"]: logging.info("Writing MGF file") - spectrum_output.write_mgf( - all_preds, - peprec=peprec_batch, - output_filename="{}".format(params['output_filename']), - write_mode=write_mode - ) + spec_out.write_mgf() - if 'bibliospec' in params['output_filetype']: + if "bibliospec" in params["output_filetype"]: logging.info("Writing BiblioSpec SSL and MS2 files") - spectrum_output.write_bibliospec( - all_preds, - peprec_batch, - ms2pip_params, - output_filename="{}".format(params['output_filename']), - write_mode=write_mode - ) + spec_out.write_bibliospec() - if 'spectronaut' in params['output_filetype']: + if "spectronaut" in params["output_filetype"]: logging.info("Writing Spectronaut CSV file") - spectrum_output.write_spectronaut( - all_preds, - peprec_batch, - ms2pip_params, - output_filename="{}".format(params['output_filename']), - write_mode=write_mode - ) + spec_out.write_spectronaut() del all_preds del peprec_batch @@ -384,15 +452,18 @@ def run_batches(peprec, decoy=False): def main(): params = get_params() logging.basicConfig( - format='%(asctime)s - %(levelname)s - %(message)s', - datefmt='%Y-%m-%d %H:%M:%S', - level=params['log_level'] + format="%(asctime)s - %(levelname)s - %(message)s", + datefmt="%Y-%m-%d %H:%M:%S", + level=params["log_level"], ) - peprec = pd.DataFrame(columns=['spec_id', 'peptide', 'modifications', 'charge']) + peprec = pd.DataFrame(columns=["spec_id", "peptide", "modifications", "charge"]) logging.info("Cleaving proteins, adding peptides to peprec") - with Pool(params['num_cpu']) as p: - peprec = peprec.append(p.map(prot_to_peprec, SeqIO.parse(params['fasta_filename'], "fasta")), ignore_index=True) + with Pool(params["num_cpu"]) as p: + peprec = peprec.append( + p.map(prot_to_peprec, SeqIO.parse(params["fasta_filename"], "fasta")), + ignore_index=True, + ) logging.info("Removing peptide redundancy, adding protein list to peptides") peprec = get_protein_list(peprec) @@ -401,14 +472,22 @@ def main(): save_peprec = False if save_peprec: - logging.info("Saving non-expanded PEPREC to %s.peprec.hdf", params['output_filename']) - peprec_nonmod['protein_list'] = ['/'.join(prot) for prot in peprec_nonmod['protein_list']] + logging.info( + "Saving non-expanded PEPREC to %s.peprec.hdf", params["output_filename"] + ) + peprec_nonmod["protein_list"] = [ + "/".join(prot) for prot in peprec_nonmod["protein_list"] + ] peprec_nonmod.astype(str).to_hdf( - '{}_nonexpanded.peprec.hdf'.format(params['output_filename']), key='table', - format='table', complevel=3, complib='zlib', mode='w' + "{}_nonexpanded.peprec.hdf".format(params["output_filename"]), + key="table", + format="table", + complevel=3, + complib="zlib", + mode="w", ) - if not params['decoy']: + if not params["decoy"]: del peprec_nonmod run_batches(peprec, decoy=False) @@ -416,7 +495,7 @@ def main(): # For testing # peprec_nonmod = pd.read_hdf('data/uniprot_proteome_yeast_head_nonexpanded.peprec.hdf', key='table') - if params['decoy']: + if params["decoy"]: logging.info("Reversing sequences for decoy peptides") peprec_decoy = create_decoy_peprec(peprec_nonmod, move_mods=False) del peprec_nonmod diff --git a/ms2pip/modifications.py b/ms2pip/modifications.py new file mode 100644 index 00000000..c342c11e --- /dev/null +++ b/ms2pip/modifications.py @@ -0,0 +1,54 @@ +""" +MS2PIP / PEPREC modification handling +""" + +import functools + + +class Modifications: + def __init__(self): + """ + MS2PIP / PEPREC modification handling + """ + self.modifications = dict() + + def add_from_ms2pip_modstrings(self, modstrings): + """ + Add modifications from MS2PIP modstring list + + Parameters + ---------- + modstrings: list(str) + List of MS2PIP modstrings + + Example + ------- + >>> ms2pip_ptms = [ + ... "Oxidation,15.994915,opt,M", + ... "Acetyl,42.010565,opt,N-term", + ... ] + ... mods = Modifications() + ... mods.add_from_ms2pip_modstrings(ms2pip_ptms) + """ + + for mod in modstrings: + mod = mod.split(",") + self.modifications[mod[0]] = { + "mass_shift": float(mod[1]), + "amino_acid": mod[3], + } + + self.get_mass_shifts.cache_clear() + + @functools.lru_cache() + def get_mass_shifts(self): + """ + Return modification name -> mass shift mapping. + """ + mass_shifts = {name: mod["mass_shift"] for name, mod in self.modifications.items()} + return mass_shifts + + + + + diff --git a/ms2pip/ms2pipC.py b/ms2pip/ms2pipC.py index d8bbea11..7d7f73f8 100644 --- a/ms2pip/ms2pipC.py +++ b/ms2pip/ms2pipC.py @@ -18,7 +18,7 @@ from ms2pip.feature_names import get_feature_names_new from ms2pip.cython_modules import ms2pip_pyx -logger = logging.getLogger('ms2pip') +logger = logging.getLogger("ms2pip") # Supported output formats SUPPORTED_OUT_FORMATS = ["csv", "mgf", "msp", "bibliospec", "spectronaut"] @@ -853,19 +853,22 @@ def argument_parser(): class MS2PIP: - def __init__(self, pep_file, - spec_file=None, - vector_file=None, - config_file=None, - num_cpu=1, - use_billiard=False, - params=None, - output_filename=None, - datasetname=None, - return_results=True, - limit=None, - compute_correlations=False, - tableau=False): + def __init__( + self, + pep_file, + spec_file=None, + vector_file=None, + config_file=None, + num_cpu=1, + use_billiard=False, + params=None, + output_filename=None, + datasetname=None, + return_results=True, + limit=None, + compute_correlations=False, + tableau=False, + ): self.pep_file = pep_file self.spec_file = spec_file self.vector_file = vector_file @@ -894,7 +897,9 @@ def __init__(self, pep_file, # Validate requested output formats if "out" in self.params: - self.out_formats = [o.lower().strip() for o in self.params["out"].split(",")] + self.out_formats = [ + o.lower().strip() for o in self.params["out"].split(",") + ] for o in self.out_formats: if o not in SUPPORTED_OUT_FORMATS: raise UnknownOutputFormatError(o) @@ -907,18 +912,25 @@ def __init__(self, pep_file, # Validate requested model if self.model in MODELS.keys(): - logger.info("using {} models".format(self.model)) + logger.info("using %s models", self.model) else: raise UnknownFragmentationMethodError(self.model) if output_filename is None and not return_results: - self.output_filename = "{}_{}".format(".".join(pep_file.split(".")[:-1]), self.model) + self.output_filename = "{}_{}".format( + ".".join(pep_file.split(".")[:-1]), self.model + ) else: self.output_filename = output_filename - logger.debug("starting workers (use_billiard=%r, num_cpu=%d) ...", use_billiard, self.num_cpu) + logger.debug( + "starting workers (use_billiard=%r, num_cpu=%d) ...", + use_billiard, + self.num_cpu, + ) if use_billiard: import billiard + self.myPool = billiard.Pool(self.num_cpu) else: self.myPool = multiprocessing.Pool(self.num_cpu) @@ -928,7 +940,9 @@ def run(self): # PTMs are loaded the same as in Omega # This allows me to use the same C init() function in bot ms2ip and Omega - (self.modfile, self.modfile2, self.PTMmap) = generate_modifications_file(self.params, MASSES, A_MAP) + (self.modfile, self.modfile2, self.PTMmap) = generate_modifications_file( + self.params, MASSES, A_MAP + ) self._read_peptide_information() @@ -942,7 +956,9 @@ def run(self): all_preds = self._predict_spec(results) logger.info("writing file %s_pred_and_emp.csv...", self.output_filename) - all_preds.to_csv("{}_pred_and_emp.csv".format(self.output_filename), index=False) + all_preds.to_csv( + "{}_pred_and_emp.csv".format(self.output_filename), index=False + ) if self.compute_correlations: logger.info("computing correlations") @@ -950,7 +966,10 @@ def run(self): correlations.to_csv( "{}_correlations.csv".format(self.output_filename), index=True ) - logger.info("median correlations: %f", correlations.groupby("ion")["pearsonr"].median()) + logger.info( + "median correlations: %f", + correlations.groupby("ion")["pearsonr"].median(), + ) self._remove_amino_accid_masses() else: results = self._process_peptides() @@ -1011,7 +1030,7 @@ def _read_peptide_information(self): logger.info( "Removed {} unsupported peptide sequences (< 3, > 99 \ amino acids, or containing B, J, O, U, X or Z).", - num_pep_filtered + num_pep_filtered, ) if len(data) == 0: @@ -1026,18 +1045,13 @@ def _execute_in_pool(self, titles, func, args): tmp = split_titles[i] results.append( self.myPool.apply_async( - func, - args=( - i, - self.data[self.data.spec_id.isin(tmp)], - *args - ), + func, args=(i, self.data[self.data.spec_id.isin(tmp)], *args), ) ) # """ self.myPool.close() self.myPool.join() - sys.stdout.write('\n') + sys.stdout.write("\n") return results def _process_spectra(self): @@ -1048,16 +1062,21 @@ def _process_spectra(self): """ logger.info("scanning spectrum file...") titles = scan_spectrum_file(self.spec_file) - return self._execute_in_pool(titles, process_spectra, ( - self.spec_file, - self.vector_file, - self.afile, - self.modfile, - self.modfile2, - self.PTMmap, - self.model, - self.fragerror, - self.tableau)) + return self._execute_in_pool( + titles, + process_spectra, + ( + self.spec_file, + self.vector_file, + self.afile, + self.modfile, + self.modfile2, + self.PTMmap, + self.model, + self.fragerror, + self.tableau, + ), + ) def _write_vector_file(self, results): all_results = [] @@ -1086,7 +1105,7 @@ def _write_vector_file(self, results): else: # "table" is a tag used to read back the .h5 all_results.to_hdf(self.vector_file, "table") - + return all_results def _predict_spec(self, results): @@ -1142,37 +1161,37 @@ def _predict_spec(self, results): def _process_peptides(self): logger.info("scanning peptide file...") titles = self.data.spec_id.tolist() - return self._execute_in_pool(titles, process_peptides, ( - self.afile, - self.modfile, - self.modfile2, - self.PTMmap, - self.model)) + return self._execute_in_pool( + titles, + process_peptides, + (self.afile, self.modfile, self.modfile2, self.PTMmap, self.model), + ) def _write_predictions(self, all_preds): + spec_out = spectrum_output.SpectrumOutput( + all_preds, self.data, self.params, output_filename=self.output_filename, + ) + if "mgf" in self.out_formats: logger.info("writing MGF file %s_predictions.mgf...", self.output_filename) - spectrum_output.write_mgf( - all_preds, peprec=self.data, output_filename=self.output_filename - ) + spec_out.write_mgf() if "msp" in self.out_formats: logger.info("writing MSP file %s_predictions.msp...", self.output_filename) - spectrum_output.write_msp( - all_preds, self.data, output_filename=self.output_filename - ) + spectrum_output.write_msp() if "bibliospec" in self.out_formats: - logger.info("writing SSL/MS2 files...") - spectrum_output.write_bibliospec( - all_preds, self.data, self.params, output_filename=self.output_filename + logger.info( + "writing SSL/MS2 files %s_predictions.ssl/.ms2...", self.output_filename ) + spectrum_output.write_bibliospec() if "spectronaut" in self.out_formats: - logger.info("writing Spectronaut CSV files...") - spectrum_output.write_spectronaut( - all_preds, self.data, self.params, output_filename=self.output_filename + logger.info( + "writing SSL/MS2 files %s_predictions_spectronaut.csv...", + self.output_filename, ) + spectrum_output.write_spectronaut() if "csv" in self.out_formats: logger.info("writing CSV %s_predictions.csv...", self.output_filename) @@ -1196,16 +1215,18 @@ def run( compute_correlations=False, tableau=False, ): - return MS2PIP(pep_file, - spec_file=spec_file, - vector_file=vector_file, - config_file=config_file, - num_cpu=num_cpu, - use_billiard=use_billiard, - params=params, - output_filename=output_filename, - datasetname=datasetname, - return_results=return_results, - limit=limit, - compute_correlations=compute_correlations, - tableau=tableau).run() + return MS2PIP( + pep_file, + spec_file=spec_file, + vector_file=vector_file, + config_file=config_file, + num_cpu=num_cpu, + use_billiard=use_billiard, + params=params, + output_filename=output_filename, + datasetname=datasetname, + return_results=return_results, + limit=limit, + compute_correlations=compute_correlations, + tableau=tableau, + ).run() diff --git a/ms2pip/ms2pip_tools/spectrum_output.py b/ms2pip/ms2pip_tools/spectrum_output.py index 90915d45..69450ed3 100644 --- a/ms2pip/ms2pip_tools/spectrum_output.py +++ b/ms2pip/ms2pip_tools/spectrum_output.py @@ -2,493 +2,670 @@ Write spectrum files from MS2PIP predictions. """ - -__author__ = "Ralf Gabriels" -__credits__ = ["Ralf Gabriels", "Sven Degroeve", "Lennart Martens"] -__license__ = "Apache License, Version 2.0" -__version__ = "0.2" -__email__ = "Ralf.Gabriels@ugent.be" - - -# Native libraries -from time import localtime, strftime +# Standard library from ast import literal_eval -from operator import itemgetter from io import StringIO +from operator import itemgetter +from time import localtime, strftime +import os # Third party libraries -import pandas as pd from pyteomics import mass -try: - from tqdm import tqdm -except ImportError: - use_tqdm = False -else: - use_tqdm = True + +# Project imports +from ms2pip.modifications import Modifications PROTON_MASS = 1.007825032070059 -def write_msp(all_preds_in, peprec_in, output_filename='MS2PIP_Predictions', - write_mode='wt+', unlog=True, return_stringbuffer=False): - """ - Write MS2PIP predictions to MSP spectral library file. - """ - - def write(msp_output): - if use_tqdm & len(spec_ids) > 100000: - spec_ids_iterator = tqdm(spec_ids) - else: - spec_ids_iterator = spec_ids - for spec_id in spec_ids_iterator: - out = [] - preds = preds_to_slice[spec_id] - peprec_sel = peprec_to_slice[spec_id] - - preds = sorted(preds, key=itemgetter(mz_index)) - - sequence = peprec_sel[peptide_index] - charge = peprec_sel[charge_index] - mods = peprec_sel[modifications_index] - numpeaks = len(preds) - - # Calculate mass from fragment ions - max_ionnumber = max([row[ionnumber_index] for row in preds]) - mass_b = [row[mz_index] for row in preds if row[ion_index] == 'B' and row[ionnumber_index] == 1][0] - mass_y = [row[mz_index] for row in preds if row[ion_index] == 'Y' and row[ionnumber_index] == max_ionnumber][0] - pepmass = mass_b + mass_y - 2 * PROTON_MASS - - out.append('Name: {}/{}\n'.format(sequence, charge)) - out.append('MW: {}\n'.format(pepmass)) - out.append('Comment: ') - - if mods == '-': - out.append("Mods=0 ") - else: - mods = mods.split('|') - mods = [(int(mods[i]), mods[i + 1]) for i in range(0, len(mods), 2)] - # Turn MS2PIP mod indexes into actual list indexes (eg 0 for first AA) - mods = [(x, y) if x == 0 else (x - 1, y) for (x, y) in mods] - mods = [(str(x), sequence[x], y) for (x, y) in mods] - out.append("Mods={}/{} ".format(len(mods), '/'.join([','.join(list(x)) for x in mods]))) - - out.append("Parent={} ".format((pepmass + charge * PROTON_MASS) / charge)) - - if add_protein: - try: - out.append('Protein="{}" '.format('/'.join(literal_eval(peprec_sel[protein_list_index])))) - except ValueError: - out.append('Protein="{}" '.format(peprec_sel[protein_list_index])) - - if add_rt: - out.append('RTINSECONDS={} '.format(peprec_sel[rt_index])) - - out.append('MS2PIP_ID="{}"'.format(spec_id)) - - out.append('\nNum peaks: {}\n'.format(numpeaks)) - - lines = list(zip( - [row[mz_index] for row in preds], - [row[prediction_index] for row in preds], - [row[ion_index] for row in preds], - [row[ionnumber_index] for row in preds] - )) - out.append(''.join(['{:.4f}\t{}\t"{}{}"\n'.format(*l) for l in lines])) - out.append('\n') - - out_string = "".join(out) - - msp_output.write(out_string) - - - all_preds = all_preds_in.copy() - peprec = peprec_in.copy() - all_preds.reset_index(drop=True, inplace=True) - # If not already normalized, normalize spectra - if unlog: - if not (all_preds['prediction'].min() == 0 and all_preds['prediction'].max() == 10000): - all_preds['prediction'] = ((2**all_preds['prediction']) - 0.001).clip(lower=0) - all_preds['prediction'] = all_preds.groupby(['spec_id'])['prediction'].apply(lambda x: (x / x.max()) * 10000) - all_preds['prediction'] = all_preds['prediction'].astype(int) - - # Check if protein list and rt are present in peprec - add_protein = 'protein_list' in peprec.columns - add_rt = 'rt' in peprec.columns - - # Convert RT from min to sec - if add_rt: - peprec['rt'] = peprec['rt'] * 60 - - # Split titles (according to MS2PIPc) - spec_ids = all_preds['spec_id'].unique().tolist() - - preds_col_names = list(all_preds.columns) - preds_to_slice = {} - preds_list = all_preds.values.tolist() - - preds_spec_id_index = preds_col_names.index('spec_id') - mz_index = preds_col_names.index('mz') - prediction_index = preds_col_names.index('prediction') - ion_index = preds_col_names.index('ion') - ionnumber_index = preds_col_names.index('ionnumber') - - for row in preds_list: - spec_id = row[preds_spec_id_index] - if spec_id in preds_to_slice.keys(): - preds_to_slice[spec_id].append(row) - else: - preds_to_slice[spec_id] = [row] - - peprec_col_names = list(peprec.columns) - peprec_to_slice = {} - peprec_list = peprec.values.tolist() - - spec_id_index = peprec_col_names.index('spec_id') - peptide_index = peprec_col_names.index('peptide') - charge_index = peprec_col_names.index('charge') - modifications_index = peprec_col_names.index('modifications') - if add_protein: - protein_list_index = peprec_col_names.index('protein_list') - if add_rt: - rt_index = peprec_col_names.index('rt') - - for row in peprec_list: - peprec_to_slice[row[spec_id_index]] = row - - # Write to file or stringbuffer - if return_stringbuffer: - msp_output = StringIO() - write(msp_output) - return msp_output - else: - with open("{}_predictions.msp".format(output_filename), write_mode) as msp_output: - write(msp_output) - - -def dfs_to_dicts(all_preds, peprec=None, rt_to_seconds=True): - """ - Create easy to access dict from all_preds and peprec dataframes - """ - if type(peprec) == pd.DataFrame: - peprec_to_dict = peprec.copy() - - rt_present = 'rt' in peprec_to_dict.columns - if rt_present and rt_to_seconds: - peprec_to_dict['rt'] = peprec_to_dict['rt'] * 60 - - peprec_to_dict.index = peprec_to_dict['spec_id'] - peprec_to_dict.drop('spec_id', axis=1, inplace=True) - peprec_dict = peprec_to_dict.to_dict(orient='index') - del peprec_to_dict - else: - rt_present = False - peprec_dict = None - - preds_dict = {} - preds_list = all_preds[['spec_id', 'charge', 'ion', 'mz', 'prediction']].values.tolist() - - for row in preds_list: - spec_id = row[0] - if spec_id in preds_dict.keys(): - if row[2] in preds_dict[spec_id]['peaks']: - preds_dict[spec_id]['peaks'][row[2]].append(tuple(row[3:])) - else: - preds_dict[spec_id]['peaks'][row[2]] = [tuple(row[3:])] - else: - preds_dict[spec_id] = { - 'charge': row[1], - 'peaks': {row[2]: [tuple(row[3:])]} - } - return peprec_dict, preds_dict, rt_present - - -def get_precursor_mz_pyteomics(peptide, modifications, charge, mass_shifts): - """ - Calculate precursor mass and mz for given peptide and modification list, - using Pyteomics. - - peptide: stripped peptide sequence - modifications: MS2PIP-style formatted modifications list (e.g. - `0|Acetyl|2|Oxidation`) - mass_shifts: dictionary with `modification_name -> mass_shift` pairs - - Returns: tuple(prec_mass, prec_mz) - - Note: This method does not use the build-in Pyteomics modification handling, as - that would require a known atomic composition of the modification. - """ - charge = int(charge) - unmodified_mass = mass.fast_mass(peptide) - mods_massses = sum([mass_shifts[mod] for mod in modifications.split('|')[1::2]]) - prec_mass = unmodified_mass + mods_massses - prec_mz = (prec_mass + charge * PROTON_MASS) / charge - return prec_mass, prec_mz - - -def write_mgf(all_preds_in, output_filename="MS2PIP", unlog=True, write_mode='w+', return_stringbuffer=False, peprec=None): - """ - Write MS2PIP predictions to MGF spectrum file. - """ - all_preds = all_preds_in.copy() - if unlog: - all_preds['prediction'] = ((2**all_preds['prediction']) - 0.001).clip(lower=0) - all_preds.reset_index(inplace=True) - all_preds['prediction'] = all_preds.groupby(['spec_id'])['prediction'].apply(lambda x: x / x.sum()) - - def write(all_preds, mgf_output, peprec=None): - out = [] - - peprec_dict, preds_dict, rt_present = dfs_to_dicts(all_preds, peprec=peprec, rt_to_seconds=True) - - # Write MGF - if peprec_dict: - spec_id_list = peprec_dict.keys() - else: - spec_id_list = list(all_preds['spec_id'].unique()) - - for spec_id in sorted(spec_id_list): - out.append('BEGIN IONS') - charge = preds_dict[spec_id]['charge'] - pepmass = preds_dict[spec_id]['peaks']['B'][0][0] + preds_dict[spec_id]['peaks']['Y'][-1][0] - 2 * PROTON_MASS - peaks = [item for sublist in preds_dict[spec_id]['peaks'].values() for item in sublist] - peaks = sorted(peaks, key=itemgetter(0)) - - if peprec_dict: - seq = peprec_dict[spec_id]['peptide'] - mods = peprec_dict[spec_id]['modifications'] - if rt_present: - rt = peprec_dict[spec_id]['rt'] - if mods == '-': - mods_out = '0' - else: - # Write MSP style PTM string - mods = mods.split('|') - mods = [(int(mods[i]), mods[i + 1]) for i in range(0, len(mods), 2)] - # Turn MS2PIP mod indexes into actual list indexes (eg 0 for first AA) - mods = [(x, y) if x == 0 else (x - 1, y) for (x, y) in mods] - mods = [(str(x), seq[x], y) for (x, y) in mods] - mods_out = '{}/{}'.format(len(mods), '/'.join([','.join(list(x)) for x in mods])) - out.append('TITLE={} {} {}'.format(spec_id, seq, mods_out)) - else: - out.append('TITLE={}'.format(spec_id)) - - out.append('PEPMASS={}'.format((pepmass + (charge * PROTON_MASS)) / charge)) - out.append('CHARGE={}+'.format(charge)) - if rt_present: - out.append('RTINSECONDS={}'.format(rt)) - out.append('\n'.join([' '.join(['{:.8f}'.format(p) for p in peak]) for peak in peaks])) - out.append('END IONS\n') - - mgf_output.write('\n'.join(out)) - - if return_stringbuffer: - mgf_output = StringIO() - write(all_preds, mgf_output, peprec=peprec) - return mgf_output - else: - with open("{}_predictions.mgf".format(output_filename), write_mode) as mgf_output: - write(all_preds, mgf_output, peprec=peprec) - - del all_preds - - -def build_ssl_modified_sequence(seq, mods, ssl_mods): - """ - Build BiblioSpec SSL modified sequence string. - - Arguments: - seq - peptide sequence - mods - MS2PIP-formatted modifications - ssl_mods - dict of name: mass shift strings - - create ssl_mods from MS2PIP params with: - `ssl_mods = \ - {ptm.split(',')[0]:\ - "{:+.1f}".format(round(float(ptm.split(',')[1]),1))\ - for ptm in params['ptm']}` - """ - pep = list(seq) - for loc, name in zip(mods.split('|')[::2], mods.split('|')[1::2]): - # C-term mod - if loc == '-1': - pep[-1] = pep[-1] + '[{}]'.format(ssl_mods[name]) - # N-term mod - elif loc == '0': - pep[0] = pep[0] + '[{}]'.format(ssl_mods[name]) - # Normal mod - else: - pep[int(loc) - 1] = pep[int(loc) - 1] + '[{}]'.format(ssl_mods[name]) - return ''.join(pep) - - -def write_bibliospec(all_preds_in, peprec_in, params, output_filename="MS2PIP", unlog=True, write_mode='w+', return_stringbuffer=False): - """ - Write MS2PIP predictions to BiblioSpec SSL and MS2 spectral library files - (For example for use in Skyline). - - Note: - - In contrast to write_mgf and write_msp, here a peprec is required. - - Peaks are normalized the same way as in MSP files: base-peak normalized and max peak equals 10 000 - - write_mode: start new file ('w+') or append to existing ('a+) - """ - - def get_last_scannr(ssl_filename): - """ - Return scan number of last line in a Bibliospec SSL file. - """ - with open(ssl_filename, 'rt') as ssl: - for line in ssl: - last_line = line - last_scannr = int(last_line.split('\t')[1]) - return last_scannr - - - def write(all_preds, peprec, params, ssl_output, ms2_output, start_scannr=0, output_filename="MS2PIP"): - ms2_out = [] - ssl_out = [] - - # Prepare ssl_mods and mass shifts - ssl_mods = {ptm.split(',')[0]: "{:+.1f}".format(round(float(ptm.split(',')[1]), 1)) for ptm in params['ptm']} - mass_shifts = {ptm.split(',')[0]: float(ptm.split(',')[1]) for ptm in params['ptm']} - - # Replace spec_id with integer, starting from last scan in existing SSL file - peprec.index = range(start_scannr, start_scannr + len(peprec)) - scannum_dict = {v: k for k, v in peprec['spec_id'].to_dict().items()} - peprec['spec_id'] = peprec.index - all_preds['spec_id'] = all_preds['spec_id'].map(scannum_dict) - - peprec_dict, preds_dict, rt_present = dfs_to_dicts(all_preds, peprec=peprec, rt_to_seconds=True) - - for spec_id in sorted(preds_dict.keys()): - seq = peprec_dict[spec_id]['peptide'] - mods = peprec_dict[spec_id]['modifications'] - charge = preds_dict[spec_id]['charge'] - prec_mass, prec_mz = get_precursor_mz_pyteomics(seq, mods, charge, mass_shifts) - peaks = [item for sublist in preds_dict[spec_id]['peaks'].values() for item in sublist] - peaks = sorted(peaks, key=itemgetter(0)) - - if mods != '-' and mods != '': - mod_seq = build_ssl_modified_sequence(seq, mods, ssl_mods) - else: - mod_seq = seq - - rt = peprec_dict[spec_id]['rt'] if rt_present else '' - - ssl_out.append('\t'.join([output_filename.split('/')[-1] + '_predictions.ms2', str(spec_id), str(charge), mod_seq, '', '', str(rt)])) - ms2_out.append("S\t{}\t{}".format(spec_id, prec_mz)) - ms2_out.append("Z\t{}\t{}".format(int(charge), prec_mass)) - ms2_out.append("D\tseq\t{}".format(seq)) - - ms2_out.append("D\tmodified seq\t{}".format(mod_seq)) - ms2_out.append('\n'.join(['\t'.join(['{:.8f}'.format(p) for p in peak]) for peak in peaks])) - - ssl_output.write('\n'.join(ssl_out)) - ms2_output.write('\n'.join(ms2_out)) - - - all_preds = all_preds_in.copy() - peprec = peprec_in.copy() - if unlog: - all_preds['prediction'] = ((2**all_preds['prediction']) - 0.001).clip(lower=0) - all_preds.reset_index(inplace=True) - all_preds['prediction'] = all_preds.groupby(['spec_id'])['prediction'].apply(lambda x: (x / x.max()) * 10000) - - if return_stringbuffer: - ssl_output = StringIO() - ms2_output = StringIO() - else: - ssl_output = open("{}_predictions.ssl".format(output_filename), write_mode) - ms2_output = open("{}_predictions.ms2".format(output_filename), write_mode) - - # If a new file is written, write headers - if 'w' in write_mode: - start_scannr = 0 - ssl_header = ['file', 'scan', 'charge', 'sequence', 'score-type', 'score', 'retention-time' '\n'] - ssl_output.write('\t'.join(ssl_header)) - ms2_output.write("H\tCreationDate\t{}\n".format(strftime("%Y-%m-%d %H:%M:%S", localtime()))) - ms2_output.write("H\tExtractor\tMS2PIP Predictions\n") - else: - # Get last scan number of ssl file, to continue indexing from there - # because Bibliospec speclib scan numbers can only be integers - start_scannr = get_last_scannr("{}_predictions.ssl".format(output_filename)) + 1 - ssl_output.write('\n') - ms2_output.write('\n') - - write(all_preds, peprec, params, ssl_output, ms2_output, start_scannr=start_scannr, output_filename=output_filename) - - if return_stringbuffer: - return ssl_output, ms2_output - - -def write_spectronaut(all_preds_in, peprec_in, params, output_filename="MS2PIP", - unlog=True, write_mode='w+', return_stringbuffer=False): - """ - Write to Spectronaut library import format. - - Reference: https://biognosys.com/media.ashx/spectronautmanual.pdf - """ - - def write(all_preds, peprec, output_file, header=True): - # Prepare peptide-level data - # Modified sequence - ssl_mods = {ptm.split(',')[0]: "{:+.1f}".format(round(float(ptm.split(',')[1]), 1)) for ptm in params['ptm']} - apply_build_modseq = lambda row: build_ssl_modified_sequence(row['peptide'], row['modifications'], ssl_mods) - peprec['ModifiedPeptide'] = peprec.apply(apply_build_modseq, axis=1) - peprec['ModifiedPeptide'] = '_' + peprec['ModifiedPeptide'] + '_' - - # Precursor mz - mass_shifts = {ptm.split(',')[0]: float(ptm.split(',')[1]) for ptm in params['ptm']} - apply_get_mz = lambda row: get_precursor_mz_pyteomics(row['peptide'], row['modifications'], row['charge'], mass_shifts)[1] - peprec['PrecursorMz'] = peprec.apply(apply_get_mz, axis=1) - - # Additional columns - peprec['FragmentLossType'] = 'noloss' - - # Retention time - rt_cols = [] - if 'rt' in peprec.columns: - rt_cols = ['iRT'] - peprec['iRT'] = peprec['rt'] - - # Rename columns and merge with predictions - peprec = peprec.rename(columns={'charge': 'PrecursorCharge', 'peptide': 'StrippedPeptide'}) - peptide_cols = ['ModifiedPeptide', 'StrippedPeptide', 'PrecursorCharge', 'PrecursorMz'] + rt_cols + ['FragmentLossType'] - spectronaut_df = peprec[peptide_cols + ['spec_id']] - spectronaut_df = all_preds.merge(spectronaut_df, on='spec_id') - - # Fragment columns - spectronaut_df['FragmentCharge'] = spectronaut_df['ion'].str.contains('2').map({True: 2, False: 1}) - spectronaut_df['FragmentType'] = spectronaut_df['ion'].str[0].str.lower() - col_mapping = {'mz': 'FragmentMz', 'prediction': 'RelativeIntensity', 'ionnumber': 'FragmentNumber'} - spectronaut_df = spectronaut_df.rename(columns=col_mapping) - - # Sort columns - fragment_cols = ['FragmentCharge', 'FragmentMz', 'RelativeIntensity', 'FragmentType', 'FragmentNumber'] - spectronaut_df = spectronaut_df[peptide_cols + fragment_cols] - - # Write - spectronaut_df.to_csv(output_file, index=False, header=header) - - - # Undo log transformation and TIC-normalize - peprec = peprec_in.copy() - all_preds = all_preds_in.copy() - if unlog: - all_preds['prediction'] = ((2**all_preds['prediction']) - 0.001).clip(lower=0) - all_preds.reset_index(inplace=True) - all_preds['prediction'] = all_preds.groupby(['spec_id'])['prediction'].apply(lambda x: x / x.sum()) - - if return_stringbuffer: - output_file = StringIO() - write(all_preds, peprec, output_file) - return output_file - else: - f_name = "{}_predictions_spectronaut.csv".format(output_filename) - if 'w' in write_mode: - header = True - elif 'a' in write_mode: - header = False - with open(f_name, write_mode) as output_file: - write(all_preds, peprec, output_file, header=header) +class InvalidWriteModeError(ValueError): + pass + + +# Writer decorator +def writer(**kwargs): + def deco(write_function): + def wrapper(self): + return self._write_general(write_function, **kwargs) + return wrapper + return deco + + + +class SpectrumOutput: + """ + Write MS2PIP predictions to various output formats. + + Parameters + ---------- + all_preds: pd.DataFrame + MS2PIP predictions + peprec: pd.DataFrame + PEPREC with peptide information + params: dict + MS2PIP parameters + output_filename: str, optional + path and name for output files, will be suffexed with `_predictions` and the + relevant file extension (default: ms2pip_predictions) + write_mode: str, optional + write mode to use: "wt+" to append to start a new file, "at" to append to an + existing file (default: "wt+") + return_stringbuffer: bool, optional + If True, files are written to a StringIO object, which the write function + returns. If False, files are written to a file on disk. + is_log_space: bool, optional + Set to true if predicted intensities in `all_preds` are in log-space. In that + case, intensities will first be transformed to "normal"-space. + + Methods + ------- + write_msp() + Write predictions to MSP file + write_mgf() + Write predictions to MGF file + write_bibliospec() + Write predictions to Bibliospec SSL/MS2 files (also for Skyline) + write_spectronaut() + Write predictions to Spectronaut CSV file + + Example + ------- + >>> so = ms2pip.spectrum_tools.spectrum_output.SpectrumOutput( + all_preds, + peprec, + params + ) + >>> so.write_msp() + >>> so.write_spectronaut() + + """ + + def __init__( + self, + all_preds, + peprec, + params, + output_filename="ms2pip_predictions", + write_mode="wt+", + return_stringbuffer=False, + is_log_space=True, + ): + self.all_preds = all_preds + self.peprec = peprec + self.params = params + self.output_filename = output_filename + self.write_mode = write_mode + self.return_stringbuffer = return_stringbuffer + self.is_log_space = is_log_space + + self.peprec_dict = None + self.preds_dict = None + self.normalization = None + self.mass_shifts = None + self.ssl_modification_mapping = None + + self.has_rt = "rt" in self.peprec.columns + self.has_protein_list = "protein_list" in self.peprec.columns + + mods = Modifications() + mods.add_from_ms2pip_modstrings(params["ptm"]) + self.mass_shifts = mods.get_mass_shifts() + + if self.write_mode not in ["wt+", "wt", "at", "w", "a"]: + raise InvalidWriteModeError(self.write_mode) + + if "a" in self.write_mode and self.return_stringbuffer: + raise InvalidWriteModeError(self.write_mode) + + def _generate_peprec_dict(self, rt_to_seconds=True): + """ + Create easy to access dict from all_preds and peprec dataframes + """ + peprec_tmp = self.peprec.copy() + + if self.has_rt and rt_to_seconds: + peprec_tmp["rt"] = peprec_tmp["rt"] * 60 + + peprec_tmp.index = peprec_tmp["spec_id"] + peprec_tmp.drop("spec_id", axis=1, inplace=True) + + self.peprec_dict = peprec_tmp.to_dict(orient="index") + + def _generate_preds_dict(self): + """ + Create easy to access dict from peprec dataframes + """ + self.preds_dict = {} + preds_list = self.all_preds[ + ["spec_id", "charge", "ion", "ionnumber", "mz", "prediction"] + ].values.tolist() + + for row in preds_list: + spec_id = row[0] + if spec_id in self.preds_dict.keys(): + if row[2] in self.preds_dict[spec_id]["peaks"]: + self.preds_dict[spec_id]["peaks"][row[2]].append(tuple(row[3:])) + else: + self.preds_dict[spec_id]["peaks"][row[2]] = [tuple(row[3:])] + else: + self.preds_dict[spec_id] = { + "charge": row[1], + "peaks": {row[2]: [tuple(row[3:])]}, + } + + def _get_precursor_mz(self, peptide, modifications, charge): + """ + Calculate precursor mass and mz for given peptide and modification list, + using Pyteomics. + + Note: This method does not use the build-in Pyteomics modification handling, as + that would require a known atomic composition of the modification. + + Parameters + ---------- + peptide: str + stripped peptide sequence + + modifications: str + MS2PIP-style formatted modifications list (e.g. `0|Acetyl|2|Oxidation`) + + charge: int + precursor charge + + Returns + ------- + prec_mass, prec_mz: tuple(float, float) + """ + + charge = int(charge) + unmodified_mass = mass.fast_mass(peptide) + mods_massses = sum( + [self.mass_shifts[mod] for mod in modifications.split("|")[1::2]] + ) + prec_mass = unmodified_mass + mods_massses + prec_mz = (prec_mass + charge * PROTON_MASS) / charge + return prec_mass, prec_mz + + def _normalize_spectra(self, method="basepeak_10000"): + """ + Normalize spectra + """ + if self.is_log_space: + self.all_preds["prediction"] = ( + (2 ** self.all_preds["prediction"]) - 0.001 + ).clip(lower=0) + self.is_log_space = False + + if method == "basepeak_10000": + if self.normalization == "basepeak_10000": + pass + elif self.normalization == "basepeak_1": + self.all_preds["prediction"] *= 10000 + self.all_preds["prediction"] = self.all_preds["prediction"] + else: + self.all_preds["prediction"] = self.all_preds.groupby(["spec_id"])[ + "prediction" + ].apply(lambda x: (x / x.max()) * 10000) + self.all_preds["prediction"] = self.all_preds["prediction"] + self.normalization = "basepeak_10000" + + elif method == "basepeak_1": + if self.normalization == "basepeak_1": + pass + elif self.normalization == "basepeak_10000": + self.all_preds["prediction"] /= 10000 + else: + self.all_preds["prediction"] = self.all_preds.groupby(["spec_id"])[ + "prediction" + ].apply(lambda x: (x / x.max())) + self.normalization = "basepeak_1" + + elif method == "tic" and not self.normalization == "tic": + self.all_preds["prediction"] = self.all_preds.groupby(["spec_id"])[ + "prediction" + ].apply(lambda x: x / x.sum()) + self.normalization = "tic" + + def _get_peak_string( + self, + peak_dict, + sep="\t", + include_zero=False, + include_annotations=True, + intensity_type=float, + ): + """ + Get MGF/MSP-like peaklist string + """ + all_peaks = [] + for ion_type, peaks in peak_dict.items(): + for peak in peaks: + if not include_zero and peak[2] == 0: + continue + if include_annotations: + all_peaks.append( + ( + peak[1], + f'{peak[1]:.6f}{sep}{intensity_type(peak[2])}{sep}"{ion_type.lower()}{peak[0]}"', + ) + ) + else: + all_peaks.append((peak[1], f"{peak[1]:.6f}{sep}{peak[2]}")) + + all_peaks = sorted(all_peaks, key=itemgetter(0)) + peak_string = "\n".join([peak[1] for peak in all_peaks]) + + return peak_string + + def _get_msp_modifications(self, sequence, modifications): + """ + Format modifications in MSP-style, e.g. "1/0,E,Glu->pyro-Glu" + """ + + if isinstance(modifications, str): + if modifications == "-": + msp_modifications = "0" + else: + mods = modifications.split("|") + mods = [(int(mods[i]), mods[i + 1]) for i in range(0, len(mods), 2)] + mods = [(x, y) if x == 0 else (x - 1, y) for (x, y) in mods] + mods = [(str(x), sequence[x], y) for (x, y) in mods] + msp_modifications = "/".join([",".join(list(x)) for x in mods]) + msp_modifications = f"{len(mods)}/{msp_modifications}" + else: + msp_modifications = "0" + + return msp_modifications + + def _parse_protein_string(self, protein_list): + """ + Parse protein string from list, list string literal, or string. + """ + if isinstance(protein_list, list): + protein_string = "/".join(protein_list) + elif isinstance(protein_list, str): + try: + protein_string = "/".join(literal_eval(protein_list)) + except ValueError: + protein_string = protein_list + else: + protein_string = "" + return protein_string + + def _get_last_ssl_scannr(self): + """ + Return scan number of last line in a Bibliospec SSL file. + """ + ssl_filename = "{}_predictions.ssl".format(self.output_filename) + with open(ssl_filename, "rt") as ssl: + for line in ssl: + last_line = line + last_scannr = int(last_line.split("\t")[1]) + return last_scannr + + def _generate_ssl_modification_mapping(self): + """ + Make modification name -> ssl modification name mapping. + """ + self.ssl_modification_mapping = { + ptm.split(",")[0]: "{:+.1f}".format(round(float(ptm.split(",")[1]), 1)) + for ptm in self.params["ptm"] + } + + def _get_ssl_modified_sequence(self, sequence, modifications): + """ + Build BiblioSpec SSL modified sequence string. + """ + pep = list(sequence) + + for loc, name in zip( + modifications.split("|")[::2], modifications.split("|")[1::2] + ): + # C-term mod + if loc == "-1": + pep[-1] = pep[-1] + "[{}]".format(self.ssl_modification_mapping[name]) + # N-term mod + elif loc == "0": + pep[0] = pep[0] + "[{}]".format(self.ssl_modification_mapping[name]) + # Normal mod + else: + pep[int(loc) - 1] = pep[int(loc) - 1] + "[{}]".format( + self.ssl_modification_mapping[name] + ) + return "".join(pep) + + @writer( + file_suffix="_predictions.msp", + normalization_method="basepeak_10000", + requires_dicts=True, + requires_ssl_modifications=False, + ) + def write_msp(self, file_object): + """ + Construct MSP string and write to file_object. + """ + + for spec_id in sorted(self.peprec_dict.keys()): + seq = self.peprec_dict[spec_id]["peptide"] + mods = self.peprec_dict[spec_id]["modifications"] + charge = self.peprec_dict[spec_id]["charge"] + prec_mass, prec_mz = self._get_precursor_mz(seq, mods, charge) + msp_modifications = self._get_msp_modifications(seq, mods) + num_peaks = sum( + [ + len(peaklist) + for _, peaklist in self.preds_dict[spec_id]["peaks"].items() + ] + ) + + comment_line = f" Mods={msp_modifications} Parent={prec_mz}" + + if self.has_protein_list: + protein_list = self.peprec_dict[spec_id]["protein_list"] + protein_string = self._parse_protein_string(protein_list) + comment_line += f' Protein="{protein_string}"' + + if self.has_rt: + rt = self.peprec_dict[spec_id]["rt"] + comment_line += f" RTINSECONDS={rt}" + + comment_line += f' MS2PIP_ID="{spec_id}"' + + out = [ + f"Name: {seq}/{charge}", + f"MW: {prec_mass}", + f"Comment:{comment_line}", + f"Num peaks: {num_peaks}", + self._get_peak_string( + self.preds_dict[spec_id]["peaks"], + sep="\t", + include_annotations=True, + intensity_type=int, + ), + ] + + file_object.writelines([line + "\n" for line in out] + ["\n"]) + + @writer( + file_suffix="_predictions.mgf", + normalization_method="basepeak_10000", + requires_dicts=True, + requires_ssl_modifications=False, + ) + def write_mgf(self, file_object): + """ + Construct MGF string and write to file_object + """ + for spec_id in sorted(self.peprec_dict.keys()): + seq = self.peprec_dict[spec_id]["peptide"] + mods = self.peprec_dict[spec_id]["modifications"] + charge = self.peprec_dict[spec_id]["charge"] + prec_mass, prec_mz = self._get_precursor_mz(seq, mods, charge) + msp_modifications = self._get_msp_modifications(seq, mods) + + if self.has_protein_list: + protein_list = self.peprec_dict[spec_id]["protein_list"] + protein_string = self._parse_protein_string(protein_list) + else: + protein_string = "" + + out = [ + "BEGIN IONS", + f"TITLE={spec_id} {seq}/{charge} {msp_modifications} {protein_string}", + f"PEPMASS={prec_mz}", + f"CHARGE={charge}+", + ] + + if self.has_rt: + rt = self.peprec_dict[spec_id]["rt"] + out.append(f"RTINSECONDS={rt}") + + out.append( + self._get_peak_string( + self.preds_dict[spec_id]["peaks"], + sep=" ", + include_annotations=False, + ) + ) + out.append("END IONS\n") + file_object.writelines([line + "\n" for line in out]) + + @writer( + file_suffix="_predictions_spectronaut.csv", + normalization_method="tic", + requires_dicts=False, + requires_ssl_modifications=True, + ) + def write_spectronaut(self, file_obj): + """ + Construct spectronaut DataFrame and write to file_object. + """ + if "w" in self.write_mode: + header = True + elif "a" in self.write_mode: + header = False + else: + raise InvalidWriteModeError(self.write_mode) + + spectronaut_peprec = self.peprec.copy() + + # ModifiedPeptide and PrecursorMz columns + spectronaut_peprec["ModifiedPeptide"] = spectronaut_peprec.apply( + lambda row: self._get_ssl_modified_sequence( + row["peptide"], row["modifications"] + ), + axis=1, + ) + spectronaut_peprec["PrecursorMz"] = spectronaut_peprec.apply( + lambda row: self._get_precursor_mz( + row["peptide"], row["modifications"], row["charge"] + )[1], + axis=1, + ) + spectronaut_peprec["ModifiedPeptide"] = ( + "_" + spectronaut_peprec["ModifiedPeptide"] + "_" + ) + + # Additional columns + spectronaut_peprec["FragmentLossType"] = "noloss" + + # Retention time + if "rt" in spectronaut_peprec.columns: + rt_cols = ["iRT"] + spectronaut_peprec["iRT"] = spectronaut_peprec["rt"] + else: + rt_cols = [] + + # ProteinId + if self.has_protein_list: + spectronaut_peprec["ProteinId"] = spectronaut_peprec["protein_list"].apply( + self._parse_protein_string + ) + else: + spectronaut_peprec["ProteinId"] = spectronaut_peprec["spec_id"] + + # Rename columns and merge with predictions + spectronaut_peprec = spectronaut_peprec.rename( + columns={"charge": "PrecursorCharge", "peptide": "StrippedPeptide"} + ) + peptide_cols = ( + [ + "ModifiedPeptide", + "StrippedPeptide", + "PrecursorCharge", + "PrecursorMz", + "ProteinId", + ] + + rt_cols + + ["FragmentLossType"] + ) + spectronaut_df = spectronaut_peprec[peptide_cols + ["spec_id"]] + spectronaut_df = self.all_preds.merge(spectronaut_df, on="spec_id") + + # Fragment columns + spectronaut_df["FragmentCharge"] = ( + spectronaut_df["ion"].str.contains("2").map({True: 2, False: 1}) + ) + spectronaut_df["FragmentType"] = spectronaut_df["ion"].str[0].str.lower() + + # Rename and sort columns + spectronaut_df = spectronaut_df.rename( + columns={ + "mz": "FragmentMz", + "prediction": "RelativeIntensity", + "ionnumber": "FragmentNumber", + } + ) + fragment_cols = [ + "FragmentCharge", + "FragmentMz", + "RelativeIntensity", + "FragmentType", + "FragmentNumber", + ] + spectronaut_df = spectronaut_df[peptide_cols + fragment_cols] + + spectronaut_df.to_csv(file_obj, index=False, header=header) + + def _write_bibliospec_core(self, file_obj_ssl, file_obj_ms2, start_scannr=0): + """ + Construct Bibliospec SSL/MS2 strings and write to file_objects. + """ + + for i, spec_id in enumerate(sorted(self.preds_dict.keys())): + scannr = i + start_scannr + seq = self.peprec_dict[spec_id]["peptide"] + mods = self.peprec_dict[spec_id]["modifications"] + charge = self.peprec_dict[spec_id]["charge"] + prec_mass, prec_mz = self._get_precursor_mz(seq, mods, charge) + ms2_filename = os.path.basename(self.output_filename) + "_predictions.ms2" + + peaks = self._get_peak_string( + self.preds_dict[spec_id]["peaks"], sep="\t", include_annotations=False, + ) + + if isinstance(mods, str) and mods != "-" and mods != "": + mod_seq = self._get_ssl_modified_sequence(seq, mods) + else: + mod_seq = seq + + rt = self.peprec_dict[spec_id]["rt"] if self.has_rt else "" + + # TODO: implement csv instead of manual writing + file_obj_ssl.write( + "\t".join( + [ms2_filename, str(scannr), str(charge), mod_seq, "", "", str(rt)] + ) + + "\n" + ) + file_obj_ms2.write( + "\n".join( + [ + f"S\t{scannr}\t{prec_mz}", + f"Z\t{charge}\t{prec_mass}", + f"D\tseq\t{seq}", + f"D\tmodified seq\t{mod_seq}", + peaks, + ] + ) + + "\n" + ) + + def _write_general( + self, + write_function, + file_suffix, + normalization_method, + requires_dicts, + requires_ssl_modifications, + ): + """ + General write function to call core write functions. + + Note: Does not work for write_bibliospec function. + """ + + # Normalize if necessary and make dicts + if not self.normalization == normalization_method: + self._normalize_spectra(method=normalization_method) + if requires_dicts: + self._generate_preds_dict() + elif requires_dicts and not self.preds_dict: + self._generate_preds_dict() + if requires_dicts and not self.peprec_dict: + self._generate_peprec_dict() + + if requires_ssl_modifications and not self.ssl_modification_mapping: + self._generate_ssl_modification_mapping() + + # Write to file or stringbuffer + if self.return_stringbuffer: + file_object = StringIO() + else: + f_name = self.output_filename + file_suffix + file_object = open(f_name, self.write_mode) + + write_function(self, file_object) + + return file_object + + def write_bibliospec(self): + """ + Write MS2PIP predictions to BiblioSpec SSL and MS2 spectral library files + (For example for use in Skyline). + """ + + if not self.ssl_modification_mapping: + self._generate_ssl_modification_mapping() + + # Normalize if necessary and make dicts + if not self.normalization == "basepeak_10000": + self._normalize_spectra(method="basepeak_10000") + self._generate_preds_dict() + elif not self.preds_dict: + self._generate_preds_dict() + if not self.peprec_dict: + self._generate_peprec_dict() + + if self.return_stringbuffer: + file_obj_ssl = StringIO() + file_obj_ms2 = StringIO() + else: + file_obj_ssl = open( + "{}_predictions.ssl".format(self.output_filename), self.write_mode + ) + file_obj_ms2 = open( + "{}_predictions.ms2".format(self.output_filename), self.write_mode + ) + + # If a new file is written, write headers + if "w" in self.write_mode: + start_scannr = 0 + ssl_header = [ + "file", + "scan", + "charge", + "sequence", + "score-type", + "score", + "retention-time", + "\n", + ] + file_obj_ssl.write("\t".join(ssl_header)) + file_obj_ms2.write( + "H\tCreationDate\t{}\n".format( + strftime("%Y-%m-%d %H:%M:%S", localtime()) + ) + ) + file_obj_ms2.write("H\tExtractor\tMS2PIP predictions\n") + else: + # Get last scan number of ssl file, to continue indexing from there + # because Bibliospec speclib scan numbers can only be integers + start_scannr = self._get_last_ssl_scannr() + 1 + + self._write_bibliospec_core( + file_obj_ssl, file_obj_ms2, start_scannr=start_scannr + ) + + return file_obj_ssl, file_obj_ms2 diff --git a/ms2pip_tools/spectrum_output.py b/ms2pip_tools/spectrum_output.py deleted file mode 100644 index 119bde36..00000000 --- a/ms2pip_tools/spectrum_output.py +++ /dev/null @@ -1,246 +0,0 @@ -""" -Write spectrum files from MS2PIP predictions. -""" - - -__author__ = "Ralf Gabriels" -__credits__ = ["Ralf Gabriels", "Sven Degroeve", "Lennart Martens"] -__license__ = "Apache License, Version 2.0" -__version__ = "0.1" -__email__ = "Ralf.Gabriels@ugent.be" - - -# Native libraries -from ast import literal_eval -from operator import itemgetter -from io import StringIO - -# Third party libraries -import pandas as pd -try: - from tqdm import tqdm -except: - use_tqdm = False -else: - use_tqdm = True - -def write_msp(all_preds_in, peprec_in, output_filename, write_mode='wt+'): - """ - Write MS2PIP predictions to MSP spectral library file. - """ - - all_preds = all_preds_in.copy() - peprec = peprec_in.copy() - all_preds.reset_index(drop=True, inplace=True) - # If not already normalized, normalize spectra - if not (all_preds['prediction'].min() == 0 and all_preds['prediction'].max() == 10000): - all_preds['prediction'] = ((2**all_preds['prediction']) - 0.001).clip(lower=0) - all_preds['prediction'] = all_preds.groupby(['spec_id'])['prediction'].apply(lambda x: (x / x.max()) * 10000) - all_preds['prediction'] = all_preds['prediction'].astype(int) - - # Check if protein list and rt are present in peprec - add_protein = 'protein_list' in peprec.columns - add_rt = 'rt' in peprec.columns - - # Convert RT from min to sec - if add_rt: - peprec['rt'] = peprec['rt'] * 60 - - # Split titles (according to MS2PIPc) - spec_ids = peprec['spec_id'].tolist() - - preds_col_names = list(all_preds.columns) - preds_to_slice = {} - preds_list = all_preds.values.tolist() - - preds_spec_id_index = preds_col_names.index('spec_id') - mz_index = preds_col_names.index('mz') - prediction_index = preds_col_names.index('prediction') - ion_index = preds_col_names.index('ion') - ionnumber_index = preds_col_names.index('ionnumber') - - for row in preds_list: - spec_id = row[preds_spec_id_index] - if spec_id in preds_to_slice.keys(): - preds_to_slice[spec_id].append(row) - else: - preds_to_slice[spec_id] = [row] - - peprec_col_names = list(peprec.columns) - peprec_to_slice = {} - peprec_list = peprec.values.tolist() - - spec_id_index = peprec_col_names.index('spec_id') - peptide_index = peprec_col_names.index('peptide') - charge_index = peprec_col_names.index('charge') - modifications_index = peprec_col_names.index('modifications') - if add_protein: - protein_list_index = peprec_col_names.index('protein_list') - if add_rt: - rt_index = peprec_col_names.index('rt') - - for row in peprec_list: - peprec_to_slice[row[spec_id_index]] = row - - with open("{}_predictions.msp".format(output_filename), write_mode) as f: - if use_tqdm & len(spec_ids) > 100000: - spec_ids_iterator = tqdm(spec_ids) - else: - spec_ids_iterator = spec_ids - for spec_id in spec_ids_iterator: - out = [] - preds = preds_to_slice[spec_id] - peprec_sel = peprec_to_slice[spec_id] - - preds = sorted(preds, key=itemgetter(mz_index)) - - sequence = peprec_sel[peptide_index] - charge = peprec_sel[charge_index] - mods = peprec_sel[modifications_index] - numpeaks = len(preds) - - # Calculate mass from fragment ions - mass_b = [row[mz_index] for row in preds if row[ion_index] == 'B' and row[ionnumber_index] == 1][0] - mass_y = [row[mz_index] for row in preds if row[ion_index] == 'Y' and row[ionnumber_index] == numpeaks / 2][0] - pepmass = mass_b + mass_y - 2 * 1.007236 - - out.append('Name: {}/{}\n'.format(sequence, charge)) - out.append('MW: {}\n'.format(pepmass)) - out.append('Comment: ') - - if mods == '-': - out.append("Mods=0 ") - else: - mods = mods.split('|') - mods = [(int(mods[i]), mods[i + 1]) for i in range(0, len(mods), 2)] - # Turn MS2PIP mod indexes into actual list indexes (eg 0 for first AA) - mods = [(x, y) if x == 0 else (x - 1, y) for (x, y) in mods] - mods = [(str(x), sequence[x], y) for (x, y) in mods] - out.append("Mods={}/{} ".format(len(mods), '/'.join([','.join(list(x)) for x in mods]))) - - out.append("Parent={} ".format((pepmass + charge * 1.007236) / charge)) - - if add_protein: - try: - out.append('Protein="{}" '.format('/'.join(literal_eval(peprec_sel[protein_list_index])))) - except ValueError: - out.append('Protein="{}" '.format(peprec_sel[protein_list_index])) - - if add_rt: - out.append('RTINSECONDS={} '.format(peprec_sel[rt_index])) - - out.append('MS2PIP_ID="{}"'.format(spec_id)) - - out.append('\nCharge: {}\n'.format(charge)) - - if add_rt: - out.append('RetentionTimeMins: {}\n'.format(peprec_sel[rt_index] / 60)) - - out.append('Num peaks: {}\n'.format(numpeaks)) - - lines = list(zip( - [row[mz_index] for row in preds], - [row[prediction_index] for row in preds], - [row[ion_index] for row in preds], - [row[ionnumber_index] for row in preds] - )) - out.append(''.join(['{:.4f}\t{}\t"{}{}"\n'.format(*l) for l in lines])) - out.append('\n') - - out_string = "".join(out) - - f.write(out_string) - - -def write_mgf(all_preds_in, output_filename="MS2PIP", unlog=True, write_mode='w+', return_stringbuffer=False, peprec=None): - """ - Write MS2PIP predictions to MGF spectrum file. - """ - all_preds = all_preds_in.copy() - if unlog: - all_preds['prediction'] = ((2**all_preds['prediction']) - 0.001).clip(lower=0) - all_preds.reset_index(inplace=True) - all_preds['prediction'] = all_preds.groupby(['spec_id'])['prediction'].apply(lambda x: x / x.sum()) - - def write(all_preds, mgf_output, peprec=None): - out = [] - - # Create easy to access dict from all_preds and peprec dataframe - if type(peprec) == pd.DataFrame: - peprec_to_dict = peprec.copy() - - rt_present = 'rt' in peprec_to_dict.columns - if rt_present: - peprec_to_dict['rt'] = peprec_to_dict['rt'] * 60 - - peprec_to_dict.index = peprec_to_dict['spec_id'] - peprec_to_dict.drop('spec_id', axis=1, inplace=True) - peprec_dict = peprec_to_dict.to_dict(orient='index') - del peprec_to_dict - spec_id_list = list(peprec['spec_id']) - - else: - rt_present = False - spec_id_list = list(all_preds['spec_id'].unique()) - - preds_dict = {} - preds_list = all_preds[['spec_id', 'charge', 'ion', 'mz', 'prediction']].values.tolist() - - for row in preds_list: - spec_id = row[0] - if spec_id in preds_dict.keys(): - if row[2] in preds_dict[spec_id]['peaks']: - preds_dict[spec_id]['peaks'][row[2]].append(tuple(row[3:])) - else: - preds_dict[spec_id]['peaks'][row[2]] = [tuple(row[3:])] - else: - preds_dict[spec_id] = { - 'charge': row[1], - 'peaks': {row[2]: [tuple(row[3:])]} - } - - # Write MGF - for spec_id in spec_id_list: - out.append('BEGIN IONS') - charge = preds_dict[spec_id]['charge'] - pepmass = preds_dict[spec_id]['peaks']['B'][0][0] + preds_dict[spec_id]['peaks']['Y'][-1][0] - 2 * 1.007236 - peaks = [item for sublist in preds_dict[spec_id]['peaks'].values() for item in sublist] - peaks = sorted(peaks, key=itemgetter(0)) - - if type(peprec) == pd.DataFrame: - seq = peprec_dict[spec_id]['peptide'] - mods = peprec_dict[spec_id]['modifications'] - if rt_present: - rt = peprec_dict[spec_id]['rt'] - if mods == '-': - mods_out = '0' - else: - # Write MSP style PTM string - mods = mods.split('|') - mods = [(int(mods[i]), mods[i + 1]) for i in range(0, len(mods), 2)] - # Turn MS2PIP mod indexes into actual list indexes (eg 0 for first AA) - mods = [(x, y) if x == 0 else (x - 1, y) for (x, y) in mods] - mods = [(str(x), seq[x], y) for (x, y) in mods] - mods_out = '{}/{}'.format(len(mods), '/'.join([','.join(list(x)) for x in mods])) - out.append('TITLE={} {} {}'.format(spec_id, seq, mods_out)) - else: - out.append('TITLE={}'.format(spec_id)) - - out.append('PEPMASS={}'.format((pepmass + (charge * 1.007825032)) / charge)) - out.append('CHARGE={}+'.format(charge)) - if rt_present: - out.append('RTINSECONDS={}'.format(rt)) - out.append('\n'.join([' '.join(['{:.8f}'.format(p) for p in peak]) for peak in peaks])) - out.append('END IONS\n') - - mgf_output.write('\n'.join(out)) - - if return_stringbuffer: - mgf_output = StringIO() - write(all_preds, mgf_output, peprec=peprec) - return mgf_output - else: - with open("{}_predictions.mgf".format(output_filename), write_mode) as mgf_output: - write(all_preds, mgf_output, peprec=peprec) - - del all_preds diff --git a/setup.py b/setup.py index 4e2ac396..b8fa3971 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ NAME = "ms2pip" LICENSE = "apache-2.0" DESCRIPTION = "MS²PIP: MS² Peak Intensity Prediction" -AUTHOR = "Sven Degroeve, Ralf Gabriels, Ana Sílvia C. Silva" +AUTHOR = "Sven Degroeve, Ralf Gabriels, Kevin Velghe, Ana Sílvia C. Silva" AUTHOR_EMAIL = "sven.degroeve@vib-ugent.be" URL = "https://www.github.com/compomics/ms2pip_c" PROJECT_URLS = { @@ -34,7 +34,7 @@ INSTALL_REQUIRES = [ "biopython>=1.74,<2", "numpy>=1.16,<2", - "pandas>=0.24,<1", + "pandas>=0.24,<2", "pyteomics>=3.5,<5", "scipy>=1.2,<2", "tqdm>=4,<5", diff --git a/tests/test_data/crap_2015-01-30.fasta b/tests/test_data/crap_2015-01-30.fasta new file mode 100644 index 00000000..b037149b --- /dev/null +++ b/tests/test_data/crap_2015-01-30.fasta @@ -0,0 +1,945 @@ +>sp|ALBU_BOVIN| +MKWVTFISLLLLFSSAYSRGVFRRDTHKSEIAHRFKDLGEEHFKGLVLIA +FSQYLQQCPFDEHVKLVNELTEFAKTCVADESHAGCEKSLHTLFGDELCK +VASLRETYGDMADCCEKQEPERNECFLSHKDDSPDLPKLKPDPNTLCDEF +KADEKKFWGKYLYEIARRHPYFYAPELLYYANKYNGVFQECCQAEDKGAC +LLPKIETMREKVLASSARQRLRCASIQKFGERALKAWSVARLSQKFPKAE +FVEVTKLVTDLTKVHKECCHGDLLECADDRADLAKYICDNQDTISSKLKE +CCDKPLLEKSHCIAEVEKDAIPENLPPLTADFAEDKDVCKNYQEAKDAFL +GSFLYEYSRRHPEYAVSVLLRLAKEYEATLEECCAKDDPHACYSTVFDKL +KHLVDEPQNLIKQNCDQFEKLGEYGFQNALIVRYTRKVPQVSTPTLVEVS +RSLGKVGTRCCTKPESERMPCTEDYLSLILNRLCVLHEKTPVSEKVTKCC +TESLVNRRPCFSALTPDETYVPKAFDEKLFTFHADICTLPDTEKQIKKQT +ALVELLKHKPKATEEQLKTVMENFVAFVDKCCAADDKEACFAVEGPKLVV +STQTALA +>sp|AMYS_HUMAN| +MKLFWLLFTIGFCWAQYSSNTQQGRTSIVHLFEWRWVDIALECERYLAPK +GFGGVQVSPPNENVAIHNPFRPWWERYQPVSYKLCTRSGNEDEFRNMVTR +CNNVGVRIYVDAVINHMCGNAVSAGTSSTCGSYFNPGSRDFPAVPYSGWD +FNDGKCKTGSGDIENYNDATQVRDCRLSGLLDLALGKDYVRSKIAEYMNH +LIDIGVAGFRIDASKHMWPGDIKAILDKLHNLNSNWFPEGSKPFIYQEVI +DLGGEPIKSSDYFGNGRVTEFKYGAKLGTVIRKWNGEKMSYLKNWGEGWG +FMPSDRALVFVDNHDNQRGHGAGGASILTFWDARLYKMAVGFMLAHPYGF +TRVMSSYRWPRYFENGKDVNDWVGPPNDNGVTKEVTINPDTTCGNDWVCE +HRWRQIRNMVNFRNVVDGQPFTNWYDNGSNQVAFGRGNRGFIVFNNDDWT +FSLTLQTGLPAGTYCDVISGDKINGNCTGIKIYVSDDGKAHFSISNSAED +PFIAIHAESKL +>sp|CAS1_BOVIN| +MKLLILTCLVAVALARPKHPIKHQGLPQEVLNENLLRFFVAPFPEVFGKE +KVNELSKDIGSESTEDQAMEDIKQMEAESISSSEEIVPNSVEQKHIQKED +VPSERYLGYLEQLLRLKKYKVPQLEIVPNSAEERLHSMKEGIHAQQKEPM +IGVNQELAYFYPELFRQFYQLDAYPSGAWYYVPLGTQYTDAPSFSDIPNP +IGSENSEKTTMPLW +>sp|CAS2_BOVIN| +MKFFIFTCLLAVALAKNTMEHVSSSEESIISQETYKQEKNMAINPSKENL +CSTFCKEVVRNANEEEYSIGSSSEESAEVATEEVKITVDDKHYQKALNEI +NQFYQKFPQYLQYLYQGPIVLNPWDQVKRNAVPITPTLNREQLSTSEENS +KKTVDMESTEVFTKKTKLTEEEKNRLNFLKKISQRYQKFALPQYLKTVYQ +HQKAMKPWIQPKTKVIPYVRYL +>sp|CASB_BOVIN| +MKVLILACLVALALARELEELNVPGEIVESLSSSEESITRINKKIEKFQS +EEQQQTEDELQDKIHPFAQTQSLVYPFPGPIPNSLPQNIPPLTQTPVVVP +PFLQPEVMGVSKVKEAMAPKHKEMPFPKYPVEPFTESQSLTLTDVENLHL +PLPLLQSWMHQPHQPLPPTVMFPPQSVLSLSQSKVLPVPQKAVPYPQRDM +PIQAFLLYQEPVLGPVRGPFPIIV +>sp|CASK_BOVIN| +MMKSFFLVVTILALTLPFLGAQEQNQEQPIRCEKDERFFSDKIAKYIPIQ +YVLSRYPSYGLNYYQQKPVALINNQFLPYPYYAKPAAVRSPAQILQWQVL +SNTVPAKSCQAQPTTMARHPHPHLSFMAIPPKKNQDKTEIPTINTIASGE +PTSTPTTEAVESTVATLEDSPEVIESPPEINTVQVTSTAV +>sp|CTRA_BOVIN| +CGVPAIQPVLSGLSRIVNGEEAVPGSWPWQVSLQDKTGFHFCGGSLINEN +WVVTAAHCGVTTSDVVVAGEFDQGSSSEKIQKLKIAKVFKNSKYNSLTIN +NDITLLKLSTAASFSQTVSAVCLPSASDDFAAGTTCVTTGWGLTRYTNAN +TPDRLQQASLPLLSNTNCKKYWGTKIKDAMICAGASGVSSCMGDSGGPLV +CKKNGAWTLVGIVSWGSSTCSTSTPGVYARVTALVNWVQQTLAAN +>sp|CTRB_BOVIN| +CGVPAIQPVLSGLARIVNGEDAVPGSWPWQVSLQDSTGFHFCGGSLISED +WVVTAAHCGVTTSDVVVAGEFDQGLETEDTQVLKIGKVFKNPKFSILTVR +NDITLLKLATPAQFSETVSAVCLPSADEDFPAGMLCATTGWGKTKYNALK +TPDKLQQATLPIVSNTDCRKYWGSRVTDVMICAGASGVSSCMGDSGGPLV +CQKNGAWTLAGIVSWGSSTCSTSTPAVYARVTALMPWVQETLAAN +>sp|K1C10_HUMAN| +MSVRYSSSKHYSSSRSGGGGGGGGCGGGGGVSSLRISSSKGSLGGGFSSG +GFSGGSFSRGSSGGGCFGGSSGGYGGLGGFGGGSFHGSYGSSSFGGSYGG +SFGGGNFGGGSFGGGSFGGGGFGGGGFGGGFGGGFGGDGGLLSGNEKVTM +QNLNDRLASYLDKVRALEESNYELEGKIKEWYEKHGNSHQGEPRDYSKYY +KTIDDLKNQILNLTTDNANILLQIDNARLAADDFRLKYENEVALRQSVEA +DINGLRRVLDELTLTKADLEMQIESLTEELAYLKKNHEEEMKDLRNVSTG +DVNVEMNAAPGVDLTQLLNNMRSQYEQLAEQNRKDAEAWFNEKSKELTTE +IDNNIEQISSYKSEITELRRNVQALEIELQSQLALKQSLEASLAETEGRY +CVQLSQIQAQISALEEQLQQIRAETECQNTEYQQLLDIKIRLENEIQTYR +SLLEGEGSSGGGGRGGGSFGGGYGGGSSGGGSSGGGYGGGHGGSSGGGYG +GGSSGGGSSGGGYGGGSSSGGHGGGSSSGGHGGSSSGGYGGGSSGGGGGG +YGGGSSGGGSSSGGGYGGGSSSGGHKSSSSGSVGESSSKGPRY +>sp|K1C15_SHEEP| +MATTLLQTSSSTFGGSSTRGGSLLAGGGGFGGGSLYGGGGSRTISASSAR +FVSSGSAGGYGGGFGGGAGSGYGGGFGGGFGGGFGSGFGDFGGGDGGLLS +GNEKITMQNLNDRLASYLEKVRALEEANADLEVKIRDWYQRQSPTSPERD +YSPYFKTTDELRDKILAAAIDNSRVILEIDNARLAADDFRLKYENEMALR +QSVEADINGLRRVLDELTLTKTDLEMQIESLNEELAYLKKNHEEEMKEFS +NQLAGQVNVEMDAAPGVDLTRVLSEMREQYEAMAEKNRRDAEAWFFSKTE +ELNKEVASNTEMIQTSKSEITDLRRTIQGLEIELQSQLSMKAGLESTLAE +TDGRYAAQLQQIQGLISSIEAQLSELRSEMEAQNQEYKMLLDIKTRLEQE +IATYHSLLEGQDARMAGIGTGEASLGGGGGGKVRINVEESVDGKVVSSRK +REI +>sp|K1C9_HUMAN| +MSCRQFSSSYLSRSGGGGGGGLGSGGSIRSSYSRFSSSGGRGGGGRFSSS +SGYGGGSSRVCGRGGGGSFGYSYGGGSGGGFSASSLGGGFGGGSRGFGGA +SGGGYSSSGGFGGGFGGGSGGGFGGGYGSGFGGLGGFGGGAGGGDGGILT +ANEKSTMQELNSRLASYLDKVQALEEANNDLENKIQDWYDKKGPAAIQKN +YSPYYNTIDDLKDQIVDLTVGNNKTLLDIDNTRMTLDDFRIKFEMEQNLR +QGVDADINGLRQVLDNLTMEKSDLEMQYETLQEELMALKKNHKEEMSQLT +GQNSGDVNVEINVAPGKDLTKTLNDMRQEYEQLIAKNRKDIENQYETQIT +QIEHEVSSSGQEVQSSAKEVTQLRHGVQELEIELQSQLSKKAALEKSLED +TKNRYCGQLQMIQEQISNLEAQITDVRQEIECQNQEYSLLLSIKMRLEKE +IETYHNLLEGGQEDFESSGAGKIGLGGRGGSGGSYGRGSRGGSGGSYGGG +GSGGGYGGGSGSRGGSGGSYGGGSGSGGGSGGGYGGGSGGGHSGGSGGGH +SGGSGGNYGGGSGSGGGSGGGYGGGSGSRGGSGGSHGGGSGFGGESGGSY +GGGEEASGSGGGYGGGSGKSSHS +>sp|K1H1_HUMAN| +MPYNFCLPSLSCRTSCSSRPCVPPSCHSCTLPGACNIPANVSNCNWFCEG +SFNGSEKETMQFLNDRLASYLEKVRQLERDNAELENLIRERSQQQEPLLC +PSYQSYFKTIEELQQKILCTKSENARLVVQIDNAKLAADDFRTKYQTELS +LRHVVESDINGLRRILDELTLCKSDLEAQVESLKEELLCLKSNHEQEVNT +LRCQLGDRLNVEVDAAPTVDLNRVLNETRSQYEALVETNRREVEQWFTTQ +TEELNKQVVSSSEQLQSYQAEIIELRRTVNALEIELQAQHNLRDSLENTL +TESEARYSSQLSQVQSLITNVESQLAEIRSDLERQNQEYQVLLDVRARLE +CEINTYRSLLESEDCNLPSNPCATTNACSKPIGPCLSNPCTSCVPPAPCT +PCAPRPRCGPCNSFVR +>sp|K1H2_HUMAN| +MTSSCCVTNNLQASLKSCPRPASVCSSGVNCRPELCLGYVCQPMACLPSV +CLPTTFRPASCLSKTYLSSSCQAASGISGSMGPGSWYSEGAFNGNEKETM +QFLNDRLASYLTRVRQLEQENAELESRIQEASHSQVLTMTPDYQSHFRTI +DQLQQKILCTKAENARMVVNIDNAKLAADDFRAKYEAELAMRQLVEADIN +GLRRILDDLTLCKADLEAQVESLKEELMCLKKNHEEEVGSLRCQLGDRLN +IEVDAAPPVDLTRVLEEMRCQYEAMVEANRRDVEEWFNMQMEELNQQVAT +SSEQLQNYQSDIIDLRRTVNTLEIELQAQHSLRDSLENTLTESEARYSSQ +LAQMQCMITNVEAQLAEIRAELERQNQEYQVLLDVRARLEGEINTYRSLL +ESEDCKLPCNPCSTPSCTTCVPSPCVTRTVCVPRTVGMPCSPCPQGRY +>sp|K1H4_HUMAN| +MSYSCCLPSLGCRTSCSSRPCVPPSCHGYTLPGACNIPANVSNCNWFCEG +SFNGSEKETMQFLNDRLASYLEKVRQLERDNAELEKLIQERSQQQEPLLC +PSYQSYFKTIEELQQKILCAKAENARLVVNIDNAKLASDDFRSKYQTEQS +LRLLVESDINSIRRILDELTLCKSDLESQVESLREELICLKKNHEEEVNT +LRSPLGDRLNVEVDTAPTVDLNQVLNETRSQYEALVEINRREVEQWFATQ +TEELNKQVVSSSEQLQSCQAEIIELRRTVNALEIELQAQHNLRDSLENTL +TESEAHYSSQLSQVQSLITNVESQLAEIRCDLERQNQEYQVLLDVRARLE +CEINTYRSLLESEDCKLPCNPCATTNASGNSCGPCGTSQKGCCN +>sp|K1H5_HUMAN| +MYSSSPCKLPSLSPVARSFSACSVGLGRSSYRATSCLPALCLPAGGFATS +YSGGGGWFGEGILTGNEKETMQSLNDRLAGYLEKVRHVEQENASLESRIR +EWCEQQVPYMCPDYQSYFRTIEELQKKTLCSKAENARLVVEIDNAKLAAD +DFRTKYETEVSLRQLVESDINGLRRILDDLTLCKSDLEAQVESLKEELLC +LKKNHEEEVNSLRCQLGDRLNVEVDAAPPVDLNRVLEEMRCQYETLVENN +RRDAEDWLDTQSEELNQQVVSSSEQLQSCQAEIIELRRTVNALEIELQAQ +HSMRDALESTLAETEARYSSQLAQMQCMITNVEAQLAEIRADLERQNQEY +QVLLDVRARLECEINTYRGLLESEDSKLPCNPCAPDYSPSKSCLPCLPAA +SCGPSAARTNCSPRPICVPCPGGRF +>sp|K1H6_HUMAN| +MATQTCTPTFSTGSIKGLCGTAGGISRVSSIRSVGSCRVPSLAGAAGYIS +SARSGLSGLGSCLPGSYLSSECHTSGFVGSGGWFCEGSFNGSEKETMQFL +NDRLANYLEKVRQLERENAELESRIQEWYEFQIPYICPDYQSYFKTIEDF +QQKILLTKSENARLVLQIDNAKLAADDFRTKYETELSLRQLVEADINGLR +RILDELTLCKADLEAQVESLKEELMCLKKNHEEEVSVLRCQLGDRLNVEV +DAAPPVDLNKILEDMRCQYEALVENNRRDVEAWFNTQTEELNQQVVSSSE +QLQCCQTEIIELRRTVNALEIELQAQHSMRNSLESTLAETEARYSSQLAQ +MQCLISNVEAQLSEIRCDLERQNQEYQVLLDVKARLEGEIATYRHLLEGE +DCKLPPQPCATACKPVIRVPSVPPVPCVPSVPCTPAPQVGTQIRTITEEI +RDGKVISSREHVQSRPL +>sp|K1H7_HUMAN| +MTSFYSTSSCPLGCTMAPGARNVFVSPIDVGCQPVAEANAASMCLLANVA +HANRVRVGSTPLGRPSLCLPPTSHTACPLPGTCHIPGNIGICGAYGKNTL +NGHEKETMKFLNDRLANYLEKVRQLEQENAELETTLLERSKCHESTVCPD +YQSYFRTIEELQQKILCSKAENARLIVQIDNAKLAADDFRIKLESERSLH +QLVEADKCGTQKLLDDATLAKADLEAQQESLKEEQLSLKSNHEQEVKILR +SQLGEKFRIELDIEPTIDLNRVLGEMRAQYEAMVETNHQDVEQWFQAQSE +GISLQAMSCSEELQCCQSEILELRCTVNALEVERQAQHTLKDCLQNSLCE +AEDRYGTELAQMQSLISNLEEQLSEIRADLERQNQEYQVLLDVKARLENE +IATYRNLLESEDCKLPCNPCSTPASCTSCPSCGPVTGGSPSGHGASMGR +>sp|K1H8_HUMAN| +MTSSYSSSSCPLGCTMAPGARNVSVSPIDIGCQPGAEANIAPMCLLANVA +HANRVRVGSTPLGRPSLCLPPTCHTACPLPGTCHIPGNIGICGAYGENTL +NGHEKETMQFLNDRLANYLEKVRQLEQENAELEATLLERSKCHESTVCPD +YQSYFHTIEELQQKILCSKAENARLIVQIDNAKLAADDFRIKLESERSLR +QLVEADKCGTQKLLDDATLAKADLEAQQESLKEEQLSLKSNHEQEVKILR +SQLGEKLRIELDIEPTIDLNRVLGEMRAQYEAMLETNRQDVEQWFQAQSE +GISLQDMSCSEELQCCQSEILELRCTVNALEVERQAQHTLKDCLQNSLCE +AEDRFGTELAQMQSLISNVEEQLSEIRADLERQNQEYQVLLDVKTRLENE +IATYRNLLESEDCKLPCNPCSTPPSCVTAPCAPRPSCGPCTTCGPTCGAS +TTGSRF +>sp|K1HA_HUMAN| +MSYSCGLPSLSCRTSCSSRPCVPPSCHGCTLPGACNIPANVSNCNWFCEG +SFNGSEKETMQFLNDRLASYLEKVRQLERDNAELENLIRERSQQQEPLVC +ASYQSYFKTIEELQQKILCSKSENARLVVQIDNAKLASDDFRTKYETELS +LRHVVESDINGLRRILDELTLCRSDLEAQVESLKEELLCLKQNHEQEVNT +LRCQLGDRLNVEVDAAPTVDLNQVLNETRSQYEALVETNRREVEQWFATQ +TEELNKQVVSSSEQLQSYQAEIIELRRTVNALEIELQAQHNLRDSLENTL +TESEARYSSQLSQVQRLITNVESQLAEIRSDLERQNQEYQVLLDVRARLE +CEINTYRSLLESEDCKLPSNPCATTNACDKSTGPCISNPCGLRARCGPCN +TFGY +>sp|K1HB_HUMAN| +MPYNFCLPSLSCRTSCSSRPCVPPSCHGYTLPGACNIPANVSNCNWFCEG +SFNGSEKETMQFLNDRLASYLEKVRQLERDNAELENLIRERSQQQEPLLC +PSYQSYFKTIEELQQKILCSKSENARLVVQIDNAKLAADDFRTKYQTEQS +LRQLVESDINSLRRILDELTLCRSDLEAQMESLKEELLSLKQNHEQEVNT +LRCQLGDRLNVEVDAAPAVDLNQVLNETRNQYEALVETNRREVEQWFATQ +TEELNKQVVSSSEQLQSYQAEIIELRRTVNALEIELQAQHNLRYSLENTL +TESEARYSSQLSQVQSLITNVESQLAEIRSDLERQNQEYQVLLDVRARLE +CEINTYRSLLESEDCKLPSNPCATTNACEKPIGSCVTNPCGPRSRCGPCN +TFGY +>sp|K1M1_SHEEP| +SFNFCLPNLSFRSSCSSRPCVPSSCCGTTLPGACNIPANVGSCNWFCEGS +FDGNEKETMQFLNDRLASYLEKVRQLERENAELESRILERSQQQEPLVCP +NYQSYFRTIEELQQKILCAKSENARLVVQIDNAKLAADDFRTKYETELGL +RQLVESDINGLRRILDELTLCKSDLEAQVESLKEELICLKSNHEEEVNTL +RSQLGDRLNVEVDAAPTVDLNRVLNETRAQYEALVETNRRDVEEWYIRQT +EELNKQVVSSSEQLQSCQTEIIELRRTVNALEVELQAQHNLRDSLENTLT +ETEARYSCQLNQVQSLISNVESQLAEIRGDLERQNQEYQVLLDVRARLEC +EINTYRGLLDSEDCKLPCNPCATTNACGKTITPCISSPCAPAAPCTPCVP +RSRCGPCNSYVR +>sp|K1M2_SHEEP| +SFNFCLPNLSFRSSCSSRPCVPSSCCGTTLPGACNIPASVGSCNWFCEGS +FNGNEKETMQFLNDRLASYLEKVRQLERENAELERRILERSQQQEPLVCP +NYQSYFRTIEELQQKILCGKSENARLVVQIDNAKLASDDFRTKYETEVSL +RQLVEADLNGLRRILDELTLCKSDLEARVESLKEELICLKQNHEQEVNTL +RSQLGDRLNVEVDAAPTVDLNHVLNETRAQYEALVETNRRDVEEWYIRQT +EELNKQVVSSSEQLQSCQAEIIELRRTVNALEVELQAQHNLRDSLENTLT +ETEARYSCQLNQVQSLIVSVESQLAEIRSDLERQNQEYQVLLDVRARLEC +EINTYRGLLDSEDCKLPCNPCATTNTCGKPIGPCISNPCVSRTRCGPCNT +FVH +>sp|K22E_HUMAN| +MSCQISCKSRGRGGGGGGFRGFSSGSAVVSGGSRRSTSSFSCLSRHGGGG +GGFGGGGFGSRSLVGLGGTKSISISVAGGGGGFGAAGGFGGRGGGFGGGS +GFGGGSGFGGGSGFSGGGFGGGGFGGGRFGGFGGPGGVGGLGGPGGFGPG +GYPGGIHEVSVNQSLLQPLNVKVDPEIQNVKAQEREQIKTLNNKFASFID +KVRFLEQQNQVLQTKWELLQQMNVGTRPINLEPIFQGYIDSLKRYLDGLT +AERTSQNSELNNMQDLVEDYKKKYEDEINKRTAAENDFVTLKKDVDNAYM +IKVELQSKVDLLNQEIEFLKVLYDAEISQIHQSVTDTNVILSMDNSRNLD +LDSIIAEVKAQYEEIAQRSKEEAEALYHSKYEELQVTVGRHGDSLKEIKI +EISELNRVIQRLQGEIAHVKKQCKNVQDAIADAEQRGEHALKDARNKLND +LEEALQQAKEDLARLLRDYQELMNVKLALDVEIATYRKLLEGEECRMSGD +LSSNVTVSVTSSTISSNVASKAAFGGSGGRGSSSGGGYSSGSSSYGSGGR +QSGSRGGSGGGGSISGGGYGSGGGSGGRYGSGGGSKGGSISGGGYGSGGG +KHSSGGGSRGGSSSGGGYGSGGGGSSSVKGSSGEAFGSSVTFSFR +>sp|K2C1_HUMAN| +SRQFSSRSGYRSGGGFSSGSAGIINYQRRTTSSSTRRSGGGGGRFSSCGG +GGGSFGAGGGFGSRSLVNLGGSKSISISVARGGGRGSGFGGGYGGGGFGG +GGFGGGGFGGGGIGGGGFGGFGSGGGGFGGGGFGGGGYGGGYGPVCPPGG +IQEVTINQSLLQPLNVEIDPEIQKVKSREREQIKSLNNQFASFIDKVRFL +EQQNQVLQTKWELLQQVDTSTRTHNLEPYFESFINNLRRRVDQLKSDQSR +LDSELKNMQDMVEDYRNKYEDEINKRTNAENEFVTIKKDVDGAYMTKVDL +QAKLDNLQQEIDFLTALYQAELSQMQTQISETNVILSMDNNRSLDLDSII +AEVKAQNEDIAQKSKAEAESLYQSKYEELQITAGRHGDSVRNSKIEISEL +NRVIQRLRSEIDNVKKQISNLQQSISDAEQRGENALKDAKNKLNDLEDAL +QQAKEDLARLLRDYQELMNTKLALDLEIATYRTLLEGEESRMSGECAPNV +SVSVSTSHTTISGGGSRGGGGGGYGSGGSSYGSGGGSYGSGGGGGGGRGS +YGSGGSSYGSGGGSYGSGGGGGGHGSYGSGSSSGGYRGGSGGGGGGSSGG +RGSGGGSSGGSIGGRGSSSGGVKSSGGSSSVRFVSTTYSGVTR +>sp|K2M1_SHEEP| +QNRQCCESNLEPLFSGYIETLRREAECAEADSGRLSSELNSLQEVLEGYK +RRYEEEVALRATAENEFVALKKDVDCAYLRKSDLEANVEALIQETDFLRR +LYEEEIRVL +>sp|K2M2_SHEEP| +CGFSTVGSGFGSRAFSCVSACGPRPGRCCITAAPYRGISCYRGLTGGFGS +RSVCGGFRAGSCGRSFGYRSGGVCGPSPPCITTVSVNESLLTPLNLEIDP +NAQCVKQEEKEQIKCLNNRFAAFIDKVRFLEQQNKLLETKLQFFQNRQCC +ESNLEPLFEGYIETLRREAECVEADSGRLSSELNHVQEVLEGYKKKYEQE +VALRATAENEFVALKKDVDCAYVRKSDLEANSEALIQEIDFLRRLYQEEI +RVLQANISDTSVIVKMDNSRDLNMDCIVAEIKAQYDDIASRSRAEAESWY +RSKCEEIKATVIRHGETLRRTKEEINELNRVIQRLTAEVENAKCQNSKLE +AAVTQAEQQGEVALNDARCKLAGLEEALQKAKQDMACLLKEYQEVMNSKL +GLDIEIATYRRLLEGEEQRLCEGVGAVNVCVSSSRGGVVCGDLCVSGSRP +VTGSVCSAPCSGNLAVSTGLCAPCGQLNTTCGGGSCSLGRC +>sp|K2M3_SHEEP| +SCRSYRISPGYSVTRTFSSCSAVAPKTGSRCCISAAPYRGVSCYRGLTGF +GSRSVSALGSCGPRIAVSGFRAGSCGRSFGYRSGGVGGLSPSCITTVSVN +ESLLTPLNLEIDPNAQCVKHQEKEQIKNLNSRFAAFIDKVRFLEQQNKLL +ETKWQFYQNQRCCESNLEPLFNGYIETLRREAEHVEADSGRLASELDHVQ +EVLEGYKKKYEEEVALRATAENEFVVLKKDVDCAYLRKSDLEANVEALVE +ESNFLKRLYDEEIQILNAHISDTSVIVKMDNSRDLNMDCVVAEIKAQYDD +IASRSRAEAESWYRSKCEEMKATVIRHGETLRRTKEEINELNRVIQRLTA +EIENAKCQRTKLEAAVAEAEQQGEAALNDARSKLAGLEEALQKAKQDMAC +LLKEYQEVMNSKLGLDIEIATYRRLLEGEEQRLCEGVGSVNVCVSSRGGV +ACGGLTYSSTAGRQIASGPVATGGSITVLAPDSCQPRASSFSCGSSRSVR +FA +>sp|KRA33_SHEEP| +ACCARLCCSVPTSPATTICSSDKFCRCGVCLPSTCPHTVWLLQPTCCCDN +RPPPYHVPQPSVPTCFLLNSSQPTPGLESINLTTYTQSSCEPCIPSCC +>sp|KRA34_SHEEP| +ACCARLCCSVPTSPATTICSSDKFCRCGVCLPSTCPHTVWFLQPTCCCDN +RPPPCHIPQPSVPTCFLLNSSQPTPGLESINLTTYTQPSCEPCIPSCC +>sp|KRA3A_SHEEP| +TGSCCGPTFSSLSCGGGCLQPCCYRDPCCCRPVSSTQTVSRPVTFVSRCT +RPICEPCRRPVCCDPCSLQEGCCRPITCCPTSCQAVVCRPCCWATTCCQP +VSVQCPCCRPTSCPSAPRTTCRTFRTSPCC +>sp|KRA3_SHEEP| +TGSCCGPTFSSLSCGGGCLQPRYYRDPCCCRPVSCQTVSRPVTFVPRCTR +PICEPCRRPVCCDPCSLQEGCCRPITCCPTSCQAVVCRPCCWATTCCQPV +SVQCPCCRPTSCQPAPCSRTTCRTFRTSPCC +>sp|KRA61_SHEEP| +CGYYGNYYGGLGCGSYSYGGLGCGYGSCYGSGFRRLGCGYGCGYGYGSRS +LCGSGYGYGSRSLCGSGYGCGSGYGSGFGYYY +>sp|KRB2A_SHEEP| +ACCSTSFCGFPICSTGGTCGSSPCQPTCCQTSCCQPTSIQTSCCQPISIQ +TSCCQPTSIQTSCCQPTCLQTSGCETGCGIGGSIGYGQVGSSGAVSSRTR +WCRPDCRVEGTSLPPCCVVSCTPPSCCQLYYAQASCCRPSYCGQSCCRPA +CCCQPTCIEPICEPSCCEPTC +>sp|KRB2B_SHEEP| +ACCSTSFCGFPICSSVGTCGSSCGQPTCSQTSCCQPTSIQTSCCQPISIQ +TSCCQPTCLQTSGCETGCGIGGSIGYDQVGSSGAVSSRTRWCRPDCRVEG +TSLPPCCVVSCTSPSCCQLYYAQASCCRPSYCGQSCCRPACCCQPTCIEP +VCEPTC +>sp|KRB2C_SHEEP| +ACCSTSFCGFPICSTAGTCGSSCCRSTCSQTSCCQPTSIQTSCCQPTCLQ +TSGCETGCGIGGSTGYGQVGSSGAVSSRTRWCRPDCRVEGTSLPPCCVVS +CTSPSCCQLYYAQASCCRPSYCGQSCCRPACCCQPTCTEPVCEPTCSQPI +C +>sp|KRB2D_SHEEP| +ACCSTSFCGFPTCSTGGTCGSNFCQPTCCQTSCCQPTSIQTSCCQPTSIQ +TSCCQPTSIQTSCCQPISIQTSCCQPTCLQTSGCETGCGIGGSIGYGQVG +SSGAVSSRTKWCRPDCRVEGTSLPPCCVVSCTSPSCCQLYYAQASCCRPS +YCGQSCCRPACCCQPTCIEPVCEPTCCEPTC +>sp|KRHB1_HUMAN| +MTCGSGFGGRAFSCISACGPRPGRCCITAAPYRGISCYRGLTGGFGSHSV +CGGFRAGSCGRSFGYRSGGVCGPSPPCITTVSVNESLLTPLNLEIDPNAQ +CVKQEEKEQIKSLNSRFAAFIDKVRFLEQQNKLLETKLQFYQNRECCQSN +LEPLFEGYIETLRREAECVEADSGRLASELNHVQEVLEGYKKKYEEEVSL +RATAENEFVALKKDVDCAYLRKSDLEANVEALIQEIDFLRRLYEEEIRIL +QSHISDTSVVVKLDNSRDLNMDCIIAEIKAQYDDIVTRSRAEAESWYRSK +CEEMKATVIRHGETLRRTKEEINELNRMIQRLTAEVENAKCQNSKLEAAV +AQSEQQGEAALSDARCKLAELEGALQKAKQDMACLIREYQEVMNSKLGLD +IEIATYRRLLEGEEQRLCEGIGAVNVCVSSSRGGVVCGDLCVSGSRPVTG +SVCSAPCNGNVAVSTGLCAPCGQLNTTCGGGSCGVGSCGISSLGVGSCGS +SCRKC +>sp|KRHB2_HUMAN| +MSYHSFQPGSRCGSQSFSSYSAVMPRMVTHYAVSKGPCRPGGGRGLRALG +CLGSRSLCNVGFGRPRVASRCGGTLPGFGYRLGATCGPSACITPVTINES +LLVPLALEIDPTVQRVKRDEKEQIKCLNNRFASFINKVRFLEQKNKLLET +KWNFMQQQRCCQTNIEPIFEGYISALRRQLDCVSGDRVRLESELCSLQAA +LEGYKKKYEEELSLRPCVENEFVALKKDVDTAFLMKADLETNAEALVQEI +DFLKSLYEEEICLLQSQISETSVIVKMDNSRELDVDGIIAEIKAQYDDIA +SRSKAEAEAWYQCRYEELRVTAGNHCDNLRNRKNEILEMNKLIQRLQQET +ENVKAQRCKLEGAIAEAEQQGEAALNDAKCKLAGLEEALQKAKQDMACLL +KEYQEVMNSKLGLDIEIATYRRLLEGEEHRLCEGIGPVNISVSSSKGAFL +YEPCGVSMPVLSTGVLRSNGGCSIVGTGELYVPCEPQGLLSCGSGRKSSM +TLGAGGSSPSHKH +>sp|KRHB3_HUMAN| +MTCGFNSIGCGFRPGNFSCVSACGPRPSRCCITAAPYRGISCYRGLTGGF +GSHSVCGGFRAGSCGRDFGYRSGGVCGPSPPCITTVSVNESLLTPLNLEI +DPNAQCVKQEEKEQIKSLNSRFAAFIDKVRFLEQQNKLLETKLQFYQNCE +CCQSNLEPLFAGYIETLRREAECVEADSGRLASELNHVQEVLEGYKKKYE +EEVALRATAENEFVALKKDVDCAYLRKSDLEANVEALIQEIDFLRRLYEE +EIRILQSHISDTSVVVKLDNSRDLNMDCMVAEIKAQYDDIATRSRAEAES +WYRSKCEEMKATVIRHGETLRRTKEEINELNRMIQRLTAEVENAKCQNSK +LEAAVAQSEQQGEAALSDARCKLAELEGALQKAKQDMACLIREYQEVMNS +KLGLDIEIATYRRLLEGEEQRLCEGVEAVNVCVSSSRGGVVCGDLCVSGS +RPVTGSVCSAPCNGNLVVSTGLCKPCGQLNTTCGGGSCGQGRY +>sp|KRHB4_HUMAN| +MSCRSYRVSSGHRVGNFSSCSAMTPQNLNRFRANSVSCWSGPGFRGLGSF +GSRSVITFGSYSPRIAAVGSRPIHCGVRFGAGCGMGFGDGRGVGLGPRAD +SCVGLGFGAGSGIGYGFGGPGFGYRVGGVGVPAAPSITAVTVNKSLLTPL +NLEIDPNAQRVKKDEKEQIKTLNNKFASFIDKVRFLEQQNKLLETKWSFL +QEQKCIRSNLEPLFESYITNLRRQLEVLVSDQARLQAERNHLQDVLEGFK +KKYEEEVVCRANAENEFVALKKDVDAAFMNKSDLEANVDTLTQEIDFLKT +LYMEEIQLLQSHISETSVIVKMDNSRDLNLDGIIAEVKAQYEEVARRSRA +DAEAWYQTKYEEMQVTAGQHCDNLRNIRNEINELTRLIQRLKAEIEHAKA +QRAKLEAAVAEAEQQGEATLSDAKCKLADLECALQQAKQDMARQLREYQE +LMNAKLGLDIEIATYRRLLEGEESRLCEGVGPVNISVSSSRGGLVCGPEP +LVAGSTLSRGGVTFSGSSSVCATSGVLASCGPSLGGARVAPATGDLLSTG +TRSGSMLISEACVPSVPCPLPTQGGFSSCSGGRSSSVRFVSTTTSCRTKY + +>sp|KRHB5_HUMAN| +MSCRSYRISSGCGVTRNFSSCSAVAPKTGNRCCISAAPYRGVSCYRGLTG +FGSRSLCNLGSCGPRIAVGGFRAGSCGRSFGYRSGGVCGPSPPCITTVSV +NESLLTPLNLEIDPNAQCVKQEEKEQIKSLNSRFAAFIDKVRFLEQQNKL +LETKWQFYQNQRCCESNLEPLFSGYIETLRREAECVEADSGRLASELNHV +QEVLEGYKKKYEEEVALRATAENEFVVLKKDVDCAYLRKSDLEANVEALV +EESSFLRRLYEEEIRVLQAHISDTSVIVKMDNSRDLNMDCIIAEIKAQYD +DVASRSRAEAESWYRSKCEEMKATVIRHGETLRRTKEEINELNRMIQRLT +AEIENAKCQRAKLEAAVAEAEQQGEAALSDARCKLAELEGALQKAKQDMA +CLLKEYQEVMNSKLGLDIEIATYRRLLEGEEHRLCEGVGSVNVCVSSSRG +GVSCGGLSYSTTPGRQITSGPSAIGGSITVVAPDSCAPCQPRSSSFSCGS +SRSVRFA +>sp|KRHB6_HUMAN| +MTCGSYCGGRAFSCISACGPRPGRCCITAAPYRGISCYRGLTGGFGSHSV +CGGFRAGSCGRSFGYRSGGVCGPSPPCITTVSVNESLLTPLNLEIDPNAQ +CVKQEEKEQIKSLNSRFAAFIDKVRFLEQQNKLLETKLQFYQNRECCQSN +LEPLFEGYIETLRREAECVEADSGRLASELNHVQEVLEGYKKKYEEEVSL +RATAENEFVALKKDVDCAYLRKSDLEANVEALIQEIDFLRRLYEEEIRVL +QSHISDTSVVVKLDNSRDLNMDCIIAEIKAQYDDIVTRSRAEAESWYRSK +CEEMKATVIRHGETLRRTKEEINELNRMIQRLTAEVENAKCQNSKLEAAV +AQSEQQGEAALSDARCKLAELEGALQKAKQDMACLIREYQEVMNSKLGLD +IEIATYRRLLEGEEQRLCEGVGSVNVCVSSSRGGVVCGDLCASTTAPVVS +TRVSSVPSNSNVVVGTTNACAPSARVGVCGGSCKRC +>sp|KRUC_SHEEP| +MGCSGCSGGCGSSCGGCGSRCGGCSSSCCVPVCCCKPVCCCVPACSCSSC +GKGGCGSSCGGSKGGCGSCGGSKGGCGSCGGCGSSCCKPVCCCVPACSCS +SCGKGGCGSCGGSKGGCGSCGGSKGGCGSCGGCGSGCGSSCCVPVCCCVP +ACSCSSCGKGGCGSCGCSQSSCCVPVCCQRKI +>sp|LALBA_BOVIN| +MMSFVSLLLVGILFHATQAEQLTKCEVFRELKDLKGYGGVSLPEWVCTTF +HTSGYDTQAIVQNNDSTEYGLFQINNKIWCKDDQNPHSSNICNISCDKFL +DDDLTDDIMCVKKILDKVGINYWLAHKALCSEKLDQWLCEKL +>sp|LYSC_LYSEN| +GVSGSCNIDVVCPEGNGHRDVIRSVAAYSKQGTMWCTGSLVNNSANDKKM +YFLTANHCGMTTAAIASSMVVYWNYQNSTCRAPGSSSSGANGDGSLAQSQ +TGAVVRATNAASDFTLLELNTAANPAYNLFWAGWDRRDQNFAGATAIHHP +NVAEKRISHSTVATEISGYNGATGTSHLHVFWQASGGVTEPGSSGSPIYS +PEKRVLGQLHGGPSSCSATGADRSDYYGRVFTSWTGGGTSATRLSDWLDA +AGTGAQFIDGLDSTGTPPV +>sp|PEPA_BOVIN| +MSVVKIPLVKKKSLRQNLIENGKLKEFMRTHKYNLGSKYIREAATLVSEQ +PLQNYLDTEYFGTIGIGTPAQDFTVIFDTGSSNLWVPSIYCSSEACTNHN +RFNPQDSSTYEATSETLSITYGTGSMTGILGYDTVQVGGISDTNQIFGLS +ETEPGSFLYYAPFDGILGLAYPSISSSGATPVFDNIWDQGLVSQDLFSVY +LSSNEESGSVVIFGDIDSSYYSGSLNWVPVSVEGYWQITVDSITMNGESI +ACSDGCQAIVDTGTSLLAGPTTAISNIQSYIGASEDSSGEVVISCSSIDS +LPDIVFTINGVQYPVPPSAYILQSNGICSSGFEGMDISTSSGDLWILGDV +FIRQYFTVFDRGNNQIGLAPVA +>sp|PEPA_PIG| +MKWLLLLSLVVLSECLVKVPLVRKKSLRQNLIKNGKLKDFLKTHKHNPAS +KYFPEAAALIGDEPLENYLDTEYFGTIGIGTPAQDFTVIFDTGSSNLWVP +SVYCSSLACSDHNQFNPDDSSTFEATSQELSITYGTGSMTGILGYDTVQV +GGISDTNQIFGLSETEPGSFLYYAPFDGILGLAYPSISASGATPVFDNLW +DQGLVSQDLFSVYLSSNDDSGSVVLLGGIDSSYYTGSLNWVPVSVEGYWQ +ITLDSITMDGETIACSGGCQAIVDTGTSLLTGPTSAIAINIQSDIGASEN +SDGEMVISCSSIDSLPDIVFTINGVQYPLSPSAYILQDDDSCTSGFEGMD +VPTSSGELWILGDVFIRQYYTVFDRANNKVGLAPVA +>sp|PEPB_PIG| +MERIILRKGKSIREAMEEQGVLEKFLKNRPKIDPAAKYHFNNDAVAYEPF +TNYLDSFYFGEISIGTP +>sp|PEPC_PIG| +SVIKVPLKKLKSIRQAMKEKGLLEEFLKTHKYDPAQRYRIGDISVALEPM +AYLEAAYFGEISIGTPPQNFLVL +>sp|SSPA_STAAU| +MKGKFLKVSSLFVATLTTATLVSSPAANALSSKAMDNHPQQTQSSKQQTP +KIQKGGNLKPLEQREHANVILPNNDRHQITDTTNGHYAPVTYIQVEAPTG +TFIASGVVVGKDTLLTNKHVVDATHGDPHALKAFPSAINQDNYPNGGFTA +EQITKYSGEGDLAIVKFSPNEQNKHIGEVVKPATMSNNAETQVNQNITVT +GYPGDKPVATMWESKGKITYLKGEAMQYDLSTTGGNSGSPVFNEKNEVIG +IHWGGVPNEFNGAVFINENVRNFLKQNIEDIHFANDDQPNNPDNPDNPNN +PDNPNNPDEPNNPDNPNNPDNPDNGDNNNSDNPDAA +>sp|TRY1_BOVIN| +FIFLALLGAAVAFPVDDDDKIVGGYTCGANTVPYQVSLNSGYHFCGGSLI +NSQWVVSAAHCYKSGIQVRLGEDNINVVEGNEQFISASKSIVHPSYNSNT +LNNDIMLIKLKSAASLNSRVASISLPTSCASAGTQCLISGWGNTKSSGTS +YPDVLKCLKAPILSDSSCKSAYPGQITSNMFCAGYLEGGKDSCQGDSGGP +VVCSGKLQGIVSWGSGCAQKNKPGVYTKVCNYVSWIKQTIASN +>sp|TRY2_BOVIN| +MHPLLILAFVGAAVAFPSDDDDKIVGGYTCAENSVPYQVSLNAGYHFCGG +SLINDQWVVSAAHCYQYHIQVRLGEYNIDVLEGGEQFIDASKIIRHPKYS +SWTLDNDILLIKLSTPAVINARVSTLLLPSACASAGTECLISGWGNTLSS +GVNYPDLLQCLVAPLLSHADCEASYPGQITNNMICAGFLEGGKDSCQGDS +GGPVACNGQLQGIVSWGYGCAQKGKPGVYTKVCNYVDWIQETIAANS +>sp|TRYP_PIG| +FPTDDDDKIVGGYTCAANSIPYQVSLNSGSHFCGGSLINSQWVVSAAHCY +KSRIQVRLGEHNIDVLEGNEQFINAAKIITHPNFNGNTLDNDIMLIKLSS +PATLNSRVATVSLPRSCAAAGTECLISGWGNTKSSGSSYPSLLQCLKAPV +LSDSSCKSSYPGQITGNMICVGFLEGGKDSCQGDSGGPVVCNGQLQGIVS +WGYGCAQKNKPGVYTKVCNYVNWIQQTIAAN +>sp|ALBU_HUMAN| +MKWVTFISLLFLFSSAYSRGVFRRDAHKSEVAHRFKDLGEENFKALVLIA +FAQYLQQCPFEDHVKLVNEVTEFAKTCVADESAENCDKSLHTLFGDKLCT +VATLRETYGEMADCCAKQEPERNECFLQHKDDNPNLPRLVRPEVDVMCTA +FHDNEETFLKKYLYEIARRHPYFYAPELLFFAKRYKAAFTECCQAADKAA +CLLPKLDELRDEGKASSAKQRLKCASLQKFGERAFKAWAVARLSQRFPKA +EFAEVSKLVTDLTKVHTECCHGDLLECADDRADLAKYICENQDSISSKLK +ECCEKPLLEKSHCIAEVENDEMPADLPSLAADFVESKDVCKNYAEAKDVF +LGMFLYEYARRHPDYSVVLLLRLAKTYETTLEKCCAAADPHECYAKVFDE +FKPLVEEPQNLIKQNCELFEQLGEYKFQNALLVRYTKKVPQVSTPTLVEV +SRNLGKVGSKCCKHPEAKRMPCAEDYLSVVLNQLCVLHEKTPVSDRVTKC +CTESLVNRRPCFSALEVDETYVPKEFNAETFTFHADICTLSEKERQIKKQ +TALVELVKHKPKATKEQLKAVMDDFAAFVEKCCKADDKETCFAEEGKKLV +AASQAALGL +>sp|ANT3_HUMAN| +MYSNVIGTVTSGKRKVYLLSLLLIGFWDCVTCHGSPVDICTAKPRDIPMN +PMCIYRSPEKKATEDEGSEQKIPEATNRRVWELSKANSRFATTFYQHLAD +SKNDNDNIFLSPLSISTAFAMTKLGACNDTLQQLMEVFKFDTISEKTSDQ +IHFFFAKLNCRLYRKANKSSKLVSANRLFGDKSLTFNETYQDISELVYGA +KLQPLDFKENAEQSRAAINKWVSNKTEGRITDVIPSEAINELTVLVLVNT +IYFKGLWKSKFSPENTRKELFYKADGESCSASMMYQEGKFRYRRVAEGTQ +VLELPFKGDDITMVLILPKPEKSLAKVEKELTPEVLQEWLDELEEMMLVV +HMPRFRIEDGFSLKEQLQDMGLVDLFSPEKSKLPGIVAEGRDDLYVSDAF +HKAFLEVNEEGSEAAASTAVVIAGRSLNPNRVTFKANRPFLVFIREVPLN +TIIFMGRVANPCVK +>sp|ANXA5_HUMAN| +AQVLRGTVTDFPGFDERADAETLRKAMKGLGTDEESILTLLTSRSNAQRQ +EISAAFKTLFGRDLLDDLKSELTGKFEKLIVALMKPSRLYDAYELKHALK +GAGTNEKVLTEIIASRTPEELRAIKQVYEEEYGSSLEDDVVGDTSGYYQR +MLVVLLQANRDPDAGIDEAQVEQDAQALFQAGELKWGTDEEKFITIFGTR +SVSHLRKVFDKYMTISGFQIEETIDRETSGNLEQLLLAVVKSIRSIPAYL +AETLYYAMKGAGTDDHTLIRVMVSRSEIDLFNIRKEFRKNFATSLYSMIK +GDTSGDYKKALLLLCGEDD +>sp|B2MG_HUMAN| +MSRSVALAVLALLSLSGLEAIQRTPKIQVYSRHPAENGKSNFLNCYVSGF +HPSDIEVDLLKNGERIEKVEHSDLSFSKDWSFYLLYYTEFTPTEKDEYAC +RVNHVTLSQPKIVKWDRDM +>sp|BID_HUMAN| +MDCEVNNGSSLRDECITNLLVFGFLQSCSDNSFRRELDALGHELPVLAPQ +WEGYDELQTDGNRSSHSRLGRIEADSESQEDIIRNIARHLAQVGDSMDRS +IPPGLVNGLALQLRNTSRSEEDRNRDLATALEQLLQAYPRDMEKEKTMLV +LALLLAKKVASHTPSLLRDVFHTTVNFINQNLRTYVRSLARNGMD +>sp|CAH1_HUMAN| +ASPDWGYDDKNGPEQWSKLYPIANGNNQSPVDIKTSETKHDTSLKPISVS +YNPATAKEIINVGHSFHVNFEDNDNRSVLKGGPFSDSYRLFQFHFHWGST +NEHGSEHTVDGVKYSAELHVAHWNSAKYSSLAEAASKADGLAVIGVLMKV +GEANPKLQKVLDALQAIKTKGKRAPFTNFDPSTLLPSSLDFWTYPGSLTH +PPLYESVTWIICKESISVSSEQLAQFRSLLSNVEGDNAVPMQHNNRPTQP +LKGRTVRASF +>sp|CAH2_HUMAN| +SHHWGYGKHNGPEHWHKDFPIAKGERQSPVDIDTHTAKYDPSLKPLSVSY +DQATSLRILNNGHAFNVEFDDSQDKAVLKGGPLDGTYRLIQFHFHWGSLD +GQGSEHTVDKKKYAAELHLVHWNTKYGDFGKAVQQPDGLAVLGIFLKVGS +AKPGLQKVVDVLDSIKTKGKSADFTNFDPRGLLPESLDYWTYPGSLTTPP +LLECVTWIVLKEPISVSSEQVLKFRKLNFNGEGEPEELMVDNWRPAQPLK +NRQIKASFK +>sp|CATA_HUMAN| +ADSRDPASDQMQHWKEQRAAQKADVLTTGAGNPVGDKLNVITVGPRGPLL +VQDVVFTDEMAHFDRERIPERVVHAKGAGAFGYFEVTHDITKYSKAKVFE +HIGKKTPIAVRFSTVAGESGSADTVRDPRGFAVKFYTEDGNWDLVGNNTP +IFFIRDPILFPSFIHSQKRNPQTHLKDPDMVWDFWSLRPESLHQVSFLFS +DRGIPDGHRHMNGYGSHTFKLVNANGEAVYCKFHYKTDQGIKNLSVEDAA +RLSQEDPDYGIRDLFNAIATGKYPSWTFYIQVMTFNQAETFPFNPFDLTK +VWPHKDYPLIPVGKLVLNRNPVNYFAEVEQIAFDPSNMPPGIEASPDKML +QGRLFAYPDTHRHRLGPNYLHIPVNCPYRARVANYQRDGPMCMQDNQGGA +PNYYPNSFGAPEQQPSALEHSIQYSGEVRRFNTANDDNVTQVRAFYVNVL +NEEQRKRLCENIAGHLKDAQIFIQKKAVKNFTEVHPDYGSHIQALLDKYN +AEKPKNAIHTFVQSGSHLAAREKANL +>sp|CATD_HUMAN| +MQPSSLLPLALCLLAAPASALVRIPLHKFTSIRRTMSEVGGSVEDLIAKG +PVSKYSQAVPAVTEGPIPEVLKNYMDAQYYGEIGIGTPPQCFTVVFDTGS +SNLWVPSIHCKLLDIACWIHHKYNSDKSSTYVKNGTSFDIHYGSGSLSGY +LSQDTVSVPCQSASSASALGGVKVERQVFGEATKQPGITFIAAKFDGILG +MAYPRISVNNVLPVFDNLMQQKLVDQNIFSFYLSRDPDAQPGGELMLGGT +DSKYYKGSLSYLNVTRKAYWQVHLDQVEVASGLTLCKEGCEAIVDTGTSL +MVGPVDEVRELQKAIGAVPLIQGEYMIPCEKVSTLPAITLKLGGKGYKLS +PEDYTLKVSQAGKTLCLSGFMGMDIPPPSGPLWILGDVFIGRYYTVFDRD +NNRVGFAEAARL +>sp|CATG_HUMAN| +MQPLLLLLAFLLPTGAEAGEIIGGRESRPHSRPYMAYLQIQSPAGQSRCG +GFLVREDFVLTAAHCWGSNINVTLGAHNIQRRENTQQHITARRAIRHPQY +NQRTIQNDIMLLQLSRRVRRNRNVNPVALPRAQEGLRPGTLCTVAGWGRV +SMRRGTDTLREVQLRVQRDRQCLRIFGSYDPRRQICVGDRRERKAAFKGD +SGGPLLCNNVAHGIVSYGKSSGVPPEVFTRVSSFLPWIRTTMRSFKLLDQ +METPL +>sp|CO5_HUMAN| +MGLLGILCFLIFLGKTWGQEQTYVISAPKIFRVGASENIVIQVYGYTEAF +DATISIKSYPDKKFSYSSGHVHLSSENKFQNSAILTIQPKQLPGGQNPVS +YVYLEVVSKHFSKSKRMPITYDNGFLFIHTDKPVYTPDQSVKVRVYSLND +DLKPAKRETVLTFIDPEGSEVDMVEEIDHIGIISFPDFKIPSNPRYGMWT +IKAKYKEDFSTTGTAYFEVKEYVLPHFSVSIEPEYNFIGYKNFKNFEITI +KARYFYNKVVTEADVYITFGIREDLKDDQKEMMQTAMQNTMLINGIAQVT +FDSETAVKELSYYSLEDLNNKYLYIAVTVIESTGGFSEEAEIPGIKYVLS +PYKLNLVATPLFLKPGIPYPIKVQVKDSLDQLVGGVPVILNAQTIDVNQE +TSDLDPSKSVTRVDDGVASFVLNLPSGVTVLEFNVKTDAPDLPEENQARE +GYRAIAYSSLSQSYLYIDWTDNHKALLVGEHLNIIVTPKSPYIDKITHYN +YLILSKGKIIHFGTREKFSDASYQSINIPVTQNMVPSSRLLVYYIVTGEQ +TAELVSDSVWLNIEEKCGNQLQVHLSPDADAYSPGQTVSLNMATGMDSWV +ALAAVDSAVYGVQRGAKKPLERVFQFLEKSDLGCGAGGGLNNANVFHLAG +LTFLTNANADDSQENDEPCKEILRPRRTLQKKIEEIAAKYKHSVVKKCCY +DGACVNNDETCEQRAARISLGPRCIKAFTECCVVASQLRANISHKDMQLG +RLHMKTLLPVSKPEIRSYFPESWLWEVHLVPRRKQLQFALPDSLTTWEIQ +GIGISNTGICVADTVKAKVFKDVFLEMNIPYSVVRGEQIQLKGTVYNYRT +SGMQFCVKMSAVEGICTSESPVIDHQGTKSSKCVRQKVEGSSSHLVTFTV +LPLEIGLHNINFSLETWFGKEILVKTLRVVPEGVKRESYSGVTLDPRGIY +GTISRRKEFPYRIPLDLVPKTEIKRILSVKGLLVGEILSAVLSQEGINIL +THLPKGSAEAELMSVVPVFYVFHYLETGNHWNIFHSDPLIEKQKLKKKLK +EGMLSIMSYRNADYSYSVWKGGSASTWLTAFALRVLGQVNKYVEQNQNSI +CNSLLWLVENYQLDNGSFKENSQYQPIKLQGTLPVEARENSLYLTAFTVI +GIRKAFDICPLVKIDTALIKADNFLLENTLPAQSTFTLAISAYALSLGDK +THPQFRSIVSALKREALVKGNPPIYRFWKDNLQHKDSSVPNTGTARMVET +TAYALLTSLNLKDINYVNPVIKWLSEEQRYGGGFYSTQDTINAIEGLTEY +SLLVKQLRLSMDIDVSYKHKGALHNYKMTDKNFLGRPVEVLLNDDLIVST +GFGSGLATVHVTTVVHKTSTSEEVCSFYLKIDTQDIEASHYRGYGNSDYK +RIVACASYKPSREESSSGSSHAVMDISLPTGISANEEDLKALVEGVDQLF +TDYQIKDGHVILQLNSIPSSDFLCVRFRIFELFEVGFLSPATFTVYEYHR +PDKQCTMFYSTSNIKIQKVCEGAACKCVEADCGQMQEELDLTISAETRKQ +TACKPEIAYAYKVSITSITVENVFVKYKATLLDIYKTGEAVAEKDSEITF +IKKVTCTNAELVKGRQYLIMGKEALQIKYNFSFRYIYPLDSLTWIEYWPR +DTTCSSCQAFLANLDEFAEDIFLNGC +>sp|CRP_HUMAN| +MEKLLCFLVLTSLSHAFGQTDMSRKAFVFPKESDTSYVSLKAPLTKPLKA +FTVCLHFYTELSSTRGYSIFSYATKRQDNEILIFWSKDIGYSFTVGGSEI +LFEVPEVTVAPVHICTSWESASGIVEFWVDGKPRVRKSLKKGYTVGAEAS +IILGQEQDSFGGNFEGSQSLVGDIGNVNMWDFVLSPDEINTIYLGGPFSP +NVLNWRALKYEVQGEVFTKPQLWP +>sp|CYB5_HUMAN| +AEQSDEAVKYYTLEEIQKHNHSKSTWLILHHKVYDLTKFLEEHPGGEEVL +REQAGGDATENFEDVGHSTDAREMSKTFIIGELHPDDRPKLNKPPETLIT +TIDSSSSWWTNWVIPAISAVAVALMYRLYMAED +>sp|CYC_HUMAN| +GDVEKGKKIFIMKCSQCHTVEKGGKHKTGPNLHGLFGRKTGQAPGYSYTA +ANKNKGIIWGEDTLMEYLENPKKYIPGTKMIFVGIKKKEERADLIAYLKK +ATNE +>sp|EGF_HUMAN| +MLLTLIILLPVVSKFSFVSLSAPQHWSCPEGTLAGNGNSTCVGPAPFLIF +SHGNSIFRIDTEGTNYEQLVVDAGVSVIMDFHYNEKRIYWVDLERQLLQR +VFLNGSRQERVCNIEKNVSGMAINWINEEVIWSNQQEGIITVTDMKGNNS +HILLSALKYPANVAVDPVERFIFWSSEVAGSLYRADLDGVGVKALLETSE +KITAVSLDVLDKRLFWIQYNREGSNSLICSCDYDGGSVHISKHPTQHNLF +AMSLFGDRIFYSTWKMKTIWIANKHTGKDMVRINLHSSFVPLGELKVVHP +LAQPKAEDDTWEPEQKLCKLRKGNCSSTVCGQDLQSHLCMCAEGYALSRD +RKYCEDVNECAFWNHGCTLGCKNTPGSYYCTCPVGFVLLPDGKRCHQLVS +CPRNVSECSHDCVLTSEGPLCFCPEGSVLERDGKTCSGCSSPDNGGCSQL +CVPLSPVSWECDCFPGYDLQLDEKSCAASGPQPFLLFANSQDIRHMHFDG +TDYGTLLSQQMGMVYALDHDPVENKIYFAHTALKWIERANMDGSQRERLI +EEGVDVPEGLAVDWIGRRFYWTDRGKSLIGRSDLNGKRSKIITKENISQP +RGIAVHPMAKRLFWTDTGINPRIESSSLQGLGRLVIASSDLIWPSGITID +FLTDKLYWCDAKQSVIEMANLDGSKRRRLTQNDVGHPFAVAVFEDYVWFS +DWAMPSVIRVNKRTGKDRVRLQGSMLKPSSLVVVHPLAKPGADPCLYQNG +GCEHICKKRLGTAWCSCREGFMKASDGKTCLALDGHQLLAGGEVDLKNQV +TPLDILSKTRVSEDNITESQHMLVAEIMVSDQDDCAPVGCSMYARCISEG +EDATCQCLKGFAGDGKLCSDIDECEMGVPVCPPASSKCINTEGGYVCRCS +EGYQGDGIHCLDIDECQLGVHSCGENASCTNTEGGYTCMCAGRLSEPGLI +CPDSTPPPHLREDDHHYSVRNSDSECPLSHDGYCLHDGVCMYIEALDKYA +CNCVVGYIGERCQYRDLKWWELRHAGHGQQQKVIVVAVCVVVLVMLLLLS +LWGAHYYRTQKLLSKNPKNPYEESSRDVRSRRPADTEDGMSSCPQPWFVV +IKEHQDLKNGGQPVAGEDGQAADGSMQPTSWRQEPQLCGMGTEQGCWIPV +SSDKGSCPQVMERSFHMPSYGTQTLEGGVEKPHSLLSANPLWQQRALDPP +HQMELTQ +>sp|FABPH_HUMAN| +VDAFLGTWKLVDSKNFDDYMKSLGVGFATRQVASMTKPTTIIEKNGDILT +LKTHSTFKNTEISFKLGVEFDETTADDRKVKSIVTLDGGKLVHLQKWDGQ +ETTLVRELIDGKLILTLTHGTAVCTRTYEKEA +>sp|GELS_HUMAN| +MAPHRPAPALLCALSLALCALSLPVRAATASRGASQAGAPQGRVPEARPN +SMVVEHPEFLKAGKEPGLQIWRVEKFDLVPVPTNLYGDFFTGDAYVILKT +VQLRNGNLQYDLHYWLGNECSQDESGAAAIFTVQLDDYLNGRAVQHREVQ +GFESATFLGYFKSGLKYKKGGVASGFKHVVPNEVVVQRLFQVKGRRVVRA +TEVPVSWESFNNGDCFILDLGNNIHQWCGSNSNRYERLKATQVSKGIRDN +ERSGRARVHVSEEGTEPEAMLQVLGPKPALPAGTEDTAKEDAANRKLAKL +YKVSNGAGTMSVSLVADENPFAQGALKSEDCFILDHGKDGKIFVWKGKQA +NTEERKAALKTASDFITKMDYPKQTQVSVLPEGGETPLFKQFFKNWRDPD +QTDGLGLSYLSSHIANVERVPFDAATLHTSTAMAAQHGMDDDGTGQKQIW +RIEGSNKVPVDPATYGQFYGGDSYIILYNYRHGGRQGQIIYNWQGAQSTQ +DEVAASAILTAQLDEELGGTPVQSRVVQGKEPAHLMSLFGGKPMIIYKGG +TSREGGQTAPASTRLFQVRANSAGATRAVEVLPKAGALNSNDAFVLKTPS +AAYLWVGTGASEAEKTGAQELLRVLRAQPVQVAEGSEPDGFWEALGGKAA +YRTSPRLKDKKMDAHPPRLFACSNKIGRFVIEEVPGELMQEDLATDDVML +LDTWDQVFVWVGKDSQEEEKTEALTSAKRYIETDPANRDRRTPITVVKQG +FEPPSFVGWFLGWDDDYWSVDPLDRAMAELAA +>sp|GSTA1_HUMAN| +AEKPKLHYFNARGRMESTRWLLAAAGVEFEEKFIKSAEDLDKLRNDGYLM +FQQVPMVEIDGMKLVQTRAILNYIASKYNLYGKDIKERALIDMYIEGIAD +LGEMILLLPVCPPEEKDAKLALIKEKIKNRYFPAFEKVLKSHGQDYLVGN +KLSRADIHLVELLYYVEELDSSLISSFPLLKALKTRISNLPTVKKFLQPG +SPRKPPMDEKSLEEARKIFRF +>sp|GSTP1_HUMAN| +PPYTVVYFPVRGRCAALRMLLADQGQSWKEEVVTVETWQEGSLKASCLYG +QLPKFQDGDLTLYQSNTILRHLGRTLGLYGKDQQEAALVDMVNDGVEDLR +CKYISLIYTNYEAGKDDYVKALPGQLKPFETLLSQNQGGKTFIVGDQISF +ADYNLLDLLLIHEVLAPGCLDAFPLLSAYVGRLSARPKLKAFLASPEYVN +LPINGNGKQ +>sp|HBA_HUMAN| +VLSPADKTNVKAAWGKVGAHAGEYGAEALERMFLSFPTTKTYFPHFDLSH +GSAQVKGHGKKVADALTNAVAHVDDMPNALSALSDLHAHKLRVDPVNFKL +LSHCLLVTLAAHLPAEFTPAVHASLDKFLASVSTVLTSKYR +>sp|HBB_HUMAN| +VHLTPEEKSAVTALWGKVNVDEVGGEALGRLLVVYPWTQRFFESFGDLST +PDAVMGNPKVKAHGKKVLGAFSDGLAHLDNLKGTFATLSELHCDKLHVDP +ENFRLLGNVLVCVLAHHFGKEFTPPVQAAYQKVVAGVANALAHKYH +>sp|IGF2_HUMAN| +MGIPMGKSMLVLLTFLAFASCCIAAYRPSETLCGGELVDTLQFVCGDRGF +YFSRPASRVSRRSRGIVEECCFRSCDLALLETYCATPAKSERDVSTPPTV +LPDNFPRYPVGKFFQYDTWKQSTQRLRRGLPALLRARRGHVLAKELEAFR +EAKRHRPLIALPTQDPAHGGAPPEMASNRK +>sp|IL8_HUMAN| +MTSKLAVALLAAFLISAALCEGAVLPRSAKELRCQCIKTYSKPFHPKFIK +ELRVIESGPHCANTEIIVKLSDGRELCLDPKENWVQRVVEKFLKRAENS +>sp|KCRM_HUMAN| +MPFGNTHNKFKLNYKPEEEYPDLSKHNNHMAKVLTLELYKKLRDKETPSG +FTVDDVIQTGVDNPGHPFIMTVGCVAGDEESYEVFKELFDPIISDRHGGY +KPTDKHKTDLNHENLKGGDDLDPNYVLSSRVRTGRSIKGYTLPPHCSRGE +RRAVEKLSVEALNSLTGEFKGKYYPLKSMTEKEQQQLIDDHFLFDKPVSP +LLLASGMARDWPDARGIWHNDNKSFLVWVNEEDHLRVISMEKGGNMKEVF +RRFCVGLQKIEEIFKKAGHPFMWNQHLGYVLTCPSNLGTGLRGGVHVKLA +HLSKHPKFEEILTRLRLQKRGTGGVDTAAVGSVFDVSNADRLGSSEVEQV +QLVVDGVKLMVEMEKKLEKGQSIDDMIPAQK +>sp|LALBA_HUMAN| +MRFFVPLFLVGILFPAILAKQFTKCELSQLLKDIDGYGGIALPELICTMF +HTSGYDTQAIVENNESTEYGLFQISNKLWCKSSQVPQSRNICDISCDKFL +DDDITDDIMCAKKILDIKGIDYWLAHKALCTEKLEQWLCEKL +>sp|LEP_HUMAN| +MHWGTLCGFLWLWPYLFYVQAVPIQKVQDDTKTLIKTIVTRINDISHTQS +VSSKQKVTGLDFIPGLHPILTLSKMDQTLAVYQQILTSMPSRNVIQISND +LENLRDLLHVLAFSKSCHLPWASGLETLDSLGGVLEASGYSTEVVALSRL +QGSLQDMLWQLDLSPGC +>sp|LYSC_HUMAN| +MKALIVLGLVLLSVTVQGKVFERCELARTLKRLGMDGYRGISLANWMCLA +KWESGYNTRATNYNAGDRSTDYGIFQINSRYWCNDGKTPGAVNACHLSCS +ALLQDNIADAVACAKRVVRDPQGIRAWVAWRNRCQNRDVRQYVQGCGV +>sp|MYG_HUMAN| +GLSDGEWQLVLNVWGKVEADIPGHGQEVLIRLFKGHPETLEKFDKFKHLK +SEDEMKASEDLKKHGATVLTALGGILKKKGHHEAEIKPLAQSHATKHKIP +VKYLEFISECIIQVLQSKHPGDFGADAQGAMNKALELFRKDMASNYKELG +FQG +>sp|NEDD8_HUMAN| +MLIKVKTLTGKEIEIDIEPTDKVERIKERVEEKEGIPPQQQRLIYSGKQM +NDEKTAADYKILGGSVLHLVLALRGGGGLRQ +>sp|NQO1_HUMAN| +MVGRRALIVLAHSERTSFNYAMKEAAAAALKKKGWEVVESDLYAMNFNPI +ISRKDITGKLKDPANFQYPAESVLAYKEGHLSPDIVAEQKKLEAADLVIF +QFPLQWFGVPAILKGWFERVFIGEFAYTYAAMYDKGPFRSKKAVLSITTG +GSGSMYSLQGIHGDMNVILWPIQSGILHFCGFQVLEPQLTYSIGHTPADA +RIQILEGWKKRLENIWDETPLYFAPSSLFDLNFQAGFLMKKEVQDEEKNK +KFGLSVGHHLGKSIPTDNQIKARK +>sp|NQO2_HUMAN| +AGKKVLIVYAHQEPKSFNGSLKNVAVDELSRQGCTVTVSDLYAMNFEPRA +TDKDITGTLSNPEVFNYGVETHEAYKQRSLASDITDEQKKVREADLVIFQ +FPLYWFSVPAILKGWMDRVLCQGFAFDIPGFYDSGLLQGKLALLSVTTGG +TAEMYTKTGVNGDSRYFLWPLQHGTLHFCGFKVLAPQISFAPEIASEEER +KGMVAAWSQRLQTIWKEEPIPCTAHWHFGQ +>sp|PDGFB_HUMAN| +MNRCWALFLSLCCYLRLVSAEGDPIPEELYEMLSDHSIRSFDDLQRLLHG +DPGEEDGAELDLNMTRSHSGGELESLARGRRSLGSLTIAEPAMIAECKTR +TEVFEISRRLIDRTNANFLVWPPCVEVQRCSGCCNNRNVQCRPTQVQLRP +VQVRKIEIVRKKPIFKKATVTLEDHLACKCETVAAARPVTRSPGGSQEQR +AKTPQTRVTIRTVRVRRPPKGKHRKFKHTHDKTALKETLGA +>sp|PPIA_HUMAN| +VNPTVFFDIAVDGEPLGRVSFELFADKVPKTAENFRALSTGEKGFGYKGS +CFHRIIPGFMCQGGDFTRHNGTGGKSIYGEKFEDENFILKHTGPGILSMA +NAGPNTNGSQFFICTAKTEWLDGKHVVFGKVKEGMNIVEAMERFGSRNGK +TSKKITIADCGQLE +>sp|PRDX1_HUMAN| +MSSGNAKIGHPAPNFKATAVMPDGQFKDISLSDYKGKYVVFFFYPLDFTF +VCPTEIIAFSDRAEEFKKLNCQVIGASVDSHFCHLAWVNTPKKQGGLGPM +NIPLVSDPKRTIAQDYGVLKADEGISFRGLFIIDDKGILRQITVNDLPVG +RSVDETLRLVQAFQFTDKHGEVCPAGWKPGSDTIKPDVQKSKEYFSKQK +>sp|RASH_HUMAN| +MTEYKLVVVGAGGVGKSALTIQLIQNHFVDEYDPTIEDSYRKQVVIDGET +CLLDILDTAGQEEYSAMRDQYMRTGEGFLCVFAINNTKSFEDIHQYREQI +KRVKDSDDVPMVLVGNKCDLAARTVESRQAQDLARSYGIPYIETSAKTRQ +GVEDAFYTLVREIRQHKLRKLNPPDESGPGCMSCKCVLS +>sp|RETBP_HUMAN| +MKWVWALLLLAALGSGRAERDCRVSSFRVKENFDKARFSGTWYAMAKKDP +EGLFLQDNIVAEFSVDETGQMSATAKGRVRLLNNWDVCADMVGTFTDTED +PAKFKMKYWGVASFLQKGNDDHWIVDTDYDTYAVQYSCRLLNLDGTCADS +YSFVFSRDPNGLPPEAQKIVRQRQEELCLARQYRLIVHNGYCDGRSERNL +L +>sp|SODC_HUMAN| +ATKAVCVLKGDGPVQGIINFEQKESNGPVKVWGSIKGLTEGLHGFHVHEF +GDNTAGCTSAGPHFNPLSRKHGGPKDEERHVGDLGNVTADKDGVADVSIE +DSVISLSGDHCIIGRTLVVHEKADDLGKGGNEESTKTGNAGSRLACGVIG +IAQ +>sp|SUMO1_HUMAN| +MSDQEAKPSTEDLGDKKEGEYIKLKVIGQDSSEIHFKVKMTTHLKKLKES +YCQRQGVPMNSLRFLFEGQRIADNHTPKELGMEEEDVIEVYQEQTGGHST +V +>sp|SYH_HUMAN| +MAERAALEELVKLQGERVRGLKQQKASAELIEEEVAKLLKLKAQLGPDES +KQKFVLKTPKGTRDYSPRQMAVREKVFDVIIRCFKRHGAEVIDTPVFELK +ETLMGKYGEDSKLIYDLKDQGGELLSLRYDLTVPFARYLAMNKLTNIKRY +HIAKVYRRDNPAMTRGRYREFYQCDFDIAGNFDPMIPDAECLKIMCEILS +SLQIGDFLVKVNDRRILDGMFAICGVSDSKFRTICSSVDKLDKVSWEEVK +NEMVGEKGLAPEVADRIGDYVQQHGGVSLVEQLLQDPKLSQNKQALEGLG +DLKLLFEYLTLFGIDDKISFDLSLARGLDYYTGVIYEAVLLQTPAQAGEE +PLGVGSVAAGGRYDGLVGMFDPKGRKVPCVGLSIGVERIFSIVEQRLEAL +EEKIRTTETQVLVASAQKKLLEERLKLVSELWDAGIKAELLYKKNPKLLN +QLQYCEEAGIPLVAIIGEQELKDGVIKLRSVTSREEVDVRREDLVEEIKR +RTGQPLCIC +>sp|TAU_HUMAN| +AEPRQEFEVMEDHAGTYGLGDRKDQGGYTMHQDQEGDTDAGLKESPLQTP +TEDGSEEPGSETSDAKSTPTAEDVTAPLVDEGAPGKQAAAQPHTEIPEGT +TAEEAGIGDTPSLEDEAAGHVTQEPESGKVVQEGFLREPGPPGLSHQLMS +GMPGAPLLPEGPREATRQPSGTGPEDTEGGRHAPELLKHQLLGDLHQEGP +PLKGAGGKERPGSKEEVDEDRDVDESSPQDSPPSKASPAQDGRPPQTAAR +EATSIPGFPAEGAIPLPVDFLSKVSTEIPASEPDGPSVGRAKGQDAPLEF +TFHVEITPNVQKEQAHSEEHLGRAAFPGAPGEGPEARGPSLGEDTKEADL +PEPSEKQPAAAPRGKPVSRVPQLKARMVSKSKDGTGSDDKKAKTSTRSSA +KTLKNRPCLSPKLPTPGSSDPLIQPSSPAVCPEPPSSPKHVSSVTSRTGS +SGAKEMKLKGADGKTKIATPRGAAPPGQKGQANATRIPAKTPPAPKTPPS +SGEPPKSGDRSGYSSPGSPGTPGSRSRTPSLPTPPTREPKKVAVVRTPPK +SPSSAKSRLQTAPVPMPDLKNVKSKIGSTENLKHQPGGGKVQIINKKLDL +SNVQSKCGSKDNIKHVPGGGSVQIVYKPVDLSKVTSKCGSLGNIHHKPGG +GQVEVKSEKLDFKDRVQSKIGSLDNITHVPGGGNKKIETHKLTFRENAKA +KTDHGAEIVYKSPVVSGDTSPRHLSNVSSTGSIDMVDSPQLATLADEVSA +SLAKQGL +>sp|THIO_HUMAN| +VKQIESKTAFQEALDAAGDKLVVVDFSATWCGPCKMIKPFFHSLSEKYSN +VIFLEVDVDDCQDVASECEVKCMPTFQFFKKGQKVGEFSGANKEKLEATI +NELV +>sp|TNFA_HUMAN| +MSTESMIRDVELAEEALPKKTGGPQGSRRCLFLSLFSFLIVAGATTLFCL +LHFGVIGPQREEFPRDLSLISPLAQAVRSSSRTPSDKPVAHVVANPQAEG +QLQWLNRRANALLANGVELRDNQLVVPSEGLYLIYSQVLFKGQGCPSTHV +LLTHTISRIAVSYQTKVNLLSAIKSPCQRETPEGAEAKPWYEPIYLGGVF +QLEKGDRLSAEINRPDYLDFAESGQVYFGIIAL +>sp|TRFE_HUMAN| +MRLAVGALLVCAVLGLCLAVPDKTVRWCAVSEHEATKCQSFRDHMKSVIP +SDGPSVACVKKASYLDCIRAIAANEADAVTLDAGLVYDAYLAPNNLKPVV +AEFYGSKEDPQTFYYAVAVVKKDSGFQMNQLRGKKSCHTGLGRSAGWNIP +IGLLYCDLPEPRKPLEKAVANFFSGSCAPCADGTDFPQLCQLCPGCGCST +LNQYFGYSGAFKCLKDGAGDVAFVKHSTIFENLANKADRDQYELLCLDNT +RKPVDEYKDCHLAQVPSHTVVARSMGGKEDLIWELLNQAQEHFGKDKSKE +FQLFSSPHGKDLLFKDSAHGFLKVPPRMDAKMYLGYEYVTAIRNLREGTC +PEAPTDECKPVKWCALSHHERLKCDEWSVNSVGKIECVSAETTEDCIAKI +MNGEADAMSLDGGFVYIAGKCGLVPVLAENYNKSDNCEDTPEAGYFAVAV +VKKSASDLTWDNLKGKKSCHTAVGRTAGWNIPMGLLYNKINHCRFDEFFS +EGCAPGSKKDSSLCKLCMGSGLNLCEPNNKEGYYGYTGAFRCLVEKGDVA +FVKHQTVPQNTGGKNPDPWAKNLNEKDYELLCLDGTRKPVEEYANCHLAR +APNHAVVTRKDKEACVHKILRQQQHLFGSNVTDCSGNFCLFRSETKDLLF +RDDTVCLAKLHDRNTYEKYLGEEYVKAVGNLRKCSTSSLLEACTFRRP +>sp|TRFL_HUMAN| +MKLVFLVLLFLGALGLCLAGRRRSVQWCAVSQPEATKCFQWQRNMRKVRG +PPVSCIKRDSPIQCIQAIAENRADAVTLDGGFIYEAGLAPYKLRPVAAEV +YGTERQPRTHYYAVAVVKKGGSFQLNELQGLKSCHTGLRRTAGWNVPIGT +LRPFLNWTGPPEPIEAAVARFFSASCVPGADKGQFPNLCRLCAGTGENKC +AFSSQEPYFSYSGAFKCLRDGAGDVAFIRESTVFEDLSDEAERDEYELLC +PDNTRKPVDKFKDCHLARVPSHAVVARSVNGKEDAIWNLLRQAQEKFGKD +KSPKFQLFGSPSGQKDLLFKDSAIGFSRVPPRIDSGLYLGSGYFTAIQNL +RKSEEEVAARRARVVWCAVGEQELRKCNQWSGLSEGSVTCSSASTTEDCI +ALVLKGEADAMSLDGGYVYTAGKCGLVPVLAENYKSQQSSDPDPNCVDRP +VEGYLAVAVVRRSDTSLTWNSVKGKKSCHTAVDRTAGWNIPMGLLFNQTG +SCKFDEYFSQSCAPGSDPRSNLCALCIGDEQGENKCVPNSNERYYGYTGA +FRCLAENAGDVAFVKDVTVLQNTDGNNNEAWAKDLKLADFALLCLDGKRK +PVTEARSCHLAMAPNHAVVSRMDKVERLKQVLLHQQAKFGRNGSDCPDKF +CLFQSETKNLLFNDNTECLARLHGKTTYEKYLGPQYVAGITNLKKCSTSP +LLEACEFLRK +>sp|UB2E1_HUMAN| +MSDDDSRASTSSSSSSSSNQQTEKETNTPKKKESKVSMSKNSKLLSTSAK +RIQKELADITLDPPPNCSAGPKGDNIYEWRSTILGPPGSVYEGGVFFLDI +TFTPEYPFKPPKVTFRTRIYHCNINSQGVICLDILKDNWSPALTISKVLL +SICSLLTDCNPADPLVGSIATQYMTNRAEHDRMARQWTKRYAT +>sp|UBE2C_HUMAN| +MASQNRDPAATSVAAARKGAEPSGGAARGPVGKRLQQELMTLMMSGDKGI +SAFPESDNLFKWVGTIHGAAGTVYEDLRYKLSLEFPSGYPYNAPTVKFLT +PCYHPNVDTQGNICLDILKEKWSALYDVRTILLSIQSLLGEPNIDSPLNT +HAAELWKNPTAFKKYLQETYSKQVTSQEP +>sp|UBE2I_HUMAN| +MSGIALSRLAQERKAWRKDHPFGFVAVPTKNPDGTMNLMNWECAIPGKKG +TPWEGGLFKLRMLFKDDYPSSPPKCKFEPPLFHPNVYPSGTVCLSILEED +KDWRPAITIKQILLGIQELLNEPNIQDPAQAEAYTIYCQNRVEYEKRVRA +QAKKFAPS +>sp|RS27A_HUMAN| +MQIFVKTLTGKTITLEVEPSDTIENVKAKIQDKEGIPPDQQRLIFAGKQL +EDGRTLSDYNIQKESTLHLVLRLRGGAKKRKKKSYTTPKKNKHKRKKVKL +AVLKYYKVDENGKISRLRRECPSDECGAGVFMASHFDRHYCGKCCLTYCF +NKPEDK +>sp|GAG_SCVLA| +MLRFVTKNSQDKSSDLFSICSDRGTFVAHNRVRTDFKFDNLVFNRVYGVS +QKFTLVGNPTVCFNEGSSYLEGIAKKYLTLDGGLAIDNVLNELRSTCGIP +GNAVASHAYNITSWRWYDNHVALLMNMLRAYHLQVLTEQGQYSAGDIPMY +HDGHVKIKLPVTIDDTAGPTQFAWPSDRSTDSYPDWAQFSESFPSIDVPY +LDVRPLTVTEVNFVLMMMSKWHRRTNLAIDYEAPQLADKFAYRHALTVQD +ADEWIEGDRTDDQFRPPSSKVMLSALRKYVNHNRLYNQFYTAAQLLAQIM +MKPVPNCAEGYAWLMHDALVNIPKFGSIRGRYPFLLSGDAALIQATALED +WSAIMAKPELVFTYAMQVSVALNTGLYLRRVKKTGFGTTIDDSYEDGAFL +QPETFVQAALACCTGQDAPLNGMSDVYVTYPDLLEFDAVTQVPITVIEPA +GYNIVDDHLVVVGVPVACSPYMIFPVAAFDTANPYCGNFVIKAANKYLRK +GAVYDKLEAWKLAWALRVAGYDTHFKVYGDTHGLTKFYADNGDTWTHIPE +FVTDGDVMEVFVTAIERRARHFVELPRLNSPAFFRSVEVSTTIYDTHVQA +GAHAVYHASRINLDYVKPVSTGIQVINAGELKNYWGSVRRTQQGLGVVGL +TMPAVMPTGEPTAGAAHEELIEQADNVLVE +>sp|CYC_HORSE| +MGDVEKGKKIFVQKCAQCHTVEKGGKHKTGPNLHGLFGRKTGQAPGFTYT +DANKNKGITWKEETLMEYLENPKKYIPGTKMIFAGIKKKTEREDLIAYLK +KATNE +>sp|CAH2_BOVIN| +MSHHWGYGKHNGPEHWHKDFPIANGERQSPVDIDTKAVVQDPALKPLALV +YGEATSRRMVNNGHSFNVEYDDSQDKAVLKDGPLTGTYRLVQFHFHWGSS +DDQGSEHTVDRKKYAAELHLVHWNTKYGDFGTAAQQPDGLAVVGVFLKVG +DANPALQKVLDALDSIKTKGKSTDFPNFDPGSLLPNVLDYWTYPGSLTTP +PLLESVTWIVLKEPISVSSQQMLKFRTLNFNAEGEPELLMLANWRPAQPL +KNRQVRGFPK +>sp|ADH1_YEAST| +MSIPETQKGVIFYESHGKLEHKDIPVPKPKANELLINVKYSGVCHTDLHA +WHGDWPLPVKLPLVGGHEGAGVVVGMGENVKGWKIGDYAGIKWLNGSCMA +CEYCELGNESNCPHADLSGYTHDGSFQQYATADAVQAAHIPQGTDLAQVA +PILCAGITVYKALKSANLMAGHWVAISGAAGGLGSLAVQYAKAMGYRVLG +IDGGEGKEELFRSIGGEVFIDFTKEKDIVGAVLKATDGGAHGVINVSVSE +AAIEASTRYVRANGTTVLVGMPAGAKCCSDVFNQVVKSISIVGSYVGNRA +DTREALDFFARGLVKSPIKVVGLSTLPEIYEKMEKGQIVGRYVVDTSK +>sp|ALDOA_RABIT| +MPHSHPALTPEQKKELSDIAHRIVAPGKGILAADESTGSIAKRLQSIGTE +NTEENRRFYRQLLLTADDRVNPCIGGVILFHETLYQKADDGRPFPQVIKS +KGGVVGIKVDKGVVPLAGTNGETTTQGLDGLSERCAQYKKDGADFAKWRC +VLKIGEHTPSALAIMENANVLARYASICQQNGIVPIVEPEILPDGDHDLK +RCQYVTEKVLAAVYKALSDHHIYLEGTLLKPNMVTPGHACTQKYSHEEIA +MATVTALRRTVPPAVTGVTFLSGGQSEEEASINLNAINKCPLLKPWALTF +SYGRALQASALKAWGGKKENLKAAQEEYVKRALANSLACQGKYTPSGQAG +AAASESLFIS +>sp|LYSC_CHICK| +MRSLLILVLCFLPLAALGKVFGRCELAAAMKRHGLDNYRGYSLGNWVCAA +KFESNFNTQATNRNTDGSTDYGILQINSRWWCNDGRTPGSRNLCNIPCSA +LLSSDITASVNCAKKIVSDGNGMNAWVAWRNRCKGTDVQAWIRGCRL +>sp|MYG_HORSE| +MGLSDGEWQQVLNVWGKVEADIAGHGQEVLIRLFTGHPETLEKFDKFKHL +KTEAEMKASEDLKKHGTVVLTALGGILKKKGHHEAELKPLAQSHATKHKI +PIKYLEFISDAIIHVLHSKHPGDFGADAQGAMTKALELFRNDIAAKYKEL +GFQG +>sp|OVAL_CHICK| +MGSIGAASMEFCFDVFKELKVHHANENIFYCPIAIMSALAMVYLGAKDST +RTQINKVVRFDKLPGFGDSIEAQCGTSVNVHSSLRDILNQITKPNDVYSF +SLASRLYAEERYPILPEYLQCVKELYRGGLEPINFQTAADQARELINSWV +ESQTNGIIRNVLQPSSVDSQTAMVLVNAIVFKGLWEKAFKDEDTQAMPFR +VTEQESKPVQMMYQIGLFRVASMASEKMKILELPFASGTMSMLVLLPDEV +SGLEQLESIINFEKLTEWTSSNVMEERKIKVYLPRMKMEEKYNLTSVLMA +MGITDVFSSSANLSGISSAESLKISQAVHAAHAEINEAGREVVGSAEAGV +DAASVSEEFRADHPFLFCIKHIATNAVLFFGRCVSP +>sp|BGAL_ECOLI| +MTMITDSLAVVLQRRDWENPGVTQLNRLAAHPPFASWRNSEEARTDRPSQ +QLRSLNGEWRFAWFPAPEAVPESWLECDLPEADTVVVPSNWQMHGYDAPI +YTNVTYPITVNPPFVPTENPTGCYSLTFNVDESWLQEGQTRIIFDGVNSA +FHLWCNGRWVGYGQDSRLPSEFDLSAFLRAGENRLAVMVLRWSDGSYLED +QDMWRMSGIFRDVSLLHKPTTQISDFHVATRFNDDFSRAVLEAEVQMCGE +LRDYLRVTVSLWQGETQVASGTAPFGGEIIDERGGYADRVTLRLNVENPK +LWSAEIPNLYRAVVELHTADGTLIEAEACDVGFREVRIENGLLLLNGKPL +LIRGVNRHEHHPLHGQVMDEQTMVQDILLMKQNNFNAVRCSHYPNHPLWY +TLCDRYGLYVVDEANIETHGMVPMNRLTDDPRWLPAMSERVTRMVQRDRN +HPSVIIWSLGNESGHGANHDALYRWIKSVDPSRPVQYEGGGADTTATDII +CPMYARVDEDQPFPAVPKWSIKKWLSLPGETRPLILCEYAHAMGNSLGGF +AKYWQAFRQYPRLQGGFVWDWVDQSLIKYDENGNPWSAYGGDFGDTPNDR +QFCMNGLVFADRTPHPALTEAKHQQQFFQFRLSGQTIEVTSEYLFRHSDN +ELLHWMVALDGKPLASGEVPLDVAPQGKQLIELPELPQPESAGQLWLTVR +VVQPNATAWSEAGHISAWQQWRLAENLSVTLPAASHAIPHLTTSEMDFCI +ELGNKRWQFNRQSGFLSQMWIGDKKQLLTPLRDQFTRAPLDNDIGVSEAT +RIDPNAWVERWKAAGHYQAEAALLQCTADTLADAVLITTAHAWQHQGKTL +FISRKTYRIDGSGQMAITVDVEVASDTPHPARIGLNCQLAQVAERVNWLG +LGPQENYPDRLTAACFDRWDLPLSDMYTPYVFPSENGLRCGTRELNYGPH +QWRGDFQFNISRYSQQQLMETSHRHLLHAEEGTWLNIDGFHMGIGGDDSW +SPSVSAEFQLSAGRYHYQLVWCQK +>sp|DHE3_BOVIN| +MYRYLGEALLLSRAGPAALGSASADSAALLGWARGQPAAAPQPGLVPPAR +RHYSEAAADREDDPNFFKMVEGFFDRGASIVEDKLVEDLKTRETEEQKRN +RVRSILRIIKPCNHVLSLSFPIRRDDGSWEVIEGYRAQHSQHRTPCKGGI +RYSTDVSVDEVKALASLMTYKCAVVDVPFGGAKAGVKINPKNYTDNELEK +ITRRFTMELAKKGFIGPGVDVPAPDMSTGEREMSWIADTYASTIGHYDIN +AHACVTGKPISQGGIHGRISATGRGVFHGIENFINEASYMSILGMTPGFG +DKTFVVQGFGNVGLHSMRYLHRFGAKCITVGESDGSIWNPDGIDPKELED +FKLQHGTILGFPKAKIYEGSILEVDCDILIPAASEKQLTKSNAPRVKAKI +IAEGANGPTTPEADKIFLERNIMVIPDLYLNAGGVTVSYFEWLNNLNHVS +YGRLTFKYERDSNYHLLMSVQESLERKFGKHGGTIPIVPTAEFQDRISGA +SEKDIVHSGLAYTMERSARQIMRTAMKYNLGLDLRTAAYVNAIEKVFRVY +NEAGVTFT +>sp|GFP_AEQVI| +MSKGEELFTGVVPILVELDGDVNGHKFSVSGEGEGDATYGKLTLKFICTT +GKLPVPWPTLVTTFSYGVQCFSRYPDHMKQHDFFKSAMPEGYVQERTIF +FKDDGNYKTRAEVKFEGDTLVNRIELKGIDFKEDGNILGHKLEYNYNSH +NVYIMADKQKNGIKVNFKIRHNIEDGSVQLADHYQQNTPIGDGPVLLPD +NHYLSTQSALSKDPNEKRDHMVLLEFVTAAGITHGMDELYK +>sp|SRPP_HEVBR| +MAEEVEEERLKYLDFVRAAGVYAVDSFSTLYLYAKDISGPLKPGVDTIEN +VVKTVVTPVYYIPLEAVKFVDKTVDVSVTSLDGVVPPVIKQVSAQTYSVA +QDAPRIVLDVASSVFNTGVQEGAKALYANLEPKAEQYAVITWRALNKLPL +VPQVANVVVPTAVYFSEKYNDVVRGTTEQGYRVSSYLPLLPTEKITKVFG +DEAS +>sp|REF_HEVBR| +MAEDEDNQQGQGEGLKYLGFVQDAATYAVTTFSNVYLFAKDKSGPLQPGV +DIIEGPVKNVAVPLYNRFSYIPNGALKFVDSTVVASVTIIDRSLPPIVKD +ASIQVVSAIRAAPEAARSLASSLPGQTKILAKVFYGEN +>sp|PLMP_GRIFR| +MFSSVMVALVSLAVAVSANPGLSLKVSGPEAVDGVNNLKVVTTITNTGDE +TLKLLNDPRGALHTMPTDTFAITNESGETPSFIGVKVKYVPSMAAKSTGE +NVFAVIAPGQSVNVEHDLSAAYNFTSSGAGTYALEALNVFNYIDPETNEP +VEIWADAEAHTTAVSGKLAVVRATPTLTRPVTYNGCSSSEQSALAAAASA +AQSYVAESLSYLQTHTAATPRYTTWFGSYISSRHSTVLQHYTDMNSNDFS +SYSFDCTCTAAGTFAYVYPNRFGTVYLCGAFWKAPTTGTDSQAGTLVHES +SHFTRNGGTKDYAYGQAAAKSLATMDPDKAVMNADNHEYFSENNPAQS +>KKA1_ECOLX +MSHIQRETSCSRPRLNSNLDADLYGYKWARDNVGQSGATIYRLYGKPDAP +ELFLKHGKGSVANDVTDEMVRLNWLTEFMPLPTIKHFIRTPDDAWLLTTA +IPGKTAFQVLEEYPDSGENIVDALAVFLRRLHSIPVCNCPFNSDRVFRLA +QAQSRMNNGLVDASDFDDERNGWPVEQVWKEMHKLLPFSPDSVVTHGDFS +LDNLIFDEGKLIGCIDVGRVGIADRYQDLAILWNCLGEFSPSLQKRLFQK +YGIDNPDMNKLQFHLMLDEFF diff --git a/tests/test_data/spectrum_output/input_peprec.pkl b/tests/test_data/spectrum_output/input_peprec.pkl new file mode 100644 index 00000000..e55b5f01 Binary files /dev/null and b/tests/test_data/spectrum_output/input_peprec.pkl differ diff --git a/tests/test_data/spectrum_output/input_preds.pkl b/tests/test_data/spectrum_output/input_preds.pkl new file mode 100644 index 00000000..18c5c335 Binary files /dev/null and b/tests/test_data/spectrum_output/input_preds.pkl differ diff --git a/tests/test_data/spectrum_output/target_predictions.mgf b/tests/test_data/spectrum_output/target_predictions.mgf new file mode 100644 index 00000000..5538b0bc --- /dev/null +++ b/tests/test_data/spectrum_output/target_predictions.mgf @@ -0,0 +1,179 @@ +BEGIN IONS +TITLE=sp|AMYS_HUMAN|_069_000_2 LYKMAVGFMLAHPYGFTRVMSSYR/2 0 sp|AMYS_HUMAN| +PEPMASS=1413.2085823739196 +CHARGE=2+ +175.118912 7612.69677734375 +277.154633 862.73876953125 +338.182220 306.3399658203125 +405.249603 1173.1416015625 +425.214264 125.47180938720703 +512.246338 1824.03955078125 +536.290100 907.391845703125 +607.327209 3567.384521484375 +643.286804 1487.3551025390625 +706.395630 1691.5927734375 +742.355225 2041.1654052734375 +763.417114 1299.4510498046875 +898.456299 496.16607666015625 +910.485535 2427.090576171875 +999.503967 2596.78955078125 +1041.526001 5264.21435546875 +1154.610107 1048.434326171875 +1203.593872 1489.6451416015625 +1225.647217 2918.779296875 +1362.706177 2992.5322265625 +1366.657227 340.6930847167969 +1459.758911 238.18710327148438 +1463.709961 10000.0 +1600.768921 6510.755859375 +1622.822266 413.885009765625 +1671.806030 3304.76611328125 +1784.890137 2934.682373046875 +1826.912231 381.4009704589844 +1915.930664 2700.005859375 +1927.959961 271.4562683105469 +2062.999023 571.2114868164062 +2084.061279 306.1235656738281 +2120.020508 260.2799987792969 +2183.129639 198.59060668945312 +2219.088867 325.4184265136719 +2314.170166 147.9990997314453 +2401.202148 288.29730224609375 +2651.297363 102.8747787475586 +END IONS + +BEGIN IONS +TITLE=sp|CATA_HUMAN|_064_001_2 VWPHKDYPLIPVGK/2 1/0,V,Acetyl sp|CATA_HUMAN| +PEPMASS=845.96993487392 +CHARGE=2+ +142.112350 79.65632629394531 +147.112762 1064.179931640625 +204.134232 3336.057373046875 +303.202637 484.39459228515625 +328.191681 3245.29833984375 +400.255402 10000.0 +513.339478 876.9566040039062 +562.303406 503.4450378417969 +626.423523 21.09709930419922 +690.398376 209.52952575683594 +723.476318 1903.180908203125 +805.425293 196.67640686035156 +886.539673 742.6878051757812 +968.488586 399.8765869140625 +1001.566589 1811.6539306640625 +1129.661499 1234.114013671875 +1178.625366 311.4296569824219 +1266.720459 409.8760986328125 +1291.709473 1984.581787109375 +1363.773193 4492.7001953125 +1388.762207 19.803117752075195 +1487.830566 60.26520919799805 +1544.852051 35.82118225097656 +1549.852539 6.53165340423584 +END IONS + +BEGIN IONS +TITLE=sp|CO5_HUMAN|_055_017_3 EGMLSIMSYR/3 3/0,E,Acetyl/6,M,Oxidation/2,M,Oxidation sp|CO5_HUMAN| +PEPMASS=420.8586949266367 +CHARGE=3+ +172.086517 143.6107177734375 +175.118912 3735.07763671875 +229.107986 892.0596313476562 +338.182220 2297.72705078125 +376.147858 1329.711669921875 +425.214264 6692.74853515625 +489.231903 306.2079162597656 +572.254150 10000.0 +576.263977 357.6639404296875 +685.338196 876.877685546875 +689.348022 7.6761651039123535 +772.370239 136.63638305664062 +836.387878 3.2346858978271484 +885.454285 37.848175048828125 +923.419922 25.234107971191406 +1032.494141 8.809098243713379 +1086.483154 8.878386497497559 +END IONS + +BEGIN IONS +TITLE=sp|CO5_HUMAN|_253_048_3 EMMQTAMQNTMLINGIAQVTFDSETAVK/3 2/1,M,Oxidation/2,M,Oxidation sp|CO5_HUMAN| +PEPMASS=1045.4891532599702 +CHARGE=3+ +130.049820 22.036392211914062 +147.112762 1306.5345458984375 +246.181168 1061.819580078125 +277.089691 1080.4100341796875 +317.218262 765.2626342773438 +418.265930 3030.318115234375 +424.129578 133.9978485107422 +547.308533 363.675048828125 +552.188171 775.8615112304688 +634.340576 3094.260986328125 +653.235840 834.9537963867188 +724.272949 777.478759765625 +749.367493 3039.6630859375 +855.313416 728.5900268554688 +896.435913 4310.04541015625 +983.372009 440.79754638671875 +997.483582 6664.56884765625 +1096.552002 10000.0 +1097.414917 123.42723846435547 +1198.462646 65.79039001464844 +1224.610596 1508.0657958984375 +1295.647705 2677.755126953125 +1329.503174 220.1059112548828 +1408.731812 60.41111373901367 +1442.587280 258.4700927734375 +1465.753296 1011.6499633789062 +1555.671387 80.28204345703125 +1579.796265 1034.8062744140625 +1669.714355 12.788238525390625 +1692.880371 333.5540466308594 +1726.735840 5.328736305236816 +1805.964478 71.2789535522461 +2038.052734 8.865133285522461 +2038.915649 1.5556315183639526 +2137.984131 18.362194061279297 +2239.031738 33.115936279296875 +2280.154297 14.037590980529785 +2386.100098 19.145526885986328 +2411.194824 29.84055519104004 +2482.231934 7.72592306137085 +2501.126953 0.356456995010376 +2588.158936 18.203256607055664 +2711.338135 7.045225143432617 +2717.201416 2.179961681365967 +2818.249023 2.4785099029541016 +2889.286133 3.0733273029327393 +3005.417725 6.69489049911499 +END IONS + +BEGIN IONS +TITLE=sp|K1M1_SHEEP|_060_001_2 ENAELESRILERSQQQEPLVCPNYQSYFR/2 2/20,C,Carbamidomethyl/0,E,Glu->pyro-Glu sp|K1M1_SHEEP| +PEPMASS=1783.36060187392 +CHARGE=2+ +175.118912 2332.532958984375 +226.077454 957.4569091796875 +297.114563 94.21830749511719 +322.187317 893.1830444335938 +426.157135 2421.071533203125 +485.250641 756.0931396484375 +572.282715 771.4932250976562 +668.283813 559.9879760742188 +700.341309 681.98583984375 +863.404663 752.508056640625 +1074.500366 7516.95947265625 +1137.584961 143.07835388183594 +1234.560791 357.1983947753906 +1266.627563 6580.345703125 +1422.728638 596.2440795898438 +1543.765991 10000.0 +1672.808594 1030.6849365234375 +1800.867188 436.93902587890625 +2144.016357 225.71180725097656 +2300.117432 57.6701545715332 +2898.460938 5.564789295196533 +3340.666992 4.477206230163574 +3454.709961 20.332361221313477 +END IONS + diff --git a/tests/test_data/spectrum_output/target_predictions.ms2 b/tests/test_data/spectrum_output/target_predictions.ms2 new file mode 100644 index 00000000..83699245 --- /dev/null +++ b/tests/test_data/spectrum_output/target_predictions.ms2 @@ -0,0 +1,171 @@ +H CreationDate 2020-03-03 14:56:04 +H Extractor MS2PIP predictions +S 0 1413.2085823739196 +Z 2 2824.401514683699 +D seq LYKMAVGFMLAHPYGFTRVMSSYR +D modified seq LYKMAVGFMLAHPYGFTRVMSSYR +175.118912 7612.69677734375 +277.154633 862.73876953125 +338.182220 306.3399963378906 +405.249603 1173.1417236328125 +425.214264 125.47181701660156 +512.246338 1824.0396728515625 +536.290100 907.3919067382812 +607.327209 3567.384521484375 +643.286804 1487.355224609375 +706.395630 1691.5927734375 +742.355225 2041.16552734375 +763.417114 1299.451171875 +898.456299 496.1661376953125 +910.485535 2427.0908203125 +999.503967 2596.78955078125 +1041.526001 5264.21484375 +1154.610107 1048.434326171875 +1203.593872 1489.6451416015625 +1225.647217 2918.779541015625 +1362.706177 2992.5322265625 +1366.657227 340.6930847167969 +1459.758911 238.1871337890625 +1463.709961 10000.0 +1600.768921 6510.755859375 +1622.822266 413.8850402832031 +1671.806030 3304.766357421875 +1784.890137 2934.6826171875 +1826.912231 381.4010009765625 +1915.930664 2700.006103515625 +1927.959961 271.4562683105469 +2062.999023 571.2114868164062 +2084.061279 306.12359619140625 +2120.020508 260.2799987792969 +2183.129639 198.59060668945312 +2219.088867 325.4184265136719 +2314.170166 147.9990997314453 +2401.202148 288.2973327636719 +2651.297363 102.87478637695312 +S 1 845.96993487392 +Z 2 1689.9242196837 +D seq VWPHKDYPLIPVGK +D modified seq V[+42.0]WPHKDYPLIPVGK +142.112350 79.65632629394531 +147.112762 1064.179931640625 +204.134232 3336.057373046875 +303.202637 484.39459228515625 +328.191681 3245.298583984375 +400.255402 10000.0 +513.339478 876.9566650390625 +562.303406 503.4450378417969 +626.423523 21.09709930419922 +690.398376 209.52952575683594 +723.476318 1903.180908203125 +805.425293 196.67640686035156 +886.539673 742.6878051757812 +968.488586 399.8765869140625 +1001.566589 1811.65380859375 +1129.661499 1234.1138916015625 +1178.625366 311.4296569824219 +1266.720459 409.8760986328125 +1291.709473 1984.581787109375 +1363.773193 4492.7001953125 +1388.762207 19.803117752075195 +1487.830566 60.26520919799805 +1544.852051 35.82118225097656 +1549.852539 6.53165340423584 +S 2 420.8586949266367 +Z 3 1259.5526096837 +D seq EGMLSIMSYR +D modified seq E[+42.0]GM[+16.0]LSIM[+16.0]SYR +172.086517 143.61070251464844 +175.118912 3735.07763671875 +229.107986 892.0596313476562 +338.182220 2297.726806640625 +376.147858 1329.711669921875 +425.214264 6692.748046875 +489.231903 306.2078857421875 +572.254150 10000.0 +576.263977 357.6639404296875 +685.338196 876.8776245117188 +689.348022 7.676164150238037 +772.370239 136.63636779785156 +836.387878 3.2346858978271484 +885.454285 37.848175048828125 +923.419922 25.234107971191406 +1032.494141 8.809098243713379 +1086.483154 8.878385543823242 +S 3 1045.4891532599702 +Z 3 3133.4439846837 +D seq EMMQTAMQNTMLINGIAQVTFDSETAVK +D modified seq EM[+16.0]M[+16.0]QTAMQNTMLINGIAQVTFDSETAVK +130.049820 22.036392211914062 +147.112762 1306.5345458984375 +246.181168 1061.819580078125 +277.089691 1080.4100341796875 +317.218262 765.2626342773438 +418.265930 3030.318359375 +424.129578 133.9978485107422 +547.308533 363.675048828125 +552.188171 775.8615112304688 +634.340576 3094.26123046875 +653.235840 834.9537963867188 +724.272949 777.4788208007812 +749.367493 3039.6630859375 +855.313416 728.5900268554688 +896.435913 4310.04541015625 +983.372009 440.79754638671875 +997.483582 6664.5693359375 +1096.552002 10000.0 +1097.414917 123.42723846435547 +1198.462646 65.79039001464844 +1224.610596 1508.0657958984375 +1295.647705 2677.75537109375 +1329.503174 220.10592651367188 +1408.731812 60.41111373901367 +1442.587280 258.4700927734375 +1465.753296 1011.6499633789062 +1555.671387 80.28204345703125 +1579.796265 1034.8062744140625 +1669.714355 12.788238525390625 +1692.880371 333.5540466308594 +1726.735840 5.328736782073975 +1805.964478 71.2789535522461 +2038.052734 8.865133285522461 +2038.915649 1.5556316375732422 +2137.984131 18.362194061279297 +2239.031738 33.115936279296875 +2280.154297 14.037590980529785 +2386.100098 19.145526885986328 +2411.194824 29.84055519104004 +2482.231934 7.72592306137085 +2501.126953 0.35645702481269836 +2588.158936 18.203256607055664 +2711.338135 7.045225143432617 +2717.201416 2.179961681365967 +2818.249023 2.4785099029541016 +2889.286133 3.0733275413513184 +3005.417725 6.694890975952148 +S 4 1783.36060187392 +Z 2 3564.7055536836997 +D seq ENAELESRILERSQQQEPLVCPNYQSYFR +D modified seq E[-18.0]NAELESRILERSQQQEPLVC[+57.0]PNYQSYFR +175.118912 2332.532958984375 +226.077454 957.4569091796875 +297.114563 94.21830749511719 +322.187317 893.1830444335938 +426.157135 2421.071533203125 +485.250641 756.0932006835938 +572.282715 771.4932250976562 +668.283813 559.9879760742188 +700.341309 681.98583984375 +863.404663 752.508056640625 +1074.500366 7516.9599609375 +1137.584961 143.07835388183594 +1234.560791 357.19842529296875 +1266.627563 6580.345703125 +1422.728638 596.244140625 +1543.765991 10000.0 +1672.808594 1030.6849365234375 +1800.867188 436.93902587890625 +2144.016357 225.71182250976562 +2300.117432 57.6701545715332 +2898.460938 5.564789772033691 +3340.666992 4.477206230163574 +3454.709961 20.332361221313477 diff --git a/tests/test_data/spectrum_output/target_predictions.msp b/tests/test_data/spectrum_output/target_predictions.msp new file mode 100644 index 00000000..9be3a8fc --- /dev/null +++ b/tests/test_data/spectrum_output/target_predictions.msp @@ -0,0 +1,174 @@ +Name: LYKMAVGFMLAHPYGFTRVMSSYR/2 +MW: 2824.401514683699 +Comment: Mods=0 Parent=1413.2085823739196 Protein="sp|AMYS_HUMAN|" MS2PIP_ID="sp|AMYS_HUMAN|_069_000_2" +Num peaks: 46 +175.118912 7612 "y1" +277.154633 862 "b2" +338.182220 306 "y2" +405.249603 1173 "b3" +425.214264 125 "y3" +512.246338 1824 "y4" +536.290100 907 "b4" +607.327209 3567 "b5" +643.286804 1487 "y5" +706.395630 1691 "b6" +742.355225 2041 "y6" +763.417114 1299 "b7" +898.456299 496 "y7" +910.485535 2427 "b8" +999.503967 2596 "y8" +1041.526001 5264 "b9" +1154.610107 1048 "b10" +1203.593872 1489 "y10" +1225.647217 2918 "b11" +1362.706177 2992 "b12" +1366.657227 340 "y11" +1459.758911 238 "b13" +1463.709961 10000 "y12" +1600.768921 6510 "y13" +1622.822266 413 "b14" +1671.806030 3304 "y14" +1784.890137 2934 "y15" +1826.912231 381 "b16" +1915.930664 2700 "y16" +1927.959961 271 "b17" +2062.999023 571 "y17" +2084.061279 306 "b18" +2120.020508 260 "y18" +2183.129639 198 "b19" +2219.088867 325 "y19" +2314.170166 147 "b20" +2401.202148 288 "b21" +2651.297363 102 "b23" + +Name: VWPHKDYPLIPVGK/2 +MW: 1689.9242196837 +Comment: Mods=1/0,V,Acetyl Parent=845.96993487392 Protein="sp|CATA_HUMAN|" MS2PIP_ID="sp|CATA_HUMAN|_064_001_2" +Num peaks: 26 +142.112350 79 "b1" +147.112762 1064 "y1" +204.134232 3336 "y2" +303.202637 484 "y3" +328.191681 3245 "b2" +400.255402 10000 "y4" +513.339478 876 "y5" +562.303406 503 "b4" +626.423523 21 "y6" +690.398376 209 "b5" +723.476318 1903 "y7" +805.425293 196 "b6" +886.539673 742 "y8" +968.488586 399 "b7" +1001.566589 1811 "y9" +1129.661499 1234 "y10" +1178.625366 311 "b9" +1266.720459 409 "y11" +1291.709473 1984 "b10" +1363.773193 4492 "y12" +1388.762207 19 "b11" +1487.830566 60 "b12" +1544.852051 35 "b13" +1549.852539 6 "y13" + +Name: EGMLSIMSYR/3 +MW: 1259.5526096837 +Comment: Mods=3/0,E,Acetyl/6,M,Oxidation/2,M,Oxidation Parent=420.8586949266367 Protein="sp|CO5_HUMAN|" MS2PIP_ID="sp|CO5_HUMAN|_055_017_3" +Num peaks: 18 +172.086517 143 "b1" +175.118912 3735 "y1" +229.107986 892 "b2" +338.182220 2297 "y2" +376.147858 1329 "b3" +425.214264 6692 "y3" +489.231903 306 "b4" +572.254150 10000 "y4" +576.263977 357 "b5" +685.338196 876 "y5" +689.348022 7 "b6" +772.370239 136 "y6" +836.387878 3 "b7" +885.454285 37 "y7" +923.419922 25 "b8" +1032.494141 8 "y8" +1086.483154 8 "b9" + +Name: EMMQTAMQNTMLINGIAQVTFDSETAVK/3 +MW: 3133.4439846837 +Comment: Mods=2/1,M,Oxidation/2,M,Oxidation Parent=1045.4891532599702 Protein="sp|CO5_HUMAN|" MS2PIP_ID="sp|CO5_HUMAN|_253_048_3" +Num peaks: 54 +130.049820 22 "b1" +147.112762 1306 "y1" +246.181168 1061 "y2" +277.089691 1080 "b2" +317.218262 765 "y3" +418.265930 3030 "y4" +424.129578 133 "b3" +547.308533 363 "y5" +552.188171 775 "b4" +634.340576 3094 "y6" +653.235840 834 "b5" +724.272949 777 "b6" +749.367493 3039 "y7" +855.313416 728 "b7" +896.435913 4310 "y8" +983.372009 440 "b8" +997.483582 6664 "y9" +1096.552002 10000 "y10" +1097.414917 123 "b9" +1198.462646 65 "b10" +1224.610596 1508 "y11" +1295.647705 2677 "y12" +1329.503174 220 "b11" +1408.731812 60 "y13" +1442.587280 258 "b12" +1465.753296 1011 "y14" +1555.671387 80 "b13" +1579.796265 1034 "y15" +1669.714355 12 "b14" +1692.880371 333 "y16" +1726.735840 5 "b15" +1805.964478 71 "y17" +2038.052734 8 "y19" +2038.915649 1 "b18" +2137.984131 18 "b19" +2239.031738 33 "b20" +2280.154297 14 "y21" +2386.100098 19 "b21" +2411.194824 29 "y22" +2482.231934 7 "y23" +2501.126953 0 "b22" +2588.158936 18 "b23" +2711.338135 7 "y25" +2717.201416 2 "b24" +2818.249023 2 "b25" +2889.286133 3 "b26" +3005.417725 6 "y27" + +Name: ENAELESRILERSQQQEPLVCPNYQSYFR/2 +MW: 3564.7055536836997 +Comment: Mods=2/20,C,Carbamidomethyl/0,E,Glu->pyro-Glu Parent=1783.36060187392 Protein="sp|K1M1_SHEEP|" MS2PIP_ID="sp|K1M1_SHEEP|_060_001_2" +Num peaks: 56 +175.118912 2332 "y1" +226.077454 957 "b2" +297.114563 94 "b3" +322.187317 893 "y2" +426.157135 2421 "b4" +485.250641 756 "y3" +572.282715 771 "y4" +668.283813 559 "b6" +700.341309 681 "y5" +863.404663 752 "y6" +1074.500366 7516 "y8" +1137.584961 143 "b10" +1234.560791 357 "y9" +1266.627563 6580 "b11" +1422.728638 596 "b12" +1543.765991 10000 "y12" +1672.808594 1030 "y13" +1800.867188 436 "y14" +2144.016357 225 "y17" +2300.117432 57 "y18" +2898.460938 5 "y23" +3340.666992 4 "y27" +3454.709961 20 "y28" + diff --git a/tests/test_data/spectrum_output/target_predictions.ssl b/tests/test_data/spectrum_output/target_predictions.ssl new file mode 100644 index 00000000..37cc1bde --- /dev/null +++ b/tests/test_data/spectrum_output/target_predictions.ssl @@ -0,0 +1,6 @@ +file scan charge sequence score-type score retention-time +target_predictions.ms2 0 2 LYKMAVGFMLAHPYGFTRVMSSYR +target_predictions.ms2 1 2 V[+42.0]WPHKDYPLIPVGK +target_predictions.ms2 2 3 E[+42.0]GM[+16.0]LSIM[+16.0]SYR +target_predictions.ms2 3 3 EM[+16.0]M[+16.0]QTAMQNTMLINGIAQVTFDSETAVK +target_predictions.ms2 4 2 E[-18.0]NAELESRILERSQQQEPLVC[+57.0]PNYQSYFR diff --git a/tests/test_data/spectrum_output/target_predictions_spectronaut.csv b/tests/test_data/spectrum_output/target_predictions_spectronaut.csv new file mode 100644 index 00000000..7f897810 --- /dev/null +++ b/tests/test_data/spectrum_output/target_predictions_spectronaut.csv @@ -0,0 +1,201 @@ +ModifiedPeptide,StrippedPeptide,PrecursorCharge,PrecursorMz,ProteinId,FragmentLossType,FragmentCharge,FragmentMz,RelativeIntensity,FragmentType,FragmentNumber +_EM[+16.0]M[+16.0]QTAMQNTMLINGIAQVTFDSETAVK_,EMMQTAMQNTMLINGIAQVTFDSETAVK,3,1045.4891532599702,sp|CO5_HUMAN|,noloss,1,130.04982,0.0004783587,b,1 +_EM[+16.0]M[+16.0]QTAMQNTMLINGIAQVTFDSETAVK_,EMMQTAMQNTMLINGIAQVTFDSETAVK,3,1045.4891532599702,sp|CO5_HUMAN|,noloss,1,277.0897,0.023453183,b,2 +_EM[+16.0]M[+16.0]QTAMQNTMLINGIAQVTFDSETAVK_,EMMQTAMQNTMLINGIAQVTFDSETAVK,3,1045.4891532599702,sp|CO5_HUMAN|,noloss,1,424.12958,0.002908781,b,3 +_EM[+16.0]M[+16.0]QTAMQNTMLINGIAQVTFDSETAVK_,EMMQTAMQNTMLINGIAQVTFDSETAVK,3,1045.4891532599702,sp|CO5_HUMAN|,noloss,1,552.1882,0.016842145,b,4 +_EM[+16.0]M[+16.0]QTAMQNTMLINGIAQVTFDSETAVK_,EMMQTAMQNTMLINGIAQVTFDSETAVK,3,1045.4891532599702,sp|CO5_HUMAN|,noloss,1,653.23584,0.0181249,b,5 +_EM[+16.0]M[+16.0]QTAMQNTMLINGIAQVTFDSETAVK_,EMMQTAMQNTMLINGIAQVTFDSETAVK,3,1045.4891532599702,sp|CO5_HUMAN|,noloss,1,724.27295,0.016877253,b,6 +_EM[+16.0]M[+16.0]QTAMQNTMLINGIAQVTFDSETAVK_,EMMQTAMQNTMLINGIAQVTFDSETAVK,3,1045.4891532599702,sp|CO5_HUMAN|,noloss,1,855.3134,0.015815992,b,7 +_EM[+16.0]M[+16.0]QTAMQNTMLINGIAQVTFDSETAVK_,EMMQTAMQNTMLINGIAQVTFDSETAVK,3,1045.4891532599702,sp|CO5_HUMAN|,noloss,1,983.372,0.0095686875,b,8 +_EM[+16.0]M[+16.0]QTAMQNTMLINGIAQVTFDSETAVK_,EMMQTAMQNTMLINGIAQVTFDSETAVK,3,1045.4891532599702,sp|CO5_HUMAN|,noloss,1,1097.4149,0.0026793177,b,9 +_EM[+16.0]M[+16.0]QTAMQNTMLINGIAQVTFDSETAVK_,EMMQTAMQNTMLINGIAQVTFDSETAVK,3,1045.4891532599702,sp|CO5_HUMAN|,noloss,1,1198.4626,0.001428156,b,10 +_EM[+16.0]M[+16.0]QTAMQNTMLINGIAQVTFDSETAVK_,EMMQTAMQNTMLINGIAQVTFDSETAVK,3,1045.4891532599702,sp|CO5_HUMAN|,noloss,1,1329.5032,0.0047779866,b,11 +_EM[+16.0]M[+16.0]QTAMQNTMLINGIAQVTFDSETAVK_,EMMQTAMQNTMLINGIAQVTFDSETAVK,3,1045.4891532599702,sp|CO5_HUMAN|,noloss,1,1442.5873,0.005610783,b,12 +_EM[+16.0]M[+16.0]QTAMQNTMLINGIAQVTFDSETAVK_,EMMQTAMQNTMLINGIAQVTFDSETAVK,3,1045.4891532599702,sp|CO5_HUMAN|,noloss,1,1555.6714,0.0017427361,b,13 +_EM[+16.0]M[+16.0]QTAMQNTMLINGIAQVTFDSETAVK_,EMMQTAMQNTMLINGIAQVTFDSETAVK,3,1045.4891532599702,sp|CO5_HUMAN|,noloss,1,1669.7144,0.00027760284,b,14 +_EM[+16.0]M[+16.0]QTAMQNTMLINGIAQVTFDSETAVK_,EMMQTAMQNTMLINGIAQVTFDSETAVK,3,1045.4891532599702,sp|CO5_HUMAN|,noloss,1,1726.7358,0.00011567445,b,15 +_EM[+16.0]M[+16.0]QTAMQNTMLINGIAQVTFDSETAVK_,EMMQTAMQNTMLINGIAQVTFDSETAVK,3,1045.4891532599702,sp|CO5_HUMAN|,noloss,1,1839.82,0.0,b,16 +_EM[+16.0]M[+16.0]QTAMQNTMLINGIAQVTFDSETAVK_,EMMQTAMQNTMLINGIAQVTFDSETAVK,3,1045.4891532599702,sp|CO5_HUMAN|,noloss,1,1910.857,0.0,b,17 +_EM[+16.0]M[+16.0]QTAMQNTMLINGIAQVTFDSETAVK_,EMMQTAMQNTMLINGIAQVTFDSETAVK,3,1045.4891532599702,sp|CO5_HUMAN|,noloss,1,2038.9156,3.3769134e-05,b,18 +_EM[+16.0]M[+16.0]QTAMQNTMLINGIAQVTFDSETAVK_,EMMQTAMQNTMLINGIAQVTFDSETAVK,3,1045.4891532599702,sp|CO5_HUMAN|,noloss,1,2137.9841,0.00039860044,b,19 +_EM[+16.0]M[+16.0]QTAMQNTMLINGIAQVTFDSETAVK_,EMMQTAMQNTMLINGIAQVTFDSETAVK,3,1045.4891532599702,sp|CO5_HUMAN|,noloss,1,2239.0317,0.0007188698,b,20 +_EM[+16.0]M[+16.0]QTAMQNTMLINGIAQVTFDSETAVK_,EMMQTAMQNTMLINGIAQVTFDSETAVK,3,1045.4891532599702,sp|CO5_HUMAN|,noloss,1,2386.1,0.00041560477,b,21 +_EM[+16.0]M[+16.0]QTAMQNTMLINGIAQVTFDSETAVK_,EMMQTAMQNTMLINGIAQVTFDSETAVK,3,1045.4891532599702,sp|CO5_HUMAN|,noloss,1,2501.127,7.737851e-06,b,22 +_EM[+16.0]M[+16.0]QTAMQNTMLINGIAQVTFDSETAVK_,EMMQTAMQNTMLINGIAQVTFDSETAVK,3,1045.4891532599702,sp|CO5_HUMAN|,noloss,1,2588.159,0.00039515027,b,23 +_EM[+16.0]M[+16.0]QTAMQNTMLINGIAQVTFDSETAVK_,EMMQTAMQNTMLINGIAQVTFDSETAVK,3,1045.4891532599702,sp|CO5_HUMAN|,noloss,1,2717.2014,4.7321886e-05,b,24 +_EM[+16.0]M[+16.0]QTAMQNTMLINGIAQVTFDSETAVK_,EMMQTAMQNTMLINGIAQVTFDSETAVK,3,1045.4891532599702,sp|CO5_HUMAN|,noloss,1,2818.249,5.3802672e-05,b,25 +_EM[+16.0]M[+16.0]QTAMQNTMLINGIAQVTFDSETAVK_,EMMQTAMQNTMLINGIAQVTFDSETAVK,3,1045.4891532599702,sp|CO5_HUMAN|,noloss,1,2889.2861,6.6714776e-05,b,26 +_EM[+16.0]M[+16.0]QTAMQNTMLINGIAQVTFDSETAVK_,EMMQTAMQNTMLINGIAQVTFDSETAVK,3,1045.4891532599702,sp|CO5_HUMAN|,noloss,1,2988.3545,0.0,b,27 +_EM[+16.0]M[+16.0]QTAMQNTMLINGIAQVTFDSETAVK_,EMMQTAMQNTMLINGIAQVTFDSETAVK,3,1045.4891532599702,sp|CO5_HUMAN|,noloss,1,147.11276,0.02836182,y,1 +_EM[+16.0]M[+16.0]QTAMQNTMLINGIAQVTFDSETAVK_,EMMQTAMQNTMLINGIAQVTFDSETAVK,3,1045.4891532599702,sp|CO5_HUMAN|,noloss,1,246.18117,0.023049628,y,2 +_EM[+16.0]M[+16.0]QTAMQNTMLINGIAQVTFDSETAVK_,EMMQTAMQNTMLINGIAQVTFDSETAVK,3,1045.4891532599702,sp|CO5_HUMAN|,noloss,1,317.21826,0.016612068,y,3 +_EM[+16.0]M[+16.0]QTAMQNTMLINGIAQVTFDSETAVK_,EMMQTAMQNTMLINGIAQVTFDSETAVK,3,1045.4891532599702,sp|CO5_HUMAN|,noloss,1,418.26593,0.06578115,y,4 +_EM[+16.0]M[+16.0]QTAMQNTMLINGIAQVTFDSETAVK_,EMMQTAMQNTMLINGIAQVTFDSETAVK,3,1045.4891532599702,sp|CO5_HUMAN|,noloss,1,547.30853,0.007894537,y,5 +_EM[+16.0]M[+16.0]QTAMQNTMLINGIAQVTFDSETAVK_,EMMQTAMQNTMLINGIAQVTFDSETAVK,3,1045.4891532599702,sp|CO5_HUMAN|,noloss,1,634.3406,0.0671692,y,6 +_EM[+16.0]M[+16.0]QTAMQNTMLINGIAQVTFDSETAVK_,EMMQTAMQNTMLINGIAQVTFDSETAVK,3,1045.4891532599702,sp|CO5_HUMAN|,noloss,1,749.3675,0.065984,y,7 +_EM[+16.0]M[+16.0]QTAMQNTMLINGIAQVTFDSETAVK_,EMMQTAMQNTMLINGIAQVTFDSETAVK,3,1045.4891532599702,sp|CO5_HUMAN|,noloss,1,896.4359,0.09356104,y,8 +_EM[+16.0]M[+16.0]QTAMQNTMLINGIAQVTFDSETAVK_,EMMQTAMQNTMLINGIAQVTFDSETAVK,3,1045.4891532599702,sp|CO5_HUMAN|,noloss,1,997.4836,0.14467226,y,9 +_EM[+16.0]M[+16.0]QTAMQNTMLINGIAQVTFDSETAVK_,EMMQTAMQNTMLINGIAQVTFDSETAVK,3,1045.4891532599702,sp|CO5_HUMAN|,noloss,1,1096.552,0.21707669,y,10 +_EM[+16.0]M[+16.0]QTAMQNTMLINGIAQVTFDSETAVK_,EMMQTAMQNTMLINGIAQVTFDSETAVK,3,1045.4891532599702,sp|CO5_HUMAN|,noloss,1,1224.6106,0.032736592,y,11 +_EM[+16.0]M[+16.0]QTAMQNTMLINGIAQVTFDSETAVK_,EMMQTAMQNTMLINGIAQVTFDSETAVK,3,1045.4891532599702,sp|CO5_HUMAN|,noloss,1,1295.6477,0.058127824,y,12 +_EM[+16.0]M[+16.0]QTAMQNTMLINGIAQVTFDSETAVK_,EMMQTAMQNTMLINGIAQVTFDSETAVK,3,1045.4891532599702,sp|CO5_HUMAN|,noloss,1,1408.7318,0.0013113845,y,13 +_EM[+16.0]M[+16.0]QTAMQNTMLINGIAQVTFDSETAVK_,EMMQTAMQNTMLINGIAQVTFDSETAVK,3,1045.4891532599702,sp|CO5_HUMAN|,noloss,1,1465.7533,0.021960562,y,14 +_EM[+16.0]M[+16.0]QTAMQNTMLINGIAQVTFDSETAVK_,EMMQTAMQNTMLINGIAQVTFDSETAVK,3,1045.4891532599702,sp|CO5_HUMAN|,noloss,1,1579.7963,0.022463232,y,15 +_EM[+16.0]M[+16.0]QTAMQNTMLINGIAQVTFDSETAVK_,EMMQTAMQNTMLINGIAQVTFDSETAVK,3,1045.4891532599702,sp|CO5_HUMAN|,noloss,1,1692.8804,0.007240681,y,16 +_EM[+16.0]M[+16.0]QTAMQNTMLINGIAQVTFDSETAVK_,EMMQTAMQNTMLINGIAQVTFDSETAVK,3,1045.4891532599702,sp|CO5_HUMAN|,noloss,1,1805.9645,0.0015472999,y,17 +_EM[+16.0]M[+16.0]QTAMQNTMLINGIAQVTFDSETAVK_,EMMQTAMQNTMLINGIAQVTFDSETAVK,3,1045.4891532599702,sp|CO5_HUMAN|,noloss,1,1937.005,0.0,y,18 +_EM[+16.0]M[+16.0]QTAMQNTMLINGIAQVTFDSETAVK_,EMMQTAMQNTMLINGIAQVTFDSETAVK,3,1045.4891532599702,sp|CO5_HUMAN|,noloss,1,2038.0527,0.00019244138,y,19 +_EM[+16.0]M[+16.0]QTAMQNTMLINGIAQVTFDSETAVK_,EMMQTAMQNTMLINGIAQVTFDSETAVK,3,1045.4891532599702,sp|CO5_HUMAN|,noloss,1,2152.0957,0.0,y,20 +_EM[+16.0]M[+16.0]QTAMQNTMLINGIAQVTFDSETAVK_,EMMQTAMQNTMLINGIAQVTFDSETAVK,3,1045.4891532599702,sp|CO5_HUMAN|,noloss,1,2280.1543,0.0003047234,y,21 +_EM[+16.0]M[+16.0]QTAMQNTMLINGIAQVTFDSETAVK_,EMMQTAMQNTMLINGIAQVTFDSETAVK,3,1045.4891532599702,sp|CO5_HUMAN|,noloss,1,2411.1948,0.0006477689,y,22 +_EM[+16.0]M[+16.0]QTAMQNTMLINGIAQVTFDSETAVK_,EMMQTAMQNTMLINGIAQVTFDSETAVK,3,1045.4891532599702,sp|CO5_HUMAN|,noloss,1,2482.232,0.00016771178,y,23 +_EM[+16.0]M[+16.0]QTAMQNTMLINGIAQVTFDSETAVK_,EMMQTAMQNTMLINGIAQVTFDSETAVK,3,1045.4891532599702,sp|CO5_HUMAN|,noloss,1,2583.2795,0.0,y,24 +_EM[+16.0]M[+16.0]QTAMQNTMLINGIAQVTFDSETAVK_,EMMQTAMQNTMLINGIAQVTFDSETAVK,3,1045.4891532599702,sp|CO5_HUMAN|,noloss,1,2711.3381,0.00015293542,y,25 +_EM[+16.0]M[+16.0]QTAMQNTMLINGIAQVTFDSETAVK_,EMMQTAMQNTMLINGIAQVTFDSETAVK,3,1045.4891532599702,sp|CO5_HUMAN|,noloss,1,2858.378,0.0,y,26 +_EM[+16.0]M[+16.0]QTAMQNTMLINGIAQVTFDSETAVK_,EMMQTAMQNTMLINGIAQVTFDSETAVK,3,1045.4891532599702,sp|CO5_HUMAN|,noloss,1,3005.4177,0.00014533047,y,27 +_V[+42.0]WPHKDYPLIPVGK_,VWPHKDYPLIPVGK,2,845.96993487392,sp|CATA_HUMAN|,noloss,1,142.11235,0.002382793,b,1 +_V[+42.0]WPHKDYPLIPVGK_,VWPHKDYPLIPVGK,2,845.96993487392,sp|CATA_HUMAN|,noloss,1,328.19168,0.09707797,b,2 +_V[+42.0]WPHKDYPLIPVGK_,VWPHKDYPLIPVGK,2,845.96993487392,sp|CATA_HUMAN|,noloss,1,425.24445,0.0,b,3 +_V[+42.0]WPHKDYPLIPVGK_,VWPHKDYPLIPVGK,2,845.96993487392,sp|CATA_HUMAN|,noloss,1,562.3034,0.015059763,b,4 +_V[+42.0]WPHKDYPLIPVGK_,VWPHKDYPLIPVGK,2,845.96993487392,sp|CATA_HUMAN|,noloss,1,690.3984,0.0062677446,b,5 +_V[+42.0]WPHKDYPLIPVGK_,VWPHKDYPLIPVGK,2,845.96993487392,sp|CATA_HUMAN|,noloss,1,805.4253,0.005883264,b,6 +_V[+42.0]WPHKDYPLIPVGK_,VWPHKDYPLIPVGK,2,845.96993487392,sp|CATA_HUMAN|,noloss,1,968.4886,0.011961676,b,7 +_V[+42.0]WPHKDYPLIPVGK_,VWPHKDYPLIPVGK,2,845.96993487392,sp|CATA_HUMAN|,noloss,1,1065.5413,0.0,b,8 +_V[+42.0]WPHKDYPLIPVGK_,VWPHKDYPLIPVGK,2,845.96993487392,sp|CATA_HUMAN|,noloss,1,1178.6254,0.009315926,b,9 +_V[+42.0]WPHKDYPLIPVGK_,VWPHKDYPLIPVGK,2,845.96993487392,sp|CATA_HUMAN|,noloss,1,1291.7095,0.059365626,b,10 +_V[+42.0]WPHKDYPLIPVGK_,VWPHKDYPLIPVGK,2,845.96993487392,sp|CATA_HUMAN|,noloss,1,1388.7622,0.00059237896,b,11 +_V[+42.0]WPHKDYPLIPVGK_,VWPHKDYPLIPVGK,2,845.96993487392,sp|CATA_HUMAN|,noloss,1,1487.8306,0.0018027385,b,12 +_V[+42.0]WPHKDYPLIPVGK_,VWPHKDYPLIPVGK,2,845.96993487392,sp|CATA_HUMAN|,noloss,1,1544.852,0.001071534,b,13 +_V[+42.0]WPHKDYPLIPVGK_,VWPHKDYPLIPVGK,2,845.96993487392,sp|CATA_HUMAN|,noloss,1,147.11276,0.03183326,y,1 +_V[+42.0]WPHKDYPLIPVGK_,VWPHKDYPLIPVGK,2,845.96993487392,sp|CATA_HUMAN|,noloss,1,204.13423,0.09979288,y,2 +_V[+42.0]WPHKDYPLIPVGK_,VWPHKDYPLIPVGK,2,845.96993487392,sp|CATA_HUMAN|,noloss,1,303.20264,0.014489898,y,3 +_V[+42.0]WPHKDYPLIPVGK_,VWPHKDYPLIPVGK,2,845.96993487392,sp|CATA_HUMAN|,noloss,1,400.2554,0.2991342,y,4 +_V[+42.0]WPHKDYPLIPVGK_,VWPHKDYPLIPVGK,2,845.96993487392,sp|CATA_HUMAN|,noloss,1,513.3395,0.026232772,y,5 +_V[+42.0]WPHKDYPLIPVGK_,VWPHKDYPLIPVGK,2,845.96993487392,sp|CATA_HUMAN|,noloss,1,626.4235,0.00063108635,y,6 +_V[+42.0]WPHKDYPLIPVGK_,VWPHKDYPLIPVGK,2,845.96993487392,sp|CATA_HUMAN|,noloss,1,723.4763,0.05693065,y,7 +_V[+42.0]WPHKDYPLIPVGK_,VWPHKDYPLIPVGK,2,845.96993487392,sp|CATA_HUMAN|,noloss,1,886.5397,0.022216331,y,8 +_V[+42.0]WPHKDYPLIPVGK_,VWPHKDYPLIPVGK,2,845.96993487392,sp|CATA_HUMAN|,noloss,1,1001.5666,0.054192763,y,9 +_V[+42.0]WPHKDYPLIPVGK_,VWPHKDYPLIPVGK,2,845.96993487392,sp|CATA_HUMAN|,noloss,1,1129.6615,0.03691657,y,10 +_V[+42.0]WPHKDYPLIPVGK_,VWPHKDYPLIPVGK,2,845.96993487392,sp|CATA_HUMAN|,noloss,1,1266.7205,0.012260796,y,11 +_V[+42.0]WPHKDYPLIPVGK_,VWPHKDYPLIPVGK,2,845.96993487392,sp|CATA_HUMAN|,noloss,1,1363.7732,0.13439202,y,12 +_V[+42.0]WPHKDYPLIPVGK_,VWPHKDYPLIPVGK,2,845.96993487392,sp|CATA_HUMAN|,noloss,1,1549.8525,0.00019538408,y,13 +_LYKMAVGFMLAHPYGFTRVMSSYR_,LYKMAVGFMLAHPYGFTRVMSSYR,2,1413.2085823739196,sp|AMYS_HUMAN|,noloss,1,114.09129,0.0,b,1 +_LYKMAVGFMLAHPYGFTRVMSSYR_,LYKMAVGFMLAHPYGFTRVMSSYR,2,1413.2085823739196,sp|AMYS_HUMAN|,noloss,1,277.15463,0.012078261,b,2 +_LYKMAVGFMLAHPYGFTRVMSSYR_,LYKMAVGFMLAHPYGFTRVMSSYR,2,1413.2085823739196,sp|AMYS_HUMAN|,noloss,1,405.2496,0.016423872,b,3 +_LYKMAVGFMLAHPYGFTRVMSSYR_,LYKMAVGFMLAHPYGFTRVMSSYR,2,1413.2085823739196,sp|AMYS_HUMAN|,noloss,1,536.2901,0.0127034,b,4 +_LYKMAVGFMLAHPYGFTRVMSSYR_,LYKMAVGFMLAHPYGFTRVMSSYR,2,1413.2085823739196,sp|AMYS_HUMAN|,noloss,1,607.3272,0.049943045,b,5 +_LYKMAVGFMLAHPYGFTRVMSSYR_,LYKMAVGFMLAHPYGFTRVMSSYR,2,1413.2085823739196,sp|AMYS_HUMAN|,noloss,1,706.3956,0.023682138,b,6 +_LYKMAVGFMLAHPYGFTRVMSSYR_,LYKMAVGFMLAHPYGFTRVMSSYR,2,1413.2085823739196,sp|AMYS_HUMAN|,noloss,1,763.4171,0.018192193,b,7 +_LYKMAVGFMLAHPYGFTRVMSSYR_,LYKMAVGFMLAHPYGFTRVMSSYR,2,1413.2085823739196,sp|AMYS_HUMAN|,noloss,1,910.48553,0.03397904,b,8 +_LYKMAVGFMLAHPYGFTRVMSSYR_,LYKMAVGFMLAHPYGFTRVMSSYR,2,1413.2085823739196,sp|AMYS_HUMAN|,noloss,1,1041.526,0.073698506,b,9 +_LYKMAVGFMLAHPYGFTRVMSSYR_,LYKMAVGFMLAHPYGFTRVMSSYR,2,1413.2085823739196,sp|AMYS_HUMAN|,noloss,1,1154.6101,0.014677981,b,10 +_LYKMAVGFMLAHPYGFTRVMSSYR_,LYKMAVGFMLAHPYGFTRVMSSYR,2,1413.2085823739196,sp|AMYS_HUMAN|,noloss,1,1225.6472,0.040862635,b,11 +_LYKMAVGFMLAHPYGFTRVMSSYR_,LYKMAVGFMLAHPYGFTRVMSSYR,2,1413.2085823739196,sp|AMYS_HUMAN|,noloss,1,1362.7062,0.041895166,b,12 +_LYKMAVGFMLAHPYGFTRVMSSYR_,LYKMAVGFMLAHPYGFTRVMSSYR,2,1413.2085823739196,sp|AMYS_HUMAN|,noloss,1,1459.7589,0.003334597,b,13 +_LYKMAVGFMLAHPYGFTRVMSSYR_,LYKMAVGFMLAHPYGFTRVMSSYR,2,1413.2085823739196,sp|AMYS_HUMAN|,noloss,1,1622.8223,0.005794351,b,14 +_LYKMAVGFMLAHPYGFTRVMSSYR_,LYKMAVGFMLAHPYGFTRVMSSYR,2,1413.2085823739196,sp|AMYS_HUMAN|,noloss,1,1679.8438,0.0,b,15 +_LYKMAVGFMLAHPYGFTRVMSSYR_,LYKMAVGFMLAHPYGFTRVMSSYR,2,1413.2085823739196,sp|AMYS_HUMAN|,noloss,1,1826.9122,0.0053395773,b,16 +_LYKMAVGFMLAHPYGFTRVMSSYR_,LYKMAVGFMLAHPYGFTRVMSSYR,2,1413.2085823739196,sp|AMYS_HUMAN|,noloss,1,1927.96,0.003800362,b,17 +_LYKMAVGFMLAHPYGFTRVMSSYR_,LYKMAVGFMLAHPYGFTRVMSSYR,2,1413.2085823739196,sp|AMYS_HUMAN|,noloss,1,2084.0613,0.004285701,b,18 +_LYKMAVGFMLAHPYGFTRVMSSYR_,LYKMAVGFMLAHPYGFTRVMSSYR,2,1413.2085823739196,sp|AMYS_HUMAN|,noloss,1,2183.1296,0.0027802496,b,19 +_LYKMAVGFMLAHPYGFTRVMSSYR_,LYKMAVGFMLAHPYGFTRVMSSYR,2,1413.2085823739196,sp|AMYS_HUMAN|,noloss,1,2314.1702,0.0020719734,b,20 +_LYKMAVGFMLAHPYGFTRVMSSYR_,LYKMAVGFMLAHPYGFTRVMSSYR,2,1413.2085823739196,sp|AMYS_HUMAN|,noloss,1,2401.2021,0.004036135,b,21 +_LYKMAVGFMLAHPYGFTRVMSSYR_,LYKMAVGFMLAHPYGFTRVMSSYR,2,1413.2085823739196,sp|AMYS_HUMAN|,noloss,1,2488.2341,0.0,b,22 +_LYKMAVGFMLAHPYGFTRVMSSYR_,LYKMAVGFMLAHPYGFTRVMSSYR,2,1413.2085823739196,sp|AMYS_HUMAN|,noloss,1,2651.2974,0.0014402372,b,23 +_LYKMAVGFMLAHPYGFTRVMSSYR_,LYKMAVGFMLAHPYGFTRVMSSYR,2,1413.2085823739196,sp|AMYS_HUMAN|,noloss,1,175.11891,0.10657703,y,1 +_LYKMAVGFMLAHPYGFTRVMSSYR_,LYKMAVGFMLAHPYGFTRVMSSYR,2,1413.2085823739196,sp|AMYS_HUMAN|,noloss,1,338.18222,0.0042887307,y,2 +_LYKMAVGFMLAHPYGFTRVMSSYR_,LYKMAVGFMLAHPYGFTRVMSSYR,2,1413.2085823739196,sp|AMYS_HUMAN|,noloss,1,425.21426,0.0017565935,y,3 +_LYKMAVGFMLAHPYGFTRVMSSYR_,LYKMAVGFMLAHPYGFTRVMSSYR,2,1413.2085823739196,sp|AMYS_HUMAN|,noloss,1,512.24634,0.02553638,y,4 +_LYKMAVGFMLAHPYGFTRVMSSYR_,LYKMAVGFMLAHPYGFTRVMSSYR,2,1413.2085823739196,sp|AMYS_HUMAN|,noloss,1,643.2868,0.02082283,y,5 +_LYKMAVGFMLAHPYGFTRVMSSYR_,LYKMAVGFMLAHPYGFTRVMSSYR,2,1413.2085823739196,sp|AMYS_HUMAN|,noloss,1,742.3552,0.028576123,y,6 +_LYKMAVGFMLAHPYGFTRVMSSYR_,LYKMAVGFMLAHPYGFTRVMSSYR,2,1413.2085823739196,sp|AMYS_HUMAN|,noloss,1,898.4563,0.0069462783,y,7 +_LYKMAVGFMLAHPYGFTRVMSSYR_,LYKMAVGFMLAHPYGFTRVMSSYR,2,1413.2085823739196,sp|AMYS_HUMAN|,noloss,1,999.50397,0.036354806,y,8 +_LYKMAVGFMLAHPYGFTRVMSSYR_,LYKMAVGFMLAHPYGFTRVMSSYR,2,1413.2085823739196,sp|AMYS_HUMAN|,noloss,1,1146.5724,0.0,y,9 +_LYKMAVGFMLAHPYGFTRVMSSYR_,LYKMAVGFMLAHPYGFTRVMSSYR,2,1413.2085823739196,sp|AMYS_HUMAN|,noloss,1,1203.5939,0.02085489,y,10 +_LYKMAVGFMLAHPYGFTRVMSSYR_,LYKMAVGFMLAHPYGFTRVMSSYR,2,1413.2085823739196,sp|AMYS_HUMAN|,noloss,1,1366.6572,0.004769671,y,11 +_LYKMAVGFMLAHPYGFTRVMSSYR_,LYKMAVGFMLAHPYGFTRVMSSYR,2,1413.2085823739196,sp|AMYS_HUMAN|,noloss,1,1463.71,0.13999905,y,12 +_LYKMAVGFMLAHPYGFTRVMSSYR_,LYKMAVGFMLAHPYGFTRVMSSYR,2,1413.2085823739196,sp|AMYS_HUMAN|,noloss,1,1600.7689,0.09114996,y,13 +_LYKMAVGFMLAHPYGFTRVMSSYR_,LYKMAVGFMLAHPYGFTRVMSSYR,2,1413.2085823739196,sp|AMYS_HUMAN|,noloss,1,1671.806,0.046266414,y,14 +_LYKMAVGFMLAHPYGFTRVMSSYR_,LYKMAVGFMLAHPYGFTRVMSSYR,2,1413.2085823739196,sp|AMYS_HUMAN|,noloss,1,1784.8901,0.041085277,y,15 +_LYKMAVGFMLAHPYGFTRVMSSYR_,LYKMAVGFMLAHPYGFTRVMSSYR,2,1413.2085823739196,sp|AMYS_HUMAN|,noloss,1,1915.9307,0.037799828,y,16 +_LYKMAVGFMLAHPYGFTRVMSSYR_,LYKMAVGFMLAHPYGFTRVMSSYR,2,1413.2085823739196,sp|AMYS_HUMAN|,noloss,1,2062.999,0.0079969065,y,17 +_LYKMAVGFMLAHPYGFTRVMSSYR_,LYKMAVGFMLAHPYGFTRVMSSYR,2,1413.2085823739196,sp|AMYS_HUMAN|,noloss,1,2120.0205,0.0036438953,y,18 +_LYKMAVGFMLAHPYGFTRVMSSYR_,LYKMAVGFMLAHPYGFTRVMSSYR,2,1413.2085823739196,sp|AMYS_HUMAN|,noloss,1,2219.0889,0.004555827,y,19 +_LYKMAVGFMLAHPYGFTRVMSSYR_,LYKMAVGFMLAHPYGFTRVMSSYR,2,1413.2085823739196,sp|AMYS_HUMAN|,noloss,1,2290.126,0.0,y,20 +_LYKMAVGFMLAHPYGFTRVMSSYR_,LYKMAVGFMLAHPYGFTRVMSSYR,2,1413.2085823739196,sp|AMYS_HUMAN|,noloss,1,2421.1665,0.0,y,21 +_LYKMAVGFMLAHPYGFTRVMSSYR_,LYKMAVGFMLAHPYGFTRVMSSYR,2,1413.2085823739196,sp|AMYS_HUMAN|,noloss,1,2549.2615,0.0,y,22 +_LYKMAVGFMLAHPYGFTRVMSSYR_,LYKMAVGFMLAHPYGFTRVMSSYR,2,1413.2085823739196,sp|AMYS_HUMAN|,noloss,1,2712.3247,0.0,y,23 +_E[-18.0]NAELESRILERSQQQEPLVC[+57.0]PNYQSYFR_,ENAELESRILERSQQQEPLVCPNYQSYFR,2,1783.36060187392,sp|K1M1_SHEEP|,noloss,1,112.034515,0.0,b,1 +_E[-18.0]NAELESRILERSQQQEPLVC[+57.0]PNYQSYFR_,ENAELESRILERSQQQEPLVCPNYQSYFR,2,1783.36060187392,sp|K1M1_SHEEP|,noloss,1,226.07745,0.025741037,b,2 +_E[-18.0]NAELESRILERSQQQEPLVC[+57.0]PNYQSYFR_,ENAELESRILERSQQQEPLVCPNYQSYFR,2,1783.36060187392,sp|K1M1_SHEEP|,noloss,1,297.11456,0.0025330405,b,3 +_E[-18.0]NAELESRILERSQQQEPLVC[+57.0]PNYQSYFR_,ENAELESRILERSQQQEPLVCPNYQSYFR,2,1783.36060187392,sp|K1M1_SHEEP|,noloss,1,426.15714,0.06509002,b,4 +_E[-18.0]NAELESRILERSQQQEPLVC[+57.0]PNYQSYFR_,ENAELESRILERSQQQEPLVCPNYQSYFR,2,1783.36060187392,sp|K1M1_SHEEP|,noloss,1,539.2412,0.0,b,5 +_E[-18.0]NAELESRILERSQQQEPLVC[+57.0]PNYQSYFR_,ENAELESRILERSQQQEPLVCPNYQSYFR,2,1783.36060187392,sp|K1M1_SHEEP|,noloss,1,668.2838,0.015055165,b,6 +_E[-18.0]NAELESRILERSQQQEPLVC[+57.0]PNYQSYFR_,ENAELESRILERSQQQEPLVCPNYQSYFR,2,1783.36060187392,sp|K1M1_SHEEP|,noloss,1,755.31586,0.0,b,7 +_E[-18.0]NAELESRILERSQQQEPLVC[+57.0]PNYQSYFR_,ENAELESRILERSQQQEPLVCPNYQSYFR,2,1783.36060187392,sp|K1M1_SHEEP|,noloss,1,911.41693,0.0,b,8 +_E[-18.0]NAELESRILERSQQQEPLVC[+57.0]PNYQSYFR_,ENAELESRILERSQQQEPLVCPNYQSYFR,2,1783.36060187392,sp|K1M1_SHEEP|,noloss,1,1024.501,0.0,b,9 +_E[-18.0]NAELESRILERSQQQEPLVC[+57.0]PNYQSYFR_,ENAELESRILERSQQQEPLVCPNYQSYFR,2,1783.36060187392,sp|K1M1_SHEEP|,noloss,1,1137.585,0.003846633,b,10 +_E[-18.0]NAELESRILERSQQQEPLVC[+57.0]PNYQSYFR_,ENAELESRILERSQQQEPLVCPNYQSYFR,2,1783.36060187392,sp|K1M1_SHEEP|,noloss,1,1266.6276,0.17691128,b,11 +_E[-18.0]NAELESRILERSQQQEPLVC[+57.0]PNYQSYFR_,ENAELESRILERSQQQEPLVCPNYQSYFR,2,1783.36060187392,sp|K1M1_SHEEP|,noloss,1,1422.7286,0.016029904,b,12 +_E[-18.0]NAELESRILERSQQQEPLVC[+57.0]PNYQSYFR_,ENAELESRILERSQQQEPLVCPNYQSYFR,2,1783.36060187392,sp|K1M1_SHEEP|,noloss,1,1509.7606,0.0,b,13 +_E[-18.0]NAELESRILERSQQQEPLVC[+57.0]PNYQSYFR_,ENAELESRILERSQQQEPLVCPNYQSYFR,2,1783.36060187392,sp|K1M1_SHEEP|,noloss,1,1637.8192,0.0,b,14 +_E[-18.0]NAELESRILERSQQQEPLVC[+57.0]PNYQSYFR_,ENAELESRILERSQQQEPLVCPNYQSYFR,2,1783.36060187392,sp|K1M1_SHEEP|,noloss,1,1765.8778,0.0,b,15 +_E[-18.0]NAELESRILERSQQQEPLVC[+57.0]PNYQSYFR_,ENAELESRILERSQQQEPLVCPNYQSYFR,2,1783.36060187392,sp|K1M1_SHEEP|,noloss,1,1893.9364,0.0,b,16 +_E[-18.0]NAELESRILERSQQQEPLVC[+57.0]PNYQSYFR_,ENAELESRILERSQQQEPLVCPNYQSYFR,2,1783.36060187392,sp|K1M1_SHEEP|,noloss,1,2022.979,0.0,b,17 +_E[-18.0]NAELESRILERSQQQEPLVC[+57.0]PNYQSYFR_,ENAELESRILERSQQQEPLVCPNYQSYFR,2,1783.36060187392,sp|K1M1_SHEEP|,noloss,1,2120.032,0.0,b,18 +_E[-18.0]NAELESRILERSQQQEPLVC[+57.0]PNYQSYFR_,ENAELESRILERSQQQEPLVCPNYQSYFR,2,1783.36060187392,sp|K1M1_SHEEP|,noloss,1,2233.116,0.0,b,19 +_E[-18.0]NAELESRILERSQQQEPLVC[+57.0]PNYQSYFR_,ENAELESRILERSQQQEPLVCPNYQSYFR,2,1783.36060187392,sp|K1M1_SHEEP|,noloss,1,2332.1843,0.0,b,20 +_E[-18.0]NAELESRILERSQQQEPLVC[+57.0]PNYQSYFR_,ENAELESRILERSQQQEPLVCPNYQSYFR,2,1783.36060187392,sp|K1M1_SHEEP|,noloss,1,2492.2449,0.0,b,21 +_E[-18.0]NAELESRILERSQQQEPLVC[+57.0]PNYQSYFR_,ENAELESRILERSQQQEPLVCPNYQSYFR,2,1783.36060187392,sp|K1M1_SHEEP|,noloss,1,2589.2976,0.0,b,22 +_E[-18.0]NAELESRILERSQQQEPLVC[+57.0]PNYQSYFR_,ENAELESRILERSQQQEPLVCPNYQSYFR,2,1783.36060187392,sp|K1M1_SHEEP|,noloss,1,2703.3406,0.0,b,23 +_E[-18.0]NAELESRILERSQQQEPLVC[+57.0]PNYQSYFR_,ENAELESRILERSQQQEPLVCPNYQSYFR,2,1783.36060187392,sp|K1M1_SHEEP|,noloss,1,2866.4038,0.0,b,24 +_E[-18.0]NAELESRILERSQQQEPLVC[+57.0]PNYQSYFR_,ENAELESRILERSQQQEPLVCPNYQSYFR,2,1783.36060187392,sp|K1M1_SHEEP|,noloss,1,2994.4624,0.0,b,25 +_E[-18.0]NAELESRILERSQQQEPLVC[+57.0]PNYQSYFR_,ENAELESRILERSQQQEPLVCPNYQSYFR,2,1783.36060187392,sp|K1M1_SHEEP|,noloss,1,3081.4944,0.0,b,26 +_E[-18.0]NAELESRILERSQQQEPLVC[+57.0]PNYQSYFR_,ENAELESRILERSQQQEPLVCPNYQSYFR,2,1783.36060187392,sp|K1M1_SHEEP|,noloss,1,3244.5576,0.0,b,27 +_E[-18.0]NAELESRILERSQQQEPLVC[+57.0]PNYQSYFR_,ENAELESRILERSQQQEPLVCPNYQSYFR,2,1783.36060187392,sp|K1M1_SHEEP|,noloss,1,3391.626,0.0,b,28 +_E[-18.0]NAELESRILERSQQQEPLVC[+57.0]PNYQSYFR_,ENAELESRILERSQQQEPLVCPNYQSYFR,2,1783.36060187392,sp|K1M1_SHEEP|,noloss,1,175.11891,0.06270968,y,1 +_E[-18.0]NAELESRILERSQQQEPLVC[+57.0]PNYQSYFR_,ENAELESRILERSQQQEPLVCPNYQSYFR,2,1783.36060187392,sp|K1M1_SHEEP|,noloss,1,322.18732,0.024013048,y,2 +_E[-18.0]NAELESRILERSQQQEPLVC[+57.0]PNYQSYFR_,ENAELESRILERSQQQEPLVCPNYQSYFR,2,1783.36060187392,sp|K1M1_SHEEP|,noloss,1,485.25064,0.020327413,y,3 +_E[-18.0]NAELESRILERSQQQEPLVC[+57.0]PNYQSYFR_,ENAELESRILERSQQQEPLVCPNYQSYFR,2,1783.36060187392,sp|K1M1_SHEEP|,noloss,1,572.2827,0.020741442,y,4 +_E[-18.0]NAELESRILERSQQQEPLVC[+57.0]PNYQSYFR_,ENAELESRILERSQQQEPLVCPNYQSYFR,2,1783.36060187392,sp|K1M1_SHEEP|,noloss,1,700.3413,0.018335054,y,5 +_E[-18.0]NAELESRILERSQQQEPLVC[+57.0]PNYQSYFR_,ENAELESRILERSQQQEPLVCPNYQSYFR,2,1783.36060187392,sp|K1M1_SHEEP|,noloss,1,863.40466,0.020231029,y,6 +_E[-18.0]NAELESRILERSQQQEPLVC[+57.0]PNYQSYFR_,ENAELESRILERSQQQEPLVCPNYQSYFR,2,1783.36060187392,sp|K1M1_SHEEP|,noloss,1,977.4476,0.0,y,7 +_E[-18.0]NAELESRILERSQQQEPLVC[+57.0]PNYQSYFR_,ENAELESRILERSQQQEPLVCPNYQSYFR,2,1783.36060187392,sp|K1M1_SHEEP|,noloss,1,1074.5004,0.20209196,y,8 +_E[-18.0]NAELESRILERSQQQEPLVC[+57.0]PNYQSYFR_,ENAELESRILERSQQQEPLVCPNYQSYFR,2,1783.36060187392,sp|K1M1_SHEEP|,noloss,1,1234.5608,0.009603208,y,9 +_E[-18.0]NAELESRILERSQQQEPLVC[+57.0]PNYQSYFR_,ENAELESRILERSQQQEPLVCPNYQSYFR,2,1783.36060187392,sp|K1M1_SHEEP|,noloss,1,1333.6292,0.0,y,10 +_E[-18.0]NAELESRILERSQQQEPLVC[+57.0]PNYQSYFR_,ENAELESRILERSQQQEPLVCPNYQSYFR,2,1783.36060187392,sp|K1M1_SHEEP|,noloss,1,1446.7133,0.0,y,11 +_E[-18.0]NAELESRILERSQQQEPLVC[+57.0]PNYQSYFR_,ENAELESRILERSQQQEPLVCPNYQSYFR,2,1783.36060187392,sp|K1M1_SHEEP|,noloss,1,1543.766,0.268848,y,12 +_E[-18.0]NAELESRILERSQQQEPLVC[+57.0]PNYQSYFR_,ENAELESRILERSQQQEPLVCPNYQSYFR,2,1783.36060187392,sp|K1M1_SHEEP|,noloss,1,1672.8086,0.027709758,y,13 +_E[-18.0]NAELESRILERSQQQEPLVC[+57.0]PNYQSYFR_,ENAELESRILERSQQQEPLVCPNYQSYFR,2,1783.36060187392,sp|K1M1_SHEEP|,noloss,1,1800.8672,0.011747018,y,14 +_E[-18.0]NAELESRILERSQQQEPLVC[+57.0]PNYQSYFR_,ENAELESRILERSQQQEPLVCPNYQSYFR,2,1783.36060187392,sp|K1M1_SHEEP|,noloss,1,1928.9258,0.0,y,15 +_E[-18.0]NAELESRILERSQQQEPLVC[+57.0]PNYQSYFR_,ENAELESRILERSQQQEPLVCPNYQSYFR,2,1783.36060187392,sp|K1M1_SHEEP|,noloss,1,2056.9844,0.0,y,16 +_E[-18.0]NAELESRILERSQQQEPLVC[+57.0]PNYQSYFR_,ENAELESRILERSQQQEPLVCPNYQSYFR,2,1783.36060187392,sp|K1M1_SHEEP|,noloss,1,2144.0164,0.006068217,y,17 +_E[-18.0]NAELESRILERSQQQEPLVC[+57.0]PNYQSYFR_,ENAELESRILERSQQQEPLVCPNYQSYFR,2,1783.36060187392,sp|K1M1_SHEEP|,noloss,1,2300.1174,0.0015504506,y,18 +_E[-18.0]NAELESRILERSQQQEPLVC[+57.0]PNYQSYFR_,ENAELESRILERSQQQEPLVCPNYQSYFR,2,1783.36060187392,sp|K1M1_SHEEP|,noloss,1,2429.16,0.0,y,19 +_E[-18.0]NAELESRILERSQQQEPLVC[+57.0]PNYQSYFR_,ENAELESRILERSQQQEPLVCPNYQSYFR,2,1783.36060187392,sp|K1M1_SHEEP|,noloss,1,2542.244,0.0,y,20 +_E[-18.0]NAELESRILERSQQQEPLVC[+57.0]PNYQSYFR_,ENAELESRILERSQQQEPLVCPNYQSYFR,2,1783.36060187392,sp|K1M1_SHEEP|,noloss,1,2655.328,0.0,y,21 +_E[-18.0]NAELESRILERSQQQEPLVC[+57.0]PNYQSYFR_,ENAELESRILERSQQQEPLVCPNYQSYFR,2,1783.36060187392,sp|K1M1_SHEEP|,noloss,1,2811.429,0.0,y,22 +_E[-18.0]NAELESRILERSQQQEPLVC[+57.0]PNYQSYFR_,ENAELESRILERSQQQEPLVCPNYQSYFR,2,1783.36060187392,sp|K1M1_SHEEP|,noloss,1,2898.461,0.00014960826,y,23 +_E[-18.0]NAELESRILERSQQQEPLVC[+57.0]PNYQSYFR_,ENAELESRILERSQQQEPLVCPNYQSYFR,2,1783.36060187392,sp|K1M1_SHEEP|,noloss,1,3027.5034,0.0,y,24 +_E[-18.0]NAELESRILERSQQQEPLVC[+57.0]PNYQSYFR_,ENAELESRILERSQQQEPLVCPNYQSYFR,2,1783.36060187392,sp|K1M1_SHEEP|,noloss,1,3140.5874,0.0,y,25 +_E[-18.0]NAELESRILERSQQQEPLVC[+57.0]PNYQSYFR_,ENAELESRILERSQQQEPLVCPNYQSYFR,2,1783.36060187392,sp|K1M1_SHEEP|,noloss,1,3269.63,0.0,y,26 +_E[-18.0]NAELESRILERSQQQEPLVC[+57.0]PNYQSYFR_,ENAELESRILERSQQQEPLVCPNYQSYFR,2,1783.36060187392,sp|K1M1_SHEEP|,noloss,1,3340.667,0.00012036879,y,27 +_E[-18.0]NAELESRILERSQQQEPLVC[+57.0]PNYQSYFR_,ENAELESRILERSQQQEPLVCPNYQSYFR,2,1783.36060187392,sp|K1M1_SHEEP|,noloss,1,3454.71,0.00054663146,y,28 +_E[+42.0]GM[+16.0]LSIM[+16.0]SYR_,EGMLSIMSYR,3,420.8586949266367,sp|CO5_HUMAN|,noloss,1,172.08652,0.0053466386,b,1 +_E[+42.0]GM[+16.0]LSIM[+16.0]SYR_,EGMLSIMSYR,3,420.8586949266367,sp|CO5_HUMAN|,noloss,1,229.10799,0.033211455,b,2 +_E[+42.0]GM[+16.0]LSIM[+16.0]SYR_,EGMLSIMSYR,3,420.8586949266367,sp|CO5_HUMAN|,noloss,1,376.14786,0.049505275,b,3 +_E[+42.0]GM[+16.0]LSIM[+16.0]SYR_,EGMLSIMSYR,3,420.8586949266367,sp|CO5_HUMAN|,noloss,1,489.2319,0.0114001455,b,4 +_E[+42.0]GM[+16.0]LSIM[+16.0]SYR_,EGMLSIMSYR,3,420.8586949266367,sp|CO5_HUMAN|,noloss,1,576.264,0.013315857,b,5 +_E[+42.0]GM[+16.0]LSIM[+16.0]SYR_,EGMLSIMSYR,3,420.8586949266367,sp|CO5_HUMAN|,noloss,1,689.348,0.00028578425,b,6 +_E[+42.0]GM[+16.0]LSIM[+16.0]SYR_,EGMLSIMSYR,3,420.8586949266367,sp|CO5_HUMAN|,noloss,1,836.3879,0.00012042762,b,7 +_E[+42.0]GM[+16.0]LSIM[+16.0]SYR_,EGMLSIMSYR,3,420.8586949266367,sp|CO5_HUMAN|,noloss,1,923.4199,0.0009394679,b,8 +_E[+42.0]GM[+16.0]LSIM[+16.0]SYR_,EGMLSIMSYR,3,420.8586949266367,sp|CO5_HUMAN|,noloss,1,1086.4832,0.00033054306,b,9 +_E[+42.0]GM[+16.0]LSIM[+16.0]SYR_,EGMLSIMSYR,3,420.8586949266367,sp|CO5_HUMAN|,noloss,1,175.11891,0.13905725,y,1 +_E[+42.0]GM[+16.0]LSIM[+16.0]SYR_,EGMLSIMSYR,3,420.8586949266367,sp|CO5_HUMAN|,noloss,1,338.18222,0.085544564,y,2 +_E[+42.0]GM[+16.0]LSIM[+16.0]SYR_,EGMLSIMSYR,3,420.8586949266367,sp|CO5_HUMAN|,noloss,1,425.21426,0.24917157,y,3 +_E[+42.0]GM[+16.0]LSIM[+16.0]SYR_,EGMLSIMSYR,3,420.8586949266367,sp|CO5_HUMAN|,noloss,1,572.25415,0.37230083,y,4 +_E[+42.0]GM[+16.0]LSIM[+16.0]SYR_,EGMLSIMSYR,3,420.8586949266367,sp|CO5_HUMAN|,noloss,1,685.3382,0.032646228,y,5 +_E[+42.0]GM[+16.0]LSIM[+16.0]SYR_,EGMLSIMSYR,3,420.8586949266367,sp|CO5_HUMAN|,noloss,1,772.37024,0.0050869836,y,6 +_E[+42.0]GM[+16.0]LSIM[+16.0]SYR_,EGMLSIMSYR,3,420.8586949266367,sp|CO5_HUMAN|,noloss,1,885.4543,0.0014090907,y,7 +_E[+42.0]GM[+16.0]LSIM[+16.0]SYR_,EGMLSIMSYR,3,420.8586949266367,sp|CO5_HUMAN|,noloss,1,1032.4941,0.00032796344,y,8 +_E[+42.0]GM[+16.0]LSIM[+16.0]SYR_,EGMLSIMSYR,3,420.8586949266367,sp|CO5_HUMAN|,noloss,1,1089.5156,0.0,y,9 diff --git a/tests/test_modifications.py b/tests/test_modifications.py new file mode 100644 index 00000000..76edbafe --- /dev/null +++ b/tests/test_modifications.py @@ -0,0 +1,29 @@ +import pytest + +import ms2pip.modifications + + +class TestModifications: + def test_add_from_ms2pip_modstrings(self): + mods = ms2pip.modifications.Modifications() + mods.add_from_ms2pip_modstrings([ + "Oxidation,15.994915,opt,M", + "Acetyl,42.010565,opt,N-term", + ]) + + assert mods.modifications["Oxidation"]["amino_acid"] == "M" + assert mods.modifications["Acetyl"]["mass_shift"] == 42.010565 + + def test_get_mass_shifts(self): + mods = ms2pip.modifications.Modifications() + + mods.add_from_ms2pip_modstrings([ + "Oxidation,15.994915,opt,M" + ]) + assert mods.get_mass_shifts()["Oxidation"] == 15.994915 + + # Test cache clear after adding new modifications + mods.add_from_ms2pip_modstrings([ + "Acetyl,42.010565,opt,N-term", + ]) + assert mods.get_mass_shifts()["Acetyl"] == 42.010565 diff --git a/tests/test_predictions.py b/tests/test_predictions.py index a3da57fa..6d37dda1 100644 --- a/tests/test_predictions.py +++ b/tests/test_predictions.py @@ -2,8 +2,10 @@ import pandas as pd import numpy as np +from ms2pip.ms2pipC import run + # Run ms2pipC to predict peak intensities from a PEPREC file (HCD model) -call(["ms2pip", "tests/test_data/test.peprec", "-c", "tests/test_data/config.txt"]) +run("tests/test_data/test.peprec", config_file='tests/test_data/config.txt') test_data = pd.read_csv("tests/test_data/test_HCD_predictions.csv") target_data = pd.read_csv("tests/test_data/target_HCD_predictions.csv") diff --git a/tests/test_spectrum_output.py b/tests/test_spectrum_output.py new file mode 100644 index 00000000..95be2d64 --- /dev/null +++ b/tests/test_spectrum_output.py @@ -0,0 +1,71 @@ +import pandas as pd + +from ms2pip.ms2pip_tools.spectrum_output import SpectrumOutput + + +class TestSpectrumOutput: + def test_integration(self): + + peprec = pd.read_pickle("tests/test_data/spectrum_output/input_peprec.pkl") + all_preds = pd.read_pickle("tests/test_data/spectrum_output/input_preds.pkl") + + params = { + "ptm": [ + "Oxidation,15.994915,opt,M", + "Carbamidomethyl,57.021464,opt,C", + "Glu->pyro-Glu,-18.010565,opt,E", + "Gln->pyro-Glu,-17.026549,opt,Q", + "Acetyl,42.010565,opt,N-term", + ], + "sptm": [], + "gptm": [], + "model": "HCD", + "frag_error": "0.02", + "out": "csv", + } + + peprec_tmp = peprec.sample(5, random_state=10).copy() + all_preds_tmp = all_preds[ + all_preds["spec_id"].isin(peprec_tmp["spec_id"]) + ].copy() + + so = SpectrumOutput( + all_preds_tmp, + peprec_tmp, + params, + output_filename="test", + return_stringbuffer=True, + ) + + target_filename_base = "tests/test_data/spectrum_output/target" + + # Test general output + test_cases = [ + (so.write_mgf, "_predictions.mgf"), + (so.write_msp, "_predictions.msp"), + (so.write_spectronaut, "_predictions_spectronaut.csv"), + ] + + for test_function, file_ext in test_cases: + test = test_function() + test.seek(0) + with open(target_filename_base + file_ext) as target: + for test_line, target_line in zip(test.readlines(), target.readlines()): + assert test_line == target_line + + # Test bibliospec output + bibliospec_ssl, bibliospec_ms2 = so.write_bibliospec() + test_cases = [ + (bibliospec_ssl, "_predictions.ssl"), + (bibliospec_ms2, "_predictions.ms2"), + ] + + for test, file_ext in test_cases: + test.seek(0) + with open(target_filename_base + file_ext) as target: + for test_line, target_line in zip(test.readlines(), target.readlines()): + test_line = test_line.replace( + "test_predictions.ms2", "target_predictions.ms2" + ) + if not "CreationDate" in target_line: + assert test_line == target_line