Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Linting iii #235

Open
wants to merge 17 commits into
base: linting_II
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions alphabase/psm_reader/alphapept_reader.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import os
from pathlib import Path
from typing import Optional
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this a thing? that Path is prefered over os?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes.. imho it makes path operations much more readable,
I especially like the overloading of the / operator when it comes to using it (e.g. Path("/home/") / "user").
I especially dislike the overloading of the / operator when it comes to mocking it :-)


import h5py
Expand All @@ -18,7 +18,7 @@
def parse_ap(precursor):
"""Parser to parse peptide strings."""
items = precursor.split("_")
decoy = 1 if len(items) == 3 else 0
decoy = 1 if len(items) == 3 else 0 # noqa: PLR2004 magic value
modseq = items[0]
charge = items[-1]

Expand Down Expand Up @@ -77,7 +77,7 @@ def _load_file(self, filename):
with h5py.File(filename, "r") as _hdf:
dataset = _hdf[self.hdf_dataset]
df = pd.DataFrame({col: dataset[col] for col in dataset})
df[PsmDfCols.RAW_NAME] = os.path.basename(filename)[: -len(".ms_data.hdf")]
df[PsmDfCols.RAW_NAME] = Path(filename).name[: -len(".ms_data.hdf")]
df["precursor"] = df["precursor"].str.decode("utf-8")
# df['naked_sequence'] = df['naked_sequence'].str.decode('utf-8')
if "scan_no" in df.columns:
Expand Down
8 changes: 4 additions & 4 deletions alphabase/psm_reader/dia_psm_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ class SpectronautReader(MaxQuantReader):

"""

def __init__(
def __init__( # noqa: PLR0913 many arguments in function definition
self,
*,
column_mapping: Optional[dict] = None,
Expand Down Expand Up @@ -66,7 +66,7 @@ def _load_file(self, filename):


class SwathReader(SpectronautReader):
def __init__(
def __init__( # noqa: PLR0913 many arguments in function definition
self,
*,
column_mapping: Optional[dict] = None,
Expand All @@ -90,7 +90,7 @@ def __init__(


class DiannReader(SpectronautReader):
def __init__(
def __init__( # noqa: PLR0913 many arguments in function definition
self,
*,
column_mapping: Optional[dict] = None,
Expand Down Expand Up @@ -144,7 +144,7 @@ class SpectronautReportReader(MaxQuantReader):

"""

def __init__(
def __init__( # noqa: PLR0913 many arguments in function definition
self,
*,
column_mapping: Optional[dict] = None,
Expand Down
38 changes: 19 additions & 19 deletions alphabase/psm_reader/maxquant_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@
warnings.filterwarnings("always")

mod_to_unimod_dict = {}
for mod_name, unimod_id in MOD_DF[["mod_name", "unimod_id"]].values:
unimod_id = int(unimod_id)
for mod_name, unimod_id_ in MOD_DF[["mod_name", "unimod_id"]].to_numpy():
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this suffix thing a known pattern? I only now it as private prefix

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I use it to avoid name clashes, e.g. with built-ins or other variables

unimod_id = int(unimod_id_)
if unimod_id in (-1, "-1"):
continue
if mod_name[-2] == "@":
Expand Down Expand Up @@ -81,14 +81,14 @@ def parse_mod_seq(
0 for N-term; -1 for C-term; 1 to N for normal modifications.

"""
PeptideModSeq = modseq
peptide_mod_seq = modseq
underscore_for_ncterm = modseq[0] == "_"
mod_list = []
site_list = []
site = PeptideModSeq.find(mod_sep[0])
site = peptide_mod_seq.find(mod_sep[0])
while site != -1:
site_end = PeptideModSeq.find(mod_sep[1], site + 1) + 1
if site_end < len(PeptideModSeq) and PeptideModSeq[site_end] == mod_sep[1]:
site_end = peptide_mod_seq.find(mod_sep[1], site + 1) + 1
if site_end < len(peptide_mod_seq) and peptide_mod_seq[site_end] == mod_sep[1]:
site_end += 1
if underscore_for_ncterm:
site_list.append(site - 1)
Expand All @@ -97,42 +97,42 @@ def parse_mod_seq(
start_mod = site
if start_mod > 0:
start_mod -= 1
mod_list.append(PeptideModSeq[start_mod:site_end])
PeptideModSeq = PeptideModSeq[:site] + PeptideModSeq[site_end:]
site = PeptideModSeq.find(mod_sep[0], site)
mod_list.append(peptide_mod_seq[start_mod:site_end])
peptide_mod_seq = peptide_mod_seq[:site] + peptide_mod_seq[site_end:]
site = peptide_mod_seq.find(mod_sep[0], site)

# patch for phos. How many other modification formats does MQ have?
site = PeptideModSeq.find("p")
site = peptide_mod_seq.find("p")
while site != -1:
mod_list.append(PeptideModSeq[site : site + 2])
mod_list.append(peptide_mod_seq[site : site + 2])
site_list = [i - 1 if i > site else i for i in site_list]
if underscore_for_ncterm:
site_list.append(site)
else:
site_list.append(site + 1)
PeptideModSeq = PeptideModSeq[:site] + PeptideModSeq[site + 1 :]
site = PeptideModSeq.find("p", site)
peptide_mod_seq = peptide_mod_seq[:site] + peptide_mod_seq[site + 1 :]
site = peptide_mod_seq.find("p", site)

if fixed_C57:
site = PeptideModSeq.find("C")
site = peptide_mod_seq.find("C")
while site != -1:
if underscore_for_ncterm:
site_list.append(site)
else:
site_list.append(site + 1)
mod_list.append("C" + "Carbamidomethyl (C)".join(mod_sep))
site = PeptideModSeq.find("C", site + 1)
sequence = PeptideModSeq.strip("_")
nAA = len(sequence)
site = peptide_mod_seq.find("C", site + 1)
sequence = peptide_mod_seq.strip("_")
n_aa = len(sequence)
return (
sequence,
";".join(mod_list),
";".join([str(i) if i <= nAA else "-1" for i in site_list]),
";".join([str(i) if i <= n_aa else "-1" for i in site_list]),
)


class MaxQuantReader(PSMReaderBase):
def __init__(
def __init__( # noqa: PLR0913 many arguments in function definition
self,
*,
column_mapping: Optional[dict] = None,
Expand Down
8 changes: 4 additions & 4 deletions alphabase/psm_reader/msfragger_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def _is_fragger_decoy(proteins):
mod_mass_tol = psm_reader_yaml["msfragger_pepxml"]["mod_mass_tol"]


def _get_mods_from_masses(sequence, msf_aa_mods):
def _get_mods_from_masses(sequence, msf_aa_mods): # noqa: PLR0912, C901 many branches, too complex TODO: refactor
mods = []
mod_sites = []
aa_mass_diffs = []
Expand Down Expand Up @@ -78,7 +78,7 @@ def _get_mods_from_masses(sequence, msf_aa_mods):
)


class MSFragger_PSM_TSV_Reader(PSMReaderBase):
class MSFragger_PSM_TSV_Reader(PSMReaderBase): # noqa: N801 name should use CapWords convention TODO: refactor
def __init__(
self,
*,
Expand All @@ -93,7 +93,7 @@ def __init__(


class MSFraggerPepXML(PSMReaderBase):
def __init__(
def __init__( # noqa: PLR0913 many arguments in function definition
self,
*,
column_mapping: Optional[dict] = None,
Expand Down Expand Up @@ -129,7 +129,7 @@ def _load_file(self, filename):
msf_df[PsmDfCols.RAW_NAME] = (
msf_df["spectrum"].str.split(".").apply(lambda x: x[0])
)
msf_df["to_remove"] = 0 # TODO revisit
msf_df["to_remove"] = 0 # TODO: revisit
self.column_mapping[PsmDfCols.TO_REMOVE] = "to_remove"
return msf_df

Expand Down
56 changes: 30 additions & 26 deletions alphabase/psm_reader/pfind_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
)


def convert_one_pFind_mod(mod):
def _convert_one_pfind_mod(mod: str) -> Optional[str]: # noqa: C901 too complex (11 > 10) TODO: refactor
if mod[-1] == ")":
mod = mod[: (mod.find("(") - 1)]
idx = mod.rfind("[")
Expand All @@ -22,40 +22,44 @@ def convert_one_pFind_mod(mod):
idx = mod.rfind("[")
name = mod[:idx]
site = mod[(idx + 1) : -1]

if len(site) == 1:
return name + "@" + site
if site == "AnyN-term":
return name + "@" + "Any_N-term"
if site == "ProteinN-term":
return name + "@" + "Protein_N-term"
if site.startswith("AnyN-term"):
return name + "@" + site[-1] + "^Any_N-term"
if site.startswith("ProteinN-term"):
return name + "@" + site[-1] + "^Protein_N-term"
if site == "AnyC-term":
return name + "@" + "Any_C-term"
if site == "ProteinC-term":
return name + "@" + "Protein_C-term"
if site.startswith("AnyC-term"):
return name + "@" + site[-1] + "^Any_C-term"
if site.startswith("ProteinC-term"):
return name + "@" + site[-1] + "^Protein_C-term"
return None


def translate_pFind_mod(mod_str):
return_value = name + "@" + site
elif site == "AnyN-term":
return_value = name + "@" + "Any_N-term"
elif site == "ProteinN-term":
return_value = name + "@" + "Protein_N-term"
elif site.startswith("AnyN-term"):
return_value = name + "@" + site[-1] + "^Any_N-term"
elif site.startswith("ProteinN-term"):
return_value = name + "@" + site[-1] + "^Protein_N-term"
elif site == "AnyC-term":
return_value = name + "@" + "Any_C-term"
elif site == "ProteinC-term":
return_value = name + "@" + "Protein_C-term"
elif site.startswith("AnyC-term"):
return_value = name + "@" + site[-1] + "^Any_C-term"
elif site.startswith("ProteinC-term"):
return_value = name + "@" + site[-1] + "^Protein_C-term"
else:
return_value = None

return return_value


def translate_pFind_mod(mod_str): # noqa: N802 name `get_pFind_mods` should be lowercase TODO: used by peptdeep
if not mod_str:
return ""
ret_mods = []
for mod in mod_str.split(";"):
mod = convert_one_pFind_mod(mod)
for mod_ in mod_str.split(";"):
mod = _convert_one_pfind_mod(mod_)
if not mod or mod not in ap_mod.MOD_INFO_DICT:
return pd.NA
ret_mods.append(mod)
return ";".join(ret_mods)


def get_pFind_mods(pfind_mod_str):
def get_pFind_mods(pfind_mod_str): # noqa: N802 name `get_pFind_mods` should be lowercase TODO: used by peptdeep
pfind_mod_str = pfind_mod_str.strip(";")
if not pfind_mod_str:
return "", ""
Expand Down Expand Up @@ -84,7 +88,7 @@ def parse_pfind_protein(protein, keep_reverse=True):
)


class pFindReader(PSMReaderBase):
class pFindReader(PSMReaderBase): # noqa: N801 name `pFindReader` should use CapWords convention TODO: used by peptdeep, alpharaw
def __init__(
self,
*,
Expand Down
8 changes: 3 additions & 5 deletions alphabase/psm_reader/psm_reader.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import copy
import os
import warnings
from pathlib import Path
from typing import NoReturn, Optional

import numpy as np
Expand Down Expand Up @@ -77,7 +77,7 @@ def _keep_modifications(mod_str: str, mod_set: set) -> str:


#: See `psm_reader.yaml <https://github.com/MannLabs/alphabase/blob/main/alphabase/constants/const_files/psm_reader.yaml>`_
psm_reader_yaml = load_yaml(os.path.join(CONST_FILE_FOLDER, "psm_reader.yaml"))
psm_reader_yaml = load_yaml(Path(CONST_FILE_FOLDER) / "psm_reader.yaml")


class PSMReaderBase:
Expand Down Expand Up @@ -268,9 +268,7 @@ def load(self, _file) -> pd.DataFrame:
return self.import_file(_file)

def import_files(self, file_list: list):
df_list = []
for _file in file_list:
df_list.append(self.import_file(_file))
df_list = [self.import_file(file) for file in file_list]
self._psm_df = pd.concat(df_list, ignore_index=True)
return self._psm_df

Expand Down
Loading
Loading