Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor readers iii #242

Open
wants to merge 5 commits into
base: refactor_readers_II
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion alphabase/psm_reader/maxquant_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ def set_modification_mapping(
super().set_modification_mapping(modification_mapping)
self._add_all_unimod()
self._extend_mod_brackets()
self._reverse_mod_mapping()
self.rev_mod_mapping = self._get_reversed_mod_mapping()

def _add_all_unimod(self) -> None:
for mod_name, unimod in mod_to_unimod_dict.items():
Expand Down
80 changes: 47 additions & 33 deletions alphabase/psm_reader/psm_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import copy
import warnings
from abc import ABC, abstractmethod
from collections import defaultdict
from pathlib import Path
from typing import Dict, List, NoReturn, Optional, Set, Type, Union

Expand Down Expand Up @@ -178,10 +179,11 @@ def __init__(
self.set_modification_mapping()
self.add_modification_mapping(modification_mapping)

if column_mapping is not None:
self.column_mapping = column_mapping
else:
self._init_column_mapping()
self.column_mapping = (
column_mapping
if column_mapping is not None
else self._read_column_mapping()
)

self._psm_df = pd.DataFrame()
self._keep_fdr = fdr
Expand Down Expand Up @@ -219,30 +221,36 @@ def add_modification_mapping(self, modification_mapping: dict) -> None:
```

"""
if (
modification_mapping is None
or len(modification_mapping) == 0
or not isinstance(modification_mapping, dict)
):
if not isinstance(modification_mapping, dict):
return

new_modification_mapping = defaultdict(list)
for key, val in list(modification_mapping.items()):
if key in self.modification_mapping:
if isinstance(val, str):
self.modification_mapping[key].append(val)
else:
self.modification_mapping[key].extend(val)
elif isinstance(val, str):
self.modification_mapping[key] = [val]
if isinstance(val, str):
new_modification_mapping[key].append(val)
else:
self.modification_mapping[key] = val
new_modification_mapping[key].extend(val)
jalew188 marked this conversation as resolved.
Show resolved Hide resolved

self.set_modification_mapping(self.modification_mapping)
if new_modification_mapping:
self.set_modification_mapping(
self.modification_mapping | new_modification_mapping
)

def set_modification_mapping(
self, modification_mapping: Optional[dict] = None
) -> None:
"""Set the modification mapping."""
"""Set the modification mapping for the search engine.

Also creates a reverse mapping from the modification format used by the search engine to the AlphaBase format.

Parameters
----------
modification_mapping:
If dictionary: the current modification_mapping will be overwritten by this.
If str: the parameter will be interpreted as a reader type, and the modification_mapping is read from the
"modification_mapping" section of the psm_reader_yaml

"""
if modification_mapping is None:
self._init_modification_mapping()
elif isinstance(modification_mapping, str):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jalew188 here, a lot of hidden complexity arises (also connected to _init_modification_mapping) as the "column_mapping" parameter in the yaml is overloaded (could either be str or dict).
Could we introduce a (mutually exclusive ) "column_mapping_type` in the yaml, that served to reference another reader?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, we shall then add a new method: add_new_column_mapping_item to handle user-defined columns

Copy link
Collaborator

@jalew188 jalew188 Nov 19, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Both modification_mapping and column_mapping are not well designed in alphabase readers. We need a better way.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I absolutely agree. I think the modification mapping is already quite close but we need to make sure it works the same where everywhere. It's also not yet compatible with custom mods and MP.

Expand All @@ -257,33 +265,39 @@ def set_modification_mapping(
else:
self.modification_mapping = copy.deepcopy(modification_mapping)

self._mods_as_lists()
self._reverse_mod_mapping()
self._str_mods_to_lists()

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason why this attribute is mutated in place, while the others are reassigned?
(see e.g. line 269)

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess for readability? Complex Inplace operations are hard to read and track

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good catch @lucas-diedrich , I was wondering too .. it's as @jalew188 suspected indeed the limited complexity that made me keep str_mods_to_lists as in-place .. but more based on a gut feeling rather than on a strict reasoning .. I could change it for consistency if you like?

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see, thank you for the explanation 👍 So I guess the alternative would then be something like

self.modification_mapping = _str_mods_to_lists(self.modification_mapping)

With

def _str_mods_to_lists(self, modification_mapping: dict) -> dict[str, list[str]]:
     """Convert all single strings to lists containing one item in self.modification_mapping."""
     for mod, val in list(modification_mapping.items()):
         if isinstance(val, str):
             modification_mapping[mod] = [val]
     return modification_mapping

I don't have a strong opinion on this, and the current way looks great, so no need to change it!

self.rev_mod_mapping = self._get_reversed_mod_mapping()

def _init_modification_mapping(self) -> None:
self.modification_mapping = {}

def _mods_as_lists(self) -> None:
def _str_mods_to_lists(self) -> None:
"""Convert all single strings to lists containing one item in self.modification_mapping."""
for mod, val in list(self.modification_mapping.items()):
if isinstance(val, str):
self.modification_mapping[mod] = [val]

def _reverse_mod_mapping(self) -> None:
self.rev_mod_mapping = {}
for this_mod, other_mod in self.modification_mapping.items():
if isinstance(other_mod, (list, tuple)):
for _mod in other_mod:
if _mod in self.rev_mod_mapping and this_mod.endswith(
"Protein_N-term"
def _get_reversed_mod_mapping(self) -> Dict[str, str]:
"""Create a reverse mapping from the modification format used by the search engine to the AlphaBase format."""
rev_mod_mapping = {}
for mod_alphabase_format, mod_other_format in self.modification_mapping.items():
if isinstance(mod_other_format, (list, tuple)):
for mod_other_format_ in mod_other_format:
if (
mod_other_format_ in rev_mod_mapping
and mod_alphabase_format.endswith("Protein_N-term")
):
continue

self.rev_mod_mapping[_mod] = this_mod
rev_mod_mapping[mod_other_format_] = mod_alphabase_format
else:
self.rev_mod_mapping[other_mod] = this_mod
rev_mod_mapping[mod_other_format] = mod_alphabase_format

return rev_mod_mapping

def _init_column_mapping(self) -> NoReturn:
self.column_mapping = psm_reader_yaml[self._reader_type]["column_mapping"]
def _read_column_mapping(self) -> Dict[str, str]:
"""Read column mapping from psm_reader yaml file."""
return psm_reader_yaml[self._reader_type]["column_mapping"]

def load(self, _file: Union[List[str], str]) -> pd.DataFrame:
"""Wrapper for import_file()."""
Expand Down
2 changes: 1 addition & 1 deletion nbs_tests/psm_reader/psm_reader.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@
"source": [
"#| hide\n",
"class TestReader(PSMReaderBase):\n",
" def _init_column_mapping(self): pass\n",
" def _read_column_mapping(self): pass\n",
" def _load_file(self): pass\n",
" def _load_modifications(self): pass\n",
"\n",
Expand Down
Loading