Skip to content

Commit

Permalink
Merge pull request #336 from MannLabs/rename_metadat_param
Browse files Browse the repository at this point in the history
rename metadata_path -> metadata_path_or_df
  • Loading branch information
mschwoer authored Sep 20, 2024
2 parents e06ae41 + a8a6682 commit 68dbd34
Show file tree
Hide file tree
Showing 13 changed files with 42 additions and 38 deletions.
6 changes: 3 additions & 3 deletions alphastats/DataSet.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,14 +47,14 @@ class DataSet:
def __init__(
self,
loader: BaseLoader,
metadata_path: Optional[Union[str, pd.DataFrame]] = None,
metadata_path_or_df: Optional[Union[str, pd.DataFrame]] = None,
sample_column: Optional[str] = None,
):
"""Create DataSet
Args:
loader (_type_): loader of class AlphaPeptLoader, MaxQuantLoader, DIANNLoader, FragPipeLoader, SpectronautLoader
metadata_path (str or pd.DataFrame, optional): path to metadata file or an actual df. Defaults to None.
metadata_path_or_df (str or pd.DataFrame, optional): path to metadata file or an actual df. Defaults to None.
sample_column (str, optional): column in metadata file indicating the sample IDs. Defaults to None.
Attributes of a DataSet instance:
Expand Down Expand Up @@ -86,7 +86,7 @@ def __init__(
rawinput=self.rawinput,
index_column=self.index_column,
intensity_column=self._intensity_column,
metadata_path=metadata_path,
metadata_path_or_df=metadata_path_or_df,
sample_column=sample_column,
)

Expand Down
8 changes: 4 additions & 4 deletions alphastats/dataset_factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,14 +15,14 @@ def __init__(
rawinput: pd.DataFrame,
index_column: str,
intensity_column: Union[List[str], str],
metadata_path: Union[str, pd.DataFrame],
metadata_path_or_df: Union[str, pd.DataFrame],
sample_column: str,
):
self.rawinput: pd.DataFrame = rawinput
self.sample_column: str = sample_column
self.index_column: str = index_column
self.intensity_column: Union[List[str], str] = intensity_column
self.metadata_path: Union[str, pd.DataFrame] = metadata_path
self.metadata_path_or_df: Union[str, pd.DataFrame] = metadata_path_or_df

def create_matrix_from_rawinput(self) -> Tuple[pd.DataFrame, pd.DataFrame]:
"""Creates a matrix: features (Proteins) as columns, samples as rows."""
Expand Down Expand Up @@ -61,9 +61,9 @@ def _check_matrix_values(mat: pd.DataFrame) -> None:
def create_metadata(self, mat: pd.DataFrame) -> Tuple[pd.DataFrame, str]:
"""Create metadata DataFrame from metadata file or DataFrame."""

if self.metadata_path is not None:
if self.metadata_path_or_df is not None:
sample = self.sample_column
metadata = self._load_metadata(file_path=self.metadata_path)
metadata = self._load_metadata(file_path=self.metadata_path_or_df)
metadata = self._remove_missing_samples_from_metadata(mat, metadata, sample)
else:
sample = "sample"
Expand Down
2 changes: 1 addition & 1 deletion alphastats/gui/pages/02_Import Data.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ def _finalize_data_loading(

dataset = DataSet(
loader=loader,
metadata_path=metadatafile_df,
metadata_path_or_df=metadatafile_df,
sample_column=sample_column,
)
metadata_columns = metadatafile_df.columns.to_list()
Expand Down
8 changes: 6 additions & 2 deletions alphastats/gui/utils/import_helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,15 +109,19 @@ def load_example_data():

loader = MaxQuantLoader(file=filepath)
# TODO why is this done twice?
dataset = DataSet(loader=loader, metadata_path=metadatapath, sample_column="sample")
dataset = DataSet(
loader=loader, metadata_path_or_df=metadatapath, sample_column="sample"
)
metadatapath = (
os.path.join(_parent_directory, "sample_data", "metadata.xlsx")
.replace("pages/", "")
.replace("pages\\", "")
)

loader = MaxQuantLoader(file=filepath)
dataset = DataSet(loader=loader, metadata_path=metadatapath, sample_column="sample")
dataset = DataSet(
loader=loader, metadata_path_or_df=metadatapath, sample_column="sample"
)

dataset.metadata = dataset.metadata[
[
Expand Down
4 changes: 2 additions & 2 deletions docs/import_data.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ maxquant_data = alphastats.MaxQuantLoader(

dataset = alphastats.DataSet(
loader = maxquant_data,
metadata_path="../testfiles/maxquant/metadata.xlsx",
metadata_path_or_df="../testfiles/maxquant/metadata.xlsx",
sample_column="sample"
)
```
Expand Down Expand Up @@ -126,7 +126,7 @@ maxquant_data = alphastats.MaxQuantLoader(

dataset = alphastats.DataSet(
loader = maxquant_data,
metadata_path="../testfiles/maxquant/metadata.xlsx",
metadata_path_or_df="../testfiles/maxquant/metadata.xlsx",
sample_column="sample"
)
```
2 changes: 1 addition & 1 deletion docs/workflow_mq.html
Original file line number Diff line number Diff line change
Expand Up @@ -781,7 +781,7 @@ <h2>2. Create a DataSet<a class="headerlink" href="#2.-Create-a-DataSet" title="
</div>
<div class="input_area highlight-ipython3 notranslate"><div class="highlight"><pre><span></span><span class="n">ds</span> <span class="o">=</span> <span class="n">alphastats</span><span class="o">.</span><span class="n">DataSet</span><span class="p">(</span>
<span class="n">loader</span> <span class="o">=</span> <span class="n">maxquant_data</span><span class="p">,</span>
<span class="n">metadata_path</span> <span class="o">=</span> <span class="s2">&quot;../testfiles/maxquant/metadata.xlsx&quot;</span><span class="p">,</span>
<span class="n">metadata_path_or_df</span> <span class="o">=</span> <span class="s2">&quot;../testfiles/maxquant/metadata.xlsx&quot;</span><span class="p">,</span>
<span class="n">sample_column</span> <span class="o">=</span> <span class="s2">&quot;sample&quot;</span> <span class="c1"># specify the column that corresponds to the sample names in proteinGroups</span>
<span class="p">)</span>
</pre></div>
Expand Down
2 changes: 1 addition & 1 deletion nbs/getting_started.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -723,7 +723,7 @@
"source": [
"ds = DataSet(\n",
" loader=maxquant_data,\n",
" metadata_path=\"../testfiles/maxquant/metadata.xlsx\",\n",
" metadata_path_or_df=\"../testfiles/maxquant/metadata.xlsx\",\n",
" sample_column=\"sample\", # specify the column that corresponds to the sample names in proteinGroups\n",
")"
]
Expand Down
2 changes: 1 addition & 1 deletion nbs/liu_2019.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@
")\n",
"dataset = DataSet(\n",
" loader=loader,\n",
" metadata_path=\"../testfiles/maxquant/metadata.xlsx\",\n",
" metadata_path_or_df=\"../testfiles/maxquant/metadata.xlsx\",\n",
" sample_column=\"sample\",\n",
")"
]
Expand Down
18 changes: 9 additions & 9 deletions nbs/ramus_2016.ipynb

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion tests/gui/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def create_dataset_alphapept():
metadata_path = TEST_INPUT_FILES_PATH / "alphapept/metadata.csv"
return DataSet(
loader=loader,
metadata_path=str(metadata_path),
metadata_path_or_df=str(metadata_path),
sample_column="sample",
)

Expand Down
22 changes: 11 additions & 11 deletions tests/test_DataSet.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ def setUp(self):
self.metadata_path = "testfiles/alphapept/metadata.csv"
self.obj = DataSet(
loader=self.loader,
metadata_path=self.metadata_path,
metadata_path_or_df=self.metadata_path,
sample_column="sample",
)
# expected dimensions of matrix
Expand Down Expand Up @@ -239,7 +239,7 @@ def test_remove_misc_samples_in_metadata(self, mock):
)
obj = DataSet(
loader=self.loader,
metadata_path=df,
metadata_path_or_df=df,
sample_column="sample",
)
#  is sample C removed
Expand All @@ -253,7 +253,7 @@ def test_load_metadata_df(self):
df = pd.read_excel(self.metadata_path)
obj = DataSet(
loader=self.loader,
metadata_path=df,
metadata_path_or_df=df,
sample_column="sample",
)
self.assertIsInstance(obj.metadata, pd.DataFrame)
Expand Down Expand Up @@ -403,7 +403,7 @@ def setUp(self):
self.metadata_path = "testfiles/maxquant/metadata.xlsx"
self.obj = DataSet(
loader=self.loader,
metadata_path=self.metadata_path,
metadata_path_or_df=self.metadata_path,
sample_column="sample",
)
# expected dimensions of matrix
Expand All @@ -419,7 +419,7 @@ def test_load_evidence_wrong_sample_names(self):
)
DataSet(
loader=loader,
metadata_path=self.metadata_path,
metadata_path_or_df=self.metadata_path,
sample_column="sample",
)

Expand Down Expand Up @@ -468,7 +468,7 @@ def test_plot_volcano_with_grouplist_wrong_names(self):
def test_plot_volcano_compare_preprocessing_modes_no_randomforest(self):
obj_ut = DataSet(
loader=self.loader,
metadata_path=self.metadata_path,
metadata_path_or_df=self.metadata_path,
sample_column="sample",
)

Expand All @@ -487,7 +487,7 @@ def test_plot_volcano_compare_preprocessing_modes_no_randomforest(self):
def test_plot_volcano_compare_preprocessing_modes_randomforest(self):
obj_ut = DataSet(
loader=self.loader,
metadata_path=self.metadata_path,
metadata_path_or_df=self.metadata_path,
sample_column="sample",
)

Expand Down Expand Up @@ -801,7 +801,7 @@ def setUp(self):
self.metadata_path = "testfiles/diann/metadata.xlsx"
self.obj = DataSet(
loader=self.loader,
metadata_path=self.metadata_path,
metadata_path_or_df=self.metadata_path,
sample_column="analytical_sample external_id",
)
# expected dimensions of matrix
Expand Down Expand Up @@ -916,7 +916,7 @@ def setUp(self):
self.metadata_path = "testfiles/fragpipe/metadata.xlsx"
self.obj = DataSet(
loader=self.loader,
metadata_path=self.metadata_path,
metadata_path_or_df=self.metadata_path,
sample_column="analytical_sample external_id",
)
# expected dimensions of matrix
Expand All @@ -937,7 +937,7 @@ def setUpClass(cls):
cls.cls_metadata_path = "testfiles/spectronaut/metadata.xlsx"
cls.cls_obj = DataSet(
loader=cls.cls_loader,
metadata_path=cls.cls_metadata_path,
metadata_path_or_df=cls.cls_metadata_path,
sample_column="sample",
)

Expand Down Expand Up @@ -983,7 +983,7 @@ def setUpClass(cls):
cls.cls_metadata_path = "testfiles/fragpipe/metadata2.xlsx"
cls.cls_obj = DataSet(
loader=cls.cls_loader,
metadata_path=cls.cls_metadata_path,
metadata_path_or_df=cls.cls_metadata_path,
sample_column="analytical_sample external_id",
)

Expand Down
2 changes: 1 addition & 1 deletion tests/test_DataSet_Pathway.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def setUp(self):

self.obj = DataSet(
loader=self.loader,
metadata_path=metadata,
metadata_path_or_df=metadata,
sample_column="sample",
)
self.fg_sample = "AC399"
Expand Down
2 changes: 1 addition & 1 deletion tests/test_gpt.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def setUp(self):
self.metadata_path = "testfiles/maxquant/metadata.xlsx"
self.obj = DataSet(
loader=self.loader,
metadata_path=self.metadata_path,
metadata_path_or_df=self.metadata_path,
sample_column="sample",
)
# expected dimensions of matrix
Expand Down

0 comments on commit 68dbd34

Please sign in to comment.