From 35973c28a86bd04797bda62a4072120833122f99 Mon Sep 17 00:00:00 2001 From: elena-krismer Date: Thu, 31 Aug 2023 11:01:56 +0200 Subject: [PATCH] add replacement of 0 to gui --- HISTORY.md | 3 +++ alphastats/gui/pages/02_Import Data.py | 12 +++++++-- requirements.txt | 7 ++--- tests/test_DataSet.py | 37 ++++++-------------------- 4 files changed, 25 insertions(+), 34 deletions(-) diff --git a/HISTORY.md b/HISTORY.md index ce8f97eb..2c62cedb 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -1,5 +1,8 @@ # Changelog +# 0.7.0 +* ADD parameter for loading 0 values as NaNs + # 0.6.5 * FIX coloring of plot_sampledistribution issue #229 * ADD nbformat requirement issue #230 diff --git a/alphastats/gui/pages/02_Import Data.py b/alphastats/gui/pages/02_Import Data.py index aaa07e11..1ccae4b4 100644 --- a/alphastats/gui/pages/02_Import Data.py +++ b/alphastats/gui/pages/02_Import Data.py @@ -102,6 +102,10 @@ def select_columns_for_loaders(software, software_df:None): key="intensity_column", ) + st.checkbox("Replace 0 values with NaN/mark as missing values in the Intensity columns. ", value = True, + key="replace_zero_with_nan") + + st.markdown("Select index column (with ProteinGroups) for further analysis") st.selectbox( @@ -117,6 +121,9 @@ def select_columns_for_loaders(software, software_df:None): key="intensity_column", ) + st.checkbox("Replace 0 values with NaN/mark as missing values in the Intensity columns. ", value = True, + key="replace_zero_with_nan") + st.markdown("Select index column (with ProteinGroups) for further analysis") st.selectbox( @@ -126,10 +133,10 @@ def select_columns_for_loaders(software, software_df:None): ) -def load_proteomics_data(uploaded_file, intensity_column, index_column, software): +def load_proteomics_data(uploaded_file, intensity_column, index_column, replace_zero_with_nan, software): """load software file into loader object from alphastats""" loader = software_options.get(software)["loader_function"]( - uploaded_file, intensity_column, index_column + uploaded_file, intensity_column, index_column, replace_zero_with_nan, ) return loader @@ -191,6 +198,7 @@ def upload_softwarefile(software): softwarefile_df, intensity_column=st.session_state.intensity_column, index_column=st.session_state.index_column, + replace_zero_with_nan=st.session_state.replace_zero_with_nan, software=software, ) st.session_state["loader"] = loader diff --git a/requirements.txt b/requirements.txt index de4ac1e2..38fc0696 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ pandas==2.0.2 -scikit-learn==1.2.2 +scikit-learn==1.3.0 data_cache>=0.1.6 -plotly==5.15.0 +plotly==5.16.1 statsmodels==0.14.0 sklearn_pandas==2.2.0 pingouin==0.5.3 @@ -22,4 +22,5 @@ combat==0.3.3 xlsxwriter==3.1.0 pyteomics==4.6.0 openpyxl>=3.0.10 -nbformat>=5.0 \ No newline at end of file +nbformat>=5.0 +threadpoolctl>=3.0 \ No newline at end of file diff --git a/tests/test_DataSet.py b/tests/test_DataSet.py index 5e7df5d0..734bfd7a 100644 --- a/tests/test_DataSet.py +++ b/tests/test_DataSet.py @@ -390,7 +390,8 @@ def test_plot_clustermap_with_label_bar(self): class TestMaxQuantDataSet(BaseTestDataSet.BaseTest): def setUp(self): - self.loader = MaxQuantLoader(file="testfiles/maxquant/proteinGroups.txt") + self.loader = MaxQuantLoader(file="testfiles/maxquant/proteinGroups.txt", + replace_zero_with_nan=False) self.metadata_path = "testfiles/maxquant/metadata.xlsx" self.obj = DataSet( loader=self.loader, @@ -406,7 +407,7 @@ def test_load_evidence_wrong_sample_names(self): with self.assertRaises(ValueError): loader = MaxQuantLoader( file="testfiles/maxquant/proteinGroups.txt", - evidence_file="testfiles/maxquant_go/evidence.txt", + evidence_file="testfiles/maxquant_go/evidence.txt" ) DataSet( loader=loader, metadata_path=self.metadata_path, sample_column="sample", @@ -737,16 +738,6 @@ def test_multicova_analysis_invalid_covariates(self): ) self.assertEqual(res.shape[1], 45) - # def test_perform_gsea(self): - # df = self.obj.perform_gsea(column="disease", - # group1="healthy", - # group2="liver cirrhosis", - # gene_sets= 'KEGG_2019_Human') - - # cholesterol_enhanced = 'Cholesterol metabolism' in df.index.to_list() - # self.assertTrue(cholersterol_enhanced) - - class TestDIANNDataSet(BaseTestDataSet.BaseTest): def setUp(self): self.loader = DIANNLoader(file="testfiles/diann/report_final.pg_matrix.tsv") @@ -843,27 +834,13 @@ def test_volcano_plot_wrongmethod(self): method="wrongmethod", ) - # def test_diff_expression_analysis_with_list(self): - # self.obj.preprocess(imputation="knn") - # column="grouping1" - # group1="Healthy" - # group2="Disease" - # group1_samples = self.obj.metadata[self.obj.metadata[column] == group1][ - # "sample" - # ].tolist() - # group2_samples = self.obj.metadata[self.obj.metadata[column] == group2][ - # "sample" - # ].tolist() - # self.obj.diff_expression_analysis( - # group1=group1_samples, - # group2=group2_samples) - class TestFragPipeDataSet(BaseTestDataSet.BaseTest): def setUp(self): self.loader = FragPipeLoader( file="testfiles/fragpipe/combined_proteins.tsv", intensity_column="[sample] Razor Intensity", + replace_zero_with_nan=False ) self.metadata_path = "testfiles/fragpipe/metadata.xlsx" self.obj = DataSet( @@ -887,7 +864,8 @@ def setUpClass(cls): ) cls.cls_loader = SpectronautLoader( - file="testfiles/spectronaut/results.tsv", filter_qvalue=False + file="testfiles/spectronaut/results.tsv", filter_qvalue=False, + replace_zero_with_nan=False ) cls.cls_metadata_path = "testfiles/spectronaut/metadata.xlsx" cls.cls_obj = DataSet( @@ -927,7 +905,8 @@ def setUpClass(cls): "S7 Razor Intensity", "S8 Razor Intensity" ], index_column="Protein", - sep="\t" + sep="\t", + replace_zero_with_nan=False ) cls.cls_metadata_path = "testfiles/fragpipe/metadata2.xlsx" cls.cls_obj = DataSet(