Skip to content

Commit

Permalink
add replacement of 0 to gui
Browse files Browse the repository at this point in the history
  • Loading branch information
elena-krismer committed Aug 31, 2023
1 parent 993d25b commit 35973c2
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 34 deletions.
3 changes: 3 additions & 0 deletions HISTORY.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
# Changelog

# 0.7.0
* ADD parameter for loading 0 values as NaNs

# 0.6.5
* FIX coloring of plot_sampledistribution issue #229
* ADD nbformat requirement issue #230
Expand Down
12 changes: 10 additions & 2 deletions alphastats/gui/pages/02_Import Data.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,10 @@ def select_columns_for_loaders(software, software_df:None):
key="intensity_column",
)

st.checkbox("Replace 0 values with NaN/mark as missing values in the Intensity columns. ", value = True,
key="replace_zero_with_nan")


st.markdown("Select index column (with ProteinGroups) for further analysis")

st.selectbox(
Expand All @@ -117,6 +121,9 @@ def select_columns_for_loaders(software, software_df:None):
key="intensity_column",
)

st.checkbox("Replace 0 values with NaN/mark as missing values in the Intensity columns. ", value = True,
key="replace_zero_with_nan")

st.markdown("Select index column (with ProteinGroups) for further analysis")

st.selectbox(
Expand All @@ -126,10 +133,10 @@ def select_columns_for_loaders(software, software_df:None):
)


def load_proteomics_data(uploaded_file, intensity_column, index_column, software):
def load_proteomics_data(uploaded_file, intensity_column, index_column, replace_zero_with_nan, software):
"""load software file into loader object from alphastats"""
loader = software_options.get(software)["loader_function"](
uploaded_file, intensity_column, index_column
uploaded_file, intensity_column, index_column, replace_zero_with_nan,
)
return loader

Expand Down Expand Up @@ -191,6 +198,7 @@ def upload_softwarefile(software):
softwarefile_df,
intensity_column=st.session_state.intensity_column,
index_column=st.session_state.index_column,
replace_zero_with_nan=st.session_state.replace_zero_with_nan,
software=software,
)
st.session_state["loader"] = loader
Expand Down
7 changes: 4 additions & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
pandas==2.0.2
scikit-learn==1.2.2
scikit-learn==1.3.0
data_cache>=0.1.6
plotly==5.15.0
plotly==5.16.1
statsmodels==0.14.0
sklearn_pandas==2.2.0
pingouin==0.5.3
Expand All @@ -22,4 +22,5 @@ combat==0.3.3
xlsxwriter==3.1.0
pyteomics==4.6.0
openpyxl>=3.0.10
nbformat>=5.0
nbformat>=5.0
threadpoolctl>=3.0
37 changes: 8 additions & 29 deletions tests/test_DataSet.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,7 +390,8 @@ def test_plot_clustermap_with_label_bar(self):

class TestMaxQuantDataSet(BaseTestDataSet.BaseTest):
def setUp(self):
self.loader = MaxQuantLoader(file="testfiles/maxquant/proteinGroups.txt")
self.loader = MaxQuantLoader(file="testfiles/maxquant/proteinGroups.txt",
replace_zero_with_nan=False)
self.metadata_path = "testfiles/maxquant/metadata.xlsx"
self.obj = DataSet(
loader=self.loader,
Expand All @@ -406,7 +407,7 @@ def test_load_evidence_wrong_sample_names(self):
with self.assertRaises(ValueError):
loader = MaxQuantLoader(
file="testfiles/maxquant/proteinGroups.txt",
evidence_file="testfiles/maxquant_go/evidence.txt",
evidence_file="testfiles/maxquant_go/evidence.txt"
)
DataSet(
loader=loader, metadata_path=self.metadata_path, sample_column="sample",
Expand Down Expand Up @@ -737,16 +738,6 @@ def test_multicova_analysis_invalid_covariates(self):
)
self.assertEqual(res.shape[1], 45)

# def test_perform_gsea(self):
# df = self.obj.perform_gsea(column="disease",
# group1="healthy",
# group2="liver cirrhosis",
# gene_sets= 'KEGG_2019_Human')

# cholesterol_enhanced = 'Cholesterol metabolism' in df.index.to_list()
# self.assertTrue(cholersterol_enhanced)


class TestDIANNDataSet(BaseTestDataSet.BaseTest):
def setUp(self):
self.loader = DIANNLoader(file="testfiles/diann/report_final.pg_matrix.tsv")
Expand Down Expand Up @@ -843,27 +834,13 @@ def test_volcano_plot_wrongmethod(self):
method="wrongmethod",
)

# def test_diff_expression_analysis_with_list(self):
# self.obj.preprocess(imputation="knn")
# column="grouping1"
# group1="Healthy"
# group2="Disease"
# group1_samples = self.obj.metadata[self.obj.metadata[column] == group1][
# "sample"
# ].tolist()
# group2_samples = self.obj.metadata[self.obj.metadata[column] == group2][
# "sample"
# ].tolist()
# self.obj.diff_expression_analysis(
# group1=group1_samples,
# group2=group2_samples)


class TestFragPipeDataSet(BaseTestDataSet.BaseTest):
def setUp(self):
self.loader = FragPipeLoader(
file="testfiles/fragpipe/combined_proteins.tsv",
intensity_column="[sample] Razor Intensity",
replace_zero_with_nan=False
)
self.metadata_path = "testfiles/fragpipe/metadata.xlsx"
self.obj = DataSet(
Expand All @@ -887,7 +864,8 @@ def setUpClass(cls):
)

cls.cls_loader = SpectronautLoader(
file="testfiles/spectronaut/results.tsv", filter_qvalue=False
file="testfiles/spectronaut/results.tsv", filter_qvalue=False,
replace_zero_with_nan=False
)
cls.cls_metadata_path = "testfiles/spectronaut/metadata.xlsx"
cls.cls_obj = DataSet(
Expand Down Expand Up @@ -927,7 +905,8 @@ def setUpClass(cls):
"S7 Razor Intensity", "S8 Razor Intensity"
],
index_column="Protein",
sep="\t"
sep="\t",
replace_zero_with_nan=False
)
cls.cls_metadata_path = "testfiles/fragpipe/metadata2.xlsx"
cls.cls_obj = DataSet(
Expand Down

0 comments on commit 35973c2

Please sign in to comment.