fix merge conflicts

MannLabs · Sep 20, 2024 · a300e86 · a300e86
2 parents bdbb6ab + 629b03c
commit a300e86
Show file tree

Hide file tree

Showing 41 changed files with 1,890 additions and 1,294 deletions.
diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml
@@ -17,6 +17,16 @@ jobs:
       - uses: actions/checkout@v4
       - uses: actions/setup-python@v5
       - uses: pre-commit/action@v3.0.1
+
+  get-code-review-input:
+    runs-on: ubuntu-latest
+    #if: contains(github.event.pull_request.labels.*.name, 'code-review')
+    steps:
+      - uses: MannLabs/alphashared/actions/get-code-review-input@v1
+        with:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          PR_NUMBER: ${{ github.event.number }}
+
   build-and-test:
     runs-on: ubuntu-latest
     strategy:
@@ -39,14 +49,14 @@ jobs:
     - name: Print pip freeze
       run: |
           pip freeze
+    - name: Run tests
+      run: |
+        coverage run -m pytest
+    - name: Upload Coverage to Codecov
+      uses: codecov/codecov-action@v4
     - name: Run notebooks
       run: |
         python3 -m ipykernel install --user
         # TODO add the excluded notebook
         TEST_NBS=$(find ./nbs -name "*.ipynb" | grep -v "ramus_2016.ipynb")
         python -m pytest --nbmake $(echo $TEST_NBS)
-    - name: Run tests
-      run: |
-        coverage run -m pytest
-    - name: Upload Coverage to Codecov
-      uses: codecov/codecov-action@v4
diff --git a/.secrets.baseline b/.secrets.baseline
@@ -139,16 +139,6 @@
     }
   ],
   "results": {
-    "alphastats/gui/utils/ollama_utils.py": [
-      {
-        "type": "Secret Keyword",
-        "filename": "alphastats/gui/utils/ollama_utils.py",
-        "hashed_secret": "8ed4322e8e2790b8c928d381ce8d07cfd966e909",
-        "is_verified": false,
-        "line_number": 68,
-        "is_secret": false
-      }
-    ],
     "docs/workflow_mq.html": [
       {
         "type": "Base64 High Entropy String",
@@ -160,5 +150,5 @@
       }
     ]
   },
-  "generated_at": "2024-09-12T14:19:09Z"
+  "generated_at": "2024-09-18T09:54:14Z"
 }
diff --git a/README.md b/README.md
@@ -78,6 +78,10 @@ alphastats gui
 ```
 If you get an `AxiosError: Request failed with status code 403'` when uploading files, try running `DISABLE_XSRF=1 alphastats gui`.
 
+If you want to use local Large Language Models to help interpret the data,
+you need to download and install ollama (https://ollama.com/download). The url of the server can be set by the
+environmental variable `OLLAMA_BASE_URL` (defaults to `http://localhost:11434`)
+
 AlphaStats can be imported as a Python package into any Python script or notebook with the command `import alphastats`.
 A brief [Jupyter notebook tutorial](nbs/getting_started.ipynb) on how to use the API is also present in the [nbs folder](nbs).
 

diff --git a/alphastats/DataSet.py b/alphastats/DataSet.py
@@ -152,7 +152,7 @@ def _get_preprocess(self) -> Preprocess:
 
     def preprocess(
         self,
-        log2_transform: bool = True,
+        log2_transform: bool = False,
         remove_contaminations: bool = False,
         subset: bool = False,
         data_completeness: float = 0,

diff --git a/alphastats/DataSet_Preprocess.py b/alphastats/DataSet_Preprocess.py
@@ -253,29 +253,41 @@ def _linear_normalization(self, dataframe: pd.DataFrame):
 
     @ignore_warning(UserWarning)
     @ignore_warning(RuntimeWarning)
-    def _normalization(self, method: str):
+    def _normalization(self, method: str) -> None:
+        """Normalize across samples."""
+        # TODO make both sample and protein normalization available
         if method == "zscore":
             scaler = sklearn.preprocessing.StandardScaler()
+            # normalize samples => for preprocessing
             normalized_array = scaler.fit_transform(
                 self.mat.values.transpose()
             ).transpose()
+            # normalize proteins => for downstream processing
+            # normalized_array = scaler.fit_transform(self.mat.values)
 
         elif method == "quantile":
             qt = sklearn.preprocessing.QuantileTransformer(random_state=0)
             normalized_array = qt.fit_transform(self.mat.values.transpose()).transpose()
+            # normalized_array = qt.fit_transform(self.mat.values) # normalize proteins
 
         elif method == "linear":
             normalized_array = self._linear_normalization(self.mat)
 
+            # normalized_array = self._linear_normalization(
+            #     self.mat.transpose()
+            # ).transpose() # normalize proteins
+
         elif method == "vst":
             minmax = sklearn.preprocessing.MinMaxScaler()
             scaler = sklearn.preprocessing.PowerTransformer()
             minmaxed_array = minmax.fit_transform(self.mat.values.transpose())
             normalized_array = scaler.fit_transform(minmaxed_array).transpose()
+            # minmaxed_array = minmax.fit_transform(self.mat.values)  # normalize proteins
+            # normalized_array = scaler.fit_transform(minmaxed_array)  # normalize proteins
 
         else:
             raise ValueError(
-                "Normalization method: {method} is invalid"
+                f"Normalization method: {method} is invalid. "
                 "Choose from 'zscore', 'quantile', 'linear' normalization. or 'vst' for variance stabilization transformation"
             )
 
@@ -322,7 +334,7 @@ def _normalization(self, method: str):
     #     return results_list
 
     def _log2_transform(self):
-        self.mat = np.log2(self.mat + 0.1)
+        self.mat = np.log2(self.mat)
         self.preprocessing_info.update({PreprocessingStateKeys.LOG2_TRANSFORMED: True})
         print("Data has been log2-transformed.")
 
@@ -351,7 +363,7 @@ def batch_correction(self, batch: str) -> pd.DataFrame:
     @ignore_warning(RuntimeWarning)
     def preprocess(
         self,
-        log2_transform: bool = True,
+        log2_transform: bool = False,
         remove_contaminations: bool = False,
         subset: bool = False,
         data_completeness: float = 0,

diff --git a/alphastats/gui/.streamlit/config.toml b/alphastats/gui/.streamlit/config.toml
@@ -19,6 +19,11 @@ font = "sans serif"
 
 [server]
 maxUploadSize = 500
+enableXsrfProtection = false
+enableCORS = false
 
 [browser]
 gatherUsageStats = true
+
+[logger]
+level = "debug"  # TODO this seems to have no effect?
diff --git a/alphastats/gui/pages/02_Import Data.py b/alphastats/gui/pages/02_Import Data.py
@@ -15,6 +15,7 @@
 )
 from alphastats.gui.utils.options import SOFTWARE_OPTIONS
 from alphastats.gui.utils.ui_helper import (
+    StateKeys,
     empty_session_state,
     init_session_state,
     sidebar_info,
@@ -27,11 +28,11 @@ def _finalize_data_loading(
     dataset: DataSet,
 ) -> None:
     """Finalize the data loading process."""
-    st.session_state["loader"] = (
+    st.session_state[StateKeys.LOADER] = (
         loader  # TODO: Figure out if we even need the loader here, as the dataset has the loader as an attribute.
     )
-    st.session_state["metadata_columns"] = metadata_columns
-    st.session_state["dataset"] = dataset
+    st.session_state[StateKeys.METADATA_COLUMNS] = metadata_columns
+    st.session_state[StateKeys.DATASET] = dataset
 
     load_options()
     sidebar_info()
@@ -64,7 +65,7 @@ def _finalize_data_loading(
 
 
 st.markdown("### Import Proteomics Data")
-if "dataset" in st.session_state:
+if StateKeys.DATASET in st.session_state:
     st.info(f"DataSet already present.")
     st.page_link("pages/03_Data Overview.py", label="=> Go to data overview page..")
     st.stop()

diff --git a/alphastats/gui/pages/03_Data Overview.py b/alphastats/gui/pages/03_Data Overview.py
@@ -7,18 +7,18 @@
     get_intensity_distribution_unprocessed,
     get_sample_histogram_matrix,
 )
-from alphastats.gui.utils.ui_helper import init_session_state, sidebar_info
+from alphastats.gui.utils.ui_helper import StateKeys, init_session_state, sidebar_info
 
 init_session_state()
 sidebar_info()
 
-if "dataset" not in st.session_state:
+if StateKeys.DATASET not in st.session_state:
     st.info("Import Data first")
     st.stop()
 
 st.markdown("### DataSet Info")
 
-display_loaded_dataset(st.session_state["dataset"])
+display_loaded_dataset(st.session_state[StateKeys.DATASET])
 
 st.markdown("## DataSet overview")
 

diff --git a/alphastats/gui/pages/03_Preprocessing.py b/alphastats/gui/pages/03_Preprocessing.py
@@ -10,13 +10,13 @@
     run_preprocessing,
     update_workflow,
 )
-from alphastats.gui.utils.ui_helper import init_session_state, sidebar_info
+from alphastats.gui.utils.ui_helper import StateKeys, init_session_state, sidebar_info
 
 init_session_state()
 sidebar_info()
 
-if "workflow" not in st.session_state:
-    st.session_state["workflow"] = [
+if StateKeys.WORKFLOW not in st.session_state:
+    st.session_state[StateKeys.WORKFLOW] = [
         PREPROCESSING_STEPS.REMOVE_CONTAMINATIONS,
         PREPROCESSING_STEPS.SUBSET,
         PREPROCESSING_STEPS.LOG2_TRANSFORM,
@@ -26,29 +26,33 @@
 c1, c2 = st.columns([1, 1])
 
 with c2:
-    if "dataset" in st.session_state:
-        settings = configure_preprocessing(dataset=st.session_state["dataset"])
+    if StateKeys.DATASET in st.session_state:
+        settings = configure_preprocessing(dataset=st.session_state[StateKeys.DATASET])
         new_workflow = update_workflow(settings)
-        if new_workflow != st.session_state.workflow:
-            st.session_state.workflow = new_workflow
+        if new_workflow != st.session_state[StateKeys.WORKFLOW]:
+            st.session_state[StateKeys.WORKFLOW] = new_workflow
 
 with c1:
     st.write("#### Flowchart of preprocessing workflow:")
 
-    selected_nodes = draw_workflow(st.session_state.workflow)
+    selected_nodes = draw_workflow(st.session_state[StateKeys.WORKFLOW])
 
-    if "dataset" not in st.session_state:
+    if StateKeys.DATASET not in st.session_state:
         st.info("Import data first to configure and run preprocessing")
 
     else:
         c11, c12 = st.columns([1, 1])
         if c11.button("Run preprocessing", key="_run_preprocessing"):
-            run_preprocessing(settings, st.session_state["dataset"])
+            run_preprocessing(settings, st.session_state[StateKeys.DATASET])
             # TODO show more info about the preprocessing steps
-            display_preprocessing_info(st.session_state["dataset"].preprocessing_info)
+            display_preprocessing_info(
+                st.session_state[StateKeys.DATASET].preprocessing_info
+            )
 
         if c12.button("Reset all Preprocessing steps", key="_reset_preprocessing"):
-            reset_preprocessing(st.session_state["dataset"])
-            display_preprocessing_info(st.session_state["dataset"].preprocessing_info)
+            reset_preprocessing(st.session_state[StateKeys.DATASET])
+            display_preprocessing_info(
+                st.session_state[StateKeys.DATASET].preprocessing_info
+            )
 
 # TODO: Add comparison plot of intensity distribution before and after preprocessing
diff --git a/alphastats/gui/pages/04_Analysis.py b/alphastats/gui/pages/04_Analysis.py
@@ -10,6 +10,7 @@
     save_plot_to_session_state,
 )
 from alphastats.gui.utils.ui_helper import (
+    StateKeys,
     convert_df,
     init_session_state,
     sidebar_info,
@@ -24,8 +25,8 @@ def select_analysis():
     load_options()
     method = st.selectbox(
         "Analysis",
-        options=list(st.session_state.plotting_options.keys())
-        + list(st.session_state.statistic_options.keys()),
+        options=list(st.session_state[StateKeys.PLOTTING_OPTIONS].keys())
+        + list(st.session_state[StateKeys.STATISTIC_OPTIONS].keys()),
     )
     return method
 
@@ -50,11 +51,11 @@ def select_analysis():
 st.markdown(styl, unsafe_allow_html=True)
 
 
-if "plot_list" not in st.session_state:
-    st.session_state["plot_list"] = []
+if StateKeys.PLOT_LIST not in st.session_state:
+    st.session_state[StateKeys.PLOT_LIST] = []
 
 
-if "dataset" in st.session_state:
+if StateKeys.DATASET in st.session_state:
     c1, c2 = st.columns((1, 2))
 
     plot_to_display = False
@@ -64,15 +65,16 @@ def select_analysis():
     with c1:
         method = select_analysis()
 
-        if method in st.session_state.plotting_options.keys():
+        if method in st.session_state[StateKeys.PLOTTING_OPTIONS].keys():
             analysis_result = get_analysis(
-                method=method, options_dict=st.session_state.plotting_options
+                method=method, options_dict=st.session_state[StateKeys.PLOTTING_OPTIONS]
             )
             plot_to_display = True
 
-        elif method in st.session_state.statistic_options.keys():
+        elif method in st.session_state[StateKeys.STATISTIC_OPTIONS].keys():
             analysis_result = get_analysis(
-                method=method, options_dict=st.session_state.statistic_options
+                method=method,
+                options_dict=st.session_state[StateKeys.STATISTIC_OPTIONS],
             )
             df_to_display = True