Skip to content

Commit

Permalink
Merge branch 'development' into rename_metadat_param
Browse files Browse the repository at this point in the history
  • Loading branch information
mschwoer authored Sep 20, 2024
2 parents 32b8670 + e06ae41 commit 1863268
Show file tree
Hide file tree
Showing 41 changed files with 1,888 additions and 1,293 deletions.
20 changes: 15 additions & 5 deletions .github/workflows/build_and_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,16 @@ jobs:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
- uses: pre-commit/action@v3.0.1

get-code-review-input:
runs-on: ubuntu-latest
#if: contains(github.event.pull_request.labels.*.name, 'code-review')
steps:
- uses: MannLabs/alphashared/actions/get-code-review-input@v1
with:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
PR_NUMBER: ${{ github.event.number }}

build-and-test:
runs-on: ubuntu-latest
strategy:
Expand All @@ -39,14 +49,14 @@ jobs:
- name: Print pip freeze
run: |
pip freeze
- name: Run tests
run: |
coverage run -m pytest
- name: Upload Coverage to Codecov
uses: codecov/codecov-action@v4
- name: Run notebooks
run: |
python3 -m ipykernel install --user
# TODO add the excluded notebook
TEST_NBS=$(find ./nbs -name "*.ipynb" | grep -v "ramus_2016.ipynb")
python -m pytest --nbmake $(echo $TEST_NBS)
- name: Run tests
run: |
coverage run -m pytest
- name: Upload Coverage to Codecov
uses: codecov/codecov-action@v4
12 changes: 1 addition & 11 deletions .secrets.baseline
Original file line number Diff line number Diff line change
Expand Up @@ -139,16 +139,6 @@
}
],
"results": {
"alphastats/gui/utils/ollama_utils.py": [
{
"type": "Secret Keyword",
"filename": "alphastats/gui/utils/ollama_utils.py",
"hashed_secret": "8ed4322e8e2790b8c928d381ce8d07cfd966e909",
"is_verified": false,
"line_number": 68,
"is_secret": false
}
],
"docs/workflow_mq.html": [
{
"type": "Base64 High Entropy String",
Expand All @@ -160,5 +150,5 @@
}
]
},
"generated_at": "2024-09-12T14:19:09Z"
"generated_at": "2024-09-18T09:54:14Z"
}
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,10 @@ alphastats gui
```
If you get an `AxiosError: Request failed with status code 403'` when uploading files, try running `DISABLE_XSRF=1 alphastats gui`.

If you want to use local Large Language Models to help interpret the data,
you need to download and install ollama (https://ollama.com/download). The url of the server can be set by the
environmental variable `OLLAMA_BASE_URL` (defaults to `http://localhost:11434`)

AlphaStats can be imported as a Python package into any Python script or notebook with the command `import alphastats`.
A brief [Jupyter notebook tutorial](nbs/getting_started.ipynb) on how to use the API is also present in the [nbs folder](nbs).

Expand Down
2 changes: 1 addition & 1 deletion alphastats/DataSet.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ def _get_preprocess(self) -> Preprocess:

def preprocess(
self,
log2_transform: bool = True,
log2_transform: bool = False,
remove_contaminations: bool = False,
subset: bool = False,
data_completeness: float = 0,
Expand Down
20 changes: 16 additions & 4 deletions alphastats/DataSet_Preprocess.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,29 +252,41 @@ def _linear_normalization(self, dataframe: pd.DataFrame):

@ignore_warning(UserWarning)
@ignore_warning(RuntimeWarning)
def _normalization(self, method: str):
def _normalization(self, method: str) -> None:
"""Normalize across samples."""
# TODO make both sample and protein normalization available
if method == "zscore":
scaler = sklearn.preprocessing.StandardScaler()
# normalize samples => for preprocessing
normalized_array = scaler.fit_transform(
self.mat.values.transpose()
).transpose()
# normalize proteins => for downstream processing
# normalized_array = scaler.fit_transform(self.mat.values)

elif method == "quantile":
qt = sklearn.preprocessing.QuantileTransformer(random_state=0)
normalized_array = qt.fit_transform(self.mat.values.transpose()).transpose()
# normalized_array = qt.fit_transform(self.mat.values) # normalize proteins

elif method == "linear":
normalized_array = self._linear_normalization(self.mat)

# normalized_array = self._linear_normalization(
# self.mat.transpose()
# ).transpose() # normalize proteins

elif method == "vst":
minmax = sklearn.preprocessing.MinMaxScaler()
scaler = sklearn.preprocessing.PowerTransformer()
minmaxed_array = minmax.fit_transform(self.mat.values.transpose())
normalized_array = scaler.fit_transform(minmaxed_array).transpose()
# minmaxed_array = minmax.fit_transform(self.mat.values) # normalize proteins
# normalized_array = scaler.fit_transform(minmaxed_array) # normalize proteins

else:
raise ValueError(
"Normalization method: {method} is invalid"
f"Normalization method: {method} is invalid. "
"Choose from 'zscore', 'quantile', 'linear' normalization. or 'vst' for variance stabilization transformation"
)

Expand Down Expand Up @@ -321,7 +333,7 @@ def _normalization(self, method: str):
# return results_list

def _log2_transform(self):
self.mat = np.log2(self.mat + 0.1)
self.mat = np.log2(self.mat)
self.preprocessing_info.update({PreprocessingStateKeys.LOG2_TRANSFORMED: True})
print("Data has been log2-transformed.")

Expand Down Expand Up @@ -350,7 +362,7 @@ def batch_correction(self, batch: str) -> pd.DataFrame:
@ignore_warning(RuntimeWarning)
def preprocess(
self,
log2_transform: bool = True,
log2_transform: bool = False,
remove_contaminations: bool = False,
subset: bool = False,
data_completeness: float = 0,
Expand Down
5 changes: 5 additions & 0 deletions alphastats/gui/.streamlit/config.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,11 @@ font = "sans serif"

[server]
maxUploadSize = 500
enableXsrfProtection = false
enableCORS = false

[browser]
gatherUsageStats = true

[logger]
level = "debug" # TODO this seems to have no effect?
9 changes: 5 additions & 4 deletions alphastats/gui/pages/02_Import Data.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
)
from alphastats.gui.utils.options import SOFTWARE_OPTIONS
from alphastats.gui.utils.ui_helper import (
StateKeys,
empty_session_state,
init_session_state,
sidebar_info,
Expand All @@ -27,11 +28,11 @@ def _finalize_data_loading(
dataset: DataSet,
) -> None:
"""Finalize the data loading process."""
st.session_state["loader"] = (
st.session_state[StateKeys.LOADER] = (
loader # TODO: Figure out if we even need the loader here, as the dataset has the loader as an attribute.
)
st.session_state["metadata_columns"] = metadata_columns
st.session_state["dataset"] = dataset
st.session_state[StateKeys.METADATA_COLUMNS] = metadata_columns
st.session_state[StateKeys.DATASET] = dataset

load_options()
sidebar_info()
Expand Down Expand Up @@ -64,7 +65,7 @@ def _finalize_data_loading(


st.markdown("### Import Proteomics Data")
if "dataset" in st.session_state:
if StateKeys.DATASET in st.session_state:
st.info(f"DataSet already present.")
st.page_link("pages/03_Data Overview.py", label="=> Go to data overview page..")
st.stop()
Expand Down
6 changes: 3 additions & 3 deletions alphastats/gui/pages/03_Data Overview.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,18 @@
get_intensity_distribution_unprocessed,
get_sample_histogram_matrix,
)
from alphastats.gui.utils.ui_helper import init_session_state, sidebar_info
from alphastats.gui.utils.ui_helper import StateKeys, init_session_state, sidebar_info

init_session_state()
sidebar_info()

if "dataset" not in st.session_state:
if StateKeys.DATASET not in st.session_state:
st.info("Import Data first")
st.stop()

st.markdown("### DataSet Info")

display_loaded_dataset(st.session_state["dataset"])
display_loaded_dataset(st.session_state[StateKeys.DATASET])

st.markdown("## DataSet overview")

Expand Down
30 changes: 17 additions & 13 deletions alphastats/gui/pages/03_Preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,13 @@
run_preprocessing,
update_workflow,
)
from alphastats.gui.utils.ui_helper import init_session_state, sidebar_info
from alphastats.gui.utils.ui_helper import StateKeys, init_session_state, sidebar_info

init_session_state()
sidebar_info()

if "workflow" not in st.session_state:
st.session_state["workflow"] = [
if StateKeys.WORKFLOW not in st.session_state:
st.session_state[StateKeys.WORKFLOW] = [
PREPROCESSING_STEPS.REMOVE_CONTAMINATIONS,
PREPROCESSING_STEPS.SUBSET,
PREPROCESSING_STEPS.LOG2_TRANSFORM,
Expand All @@ -26,29 +26,33 @@
c1, c2 = st.columns([1, 1])

with c2:
if "dataset" in st.session_state:
settings = configure_preprocessing(dataset=st.session_state["dataset"])
if StateKeys.DATASET in st.session_state:
settings = configure_preprocessing(dataset=st.session_state[StateKeys.DATASET])
new_workflow = update_workflow(settings)
if new_workflow != st.session_state.workflow:
st.session_state.workflow = new_workflow
if new_workflow != st.session_state[StateKeys.WORKFLOW]:
st.session_state[StateKeys.WORKFLOW] = new_workflow

with c1:
st.write("#### Flowchart of preprocessing workflow:")

selected_nodes = draw_workflow(st.session_state.workflow)
selected_nodes = draw_workflow(st.session_state[StateKeys.WORKFLOW])

if "dataset" not in st.session_state:
if StateKeys.DATASET not in st.session_state:
st.info("Import data first to configure and run preprocessing")

else:
c11, c12 = st.columns([1, 1])
if c11.button("Run preprocessing", key="_run_preprocessing"):
run_preprocessing(settings, st.session_state["dataset"])
run_preprocessing(settings, st.session_state[StateKeys.DATASET])
# TODO show more info about the preprocessing steps
display_preprocessing_info(st.session_state["dataset"].preprocessing_info)
display_preprocessing_info(
st.session_state[StateKeys.DATASET].preprocessing_info
)

if c12.button("Reset all Preprocessing steps", key="_reset_preprocessing"):
reset_preprocessing(st.session_state["dataset"])
display_preprocessing_info(st.session_state["dataset"].preprocessing_info)
reset_preprocessing(st.session_state[StateKeys.DATASET])
display_preprocessing_info(
st.session_state[StateKeys.DATASET].preprocessing_info
)

# TODO: Add comparison plot of intensity distribution before and after preprocessing
20 changes: 11 additions & 9 deletions alphastats/gui/pages/04_Analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
save_plot_to_session_state,
)
from alphastats.gui.utils.ui_helper import (
StateKeys,
convert_df,
init_session_state,
sidebar_info,
Expand All @@ -24,8 +25,8 @@ def select_analysis():
load_options()
method = st.selectbox(
"Analysis",
options=list(st.session_state.plotting_options.keys())
+ list(st.session_state.statistic_options.keys()),
options=list(st.session_state[StateKeys.PLOTTING_OPTIONS].keys())
+ list(st.session_state[StateKeys.STATISTIC_OPTIONS].keys()),
)
return method

Expand All @@ -50,11 +51,11 @@ def select_analysis():
st.markdown(styl, unsafe_allow_html=True)


if "plot_list" not in st.session_state:
st.session_state["plot_list"] = []
if StateKeys.PLOT_LIST not in st.session_state:
st.session_state[StateKeys.PLOT_LIST] = []


if "dataset" in st.session_state:
if StateKeys.DATASET in st.session_state:
c1, c2 = st.columns((1, 2))

plot_to_display = False
Expand All @@ -64,15 +65,16 @@ def select_analysis():
with c1:
method = select_analysis()

if method in st.session_state.plotting_options:
if method in st.session_state[StateKeys.PLOTTING_OPTIONS]:
analysis_result = get_analysis(
method=method, options_dict=st.session_state.plotting_options
method=method, options_dict=st.session_state[StateKeys.PLOTTING_OPTIONS]
)
plot_to_display = True

elif method in st.session_state.statistic_options:
elif method in st.session_state[StateKeys.STATISTIC_OPTIONS]:
analysis_result = get_analysis(
method=method, options_dict=st.session_state.statistic_options
method=method,
options_dict=st.session_state[StateKeys.STATISTIC_OPTIONS],
)
df_to_display = True

Expand Down
Loading

0 comments on commit 1863268

Please sign in to comment.