Skip to content

Commit

Permalink
Merge pull request #11 from Aura-healthcare/dev
Browse files Browse the repository at this point in the history
Merge Dev into Main
  • Loading branch information
alexisgcomte authored Dec 7, 2024
2 parents 39a530d + a255b83 commit fe01c4e
Show file tree
Hide file tree
Showing 72 changed files with 168,630 additions and 1,391 deletions.
20 changes: 15 additions & 5 deletions .github/workflows/github-actions-seizure-pipline.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,19 +15,29 @@ permissions:
jobs:
build:

runs-on: ubuntu-latest
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, macos-latest, windows-latest]
python-version: ['3.6', '3.7', '3.8', '3.9']
#exclude:
#- os: macos-latest
# python-version: '3.8'
#- os: windows-latest
# python-version: '3.6'

steps:
- uses: actions/checkout@v3
- name: Set up Python 3.6.8
- name: Set up Python 3.x
uses: actions/setup-python@v3
with:
python-version: "3.6.8"
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install flake8 pytest
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
if [ -f requirements_dev.txt ]; then pip install -r requirements_dev.txt; fi
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
Expand All @@ -36,4 +46,4 @@ jobs:
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Test with pytest
run: |
pytest
pytest -s -vvv ./tests --cov=src --cov-fail-under=80
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -136,4 +136,5 @@ output/db/*csv
cloud/
tests/output/
exports/
output/*/*
output/*/*
data/data_pl
58 changes: 48 additions & 10 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,17 @@ FOLDER_PATH= .
SRC_PATH=./src
TEST_PATH=./tests

DATA_PATH=data
DATA_PATH=data/PL
EXPORT_PATH=./output

TSE_BI_FORMATTING=dataset
COMPARISON_FOLDER=res-v0_6

# UTILITIES
# ---------

clean:
rm output/db/*csv
find output -mindepth 1 ! -name README.md -delete

flake8:
. $(FOLDER_PATH)/env/bin/activate; \
Expand All @@ -23,6 +26,11 @@ test:
. $(FOLDER_PATH)/env/bin/activate; \
pytest -s -vvv $(TEST_PATH)

test_fetch:
. $(FOLDER_PATH)/env/bin/activate; \
pytest -s -vvv $(TEST_PATH)/test_src_usecase/test_fetch_database.py


coverage:
. $(FOLDER_PATH)/env/bin/activate; \
pytest --cov=$(SRC_PATH) --cov-report html $(TEST_PATH)
Expand All @@ -31,7 +39,7 @@ coverage:
# -------------
fetch_data:
. $(FOLDER_PATH)/env/bin/activate; \
python3 src/usecase/fetch_database.py --data-folder $(DATA_PATH) --export-folder $(EXPORT_PATH)/fetched_data
python3 src/usecase/fetch_database.py --data-folder-path $(DATA_PATH) --export-folder $(EXPORT_PATH)/fetched_data --infer-database


# PREPROCESSING
Expand All @@ -41,7 +49,15 @@ fetch_data:
# PYTHON SCRIPT ON INDIVIDUAL FILES
individual_detect_qrs:
. $(FOLDER_PATH)/env/bin/activate; \
python3 src/usecase/detect_qrs.py --qrs-file-path $(DATA_PATH)/tuh/dev/01_tcp_ar/002/00009578/00009578_s006_t001.edf --method hamilton --exam-id 00009578_s006_t001 --output-folder $(EXPORT_PATH)/individual/res-v0_6
python3 src/usecase/detect_qrs.py --qrs-file-path $(DATA_PATH)/002/00009578/00009578_s006_t001.edf --method hamilton --exam-id 00009578_s006_t001 --output-folder $(EXPORT_PATH)/individual/res-v0_6

individual_apply_ecg_qc:
. $(FOLDER_PATH)/env/bin/activate; \
python3 src/usecase/apply_ecg_qc.py --qrs-file-path data/tuh/dev/01_tcp_ar/002/00009578/00009578_s006_t001.edf --exam-id 00009578_s006_t001 --output-folder $(EXPORT_PATH)/ecg_qc-v0_6 --formatting dataset

individual_compare_qrs_detectors:
. $(FOLDER_PATH)/env/bin/activate; \
python3 src/usecase/compare_qrs_detectors.py --reference-rr-intervals-file-path output/res-v0_6/dev/01_tcp_ar/002/00009578/rr_00009578_s002_t001.csv --comparison-rr-intervals-file-path output/res-v0_6/dev/01_tcp_ar/002/00009578/rr_00009578_s002_t001.csv --output-folder $(EXPORT_PATH)/individual/comp-v0_6 --formatting $(TSE_BI_FORMATTING)

individual_compute_hrvanalysis_features:
. $(FOLDER_PATH)/env/bin/activate; \
Expand All @@ -52,17 +68,23 @@ individual_consolidate_feats_and_annot:
python3 src/usecase/consolidate_feats_and_annot.py --features-file-path exports/individual/feats-v0_6/00009578_s006_t001.csv --annotations-file-path $(DATA_PATH)/tuh/dev/01_tcp_ar/002/00009578/00009578_s002_t001.tse_bi --output-folder $(EXPORT_PATH)/individual/cons_v0_6


#WIP
example_ecg_qc:
python3 src/usecase/apply_ecg_qc.py --filepath data/tuh/dev/01_tcp_ar/002/00009578/00009578_s006_t001.edf --output-folder . --sampling-frequency 1000 --exam-id 00009578_s006_t001


# BASH SCRIPT WRAPPING PYTHON SCRIPTS OVER ALL CANDIDATES
# -------------
bash_detect_qrs:
. $(FOLDER_PATH)/env/bin/activate; \
mkdir -p $(EXPORT_PATH); \
./scripts/bash_pipeline/1_detect_qrs_wrapper.sh -i $(DATA_PATH) -o $(EXPORT_PATH)/res-v0_6

bash_apply_ecg_qc:
. $(FOLDER_PATH)/env/bin/activate; \
mkdir -p $(EXPORT_PATH); \
./scripts/bash_pipeline/0_apply_ecg_qc_wrapper.sh -i $(DATA_PATH) -o $(EXPORT_PATH)/ecg_qc-v0_6 -f $(TSE_BI_FORMATTING)

bash_compare_qrs_detectors:
. $(FOLDER_PATH)/env/bin/activate; \
mkdir -p $(EXPORT_PATH); \
./scripts/bash_pipeline/0_compare_qrs_detectors.sh -i $(EXPORT_PATH)/res-v0_6 -c $(EXPORT_PATH)/res-v0_6-comp -o $(EXPORT_PATH)/$(COMPARISON_FOLDER) -f $(TSE_BI_FORMATTING)

bash_compute_hrvanalysis_features:
. $(FOLDER_PATH)/env/bin/activate; \
./scripts/bash_pipeline/2_compute_hrvanalysis_features_wrapper.sh -i $(EXPORT_PATH)/res-v0_6 -o $(EXPORT_PATH)/feats-v0_6
Expand All @@ -83,4 +105,20 @@ create_ml_dataset:
# ------------------
train:
. $(FOLDER_PATH)/env/bin/activate; \
python3 src/usecase/train_model.py --ml-dataset-path $(EXPORT_PATH)/ml_dataset/df_ml.csv
python3 src/usecase/train_model.py --ml-dataset-path $(EXPORT_PATH)/ml_dataset/df_ml.csv

train_ml:
. $(FOLDER_PATH)/env/bin/activate; \
python3 src/usecase/train_model.py --ml-dataset-path /home/DATA/DetecTeppe-2022-04-08/ml_dataset_2022_04_08/train/df_ml_train.csv --ml-dataset-path-test /home/DATA/DetecTeppe-2022-04-08/ml_dataset_2022_04_08/test/df_ml_test.csv


## VISUALIZATION
# ------------------
load_ecg:
python3 visualization/ecg_data_loader.py --pg-host localhost --pg-port 5432 --pg-user postgres --pg-password postgres --pg-database postgres --filepath data/tuh/dev/01_tcp_ar/076/00007633/s003_2013_07_09/00007633_s003_t007.edf

load_rr:
python3 visualization/rr_intervals_loader.py --pg-host localhost --pg-port 5432 --pg-user postgres --pg-password postgres --pg-database postgres --filepath data/test_data/rr_00007633_s003_t007.csv --exam 00007633_s003_t007

load_annotations:
python3 visualization/annotations_loader.py --pg-host localhost --pg-port 5432 --pg-user postgres --pg-password postgres --pg-database postgres --annotation-filename data/tuh/dev/01_tcp_ar/076/00007633/s003_2013_07_09/00007633_s003_t007.tse_bi --edf-filename data/tuh/dev/01_tcp_ar/076/00007633/s003_2013_07_09/00007633_s003_t007.edf --exam 00007633_s003_t010
17 changes: 15 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,17 @@ You need to have [docker](https://docs.docker.com/get-docker/) and [docker-compo
## Getting started

### Setting up environment and launch docker-compose
After cloning this repository, replace the value of the environment variable ```DATA_PATH``` in the *env.sh* file with the absolute path of the data you are working with.

Using symbolic link is most conveniant use to import data stored in another path. In this case, first create a symbolic link in data folder:
```sh
$ ln -s -r PATH_TO_DATA_FOLDER data/
```

Then update *env.sh* file with the the name of the folder of symbolic link at last line:

```sh
export SYMLINK_FOLDER='SYMBOLIC_NAME_FOLDER_NAME'
```

You can now run these commands :

Expand All @@ -61,7 +71,10 @@ You can now run these commands :
|Flower|5555|
|Redis|6379|


Before running Airflow, you must fetch data with:
```sh
$ make fetch_data
```

### UI
Once the services are up, you can interact with their UI :
Expand Down
61 changes: 61 additions & 0 deletions dags/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
import os
import sys
from datetime import datetime as dt
from sklearn.ensemble import RandomForestClassifier
import datetime
import xgboost as xgb
import numpy as np

PROJECT_FOLDER = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
DATA_FOLDER = os.path.join(PROJECT_FOLDER, 'data')

ML_DATASET_OUTPUT_FOLDER = "/opt/airflow/output"
AIRFLOW_PREFIX_TO_DATA = '/opt/airflow/data/'
MLRUNS_DIR = '/mlruns'

TRAIN_DATA = os.path.join(AIRFLOW_PREFIX_TO_DATA, "train/df_ml_train.csv")
TEST_DATA = os.path.join(AIRFLOW_PREFIX_TO_DATA , "test/df_ml_test.csv")
FEATURE_TRAIN_PATH= os.path.join(ML_DATASET_OUTPUT_FOLDER, "ml_train.csv")
FEATURE_TEST_PATH= os.path.join(ML_DATASET_OUTPUT_FOLDER, "ml_test.csv")

COL_TO_DROP = ['interval_index', 'interval_start_time', 'set']

START_DATE = dt(2021, 8, 1)
CONCURRENCY = 4
SCHEDULE_INTERVAL = datetime.timedelta(hours=2)
DEFAULT_ARGS = {'owner': 'airflow'}

TRACKING_URI = 'http://mlflow:5000'

MODEL_PARAM = {
'model': xgb.XGBClassifier(),
'grid_parameters': {
'nthread':[4],
'learning_rate': [0.1, 0.01, 0.05],
'max_depth': np.arange(3, 5, 2),
'scale_pos_weight':[1],
'n_estimators': np.arange(15, 25, 2),
'missing':[-999]}
}

MODELS_PARAM = {
'xgboost': {
'model': xgb.XGBClassifier(),
'grid_parameters': {
'nthread':[4],
'learning_rate': [0.1, 0.01, 0.05],
'max_depth': np.arange(3, 5, 2),
'scale_pos_weight':[1],
'n_estimators': np.arange(15, 25, 2),
'missing':[-999]
}
},
'random_forest': {
'model': RandomForestClassifier(),
'grid_parameters': {
'min_samples_leaf': np.arange(1, 5, 1),
'max_depth': np.arange(1, 7, 1),
'max_features': ['auto'],
'n_estimators': np.arange(10, 20, 2)}
}
}
28 changes: 28 additions & 0 deletions dags/predict.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import os
import sys
from datetime import datetime, timedelta, datetime

from airflow.decorators import dag, task
from airflow.utils.dates import days_ago

sys.path.append('.')
from dags.config import (DEFAULT_ARGS, START_DATE, CONCURRENCY, SCHEDULE_INTERVAL)


@dag(default_args=DEFAULT_ARGS,
start_date=START_DATE,
schedule_interval=timedelta(minutes=2),
concurrency=CONCURRENCY)
def predict():
@task
def prepare_features_with_io_task() -> str:
pass

@task
def predict_with_io_task(feature_path: str) -> None:
pass

feature_path = prepare_features_with_io_task()
predict_with_io_task(feature_path)

predict_dag = predict()
Loading

0 comments on commit fe01c4e

Please sign in to comment.