From 29e5708c6231d69f3e0303724d75b5ae8d4433c9 Mon Sep 17 00:00:00 2001 From: Julien Foenet Date: Mon, 7 Oct 2024 09:55:59 +0000 Subject: [PATCH 01/10] rename ci file --- .github/workflows/{ci.yml => ci-workflow.yml} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename .github/workflows/{ci.yml => ci-workflow.yml} (100%) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci-workflow.yml similarity index 100% rename from .github/workflows/ci.yml rename to .github/workflows/ci-workflow.yml From 4abd4e91276fb718ecc581c9e8e8a947ea5d3c52 Mon Sep 17 00:00:00 2001 From: Julien Foenet Date: Mon, 7 Oct 2024 11:45:30 +0000 Subject: [PATCH 02/10] fix linter --- .github/workflows/ci-workflow.yml | 2 +- dags/get_data_from_engie_hub.py | 4 ++-- dags/train.py | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/ci-workflow.yml b/.github/workflows/ci-workflow.yml index 690d828..6c6b8e9 100644 --- a/.github/workflows/ci-workflow.yml +++ b/.github/workflows/ci-workflow.yml @@ -20,7 +20,7 @@ jobs: - name: Install dev dependencies run: pip install -r requirements_test.txt - name: Run Linter - run: "Trainee should add command to run flake8" + run: python -m flake8 - name: Run Vulture run: "Trainee should add command to run vulture" - name: Run tests diff --git a/dags/get_data_from_engie_hub.py b/dags/get_data_from_engie_hub.py index 7b3f83b..7efbfda 100644 --- a/dags/get_data_from_engie_hub.py +++ b/dags/get_data_from_engie_hub.py @@ -6,11 +6,11 @@ from airflow.operators.python_operator import PythonOperator from airflow.utils.dates import days_ago -sys.path.insert(0, os.path.abspath(os.path.dirname(os.path.dirname(__file__)))) # So that airflow can find config files - from dags.config import TRAIN_DATA_PATH, GENERATED_DATA_FOLDER from formation_indus_ds_avancee.data_loading import get_data_from_csv +sys.path.insert(0, os.path.abspath(os.path.dirname(os.path.dirname(__file__)))) # So that airflow can find config files + dag = DAG(dag_id='data_generator', description='Get data every 2min from Engie hub CSV', catchup=False, diff --git a/dags/train.py b/dags/train.py index 5c892c2..7265199 100644 --- a/dags/train.py +++ b/dags/train.py @@ -2,8 +2,6 @@ import sys from datetime import timedelta -sys.path.insert(0, os.path.abspath(os.path.dirname(os.path.dirname(__file__)))) # So that airflow can find config files - from airflow import DAG from airflow.operators.python_operator import PythonOperator from airflow.utils.dates import days_ago @@ -12,6 +10,8 @@ from formation_indus_ds_avancee.feature_engineering import prepare_features_with_io from formation_indus_ds_avancee.train_and_predict import train_model_with_io +sys.path.insert(0, os.path.abspath(os.path.dirname(os.path.dirname(__file__)))) # So that airflow can find config files + dag = DAG(dag_id='train', description='Training DAG', start_date=days_ago(1), From a57b85d3bfda26883a690cd115f2118dd80d1274 Mon Sep 17 00:00:00 2001 From: Julien Foenet Date: Mon, 7 Oct 2024 11:48:39 +0000 Subject: [PATCH 03/10] fix vulture --- .github/workflows/ci-workflow.yml | 2 +- formation_indus_ds_avancee/train_and_predict.py | 10 ---------- requirements_test.txt | 3 ++- 3 files changed, 3 insertions(+), 12 deletions(-) diff --git a/.github/workflows/ci-workflow.yml b/.github/workflows/ci-workflow.yml index 6c6b8e9..bbd3014 100644 --- a/.github/workflows/ci-workflow.yml +++ b/.github/workflows/ci-workflow.yml @@ -22,7 +22,7 @@ jobs: - name: Run Linter run: python -m flake8 - name: Run Vulture - run: "Trainee should add command to run vulture" + run: python -m vulture - name: Run tests run: "Trainee should add command to run test" diff --git a/formation_indus_ds_avancee/train_and_predict.py b/formation_indus_ds_avancee/train_and_predict.py index ea8b56f..f148868 100644 --- a/formation_indus_ds_avancee/train_and_predict.py +++ b/formation_indus_ds_avancee/train_and_predict.py @@ -21,16 +21,6 @@ def train_model(features: pd.DataFrame, model_registry_folder: str) -> None: joblib.dump(model, os.path.join(model_registry_folder, 'model.joblib')) -def predict_with_io(features_path: str, model_path: str, predictions_folder: str) -> None: - features = pd.read_parquet(features_path) - features = predict(features, model_path) - time_str = time.strftime('%Y%m%d-%H%M%S') - features['predictions_time'] = time_str - features[['predictions', 'predictions_time']].to_csv(os.path.join(predictions_folder, time_str + '.csv'), - index=False) - features[['predictions', 'predictions_time']].to_csv(os.path.join(predictions_folder, 'latest.csv'), index=False) - - def predict(features: pd.DataFrame, model_path: str) -> pd.DataFrame: model = joblib.load(model_path) features['predictions'] = model.predict(features) diff --git a/requirements_test.txt b/requirements_test.txt index 252c2ce..56d12d3 100644 --- a/requirements_test.txt +++ b/requirements_test.txt @@ -1,3 +1,4 @@ behave pytest -pytest-cov \ No newline at end of file +pytest-cov +vulture \ No newline at end of file From 16fe27dc94bd954b1a2c1c5818d6dc4c6e714946 Mon Sep 17 00:00:00 2001 From: Julien Foenet Date: Mon, 7 Oct 2024 11:50:12 +0000 Subject: [PATCH 04/10] fix tests --- .github/workflows/ci-workflow.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci-workflow.yml b/.github/workflows/ci-workflow.yml index bbd3014..15f0f0d 100644 --- a/.github/workflows/ci-workflow.yml +++ b/.github/workflows/ci-workflow.yml @@ -24,6 +24,8 @@ jobs: - name: Run Vulture run: python -m vulture - name: Run tests - run: "Trainee should add command to run test" + run: | + python -m pytest --cov=formation_indus_ds_avancee/ tests/test_unit/ -vv -p no:warnings + python -m behave tests/test_functional/features From b1a5c16eb9dd3bfb1dcaf82eb5ac82f24da67628 Mon Sep 17 00:00:00 2001 From: Julien Foenet Date: Mon, 7 Oct 2024 11:51:14 +0000 Subject: [PATCH 05/10] install flake --- requirements_test.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements_test.txt b/requirements_test.txt index 56d12d3..66348b2 100644 --- a/requirements_test.txt +++ b/requirements_test.txt @@ -1,4 +1,5 @@ behave pytest pytest-cov -vulture \ No newline at end of file +vulture +flake8 \ No newline at end of file From 04a6fc8a8625db623f7bad36dbe40d3e36a2c866 Mon Sep 17 00:00:00 2001 From: Julien Foenet Date: Mon, 7 Oct 2024 11:55:08 +0000 Subject: [PATCH 06/10] fix flake8 --- formation_indus_ds_avancee/train_and_predict.py | 1 - 1 file changed, 1 deletion(-) diff --git a/formation_indus_ds_avancee/train_and_predict.py b/formation_indus_ds_avancee/train_and_predict.py index f148868..9c64ce9 100644 --- a/formation_indus_ds_avancee/train_and_predict.py +++ b/formation_indus_ds_avancee/train_and_predict.py @@ -1,5 +1,4 @@ import os -import time import joblib import pandas as pd From f5f3af17027526efa8a6024448b5bd242aa333ed Mon Sep 17 00:00:00 2001 From: Julien Foenet Date: Mon, 7 Oct 2024 11:59:10 +0000 Subject: [PATCH 07/10] reintroduce removed method --- .github/workflows/ci-workflow.yml | 2 +- formation_indus_ds_avancee/train_and_predict.py | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci-workflow.yml b/.github/workflows/ci-workflow.yml index 15f0f0d..ab158b8 100644 --- a/.github/workflows/ci-workflow.yml +++ b/.github/workflows/ci-workflow.yml @@ -22,7 +22,7 @@ jobs: - name: Run Linter run: python -m flake8 - name: Run Vulture - run: python -m vulture + run: python -m vulture --min-confidence 100 - name: Run tests run: | python -m pytest --cov=formation_indus_ds_avancee/ tests/test_unit/ -vv -p no:warnings diff --git a/formation_indus_ds_avancee/train_and_predict.py b/formation_indus_ds_avancee/train_and_predict.py index 9c64ce9..9827227 100644 --- a/formation_indus_ds_avancee/train_and_predict.py +++ b/formation_indus_ds_avancee/train_and_predict.py @@ -19,6 +19,15 @@ def train_model(features: pd.DataFrame, model_registry_folder: str) -> None: model.fit(X, y) joblib.dump(model, os.path.join(model_registry_folder, 'model.joblib')) +def predict_with_io(features_path: str, model_path: str, predictions_folder: str) -> None: + features = pd.read_parquet(features_path) + features = predict(features, model_path) + time_str = time.strftime('%Y%m%d-%H%M%S') + features['predictions_time'] = time_str + features[['predictions', 'predictions_time']].to_csv(os.path.join(predictions_folder, time_str + '.csv'), + index=False) + features[['predictions', 'predictions_time']].to_csv(os.path.join(predictions_folder, 'latest.csv'), index=False) + def predict(features: pd.DataFrame, model_path: str) -> pd.DataFrame: model = joblib.load(model_path) From 48f7c2f26a73f8b3f9f68ba09a5aea439fd87144 Mon Sep 17 00:00:00 2001 From: Julien Foenet Date: Mon, 7 Oct 2024 12:01:02 +0000 Subject: [PATCH 08/10] fix tests --- .github/workflows/ci-workflow.yml | 2 +- formation_indus_ds_avancee/train_and_predict.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/ci-workflow.yml b/.github/workflows/ci-workflow.yml index ab158b8..747542d 100644 --- a/.github/workflows/ci-workflow.yml +++ b/.github/workflows/ci-workflow.yml @@ -22,7 +22,7 @@ jobs: - name: Run Linter run: python -m flake8 - name: Run Vulture - run: python -m vulture --min-confidence 100 + run: python -m vulture . --min-confidence 100 - name: Run tests run: | python -m pytest --cov=formation_indus_ds_avancee/ tests/test_unit/ -vv -p no:warnings diff --git a/formation_indus_ds_avancee/train_and_predict.py b/formation_indus_ds_avancee/train_and_predict.py index 9827227..80f66aa 100644 --- a/formation_indus_ds_avancee/train_and_predict.py +++ b/formation_indus_ds_avancee/train_and_predict.py @@ -1,4 +1,5 @@ import os +import time import joblib import pandas as pd From de608477917c8417307fe109b6aace49882d4698 Mon Sep 17 00:00:00 2001 From: Julien Foenet Date: Mon, 7 Oct 2024 12:02:45 +0000 Subject: [PATCH 09/10] fix flake8 --- formation_indus_ds_avancee/train_and_predict.py | 1 + 1 file changed, 1 insertion(+) diff --git a/formation_indus_ds_avancee/train_and_predict.py b/formation_indus_ds_avancee/train_and_predict.py index 80f66aa..ea8b56f 100644 --- a/formation_indus_ds_avancee/train_and_predict.py +++ b/formation_indus_ds_avancee/train_and_predict.py @@ -20,6 +20,7 @@ def train_model(features: pd.DataFrame, model_registry_folder: str) -> None: model.fit(X, y) joblib.dump(model, os.path.join(model_registry_folder, 'model.joblib')) + def predict_with_io(features_path: str, model_path: str, predictions_folder: str) -> None: features = pd.read_parquet(features_path) features = predict(features, model_path) From 2d26879d746cf0fcc33e740f4bdde551fb2bb044 Mon Sep 17 00:00:00 2001 From: Julien Foenet Date: Mon, 7 Oct 2024 12:05:13 +0000 Subject: [PATCH 10/10] fix tests --- .github/workflows/ci-workflow.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/.github/workflows/ci-workflow.yml b/.github/workflows/ci-workflow.yml index 747542d..277814a 100644 --- a/.github/workflows/ci-workflow.yml +++ b/.github/workflows/ci-workflow.yml @@ -24,8 +24,6 @@ jobs: - name: Run Vulture run: python -m vulture . --min-confidence 100 - name: Run tests - run: | - python -m pytest --cov=formation_indus_ds_avancee/ tests/test_unit/ -vv -p no:warnings - python -m behave tests/test_functional/features + run: python -m pytest --cov=formation_indus_ds_avancee/ tests/test_unit/ -vv -p no:warnings