adding metrics to the dvc pipeline

marcellinus-witarsah · Aug 1, 2024 · cb27ed3 · cb27ed3
1 parent 442ce1f
commit cb27ed3
Show file tree

Hide file tree

Showing 7 changed files with 45 additions and 7 deletions.
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -37,6 +37,8 @@ jobs:
           DAGSHUB_PASSWORD: ${{ secrets.DAGSHUB_PASSWORD }}
         run: |
           dvc repro --force
+      - name: Report Metrics
+        - 
       - name: Cleaning Complied Python Files
         run: make clean
 
diff --git a/credit_score_mlops/modeling/train.py b/credit_score_mlops/modeling/train.py
@@ -14,6 +14,7 @@
 
 from credit_score_mlops.modeling import WOELogisticRegression
 from credit_score_mlops.plots import plot_calibration_curve
+from credit_score_mlops.utils import save_json
 
 app = typer.Typer()
 load_dotenv(find_dotenv())
@@ -33,6 +34,8 @@ def main(train_file: Path, test_file: Path, model_file: Path) -> None:
     params = dvc.api.params_show()
     target = params["target"]
     model_params = params["train"]["model_params"]
+    train_metrics_file = params["train"]["train_metrics_file"]
+    test_metrics_file = params["train"]["test_metrics_file"]
     train_calibration_curve_file = params["train"]["train_calibration_curve_file"]
     test_calibration_curve_file = params["train"]["test_calibration_curve_file"]
     model_name = params["train"]["model_name"]
@@ -114,8 +117,10 @@ def main(train_file: Path, test_file: Path, model_file: Path) -> None:
         # End run
         mlflow.end_run()
 
-    # 4. Save model locally
+    # 4. Save model and metrics locally
     model.save(model_file)
+    save_json(train_metrics_file, train_metrics)
+    save_json(test_metrics_file, test_metrics)
 
 
 if __name__ == "__main__":

diff --git a/dvc.lock b/dvc.lock
@@ -14,8 +14,8 @@ stages:
       size: 1804682
     - path: params.yaml
       hash: md5
-      md5: a4abfcebb0c71836028334791d96786d
-      size: 654
+      md5: 15a9336655f62d07d9695f24558b8492
+      size: 754
     params:
       params.yaml:
         data_preprocessing.random_state: 42
@@ -36,8 +36,8 @@ stages:
     deps:
     - path: credit_score_mlops/modeling/train.py
       hash: md5
-      md5: dca871cf22d816d2fd2561f0ac33493b
-      size: 3836
+      md5: e53c651d9d933be32f445526159cf8b9
+      size: 4115
     - path: credit_score_mlops/modeling/woe_logistic_regression.py
       hash: md5
       md5: bd8c588d01a13ffa52a5f18210d9f446
@@ -60,8 +60,8 @@ stages:
       size: 1240548
     - path: params.yaml
       hash: md5
-      md5: a4abfcebb0c71836028334791d96786d
-      size: 654
+      md5: 15a9336655f62d07d9695f24558b8492
+      size: 754
     params:
       params.yaml:
         target: loan_status
@@ -84,7 +84,9 @@ stages:
           logreg_params:
             random_state: 42
         train.test_calibration_curve_file: reports/figures/test_calibration_curve.png
+        train.test_metrics_file: reports/test_metrics.json
         train.train_calibration_curve_file: reports/figures/train_calibration_curve.png
+        train.train_metrics_file: reports/train_metrics.json
     outs:
     - path: models/model.pkl
       hash: md5
@@ -98,6 +100,14 @@ stages:
       hash: md5
       md5: edbeae05ac2a31534c229868fd69ee56
       size: 50733
+    - path: reports/test_metrics.json
+      hash: md5
+      md5: ef72fda2cc50f15a7afa0237833dc8ae
+      size: 173
+    - path: reports/train_metrics.json
+      hash: md5
+      md5: 08198cc22816feaa80bfbef4d1ffee10
+      size: 173
   evaluate:
     cmd: python credit_score_mlops/modeling/evaluate.py models/model.pkl data/processed/train.csv
       data/processed/test.csv

diff --git a/dvc.yaml b/dvc.yaml
@@ -25,8 +25,15 @@ stages:
     params:
     - target
     - train.model_params
+    - train.train_metrics_file
+    - train.test_metrics_file
     - train.train_calibration_curve_file
     - train.test_calibration_curve_file
+    metrics:
+    - reports/train_metrics.json:
+        cache: false
+    - reports/test_metrics.json:
+        cache: false
     outs:
     - models/model.pkl
     - reports/figures/train_calibration_curve.png

diff --git a/params.yaml b/params.yaml
@@ -11,6 +11,8 @@ train:
       "bins": 5
     }
     logreg_params: {"random_state": 42}
+  train_metrics_file: "reports/train_metrics.json"
+  test_metrics_file: "reports/test_metrics.json"
   train_calibration_curve_file: "reports/figures/train_calibration_curve.png"
   test_calibration_curve_file: "reports/figures/test_calibration_curve.png"
   model_name: "woe-lr-1.0.0"
diff --git a/reports/test_metrics.json b/reports/test_metrics.json
@@ -0,0 +1,6 @@
+{
+    "ROC AUC Score": 0.8716146967317216,
+    "PR AUC Score": 0.7183247007294158,
+    "Gini Score": 0.7432293934634433,
+    "Kolmogorov-Smirnov Score": 0.6121896745267259
+}
diff --git a/reports/train_metrics.json b/reports/train_metrics.json
@@ -0,0 +1,6 @@
+{
+    "ROC AUC Score": 0.8698805394816606,
+    "PR AUC Score": 0.7123467967634151,
+    "Gini Score": 0.7397610789633211,
+    "Kolmogorov-Smirnov Score": 0.6164803829590441
+}