Skip to content

Commit

Permalink
adding metrics to the dvc pipeline
Browse files Browse the repository at this point in the history
  • Loading branch information
marcellinus-witarsah committed Aug 1, 2024
1 parent 442ce1f commit cb27ed3
Show file tree
Hide file tree
Showing 7 changed files with 45 additions and 7 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/ci.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ jobs:
DAGSHUB_PASSWORD: ${{ secrets.DAGSHUB_PASSWORD }}
run: |
dvc repro --force
- name: Report Metrics
-
- name: Cleaning Complied Python Files
run: make clean

7 changes: 6 additions & 1 deletion credit_score_mlops/modeling/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

from credit_score_mlops.modeling import WOELogisticRegression
from credit_score_mlops.plots import plot_calibration_curve
from credit_score_mlops.utils import save_json

app = typer.Typer()
load_dotenv(find_dotenv())
Expand All @@ -33,6 +34,8 @@ def main(train_file: Path, test_file: Path, model_file: Path) -> None:
params = dvc.api.params_show()
target = params["target"]
model_params = params["train"]["model_params"]
train_metrics_file = params["train"]["train_metrics_file"]
test_metrics_file = params["train"]["test_metrics_file"]
train_calibration_curve_file = params["train"]["train_calibration_curve_file"]
test_calibration_curve_file = params["train"]["test_calibration_curve_file"]
model_name = params["train"]["model_name"]
Expand Down Expand Up @@ -114,8 +117,10 @@ def main(train_file: Path, test_file: Path, model_file: Path) -> None:
# End run
mlflow.end_run()

# 4. Save model locally
# 4. Save model and metrics locally
model.save(model_file)
save_json(train_metrics_file, train_metrics)
save_json(test_metrics_file, test_metrics)


if __name__ == "__main__":
Expand Down
22 changes: 16 additions & 6 deletions dvc.lock
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ stages:
size: 1804682
- path: params.yaml
hash: md5
md5: a4abfcebb0c71836028334791d96786d
size: 654
md5: 15a9336655f62d07d9695f24558b8492
size: 754
params:
params.yaml:
data_preprocessing.random_state: 42
Expand All @@ -36,8 +36,8 @@ stages:
deps:
- path: credit_score_mlops/modeling/train.py
hash: md5
md5: dca871cf22d816d2fd2561f0ac33493b
size: 3836
md5: e53c651d9d933be32f445526159cf8b9
size: 4115
- path: credit_score_mlops/modeling/woe_logistic_regression.py
hash: md5
md5: bd8c588d01a13ffa52a5f18210d9f446
Expand All @@ -60,8 +60,8 @@ stages:
size: 1240548
- path: params.yaml
hash: md5
md5: a4abfcebb0c71836028334791d96786d
size: 654
md5: 15a9336655f62d07d9695f24558b8492
size: 754
params:
params.yaml:
target: loan_status
Expand All @@ -84,7 +84,9 @@ stages:
logreg_params:
random_state: 42
train.test_calibration_curve_file: reports/figures/test_calibration_curve.png
train.test_metrics_file: reports/test_metrics.json
train.train_calibration_curve_file: reports/figures/train_calibration_curve.png
train.train_metrics_file: reports/train_metrics.json
outs:
- path: models/model.pkl
hash: md5
Expand All @@ -98,6 +100,14 @@ stages:
hash: md5
md5: edbeae05ac2a31534c229868fd69ee56
size: 50733
- path: reports/test_metrics.json
hash: md5
md5: ef72fda2cc50f15a7afa0237833dc8ae
size: 173
- path: reports/train_metrics.json
hash: md5
md5: 08198cc22816feaa80bfbef4d1ffee10
size: 173
evaluate:
cmd: python credit_score_mlops/modeling/evaluate.py models/model.pkl data/processed/train.csv
data/processed/test.csv
Expand Down
7 changes: 7 additions & 0 deletions dvc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,15 @@ stages:
params:
- target
- train.model_params
- train.train_metrics_file
- train.test_metrics_file
- train.train_calibration_curve_file
- train.test_calibration_curve_file
metrics:
- reports/train_metrics.json:
cache: false
- reports/test_metrics.json:
cache: false
outs:
- models/model.pkl
- reports/figures/train_calibration_curve.png
Expand Down
2 changes: 2 additions & 0 deletions params.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ train:
"bins": 5
}
logreg_params: {"random_state": 42}
train_metrics_file: "reports/train_metrics.json"
test_metrics_file: "reports/test_metrics.json"
train_calibration_curve_file: "reports/figures/train_calibration_curve.png"
test_calibration_curve_file: "reports/figures/test_calibration_curve.png"
model_name: "woe-lr-1.0.0"
6 changes: 6 additions & 0 deletions reports/test_metrics.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"ROC AUC Score": 0.8716146967317216,
"PR AUC Score": 0.7183247007294158,
"Gini Score": 0.7432293934634433,
"Kolmogorov-Smirnov Score": 0.6121896745267259
}
6 changes: 6 additions & 0 deletions reports/train_metrics.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"ROC AUC Score": 0.8698805394816606,
"PR AUC Score": 0.7123467967634151,
"Gini Score": 0.7397610789633211,
"Kolmogorov-Smirnov Score": 0.6164803829590441
}

0 comments on commit cb27ed3

Please sign in to comment.