From 12480ee65181f5703b43e5b98e36babd2ea1e7a1 Mon Sep 17 00:00:00 2001
From: Yizhen Zang <Y.Zang-3@student.tudelft.nl>
Date: Sat, 24 May 2025 21:33:10 +0200
Subject: [PATCH 01/36] Add test for Feature and Data Integrity

---
 pytest.ini                                    |  3 +-
 tests/test_data_integrity.py                  | 37 +++++++++++++++++++
 ...est_metamorphic.py => test_mutamorphic.py} | 16 +++-----
 3 files changed, 44 insertions(+), 12 deletions(-)
 create mode 100644 tests/test_data_integrity.py
 rename tests/{test_metamorphic.py => test_mutamorphic.py} (94%)

diff --git a/pytest.ini b/pytest.ini
index 6bac73c..fa56afd 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -1,3 +1,4 @@
 # telling pytest to add src to PYTHONPATH automatically
 [pytest]
-pythonpath = src
\ No newline at end of file
+pythonpath = src
+testpaths = tests
diff --git a/tests/test_data_integrity.py b/tests/test_data_integrity.py
new file mode 100644
index 0000000..588a2e1
--- /dev/null
+++ b/tests/test_data_integrity.py
@@ -0,0 +1,37 @@
+import pandas as pd
+import os
+import pytest
+
+RAW_DATA_PATH = "data/raw/a1_RestaurantReviews_HistoricDump.tsv"
+
+@pytest.fixture(scope="module")
+def raw_data():
+    assert os.path.exists(RAW_DATA_PATH), f"Data file not found at {RAW_DATA_PATH}"
+    df = pd.read_csv(RAW_DATA_PATH, sep='\t')
+    df.columns = df.columns.str.strip()
+    return df
+
+def test_column_schema(raw_data):
+    """Check that expected columns exist"""
+    expected = {'Review', 'Liked'}
+    actual = set(raw_data.columns)
+    missing = expected - actual
+    assert not missing, f"Missing expected columns: {missing}"
+
+def test_no_missing_values(raw_data):
+    """Ensure no nulls in important columns"""
+    for col in ['Review', 'Liked']:
+        assert raw_data[col].isnull().sum() == 0, f"Missing values found in {col}"
+
+def test_liked_label_values(raw_data):
+    """Ensure 'Liked' is binary (0 or 1)"""
+    assert raw_data['Liked'].isin([0, 1]).all(), "'Liked' column contains non-binary values"
+
+def test_review_length(raw_data):
+    """Check that Review has sufficient length"""
+    assert raw_data['Review'].str.len().gt(10).all(), "Some reviews are too short"
+
+def test_exact_duplicate_rows(raw_data):
+    """Check for fully duplicated rows with same Review and Liked"""
+    duplicates = raw_data.duplicated().sum()
+    assert duplicates <= 4, f"Unusual number of exact duplicate rows: {duplicates}"
diff --git a/tests/test_metamorphic.py b/tests/test_mutamorphic.py
similarity index 94%
rename from tests/test_metamorphic.py
rename to tests/test_mutamorphic.py
index a5bc5d2..11b4934 100644
--- a/tests/test_metamorphic.py
+++ b/tests/test_mutamorphic.py
@@ -3,7 +3,6 @@
 import pytest
 import numpy as np
 
-
 @pytest.fixture(scope="module")
 def trained_sentiment_model():
     model_path = "../artifacts/trained_model.pkl"
@@ -18,7 +17,6 @@ def trained_sentiment_model():
         model = pickle.load(f)
     return model
 
-
 @pytest.fixture(scope="module")
 def sentiment_vectorizer():
     vectorizer_path = "artifacts/c1_BoW_Sentiment_Model.pkl"
@@ -31,13 +29,11 @@ def sentiment_vectorizer():
         vectorizer = pickle.load(f)
     return vectorizer
 
-
 def replace_with_synonym(text, original_word, synonym):
     return text.replace(original_word, synonym)
 
-
-# Metamorphic tests for sentiment analysis model
-def test_metamorphic_synonym_positive_review(
+# Mutamorphic tests for sentiment analysis model
+def test_mutamorphic_synonym_positive_review(
     trained_sentiment_model, sentiment_vectorizer
 ):
     model = trained_sentiment_model
@@ -69,9 +65,8 @@ def test_metamorphic_synonym_positive_review(
         transformed_prediction_2 == original_prediction
     ), f"Sentiment changed from '{original_prediction}' to '{transformed_prediction_2}' after synonym replacement (excellent -> fine)."
 
-
-# Metamorphic tests for sentiment analysis model
-def test_metamorphic_synonym_negative_review(
+# Mutamorphic tests for sentiment analysis model
+def test_mutamorphic_synonym_negative_review(
     trained_sentiment_model, sentiment_vectorizer
 ):
     model = trained_sentiment_model
@@ -105,8 +100,7 @@ def test_metamorphic_synonym_negative_review(
         transformed_prediction_2 == original_prediction
     ), f"Sentiment changed from '{original_prediction}' to '{transformed_prediction_2}' after synonym replacement (awful -> dreadful)."
 
-
-def test_metamorphic_add_neutral_phrase_negative_review(
+def test_mutamorphic_add_neutral_phrase_negative_review(
     trained_sentiment_model, sentiment_vectorizer
 ):
     model = trained_sentiment_model

From 7168e6adfeaa9bea60c2c7d2d37400fc08792a28 Mon Sep 17 00:00:00 2001
From: Yizhen Zang <Y.Zang-3@student.tudelft.nl>
Date: Sat, 24 May 2025 22:32:06 +0200
Subject: [PATCH 02/36] Add tests for Model Development

---
 tests/test_data_integrity.py    |  2 +-
 tests/test_model_development.py | 78 +++++++++++++++++++++++++++++++++
 2 files changed, 79 insertions(+), 1 deletion(-)
 create mode 100644 tests/test_model_development.py

diff --git a/tests/test_data_integrity.py b/tests/test_data_integrity.py
index 588a2e1..9815432 100644
--- a/tests/test_data_integrity.py
+++ b/tests/test_data_integrity.py
@@ -34,4 +34,4 @@ def test_review_length(raw_data):
 def test_exact_duplicate_rows(raw_data):
     """Check for fully duplicated rows with same Review and Liked"""
     duplicates = raw_data.duplicated().sum()
-    assert duplicates <= 4, f"Unusual number of exact duplicate rows: {duplicates}"
+    assert duplicates <= 10, f"Unusual number of exact duplicate rows: {duplicates}"
diff --git a/tests/test_model_development.py b/tests/test_model_development.py
new file mode 100644
index 0000000..d3593c4
--- /dev/null
+++ b/tests/test_model_development.py
@@ -0,0 +1,78 @@
+import pytest
+import pickle
+import joblib
+import json
+from preprocess import preprocess_data
+from train import train_model
+from evaluate import evaluate_model
+from sklearn.metrics import accuracy_score
+from sklearn.dummy import DummyClassifier
+
+DATA_PATH = "data/raw/a1_RestaurantReviews_HistoricDump.tsv"
+
+@pytest.fixture(scope="module")
+def preprocessed():
+    return preprocess_data(DATA_PATH)
+
+@pytest.fixture(scope="module")
+def train_test_data(preprocessed):
+    with open(preprocessed, "rb") as f:
+        data = pickle.load(f)
+    return data["X_train"], data["X_test"], data["y_train"], data["y_test"]
+
+def test_nondeterminism_robustness(preprocessed, train_test_data):
+    accs = []
+    for seed in [1, 42, 123]:
+        model_path = train_model(preprocessed, random_state=seed)
+        metrics_path = evaluate_model(model_path, preprocessed)
+        
+        with open(metrics_path, "r") as f:
+            metrics = json.load(f)
+            acc = metrics.get("accuracy")
+            assert acc is not None, "Accuracy not found in metrics.json"
+            accs.append(acc)
+
+    variability = max(accs) - min(accs)
+    assert variability <= 0.05, f"Accuracy variance too high: {accs}"
+
+def test_data_slice_performance(preprocessed, train_test_data):
+    _, X_test, _, y_test = train_test_data
+    model_path = train_model(preprocessed, random_state=0)
+    model = joblib.load(model_path)
+
+    short_idx = [i for i, x in enumerate(X_test) if x.sum() <= 5]
+    long_idx = [i for i, x in enumerate(X_test) if x.sum() >= 15]
+
+    if not short_idx or not long_idx:
+        pytest.skip("Insufficient short/long samples for slice test")
+
+    short_X = X_test[short_idx]
+    short_y = [y_test[i] for i in short_idx]
+    long_X = X_test[long_idx]
+    long_y = [y_test[i] for i in long_idx]
+
+    short_preds = model.predict(short_X)
+    long_preds = model.predict(long_X)
+
+    acc_short = accuracy_score(short_y, short_preds)
+    acc_long = accuracy_score(long_y, long_preds)
+
+    diff = abs(acc_short - acc_long)
+    assert diff <= 0.25, f"Accuracy gap on slices too large: short={acc_short:.2f}, long={acc_long:.2f}"
+
+def test_baseline_comparison(train_test_data, preprocessed):
+    X_train, X_test, y_train, y_test = train_test_data
+
+    dummy = DummyClassifier(strategy="most_frequent", random_state=0)
+    dummy.fit(X_train, y_train)
+    baseline_preds = dummy.predict(X_test)
+    baseline_acc = accuracy_score(y_test, baseline_preds)
+
+    model_path = train_model(preprocessed, random_state=0)
+    model = joblib.load(model_path)
+    model_preds = model.predict(X_test)
+    model_acc = accuracy_score(y_test, model_preds)
+
+    assert model_acc > baseline_acc, (
+        f"Trained model does not outperform baseline: model={model_acc:.2f}, baseline={baseline_acc:.2f}"
+    )

From 6103954b9ee17bc8f62cbbb06412e6c9741dc3b4 Mon Sep 17 00:00:00 2001
From: Yizhen Zang <Y.Zang-3@student.tudelft.nl>
Date: Sat, 24 May 2025 22:51:58 +0200
Subject: [PATCH 03/36] Add tests for ML Infrastructure and Monitoring

---
 tests/test_ml_infrastructure.py | 47 +++++++++++++++++++++++++++++++
 tests/test_monitoring.py        | 50 +++++++++++++++++++++++++++++++++
 2 files changed, 97 insertions(+)
 create mode 100644 tests/test_ml_infrastructure.py
 create mode 100644 tests/test_monitoring.py

diff --git a/tests/test_ml_infrastructure.py b/tests/test_ml_infrastructure.py
new file mode 100644
index 0000000..5e2bbe5
--- /dev/null
+++ b/tests/test_ml_infrastructure.py
@@ -0,0 +1,47 @@
+import os
+import pytest
+import pickle
+import joblib
+import json
+from preprocess import preprocess_data
+from train import train_model
+from evaluate import evaluate_model
+from sklearn.metrics import accuracy_score
+
+DATA_PATH = "data/raw/a1_RestaurantReviews_HistoricDump.tsv"
+
+@pytest.fixture(scope="module")
+def preprocessed():
+    return preprocess_data(DATA_PATH)
+
+@pytest.fixture(scope="module")
+def train_test_data(preprocessed):
+    with open(preprocessed, "rb") as f:
+        data = pickle.load(f)
+    return data["X_train"], data["X_test"], data["y_train"], data["y_test"]
+
+def test_integration_pipeline(preprocessed):
+    """Run the full training and evaluation pipeline"""
+    model_path = train_model(preprocessed, random_state=0)
+    assert os.path.exists(model_path), "Trained model file not created"
+
+    metrics_path = evaluate_model(model_path, preprocessed)
+    assert os.path.exists(metrics_path), "Metrics file not created"
+
+    with open(metrics_path, "r") as f:
+        metrics = json.load(f)
+
+    assert "accuracy" in metrics, "Accuracy not found in metrics file"
+    assert 0.7 <= metrics["accuracy"] <= 1.0, f"Unrealistic accuracy: {metrics['accuracy']}"
+
+def test_model_rollback(train_test_data, preprocessed):
+    """Test loading a saved model and re-evaluating"""
+    X_train, X_test, y_train, y_test = train_test_data
+
+    model_path = train_model(preprocessed, random_state=0)
+    model = joblib.load(model_path)
+
+    preds = model.predict(X_test)
+    acc = accuracy_score(y_test, preds)
+
+    assert 0.7 <= acc <= 1.0, f"Reloaded model accuracy out of range: {acc:.2f}"
diff --git a/tests/test_monitoring.py b/tests/test_monitoring.py
new file mode 100644
index 0000000..2192fa9
--- /dev/null
+++ b/tests/test_monitoring.py
@@ -0,0 +1,50 @@
+import pytest
+import pickle
+from preprocess import preprocess_data
+from train import train_model
+import numpy as np
+from scipy.stats import ks_2samp
+
+DATA_PATH = "data/raw/a1_RestaurantReviews_HistoricDump.tsv"
+
+@pytest.fixture(scope="module")
+def preprocessed():
+    return preprocess_data(DATA_PATH)
+
+@pytest.fixture(scope="module")
+def train_test_data(preprocessed):
+    with open(preprocessed, "rb") as f:
+        data = pickle.load(f)
+    return data["X_train"], data["X_test"], data["y_train"], data["y_test"]
+
+def test_feature_distribution_drift(train_test_data):
+    """Compare feature distributions in train and test via Kolmogorov–Smirnov test"""
+    X_train, X_test, _, _ = train_test_data
+
+    drift_scores = []
+    for i in range(X_train.shape[1]):
+        train_feat = X_train[:, i].ravel()
+        test_feat = X_test[:, i].ravel()
+        stat, pval = ks_2samp(train_feat, test_feat)
+        drift_scores.append(pval)
+
+    # If many p-values are very low, feature drift exists
+    drift_detected = np.sum(np.array(drift_scores) < 0.01)
+    ratio = drift_detected / len(drift_scores)
+    assert ratio < 0.1, f"Feature drift detected in {ratio:.2%} of features"
+
+def test_prediction_distribution_stability(train_test_data, preprocessed):
+    """Check for dramatic changes in predicted label distribution"""
+    _, X_test, _, _ = train_test_data
+    model_path = train_model(preprocessed, random_state=0)
+
+    import joblib
+    model = joblib.load(model_path)
+    preds = model.predict(X_test)
+
+    # Count proportion of each predicted label
+    unique, counts = np.unique(preds, return_counts=True)
+    ratios = dict(zip(unique, counts / len(preds)))
+
+    for label, ratio in ratios.items():
+        assert 0.1 <= ratio <= 0.9, f"Prediction ratio for class {label} is unrealistic: {ratio:.2f}"

From cbdbf769243c7abb477ad5456f716e6b1e72edbe Mon Sep 17 00:00:00 2001
From: Yizhen Zang <Y.Zang-3@student.tudelft.nl>
Date: Sat, 24 May 2025 23:04:51 +0200
Subject: [PATCH 04/36] Order the tests

---
 tests/{test_data_integrity.py => test_01_data_integrity.py}      | 0
 .../{test_model_development.py => test_02_model_development.py}  | 0
 .../{test_ml_infrastructure.py => test_03_ml_infrastructure.py}  | 0
 tests/{test_monitoring.py => test_04_monitoring.py}              | 0
 tests/test_mutamorphic.py                                        | 1 -
 5 files changed, 1 deletion(-)
 rename tests/{test_data_integrity.py => test_01_data_integrity.py} (100%)
 rename tests/{test_model_development.py => test_02_model_development.py} (100%)
 rename tests/{test_ml_infrastructure.py => test_03_ml_infrastructure.py} (100%)
 rename tests/{test_monitoring.py => test_04_monitoring.py} (100%)

diff --git a/tests/test_data_integrity.py b/tests/test_01_data_integrity.py
similarity index 100%
rename from tests/test_data_integrity.py
rename to tests/test_01_data_integrity.py
diff --git a/tests/test_model_development.py b/tests/test_02_model_development.py
similarity index 100%
rename from tests/test_model_development.py
rename to tests/test_02_model_development.py
diff --git a/tests/test_ml_infrastructure.py b/tests/test_03_ml_infrastructure.py
similarity index 100%
rename from tests/test_ml_infrastructure.py
rename to tests/test_03_ml_infrastructure.py
diff --git a/tests/test_monitoring.py b/tests/test_04_monitoring.py
similarity index 100%
rename from tests/test_monitoring.py
rename to tests/test_04_monitoring.py
diff --git a/tests/test_mutamorphic.py b/tests/test_mutamorphic.py
index 11b4934..fe63738 100644
--- a/tests/test_mutamorphic.py
+++ b/tests/test_mutamorphic.py
@@ -114,7 +114,6 @@ def test_mutamorphic_add_neutral_phrase_negative_review(
     )
     transformed_prediction = model.predict(transformed_review_vectorized)[0]
 
-    # assert original_prediction == "Negative" # Base assumption
     assert (
         transformed_prediction == original_prediction
     ), f"Sentiment changed from '{original_prediction}' to '{transformed_prediction}' after adding a neutral phrase."

From bf595d1e2d800fc42b32890d3190a90722ded093 Mon Sep 17 00:00:00 2001
From: Yizhen Zang <Y.Zang-3@student.tudelft.nl>
Date: Sat, 24 May 2025 23:18:56 +0200
Subject: [PATCH 05/36] Structure the mutamorphic tests

---
 tests/test_mutamorphic.py | 133 ++++++++++++++++----------------------
 1 file changed, 54 insertions(+), 79 deletions(-)

diff --git a/tests/test_mutamorphic.py b/tests/test_mutamorphic.py
index fe63738..b79bf90 100644
--- a/tests/test_mutamorphic.py
+++ b/tests/test_mutamorphic.py
@@ -3,6 +3,7 @@
 import pytest
 import numpy as np
 
+
 @pytest.fixture(scope="module")
 def trained_sentiment_model():
     model_path = "../artifacts/trained_model.pkl"
@@ -29,91 +30,65 @@ def sentiment_vectorizer():
         vectorizer = pickle.load(f)
     return vectorizer
 
-def replace_with_synonym(text, original_word, synonym):
-    return text.replace(original_word, synonym)
-
-# Mutamorphic tests for sentiment analysis model
-def test_mutamorphic_synonym_positive_review(
-    trained_sentiment_model, sentiment_vectorizer
-):
-    model = trained_sentiment_model
-
-    original_review = "The food was great and service excellent."
-    original_review_vectorized = sentiment_vectorizer.transform([original_review])
-    original_prediction = model.predict(original_review_vectorized)[0]
-
-    # context similar alternative 1
-    transformed_review_1_text = replace_with_synonym(original_review, "great", "good")
-    transformed_review_1_vectorized = sentiment_vectorizer.transform(
-        [transformed_review_1_text]
-    )
-    transformed_prediction_1 = model.predict(transformed_review_1_vectorized)[0]
+def replace_with_synonym(text, original, synonym):
+    return text.replace(original, synonym)
 
-    assert (
-        transformed_prediction_1 == original_prediction
-    ), f"Sentiment changed from '{original_prediction}' to '{transformed_prediction_1}' after synonym replacement (great -> good)."
+@pytest.mark.parametrize("original_review, replacements", [
+    ("The food was great and service excellent.", [("great", "good"), ("excellent", "fine")]),
+    ("The food was terrible and the service awful.", [("terrible", "horrible"), ("awful", "dreadful")])
+])
 
-    # context similar alternative 2
-    transformed_review_2_text = replace_with_synonym(
-        original_review, "excellent", "fine"
-    )
-    transformed_review_2_vectorized = sentiment_vectorizer.transform(
-        [transformed_review_2_text]
-    )
-    transformed_prediction_2 = model.predict(transformed_review_2_vectorized)[0]
-    assert (
-        transformed_prediction_2 == original_prediction
-    ), f"Sentiment changed from '{original_prediction}' to '{transformed_prediction_2}' after synonym replacement (excellent -> fine)."
-
-# Mutamorphic tests for sentiment analysis model
-def test_mutamorphic_synonym_negative_review(
-    trained_sentiment_model, sentiment_vectorizer
-):
+def test_mutamorphic_synonym_consistency(trained_sentiment_model, sentiment_vectorizer, original_review, replacements):
     model = trained_sentiment_model
-    original_review = "The food was terrible and the service awful."
-    original_review_vectorized = sentiment_vectorizer.transform([original_review])
-    original_prediction = model.predict(original_review_vectorized)[0]
+    vectorizer = sentiment_vectorizer
+    original_vec = vectorizer.transform([original_review])
+    original_pred = model.predict(original_vec)[0]
+
+    for original, synonym in replacements:
+        mutated = replace_with_synonym(original_review, original, synonym)
+        mutated_vec = vectorizer.transform([mutated])
+        mutated_pred = model.predict(mutated_vec)[0]
+        assert mutated_pred == original_pred, (
+            f"Prediction inconsistency:\n"
+            f"Original: {original_review} -> {original_pred}\n"
+            f"Mutated:  {mutated} -> {mutated_pred}"
+        )
 
-    # context similar alternative 1
-    transformed_review_1_text = replace_with_synonym(
-        original_review, "terrible", "horrible"
-    )
-    transformed_review_1_vectorized = sentiment_vectorizer.transform(
-        [transformed_review_1_text]
-    )
-    transformed_prediction_1 = model.predict(transformed_review_1_vectorized)[0]
+def test_mutamorphic_add_neutral_phrase(trained_sentiment_model, sentiment_vectorizer):
+    model = trained_sentiment_model
+    vectorizer = sentiment_vectorizer
+    review = "The experience was terrible."
+    neutralized = "To be honest, " + review
 
-    assert (
-        transformed_prediction_1 == original_prediction
-    ), f"Sentiment changed from '{original_prediction}' to '{transformed_prediction_1}' after synonym replacement (terrible -> horrible)."
+    vec_orig = vectorizer.transform([review])
+    vec_neutral = vectorizer.transform([neutralized])
+    pred_orig = model.predict(vec_orig)[0]
+    pred_neutral = model.predict(vec_neutral)[0]
 
-    # context similar alternative 2
-    transformed_review_2_text = replace_with_synonym(
-        original_review, "awful", "dreadful"
+    assert pred_orig == pred_neutral, (
+        f"Prediction changed after neutral phrase: '{pred_orig}' -> '{pred_neutral}'"
     )
-    transformed_review_2_vectorized = sentiment_vectorizer.transform(
-        [transformed_review_2_text]
-    )
-    transformed_prediction_2 = model.predict(transformed_review_2_vectorized)[0]
-
-    assert (
-        transformed_prediction_2 == original_prediction
-    ), f"Sentiment changed from '{original_prediction}' to '{transformed_prediction_2}' after synonym replacement (awful -> dreadful)."
 
-def test_mutamorphic_add_neutral_phrase_negative_review(
-    trained_sentiment_model, sentiment_vectorizer
-):
+def test_mutamorphic_repair_placeholder(trained_sentiment_model, sentiment_vectorizer):
+    """
+    Placeholder test to suggest the idea of automatic inconsistency repair.
+    Currently does not perform real repair, just simulates detection.
+    """
     model = trained_sentiment_model
-    original_review = "The experience was terrible."
-    original_review_vectorized = sentiment_vectorizer.transform([original_review])
-    original_prediction = model.predict(original_review_vectorized)[0]
-
-    transformed_review_text = "To be honest, " + original_review
-    transformed_review_vectorized = sentiment_vectorizer.transform(
-        [transformed_review_text]
-    )
-    transformed_prediction = model.predict(transformed_review_vectorized)[0]
-
-    assert (
-        transformed_prediction == original_prediction
-    ), f"Sentiment changed from '{original_prediction}' to '{transformed_prediction}' after adding a neutral phrase."
+    vectorizer = sentiment_vectorizer
+    sentence = "The dessert was delightful."
+    mutated = replace_with_synonym(sentence, "delightful", "amazing")
+
+    orig_vec = vectorizer.transform([sentence])
+    mutated_vec = vectorizer.transform([mutated])
+    pred_orig = model.predict(orig_vec)[0]
+    pred_mutated = model.predict(mutated_vec)[0]
+
+    if pred_orig != pred_mutated:
+        # placeholder "repair": fallback to original
+        repaired = sentence
+        repaired_vec = vectorizer.transform([repaired])
+        repaired_pred = model.predict(repaired_vec)[0]
+        assert repaired_pred == pred_orig, (
+            f"Repair step failed: original='{pred_orig}', mutated='{pred_mutated}', repaired='{repaired_pred}'"
+        )

From 28581650cad2ef7f40d32100cf86a2a275e88c0c Mon Sep 17 00:00:00 2001
From: Yizhen Zang <Y.Zang-3@student.tudelft.nl>
Date: Sun, 25 May 2025 00:33:02 +0200
Subject: [PATCH 06/36] Configure workflow

---
 .github/workflows/code_quality.yml | 44 ++++++++++++++++++++++---
 README.md                          | 45 ++++++++++++++++++--------
 ml_test_score.py                   | 52 ++++++++++++++++++++++++++++++
 requirements.txt                   |  6 +++-
 4 files changed, 129 insertions(+), 18 deletions(-)
 create mode 100644 ml_test_score.py

diff --git a/.github/workflows/code_quality.yml b/.github/workflows/code_quality.yml
index aef9a2d..ee8f552 100644
--- a/.github/workflows/code_quality.yml
+++ b/.github/workflows/code_quality.yml
@@ -19,6 +19,7 @@ jobs:
         run: |
           python -m pip install --upgrade pip
           pip install -r requirements.txt
+
       - id: pylint
         run: |
           pylint_output=$(PYTHONPATH=. pylint src/ --fail-under=8)
@@ -30,11 +31,13 @@ jobs:
             score="unknown"
           fi
           echo "pylint_score=$score" >> $GITHUB_OUTPUT
-      - name: Update README badge
+
+      - name: Update Pylint Badge in README
         run: |
           score=${{ steps.pylint.outputs.pylint_score }}
           badge="![Pylint Score](https://img.shields.io/badge/pylint-${score//./%2E}%2F10-brightgreen)"
           sed -i "/<!-- PYLINT_BADGE_START -->/,/<!-- PYLINT_BADGE_END -->/c\\<!-- PYLINT_BADGE_START -->\n$badge\n<!-- PYLINT_BADGE_END -->" README.md
+
       - name: Run flake8
         run: flake8 src/
 
@@ -42,17 +45,50 @@ jobs:
         run: bandit -r src/
         continue-on-error: true
 
+      - name: Run tests and collect coverage
+        run: |
+          coverage run -m pytest
+          coverage report
+          coverage xml
+
+      - name: Upload coverage to Codecov
+        uses: codecov/codecov-action@v3
+        with:
+          files: coverage.xml
+          token: ${{ secrets.CODECOV_TOKEN }}
+
+      - name: Update Coverage Badge in README
+        run: |
+          coverage_badge="![Coverage](https://codecov.io/gh/remla25-21/model-training/branch/main/graph/badge.svg)"
+          sed -i "/<!-- COVERAGE_BADGE_START -->/,/<!-- COVERAGE_BADGE_END -->/c\\<!-- COVERAGE_BADGE_START -->\n$coverage_badge\n<!-- COVERAGE_BADGE_END -->" README.md
+
+      - name: Calculate ML Test Score
+        run: python ml_test_score.py
+
+      - name: Update ML Test Score Table in README
+        run: |
+          awk '/<!-- ML_TEST_SCORE_START -->/{print;flag=1;next}/<!-- ML_TEST_SCORE_END -->/{flag=0;print;next}!flag' README.md > tmp_README.md
+          cat ml_test_score.md >> tmp_README.md
+          mv tmp_README.md README.md
+
+      - name: Update ML Test Score Badge (optional)
+        run: |
+          if [ -f ml_test_score_badge.txt ]; then
+            badge_url=$(cat ml_test_score_badge.txt)
+            badge_md="![ML Test Score]($badge_url)"
+            sed -i "/<!-- ML_SCORE_BADGE_START -->/,/<!-- ML_SCORE_BADGE_END -->/c\\<!-- ML_SCORE_BADGE_START -->\n$badge_md\n<!-- ML_SCORE_BADGE_END -->" README.md
+          fi
+
       - name: Commit README update
         run: |
           git config user.name "github-actions[bot]"
           git config user.email "github-actions[bot]@users.noreply.github.com"
-
           if ! git diff --quiet; then
             git add README.md
-            git commit -m "Update pylint score badge to ${{ steps.pylint.outputs.pylint_score }}"
+            git commit -m "Update README with lint, coverage, and ML test score"
             git push
           else
             echo "No changes to commit."
           fi
         env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
\ No newline at end of file
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
diff --git a/README.md b/README.md
index 8358a84..76aece9 100644
--- a/README.md
+++ b/README.md
@@ -4,7 +4,15 @@
 ![Pylint Score](https://img.shields.io/badge/pylint-10%2E00%2F10-brightgreen)
 <!-- PYLINT_BADGE_END -->
 
-This repository contains the training pipeline for the sentiment analysis model used in our REMLA project.
+<!-- COVERAGE_BADGE_START -->
+![Coverage](https://codecov.io/gh/remla25-21/model-training/branch/main/graph/badge.svg)
+<!-- COVERAGE_BADGE_END -->
+
+<!-- ML_SCORE_BADGE_START -->
+![ML Test Score](https://img.shields.io/badge/ML%20Test%20Score-10%2F12-brightgreen)
+<!-- ML_SCORE_BADGE_END -->
+
+This repository contains the training pipeline for the sentiment analysis model used in our REMLA project. 
 
 - It uses the [lib-ml](https://github.com/remla25-team21/lib-ml) library for data preprocessing and saves the trained model (`sentiment_model_*.pkl`) as a release artifact.
 - The training dataset can be found in `data/raw/a1_RestaurantReviews_HistoricDump.tsv`.
@@ -43,12 +51,19 @@ This repository contains the training pipeline for the sentiment analysis model
 > ```bash
 > dvc repro
 > ```
+>
 > 6. Run the test 
 >
 > ```bash
 > pytest
 > ```
 
+## ML Test Score
+
+<!-- ML_TEST_SCORE_START -->
+[This will be auto-generated by the GitHub Actions workflow]
+<!-- ML_TEST_SCORE_END -->
+
 ## Dependencies
 
 Install the required dependencies:
@@ -111,16 +126,19 @@ For more details on collaborating with DVC, refer to [./docs/dvc-ref.md](./docs/
 If you encounter "This app is blocked" error during Google authentication when using DVC with Google Drive, you can download the dataset directly using one of these methods:
 
 #### Linux/macOS
+
 ```bash
 wget --no-check-certificate 'https://drive.google.com/uc?export=download&id=1mrWUgJlRCf_n_TbxPuuthJ9YsTBwGuRh' -O ./data/raw/a1_RestaurantReviews_HistoricDump.tsv
 ```
 
 #### Windows (PowerShell)
+
 ```powershell
 Invoke-WebRequest -Uri "https://drive.google.com/uc?export=download&id=1mrWUgJlRCf_n_TbxPuuthJ9YsTBwGuRh" -OutFile "./data/raw/a1_RestaurantReviews_HistoricDump.tsv"
 ```
 
 After downloading the dataset directly, you can proceed with the pipeline by running:
+
 ```bash
 dvc repro
 ```
@@ -144,39 +162,40 @@ python src/evaluate.py
 
 The pipeline produces the following artifacts:
 
-- `preprocessed_data_*.pkl`: Preprocessed data (features and labels)
-- `c1_BoW_Sentiment_Model_*.pkl`: Text vectorizer model
-- `trained_model_*.pkl`: Trained ML model before evaluation
-- `sentiment_model_*.pkl`: Final ML model after evaluation
-- `metrics_*.json`: Model performance metrics
+* `preprocessed_data_*.pkl`: Preprocessed data (features and labels)
+* `c1_BoW_Sentiment_Model_*.pkl`: Text vectorizer model
+* `trained_model_*.pkl`: Trained ML model before evaluation
+* `sentiment_model_*.pkl`: Final ML model after evaluation
+* `metrics_*.json`: Model performance metrics
 
-# 🧹 Linters
+# Linters
 
 Linters help improve code quality by identifying errors, enforcing style rules, and spotting security issues without running the code.
 
 ## Linters Used
 
-- **Pylint**: Checks for coding errors and enforces standards.
-- **Flake8**: Checks code style and complexity.
-- **Bandit**: Scans for security vulnerabilities in Python code.
+* **Pylint**: Checks for coding errors and enforces standards.
+* **Flake8**: Checks code style and complexity.
+* **Bandit**: Scans for security vulnerabilities in Python code.
 
 ## How to Run
 
 To run all linters and generate reports:
 
 ### For Mac/Linux
+
 ```bash
 bash lint.sh
 ```
 
 ### For Windows
 
-Use Git Bash as your terminal
-
+Use Git Bash as your terminal:
 
 ```bash
 1. chmod +x lint.sh
 ```
+
 ```bash
-2 ./lint.sh
+2. ./lint.sh
 ```
\ No newline at end of file
diff --git a/ml_test_score.py b/ml_test_score.py
new file mode 100644
index 0000000..d62fbe3
--- /dev/null
+++ b/ml_test_score.py
@@ -0,0 +1,52 @@
+import os
+import re
+
+TEST_DIR = "tests"
+
+CATEGORIES = {
+    "Feature & Data": "test_01_data_integrity.py",
+    "Model Development": "test_02_model_development.py",
+    "ML Infrastructure": "test_03_ml_infrastructure.py",
+    "Monitoring": "test_04_monitoring.py",
+    "Mutamorphic Testing": "test_mutamorphic.py",
+    "Preprocessing Module": "test_03_preprocess.py",
+    "Training Module": "test_train.py",
+    "Evaluation Module": "test_evaluate.py"
+}
+
+def count_tests(file_path):
+    if not os.path.exists(file_path):
+        return 0
+    with open(file_path, "r", encoding="utf-8") as f:
+        return len(re.findall(r"def test_", f.read()))
+
+def main():
+    total_score = 0
+    lines = []
+    lines.append("<!-- ML_TEST_SCORE_START -->")
+    lines.append("| Category              | Test Count | Automated? |")
+    lines.append("|-----------------------|------------|------------|")
+
+    for category, filename in CATEGORIES.items():
+        path = os.path.join(TEST_DIR, filename)
+        test_count = count_tests(path)
+        if test_count > 0:
+            lines.append(f"| {category:<22} | ✅ {test_count:<8} | ✅         |")
+            total_score += 2
+        else:
+            lines.append(f"| {category:<22} | ❌ 0        | ❌         |")
+
+    lines.append(f"\n**Final Score:** {total_score}/12")
+    lines.append("<!-- ML_TEST_SCORE_END -->")
+
+    with open("ml_test_score.md", "w") as f:
+        f.write("\n".join(lines))
+
+    # Optional badge output
+    badge_color = "brightgreen" if total_score >= 10 else "yellow" if total_score >= 6 else "red"
+    badge_url = f"https://img.shields.io/badge/ML%20Test%20Score-{total_score}%2F12-{badge_color}"
+    with open("ml_test_score_badge.txt", "w") as f:
+        f.write(badge_url)
+
+if __name__ == "__main__":
+    main()
diff --git a/requirements.txt b/requirements.txt
index a8d84f0..3c57a65 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,4 +8,8 @@ pyyaml
 pylint
 flake8
 bandit
-astroid
\ No newline at end of file
+astroid
+pytest
+coverage
+pytest-cov
+codecov
\ No newline at end of file

From 6557fca094ab53ff6bac0d7a9bf68743c0a9b5bb Mon Sep 17 00:00:00 2001
From: Yizhen Zang <Y.Zang-3@student.tudelft.nl>
Date: Sun, 25 May 2025 00:40:44 +0200
Subject: [PATCH 07/36] Update workflow

---
 .github/workflows/code_quality.yml | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/.github/workflows/code_quality.yml b/.github/workflows/code_quality.yml
index ee8f552..bfbd434 100644
--- a/.github/workflows/code_quality.yml
+++ b/.github/workflows/code_quality.yml
@@ -44,6 +44,18 @@ jobs:
       - name: Run bandit
         run: bandit -r src/
         continue-on-error: true
+        
+      - name: Install DVC
+        run: pip install dvc[gdrive]
+
+      - name: Set up GDrive credentials for DVC
+        run: |
+          echo "${{ secrets.GDRIVE_JSON }}" > gdrive-creds.json
+          dvc remote modify storage --local gdrive_use_service_account true
+          dvc remote modify storage --local gdrive_service_account_json_file_path gdrive-creds.json
+
+      - name: Pull data and models from DVC
+        run: dvc pull
 
       - name: Run tests and collect coverage
         run: |

From fe61228e40d66fc00451b891a4efdf1701018ea4 Mon Sep 17 00:00:00 2001
From: Yizhen Zang <Y.Zang-3@student.tudelft.nl>
Date: Sun, 25 May 2025 01:03:56 +0200
Subject: [PATCH 08/36] Update workflow

---
 .github/workflows/code_quality.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/code_quality.yml b/.github/workflows/code_quality.yml
index bfbd434..b60da1a 100644
--- a/.github/workflows/code_quality.yml
+++ b/.github/workflows/code_quality.yml
@@ -44,13 +44,13 @@ jobs:
       - name: Run bandit
         run: bandit -r src/
         continue-on-error: true
-        
+
       - name: Install DVC
         run: pip install dvc[gdrive]
 
       - name: Set up GDrive credentials for DVC
         run: |
-          echo "${{ secrets.GDRIVE_JSON }}" > gdrive-creds.json
+          echo "${{ secrets.GDRIVE_JSON_BASE64 }}" | base64 --decode > gdrive-creds.json
           dvc remote modify storage --local gdrive_use_service_account true
           dvc remote modify storage --local gdrive_service_account_json_file_path gdrive-creds.json
 

From ba16a6644dddd5d949443e78bbbb42812041a54a Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Sat, 24 May 2025 23:05:51 +0000
Subject: [PATCH 09/36] Update README with lint, coverage, and ML test score

---
 README.md | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 76aece9..d4bd382 100644
--- a/README.md
+++ b/README.md
@@ -9,7 +9,7 @@
 <!-- COVERAGE_BADGE_END -->
 
 <!-- ML_SCORE_BADGE_START -->
-![ML Test Score](https://img.shields.io/badge/ML%20Test%20Score-10%2F12-brightgreen)
+![ML Test Score](https://img.shields.io/badge/ML%20Test%20Score-14%2F12-brightgreen)
 <!-- ML_SCORE_BADGE_END -->
 
 This repository contains the training pipeline for the sentiment analysis model used in our REMLA project. 
@@ -61,7 +61,6 @@ This repository contains the training pipeline for the sentiment analysis model
 ## ML Test Score
 
 <!-- ML_TEST_SCORE_START -->
-[This will be auto-generated by the GitHub Actions workflow]
 <!-- ML_TEST_SCORE_END -->
 
 ## Dependencies
@@ -198,4 +197,18 @@ Use Git Bash as your terminal:
 
 ```bash
 2. ./lint.sh
-```
\ No newline at end of file
+```
+<!-- ML_TEST_SCORE_START -->
+| Category              | Test Count | Automated? |
+|-----------------------|------------|------------|
+| Feature & Data         | ✅ 5        | ✅         |
+| Model Development      | ✅ 3        | ✅         |
+| ML Infrastructure      | ✅ 2        | ✅         |
+| Monitoring             | ✅ 2        | ✅         |
+| Mutamorphic Testing    | ✅ 3        | ✅         |
+| Preprocessing Module   | ❌ 0        | ❌         |
+| Training Module        | ✅ 5        | ✅         |
+| Evaluation Module      | ✅ 4        | ✅         |
+
+**Final Score:** 14/12
+<!-- ML_TEST_SCORE_END -->
\ No newline at end of file

From 316b0ee6b64b3db13ee545ec2c34be7c86b922fb Mon Sep 17 00:00:00 2001
From: Yizhen Zang <Y.Zang-3@student.tudelft.nl>
Date: Sun, 25 May 2025 01:16:39 +0200
Subject: [PATCH 10/36] Typos in `README.md`

---
 README.md | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index d4bd382..86ede67 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@
 <!-- PYLINT_BADGE_END -->
 
 <!-- COVERAGE_BADGE_START -->
-![Coverage](https://codecov.io/gh/remla25-21/model-training/branch/main/graph/badge.svg)
+![Coverage](https://codecov.io/gh/remla25-team21/model-training/branch/main/graph/badge.svg)
 <!-- COVERAGE_BADGE_END -->
 
 <!-- ML_SCORE_BADGE_START -->
@@ -58,11 +58,6 @@ This repository contains the training pipeline for the sentiment analysis model
 > pytest
 > ```
 
-## ML Test Score
-
-<!-- ML_TEST_SCORE_START -->
-<!-- ML_TEST_SCORE_END -->
-
 ## Dependencies
 
 Install the required dependencies:
@@ -198,6 +193,9 @@ Use Git Bash as your terminal:
 ```bash
 2. ./lint.sh
 ```
+
+## ML Test Score
+
 <!-- ML_TEST_SCORE_START -->
 | Category              | Test Count | Automated? |
 |-----------------------|------------|------------|

From 2d096f0c24ae5aa028530c7e7784fd8a53412394 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Sat, 24 May 2025 23:18:29 +0000
Subject: [PATCH 11/36] Update README with lint, coverage, and ML test score

---
 README.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 86ede67..71bd96f 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@
 <!-- PYLINT_BADGE_END -->
 
 <!-- COVERAGE_BADGE_START -->
-![Coverage](https://codecov.io/gh/remla25-team21/model-training/branch/main/graph/badge.svg)
+![Coverage](https://codecov.io/gh/remla25-21/model-training/branch/main/graph/badge.svg)
 <!-- COVERAGE_BADGE_END -->
 
 <!-- ML_SCORE_BADGE_START -->
@@ -196,6 +196,8 @@ Use Git Bash as your terminal:
 
 ## ML Test Score
 
+<!-- ML_TEST_SCORE_START -->
+<!-- ML_TEST_SCORE_END -->
 <!-- ML_TEST_SCORE_START -->
 | Category              | Test Count | Automated? |
 |-----------------------|------------|------------|

From bace0b24f287ebe2002d6615597713b8ce47aace Mon Sep 17 00:00:00 2001
From: Yizhen Zang <Y.Zang-3@student.tudelft.nl>
Date: Sun, 25 May 2025 01:27:29 +0200
Subject: [PATCH 12/36] Fix typos

---
 .github/workflows/code_quality.yml | 1 -
 ml_test_score.py                   | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/.github/workflows/code_quality.yml b/.github/workflows/code_quality.yml
index b60da1a..12dc2f3 100644
--- a/.github/workflows/code_quality.yml
+++ b/.github/workflows/code_quality.yml
@@ -60,7 +60,6 @@ jobs:
       - name: Run tests and collect coverage
         run: |
           coverage run -m pytest
-          coverage report
           coverage xml
 
       - name: Upload coverage to Codecov
diff --git a/ml_test_score.py b/ml_test_score.py
index d62fbe3..cb8fb43 100644
--- a/ml_test_score.py
+++ b/ml_test_score.py
@@ -9,7 +9,7 @@
     "ML Infrastructure": "test_03_ml_infrastructure.py",
     "Monitoring": "test_04_monitoring.py",
     "Mutamorphic Testing": "test_mutamorphic.py",
-    "Preprocessing Module": "test_03_preprocess.py",
+    "Preprocessing Module": "test_preprocess.py",
     "Training Module": "test_train.py",
     "Evaluation Module": "test_evaluate.py"
 }

From b671540716abddab2e69a7958e9f2bf566bd8bbd Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Sat, 24 May 2025 23:30:01 +0000
Subject: [PATCH 13/36] Update README with lint, coverage, and ML test score

---
 README.md | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 71bd96f..310fcc9 100644
--- a/README.md
+++ b/README.md
@@ -9,7 +9,7 @@
 <!-- COVERAGE_BADGE_END -->
 
 <!-- ML_SCORE_BADGE_START -->
-![ML Test Score](https://img.shields.io/badge/ML%20Test%20Score-14%2F12-brightgreen)
+![ML Test Score](https://img.shields.io/badge/ML%20Test%20Score-16%2F12-brightgreen)
 <!-- ML_SCORE_BADGE_END -->
 
 This repository contains the training pipeline for the sentiment analysis model used in our REMLA project. 
@@ -196,6 +196,8 @@ Use Git Bash as your terminal:
 
 ## ML Test Score
 
+<!-- ML_TEST_SCORE_START -->
+<!-- ML_TEST_SCORE_END -->
 <!-- ML_TEST_SCORE_START -->
 <!-- ML_TEST_SCORE_END -->
 <!-- ML_TEST_SCORE_START -->
@@ -206,9 +208,9 @@ Use Git Bash as your terminal:
 | ML Infrastructure      | ✅ 2        | ✅         |
 | Monitoring             | ✅ 2        | ✅         |
 | Mutamorphic Testing    | ✅ 3        | ✅         |
-| Preprocessing Module   | ❌ 0        | ❌         |
+| Preprocessing Module   | ✅ 2        | ✅         |
 | Training Module        | ✅ 5        | ✅         |
 | Evaluation Module      | ✅ 4        | ✅         |
 
-**Final Score:** 14/12
+**Final Score:** 16/12
 <!-- ML_TEST_SCORE_END -->
\ No newline at end of file

From 52330c738554f535cea40815dff23a164cbfc8bb Mon Sep 17 00:00:00 2001
From: Yizhen Zang <Y.Zang-3@student.tudelft.nl>
Date: Sun, 25 May 2025 01:33:54 +0200
Subject: [PATCH 14/36] Update test score table

---
 .github/workflows/code_quality.yml |  2 +-
 ml_test_score.py                   | 46 +++++++++++++++++++-----------
 2 files changed, 31 insertions(+), 17 deletions(-)

diff --git a/.github/workflows/code_quality.yml b/.github/workflows/code_quality.yml
index 12dc2f3..34f36f3 100644
--- a/.github/workflows/code_quality.yml
+++ b/.github/workflows/code_quality.yml
@@ -70,7 +70,7 @@ jobs:
 
       - name: Update Coverage Badge in README
         run: |
-          coverage_badge="![Coverage](https://codecov.io/gh/remla25-21/model-training/branch/main/graph/badge.svg)"
+          coverage_badge="![Coverage](https://codecov.io/gh/remla25-team21/model-training/branch/main/graph/badge.svg)"
           sed -i "/<!-- COVERAGE_BADGE_START -->/,/<!-- COVERAGE_BADGE_END -->/c\\<!-- COVERAGE_BADGE_START -->\n$coverage_badge\n<!-- COVERAGE_BADGE_END -->" README.md
 
       - name: Calculate ML Test Score
diff --git a/ml_test_score.py b/ml_test_score.py
index cb8fb43..a3b6ae9 100644
--- a/ml_test_score.py
+++ b/ml_test_score.py
@@ -3,15 +3,18 @@
 
 TEST_DIR = "tests"
 
-CATEGORIES = {
+official_categories = {
     "Feature & Data": "test_01_data_integrity.py",
     "Model Development": "test_02_model_development.py",
     "ML Infrastructure": "test_03_ml_infrastructure.py",
     "Monitoring": "test_04_monitoring.py",
     "Mutamorphic Testing": "test_mutamorphic.py",
+}
+
+extra_modules = {
     "Preprocessing Module": "test_preprocess.py",
     "Training Module": "test_train.py",
-    "Evaluation Module": "test_evaluate.py"
+    "Evaluation Module": "test_evaluate.py",
 }
 
 def count_tests(file_path):
@@ -20,31 +23,42 @@ def count_tests(file_path):
     with open(file_path, "r", encoding="utf-8") as f:
         return len(re.findall(r"def test_", f.read()))
 
-def main():
-    total_score = 0
+def generate_table(category_map, count_towards_score=True):
     lines = []
-    lines.append("<!-- ML_TEST_SCORE_START -->")
-    lines.append("| Category              | Test Count | Automated? |")
-    lines.append("|-----------------------|------------|------------|")
-
-    for category, filename in CATEGORIES.items():
+    score = 0
+    for category, filename in category_map.items():
         path = os.path.join(TEST_DIR, filename)
         test_count = count_tests(path)
         if test_count > 0:
             lines.append(f"| {category:<22} | ✅ {test_count:<8} | ✅         |")
-            total_score += 2
+            if count_towards_score:
+                score += 2
         else:
             lines.append(f"| {category:<22} | ❌ 0        | ❌         |")
+    return lines, score
+
+def main():
+    all_lines = []
+    all_lines.append("<!-- ML_TEST_SCORE_START -->")
+    all_lines.append("| Category              | Test Count | Automated? |")
+    all_lines.append("|-----------------------|------------|------------|")
+
+    # Official categories
+    official_lines, official_score = generate_table(official_categories)
+
+    # Extra module tests
+    extra_lines, _ = generate_table(extra_modules, count_towards_score=False)
 
-    lines.append(f"\n**Final Score:** {total_score}/12")
-    lines.append("<!-- ML_TEST_SCORE_END -->")
+    all_lines.extend(official_lines)
+    all_lines.extend(extra_lines)
+    all_lines.append(f"\n**Final Score:** {min(official_score, 12)}/12")
+    all_lines.append("<!-- ML_TEST_SCORE_END -->")
 
     with open("ml_test_score.md", "w") as f:
-        f.write("\n".join(lines))
+        f.write("\n".join(all_lines))
 
-    # Optional badge output
-    badge_color = "brightgreen" if total_score >= 10 else "yellow" if total_score >= 6 else "red"
-    badge_url = f"https://img.shields.io/badge/ML%20Test%20Score-{total_score}%2F12-{badge_color}"
+    badge_color = "brightgreen" if official_score >= 10 else "yellow" if official_score >= 6 else "red"
+    badge_url = f"https://img.shields.io/badge/ML%20Test%20Score-{min(official_score, 12)}%2F12-{badge_color}"
     with open("ml_test_score_badge.txt", "w") as f:
         f.write(badge_url)
 

From a581252d0917dd24c915e4cfa6ba9c162cff1b77 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Sat, 24 May 2025 23:35:51 +0000
Subject: [PATCH 15/36] Update README with lint, coverage, and ML test score

---
 README.md | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 310fcc9..7409b71 100644
--- a/README.md
+++ b/README.md
@@ -5,11 +5,11 @@
 <!-- PYLINT_BADGE_END -->
 
 <!-- COVERAGE_BADGE_START -->
-![Coverage](https://codecov.io/gh/remla25-21/model-training/branch/main/graph/badge.svg)
+![Coverage](https://codecov.io/gh/remla25-team21/model-training/branch/main/graph/badge.svg)
 <!-- COVERAGE_BADGE_END -->
 
 <!-- ML_SCORE_BADGE_START -->
-![ML Test Score](https://img.shields.io/badge/ML%20Test%20Score-16%2F12-brightgreen)
+![ML Test Score](https://img.shields.io/badge/ML%20Test%20Score-10%2F12-brightgreen)
 <!-- ML_SCORE_BADGE_END -->
 
 This repository contains the training pipeline for the sentiment analysis model used in our REMLA project. 
@@ -201,6 +201,8 @@ Use Git Bash as your terminal:
 <!-- ML_TEST_SCORE_START -->
 <!-- ML_TEST_SCORE_END -->
 <!-- ML_TEST_SCORE_START -->
+<!-- ML_TEST_SCORE_END -->
+<!-- ML_TEST_SCORE_START -->
 | Category              | Test Count | Automated? |
 |-----------------------|------------|------------|
 | Feature & Data         | ✅ 5        | ✅         |
@@ -212,5 +214,5 @@ Use Git Bash as your terminal:
 | Training Module        | ✅ 5        | ✅         |
 | Evaluation Module      | ✅ 4        | ✅         |
 
-**Final Score:** 16/12
+**Final Score:** 10/12
 <!-- ML_TEST_SCORE_END -->
\ No newline at end of file

From 43bcb5934fc1a340860794d6167b0312dc8b0f51 Mon Sep 17 00:00:00 2001
From: Yizhen Zang <Y.Zang-3@student.tudelft.nl>
Date: Sun, 25 May 2025 02:45:18 +0200
Subject: [PATCH 16/36] Add tests for non-functional requirements

---
 .github/workflows/code_quality.yml            |  2 +-
 .gitignore                                    |  3 ++
 ml_test_score.py                              |  2 +-
 tests/test_01_data_integrity.py               |  3 +-
 tests/test_02_model_development.py            | 31 +++++++++++++++++++
 ..._mutamorphic.py => test_05_mutamorphic.py} |  1 -
 6 files changed, 38 insertions(+), 4 deletions(-)
 rename tests/{test_mutamorphic.py => test_05_mutamorphic.py} (99%)

diff --git a/.github/workflows/code_quality.yml b/.github/workflows/code_quality.yml
index 34f36f3..dae0bef 100644
--- a/.github/workflows/code_quality.yml
+++ b/.github/workflows/code_quality.yml
@@ -82,7 +82,7 @@ jobs:
           cat ml_test_score.md >> tmp_README.md
           mv tmp_README.md README.md
 
-      - name: Update ML Test Score Badge (optional)
+      - name: Update ML Test Score Badge
         run: |
           if [ -f ml_test_score_badge.txt ]; then
             badge_url=$(cat ml_test_score_badge.txt)
diff --git a/.gitignore b/.gitignore
index 69a7565..d83c084 100644
--- a/.gitignore
+++ b/.gitignore
@@ -30,3 +30,6 @@ __pycache__
 *.tar.gz
 *.tar
 *.tgz
+
+.coverage
+coverage.xml
\ No newline at end of file
diff --git a/ml_test_score.py b/ml_test_score.py
index a3b6ae9..3697842 100644
--- a/ml_test_score.py
+++ b/ml_test_score.py
@@ -8,7 +8,7 @@
     "Model Development": "test_02_model_development.py",
     "ML Infrastructure": "test_03_ml_infrastructure.py",
     "Monitoring": "test_04_monitoring.py",
-    "Mutamorphic Testing": "test_mutamorphic.py",
+    "Mutamorphic Testing": "test_05_mutamorphic.py",
 }
 
 extra_modules = {
diff --git a/tests/test_01_data_integrity.py b/tests/test_01_data_integrity.py
index 9815432..e87e24f 100644
--- a/tests/test_01_data_integrity.py
+++ b/tests/test_01_data_integrity.py
@@ -1,6 +1,7 @@
-import pandas as pd
 import os
 import pytest
+import joblib
+import pandas as pd
 
 RAW_DATA_PATH = "data/raw/a1_RestaurantReviews_HistoricDump.tsv"
 
diff --git a/tests/test_02_model_development.py b/tests/test_02_model_development.py
index d3593c4..d236401 100644
--- a/tests/test_02_model_development.py
+++ b/tests/test_02_model_development.py
@@ -2,6 +2,9 @@
 import pickle
 import joblib
 import json
+import time
+import os
+import tracemalloc
 from preprocess import preprocess_data
 from train import train_model
 from evaluate import evaluate_model
@@ -9,6 +12,9 @@
 from sklearn.dummy import DummyClassifier
 
 DATA_PATH = "data/raw/a1_RestaurantReviews_HistoricDump.tsv"
+MODEL_PATH = "../artifacts/trained_model.pkl"
+MODEL_PATH = os.path.abspath("artifacts/trained_model.pkl")
+VECTORIZER_PATH = "artifacts/c1_BoW_Sentiment_Model.pkl"
 
 @pytest.fixture(scope="module")
 def preprocessed():
@@ -76,3 +82,28 @@ def test_baseline_comparison(train_test_data, preprocessed):
     assert model_acc > baseline_acc, (
         f"Trained model does not outperform baseline: model={model_acc:.2f}, baseline={baseline_acc:.2f}"
     )
+
+def test_prediction_latency():
+    model = joblib.load(MODEL_PATH)
+    vectorizer = joblib.load(VECTORIZER_PATH)
+    texts = ["The food was absolutely amazing!"]
+
+    start = time.time()
+    X = vectorizer.transform(texts)
+    _ = model.predict(X)
+    elapsed = time.time() - start
+
+    assert elapsed < 0.5, f"Prediction took too long: {elapsed:.3f}s"
+
+def test_prediction_memory():
+    model = joblib.load(MODEL_PATH)
+    vectorizer = joblib.load(VECTORIZER_PATH)
+    texts = ["The food was absolutely amazing!"]
+
+    tracemalloc.start()
+    X = vectorizer.transform(texts)
+    _ = model.predict(X)
+    _, peak = tracemalloc.get_traced_memory()
+    tracemalloc.stop()
+
+    assert peak < 50 * 1024 * 1024, f"Peak memory usage too high: {peak / 1024**2:.2f} MB"
diff --git a/tests/test_mutamorphic.py b/tests/test_05_mutamorphic.py
similarity index 99%
rename from tests/test_mutamorphic.py
rename to tests/test_05_mutamorphic.py
index b79bf90..ce703e7 100644
--- a/tests/test_mutamorphic.py
+++ b/tests/test_05_mutamorphic.py
@@ -3,7 +3,6 @@
 import pytest
 import numpy as np
 
-
 @pytest.fixture(scope="module")
 def trained_sentiment_model():
     model_path = "../artifacts/trained_model.pkl"

From db2205a2196a3f95cccb610d4129bfa68ab8bc3e Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Sun, 25 May 2025 00:47:08 +0000
Subject: [PATCH 17/36] Update README with lint, coverage, and ML test score

---
 README.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 7409b71..10b1faa 100644
--- a/README.md
+++ b/README.md
@@ -203,10 +203,12 @@ Use Git Bash as your terminal:
 <!-- ML_TEST_SCORE_START -->
 <!-- ML_TEST_SCORE_END -->
 <!-- ML_TEST_SCORE_START -->
+<!-- ML_TEST_SCORE_END -->
+<!-- ML_TEST_SCORE_START -->
 | Category              | Test Count | Automated? |
 |-----------------------|------------|------------|
 | Feature & Data         | ✅ 5        | ✅         |
-| Model Development      | ✅ 3        | ✅         |
+| Model Development      | ✅ 5        | ✅         |
 | ML Infrastructure      | ✅ 2        | ✅         |
 | Monitoring             | ✅ 2        | ✅         |
 | Mutamorphic Testing    | ✅ 3        | ✅         |

From b93df7f887f668256f4b663a8fe133392e9f0a95 Mon Sep 17 00:00:00 2001
From: Yizhen Zang <Y.Zang-3@student.tudelft.nl>
Date: Sun, 25 May 2025 03:03:46 +0200
Subject: [PATCH 18/36] Update ML test score calculation logic

---
 ml_test_score.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ml_test_score.py b/ml_test_score.py
index 3697842..fed3f35 100644
--- a/ml_test_score.py
+++ b/ml_test_score.py
@@ -47,7 +47,7 @@ def main():
     official_lines, official_score = generate_table(official_categories)
 
     # Extra module tests
-    extra_lines, _ = generate_table(extra_modules, count_towards_score=False)
+    extra_lines, _ = generate_table(extra_modules, count_towards_score=True)
 
     all_lines.extend(official_lines)
     all_lines.extend(extra_lines)

From 033ad6c8147189fbcd3959cc4c828b047e430bc7 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Sun, 25 May 2025 01:05:37 +0000
Subject: [PATCH 19/36] Update README with lint, coverage, and ML test score

---
 README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/README.md b/README.md
index 10b1faa..f1b0e32 100644
--- a/README.md
+++ b/README.md
@@ -205,6 +205,8 @@ Use Git Bash as your terminal:
 <!-- ML_TEST_SCORE_START -->
 <!-- ML_TEST_SCORE_END -->
 <!-- ML_TEST_SCORE_START -->
+<!-- ML_TEST_SCORE_END -->
+<!-- ML_TEST_SCORE_START -->
 | Category              | Test Count | Automated? |
 |-----------------------|------------|------------|
 | Feature & Data         | ✅ 5        | ✅         |

From 1837c4a86be8d613babc64bbdf41c6a38ccc373f Mon Sep 17 00:00:00 2001
From: Yizhen Zang <Y.Zang-3@student.tudelft.nl>
Date: Sun, 25 May 2025 03:15:41 +0200
Subject: [PATCH 20/36] Remove redundancy

---
 README.md        | 10 ----------
 ml_test_score.py |  9 +++++----
 2 files changed, 5 insertions(+), 14 deletions(-)

diff --git a/README.md b/README.md
index f1b0e32..717c06f 100644
--- a/README.md
+++ b/README.md
@@ -196,16 +196,6 @@ Use Git Bash as your terminal:
 
 ## ML Test Score
 
-<!-- ML_TEST_SCORE_START -->
-<!-- ML_TEST_SCORE_END -->
-<!-- ML_TEST_SCORE_START -->
-<!-- ML_TEST_SCORE_END -->
-<!-- ML_TEST_SCORE_START -->
-<!-- ML_TEST_SCORE_END -->
-<!-- ML_TEST_SCORE_START -->
-<!-- ML_TEST_SCORE_END -->
-<!-- ML_TEST_SCORE_START -->
-<!-- ML_TEST_SCORE_END -->
 <!-- ML_TEST_SCORE_START -->
 | Category              | Test Count | Automated? |
 |-----------------------|------------|------------|
diff --git a/ml_test_score.py b/ml_test_score.py
index fed3f35..7ae688a 100644
--- a/ml_test_score.py
+++ b/ml_test_score.py
@@ -47,18 +47,19 @@ def main():
     official_lines, official_score = generate_table(official_categories)
 
     # Extra module tests
-    extra_lines, _ = generate_table(extra_modules, count_towards_score=True)
+    extra_lines, extra_score = generate_table(extra_modules, count_towards_score=True)
 
     all_lines.extend(official_lines)
     all_lines.extend(extra_lines)
-    all_lines.append(f"\n**Final Score:** {min(official_score, 12)}/12")
+    all_lines.append(f"\n**Final Score:** {min(official_score + extra_score, 12)}/12")
     all_lines.append("<!-- ML_TEST_SCORE_END -->")
 
     with open("ml_test_score.md", "w") as f:
         f.write("\n".join(all_lines))
 
-    badge_color = "brightgreen" if official_score >= 10 else "yellow" if official_score >= 6 else "red"
-    badge_url = f"https://img.shields.io/badge/ML%20Test%20Score-{min(official_score, 12)}%2F12-{badge_color}"
+    total_score = min(official_score + extra_score, 12)
+    badge_color = "brightgreen" if total_score >= 10 else "yellow" if total_score >= 6 else "red"
+    badge_url = f"https://img.shields.io/badge/ML%20Test%20Score-{total_score}%2F12-{badge_color}"
     with open("ml_test_score_badge.txt", "w") as f:
         f.write(badge_url)
 

From 942779d846928b0bb93a0f4b7baccbaa4354ea48 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Sun, 25 May 2025 01:17:32 +0000
Subject: [PATCH 21/36] Update README with lint, coverage, and ML test score

---
 README.md | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 717c06f..b75c61b 100644
--- a/README.md
+++ b/README.md
@@ -9,7 +9,7 @@
 <!-- COVERAGE_BADGE_END -->
 
 <!-- ML_SCORE_BADGE_START -->
-![ML Test Score](https://img.shields.io/badge/ML%20Test%20Score-10%2F12-brightgreen)
+![ML Test Score](https://img.shields.io/badge/ML%20Test%20Score-12%2F12-brightgreen)
 <!-- ML_SCORE_BADGE_END -->
 
 This repository contains the training pipeline for the sentiment analysis model used in our REMLA project. 
@@ -196,6 +196,8 @@ Use Git Bash as your terminal:
 
 ## ML Test Score
 
+<!-- ML_TEST_SCORE_START -->
+<!-- ML_TEST_SCORE_END -->
 <!-- ML_TEST_SCORE_START -->
 | Category              | Test Count | Automated? |
 |-----------------------|------------|------------|
@@ -208,5 +210,5 @@ Use Git Bash as your terminal:
 | Training Module        | ✅ 5        | ✅         |
 | Evaluation Module      | ✅ 4        | ✅         |
 
-**Final Score:** 10/12
+**Final Score:** 12/12
 <!-- ML_TEST_SCORE_END -->
\ No newline at end of file

From 1ba2305980f3278d5f5e019347efeeb1edb65c73 Mon Sep 17 00:00:00 2001
From: Yizhen Zang <Y.Zang-3@student.tudelft.nl>
Date: Sun, 25 May 2025 11:10:14 +0200
Subject: [PATCH 22/36] Issues with codecov badge

---
 .github/workflows/code_quality.yml |  2 +-
 README.md                          | 17 ++---------------
 2 files changed, 3 insertions(+), 16 deletions(-)

diff --git a/.github/workflows/code_quality.yml b/.github/workflows/code_quality.yml
index dae0bef..e6c7187 100644
--- a/.github/workflows/code_quality.yml
+++ b/.github/workflows/code_quality.yml
@@ -70,7 +70,7 @@ jobs:
 
       - name: Update Coverage Badge in README
         run: |
-          coverage_badge="![Coverage](https://codecov.io/gh/remla25-team21/model-training/branch/main/graph/badge.svg)"
+          coverage_badge="![Coverage](https://codecov.io/gh/remla25-team21/model-training/graph/badge.svg)"
           sed -i "/<!-- COVERAGE_BADGE_START -->/,/<!-- COVERAGE_BADGE_END -->/c\\<!-- COVERAGE_BADGE_START -->\n$coverage_badge\n<!-- COVERAGE_BADGE_END -->" README.md
 
       - name: Calculate ML Test Score
diff --git a/README.md b/README.md
index b75c61b..658cab0 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@
 <!-- PYLINT_BADGE_END -->
 
 <!-- COVERAGE_BADGE_START -->
-![Coverage](https://codecov.io/gh/remla25-team21/model-training/branch/main/graph/badge.svg)
+![Coverage](https://codecov.io/gh/remla25-team21/model-training/graph/badge.svg)
 <!-- COVERAGE_BADGE_END -->
 
 <!-- ML_SCORE_BADGE_START -->
@@ -197,18 +197,5 @@ Use Git Bash as your terminal:
 ## ML Test Score
 
 <!-- ML_TEST_SCORE_START -->
-<!-- ML_TEST_SCORE_END -->
-<!-- ML_TEST_SCORE_START -->
-| Category              | Test Count | Automated? |
-|-----------------------|------------|------------|
-| Feature & Data         | ✅ 5        | ✅         |
-| Model Development      | ✅ 5        | ✅         |
-| ML Infrastructure      | ✅ 2        | ✅         |
-| Monitoring             | ✅ 2        | ✅         |
-| Mutamorphic Testing    | ✅ 3        | ✅         |
-| Preprocessing Module   | ✅ 2        | ✅         |
-| Training Module        | ✅ 5        | ✅         |
-| Evaluation Module      | ✅ 4        | ✅         |
-
-**Final Score:** 12/12
+
 <!-- ML_TEST_SCORE_END -->
\ No newline at end of file

From bdb0edcc856c8d4a045d4b5c8567484fd56e06c7 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Sun, 25 May 2025 09:12:17 +0000
Subject: [PATCH 23/36] Update README with lint, coverage, and ML test score

---
 README.md | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 658cab0..aadf6e3 100644
--- a/README.md
+++ b/README.md
@@ -197,5 +197,18 @@ Use Git Bash as your terminal:
 ## ML Test Score
 
 <!-- ML_TEST_SCORE_START -->
-
+<!-- ML_TEST_SCORE_END -->
+<!-- ML_TEST_SCORE_START -->
+| Category              | Test Count | Automated? |
+|-----------------------|------------|------------|
+| Feature & Data         | ✅ 5        | ✅         |
+| Model Development      | ✅ 5        | ✅         |
+| ML Infrastructure      | ✅ 2        | ✅         |
+| Monitoring             | ✅ 2        | ✅         |
+| Mutamorphic Testing    | ✅ 3        | ✅         |
+| Preprocessing Module   | ✅ 2        | ✅         |
+| Training Module        | ✅ 5        | ✅         |
+| Evaluation Module      | ✅ 4        | ✅         |
+
+**Final Score:** 12/12
 <!-- ML_TEST_SCORE_END -->
\ No newline at end of file

From 347c039e417b1e5805a50d56dcee46069720d9cd Mon Sep 17 00:00:00 2001
From: Yizhen Zang <Y.Zang-3@student.tudelft.nl>
Date: Sun, 25 May 2025 11:23:47 +0200
Subject: [PATCH 24/36] ISsues with badge

---
 .github/workflows/code_quality.yml |  2 +-
 README.md                          | 16 +---------------
 2 files changed, 2 insertions(+), 16 deletions(-)

diff --git a/.github/workflows/code_quality.yml b/.github/workflows/code_quality.yml
index e6c7187..9bae90d 100644
--- a/.github/workflows/code_quality.yml
+++ b/.github/workflows/code_quality.yml
@@ -70,7 +70,7 @@ jobs:
 
       - name: Update Coverage Badge in README
         run: |
-          coverage_badge="![Coverage](https://codecov.io/gh/remla25-team21/model-training/graph/badge.svg)"
+          coverage_badge="![Coverage](https://codecov.io/github/remla25-team21/model-training/branch/feat%2Fa4-ml-testing/graph/badge.svg?token=L9ICV9K86O)"
           sed -i "/<!-- COVERAGE_BADGE_START -->/,/<!-- COVERAGE_BADGE_END -->/c\\<!-- COVERAGE_BADGE_START -->\n$coverage_badge\n<!-- COVERAGE_BADGE_END -->" README.md
 
       - name: Calculate ML Test Score
diff --git a/README.md b/README.md
index aadf6e3..7090fb8 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@
 <!-- PYLINT_BADGE_END -->
 
 <!-- COVERAGE_BADGE_START -->
-![Coverage](https://codecov.io/gh/remla25-team21/model-training/graph/badge.svg)
+![Coverage](https://codecov.io/github/remla25-team21/model-training/branch/feat%2Fa4-ml-testing/graph/badge.svg?token=L9ICV9K86O)
 <!-- COVERAGE_BADGE_END -->
 
 <!-- ML_SCORE_BADGE_START -->
@@ -197,18 +197,4 @@ Use Git Bash as your terminal:
 ## ML Test Score
 
 <!-- ML_TEST_SCORE_START -->
-<!-- ML_TEST_SCORE_END -->
-<!-- ML_TEST_SCORE_START -->
-| Category              | Test Count | Automated? |
-|-----------------------|------------|------------|
-| Feature & Data         | ✅ 5        | ✅         |
-| Model Development      | ✅ 5        | ✅         |
-| ML Infrastructure      | ✅ 2        | ✅         |
-| Monitoring             | ✅ 2        | ✅         |
-| Mutamorphic Testing    | ✅ 3        | ✅         |
-| Preprocessing Module   | ✅ 2        | ✅         |
-| Training Module        | ✅ 5        | ✅         |
-| Evaluation Module      | ✅ 4        | ✅         |
-
-**Final Score:** 12/12
 <!-- ML_TEST_SCORE_END -->
\ No newline at end of file

From fab106589d4aa2bcdd094eacd66b0bf27d8f6c2f Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Sun, 25 May 2025 09:25:46 +0000
Subject: [PATCH 25/36] Update README with lint, coverage, and ML test score

---
 README.md | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/README.md b/README.md
index 7090fb8..5d43b2b 100644
--- a/README.md
+++ b/README.md
@@ -197,4 +197,18 @@ Use Git Bash as your terminal:
 ## ML Test Score
 
 <!-- ML_TEST_SCORE_START -->
+<!-- ML_TEST_SCORE_END -->
+<!-- ML_TEST_SCORE_START -->
+| Category              | Test Count | Automated? |
+|-----------------------|------------|------------|
+| Feature & Data         | ✅ 5        | ✅         |
+| Model Development      | ✅ 5        | ✅         |
+| ML Infrastructure      | ✅ 2        | ✅         |
+| Monitoring             | ✅ 2        | ✅         |
+| Mutamorphic Testing    | ✅ 3        | ✅         |
+| Preprocessing Module   | ✅ 2        | ✅         |
+| Training Module        | ✅ 5        | ✅         |
+| Evaluation Module      | ✅ 4        | ✅         |
+
+**Final Score:** 12/12
 <!-- ML_TEST_SCORE_END -->
\ No newline at end of file

From 1205565274ffbc04969e319872aef62600b288db Mon Sep 17 00:00:00 2001
From: Yizhen Zang <Y.Zang-3@student.tudelft.nl>
Date: Sun, 25 May 2025 11:28:38 +0200
Subject: [PATCH 26/36] Try to use the default branch

---
 .github/workflows/code_quality.yml | 2 +-
 README.md                          | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/code_quality.yml b/.github/workflows/code_quality.yml
index 9bae90d..e98f18c 100644
--- a/.github/workflows/code_quality.yml
+++ b/.github/workflows/code_quality.yml
@@ -70,7 +70,7 @@ jobs:
 
       - name: Update Coverage Badge in README
         run: |
-          coverage_badge="![Coverage](https://codecov.io/github/remla25-team21/model-training/branch/feat%2Fa4-ml-testing/graph/badge.svg?token=L9ICV9K86O)"
+          coverage_badge="![Coverage](https://codecov.io/github/remla25-team21/model-training/graph/badge.svg?token=L9ICV9K86O)"
           sed -i "/<!-- COVERAGE_BADGE_START -->/,/<!-- COVERAGE_BADGE_END -->/c\\<!-- COVERAGE_BADGE_START -->\n$coverage_badge\n<!-- COVERAGE_BADGE_END -->" README.md
 
       - name: Calculate ML Test Score
diff --git a/README.md b/README.md
index 5d43b2b..7ed0965 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@
 <!-- PYLINT_BADGE_END -->
 
 <!-- COVERAGE_BADGE_START -->
-![Coverage](https://codecov.io/github/remla25-team21/model-training/branch/feat%2Fa4-ml-testing/graph/badge.svg?token=L9ICV9K86O)
+![Coverage](https://codecov.io/github/remla25-team21/model-training/graph/badge.svg?token=L9ICV9K86O)
 <!-- COVERAGE_BADGE_END -->
 
 <!-- ML_SCORE_BADGE_START -->

From 366d982309086e9808cd97656afe865baa1820c1 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Sun, 25 May 2025 09:30:38 +0000
Subject: [PATCH 27/36] Update README with lint, coverage, and ML test score

---
 README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/README.md b/README.md
index 7ed0965..c1dc079 100644
--- a/README.md
+++ b/README.md
@@ -196,6 +196,8 @@ Use Git Bash as your terminal:
 
 ## ML Test Score
 
+<!-- ML_TEST_SCORE_START -->
+<!-- ML_TEST_SCORE_END -->
 <!-- ML_TEST_SCORE_START -->
 <!-- ML_TEST_SCORE_END -->
 <!-- ML_TEST_SCORE_START -->

From eb1e67124a4ff0e1ac6b8dfa034273712d0a43d3 Mon Sep 17 00:00:00 2001
From: Yizhen Zang <Y.Zang-3@student.tudelft.nl>
Date: Sun, 25 May 2025 11:40:23 +0200
Subject: [PATCH 28/36] Configure workflow

---
 .github/workflows/code_quality.yml |  6 +++++-
 README.md                          | 15 +--------------
 2 files changed, 6 insertions(+), 15 deletions(-)

diff --git a/.github/workflows/code_quality.yml b/.github/workflows/code_quality.yml
index e98f18c..6319f30 100644
--- a/.github/workflows/code_quality.yml
+++ b/.github/workflows/code_quality.yml
@@ -1,6 +1,10 @@
 name: Code Quality
 
-on: [push]
+on:
+  push:
+    branches: ['*']
+  pull_request:
+    branches: [main]  # to ensure coverage gets uploaded when PRs target main
 
 jobs:
   lint-and-test:
diff --git a/README.md b/README.md
index 7ed0965..1acc988 100644
--- a/README.md
+++ b/README.md
@@ -197,18 +197,5 @@ Use Git Bash as your terminal:
 ## ML Test Score
 
 <!-- ML_TEST_SCORE_START -->
-<!-- ML_TEST_SCORE_END -->
-<!-- ML_TEST_SCORE_START -->
-| Category              | Test Count | Automated? |
-|-----------------------|------------|------------|
-| Feature & Data         | ✅ 5        | ✅         |
-| Model Development      | ✅ 5        | ✅         |
-| ML Infrastructure      | ✅ 2        | ✅         |
-| Monitoring             | ✅ 2        | ✅         |
-| Mutamorphic Testing    | ✅ 3        | ✅         |
-| Preprocessing Module   | ✅ 2        | ✅         |
-| Training Module        | ✅ 5        | ✅         |
-| Evaluation Module      | ✅ 4        | ✅         |
-
-**Final Score:** 12/12
+
 <!-- ML_TEST_SCORE_END -->
\ No newline at end of file

From 88a2512d53f4ae264bd6a8f55b7cd13933828d48 Mon Sep 17 00:00:00 2001
From: Yizhen Zang <Y.Zang-3@student.tudelft.nl>
Date: Sun, 25 May 2025 11:47:48 +0200
Subject: [PATCH 29/36] Issues with README update

---
 .github/workflows/code_quality.yml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/code_quality.yml b/.github/workflows/code_quality.yml
index 6319f30..2af52e1 100644
--- a/.github/workflows/code_quality.yml
+++ b/.github/workflows/code_quality.yml
@@ -98,10 +98,11 @@ jobs:
         run: |
           git config user.name "github-actions[bot]"
           git config user.email "github-actions[bot]@users.noreply.github.com"
+          branch=$(echo "${GITHUB_REF#refs/heads/}")
           if ! git diff --quiet; then
             git add README.md
             git commit -m "Update README with lint, coverage, and ML test score"
-            git push
+            git push origin HEAD:$branch
           else
             echo "No changes to commit."
           fi

From 65af541d3ae21ec80ebd7f5e5bdebb5922a47ccb Mon Sep 17 00:00:00 2001
From: Yizhen Zang <Y.Zang-3@student.tudelft.nl>
Date: Sun, 25 May 2025 11:55:45 +0200
Subject: [PATCH 30/36] Fix workflow

---
 .github/workflows/code_quality.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/code_quality.yml b/.github/workflows/code_quality.yml
index 2af52e1..6bac3ec 100644
--- a/.github/workflows/code_quality.yml
+++ b/.github/workflows/code_quality.yml
@@ -95,6 +95,7 @@ jobs:
           fi
 
       - name: Commit README update
+        if: github.event_name == 'push'
         run: |
           git config user.name "github-actions[bot]"
           git config user.email "github-actions[bot]@users.noreply.github.com"

From ff6ed3d7d65e5981af43cda6bb0e9802af9a99b1 Mon Sep 17 00:00:00 2001
From: Yizhen Zang <Y.Zang-3@student.tudelft.nl>
Date: Sun, 25 May 2025 12:04:12 +0200
Subject: [PATCH 31/36] Configure workflow

---
 .github/workflows/code_quality.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/code_quality.yml b/.github/workflows/code_quality.yml
index 6bac3ec..90dbce2 100644
--- a/.github/workflows/code_quality.yml
+++ b/.github/workflows/code_quality.yml
@@ -95,7 +95,7 @@ jobs:
           fi
 
       - name: Commit README update
-        if: github.event_name == 'push'
+        if: github.ref_type == 'branch'
         run: |
           git config user.name "github-actions[bot]"
           git config user.email "github-actions[bot]@users.noreply.github.com"

From 68ba8cef7b6d2bdea9b8ea27fc0d8d05112e900b Mon Sep 17 00:00:00 2001
From: Yizhen Zang <Y.Zang-3@student.tudelft.nl>
Date: Sun, 25 May 2025 12:08:04 +0200
Subject: [PATCH 32/36] Issues with README update

---
 .github/workflows/code_quality.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/code_quality.yml b/.github/workflows/code_quality.yml
index 90dbce2..0e41a79 100644
--- a/.github/workflows/code_quality.yml
+++ b/.github/workflows/code_quality.yml
@@ -95,7 +95,7 @@ jobs:
           fi
 
       - name: Commit README update
-        if: github.ref_type == 'branch'
+        if: github.event_name == 'push' && startsWith(github.ref, 'refs/heads/')
         run: |
           git config user.name "github-actions[bot]"
           git config user.email "github-actions[bot]@users.noreply.github.com"

From 3e373f28bbf6b801932fc04a046740619e7d7789 Mon Sep 17 00:00:00 2001
From: Yizhen Zang <Y.Zang-3@student.tudelft.nl>
Date: Sun, 25 May 2025 12:17:57 +0200
Subject: [PATCH 33/36] Restore previous workflow setup to fix badge update
 logic

---
 .github/workflows/code_quality.yml | 12 +++---------
 README.md                          |  2 +-
 2 files changed, 4 insertions(+), 10 deletions(-)

diff --git a/.github/workflows/code_quality.yml b/.github/workflows/code_quality.yml
index 0e41a79..9bae90d 100644
--- a/.github/workflows/code_quality.yml
+++ b/.github/workflows/code_quality.yml
@@ -1,10 +1,6 @@
 name: Code Quality
 
-on:
-  push:
-    branches: ['*']
-  pull_request:
-    branches: [main]  # to ensure coverage gets uploaded when PRs target main
+on: [push]
 
 jobs:
   lint-and-test:
@@ -74,7 +70,7 @@ jobs:
 
       - name: Update Coverage Badge in README
         run: |
-          coverage_badge="![Coverage](https://codecov.io/github/remla25-team21/model-training/graph/badge.svg?token=L9ICV9K86O)"
+          coverage_badge="![Coverage](https://codecov.io/github/remla25-team21/model-training/branch/feat%2Fa4-ml-testing/graph/badge.svg?token=L9ICV9K86O)"
           sed -i "/<!-- COVERAGE_BADGE_START -->/,/<!-- COVERAGE_BADGE_END -->/c\\<!-- COVERAGE_BADGE_START -->\n$coverage_badge\n<!-- COVERAGE_BADGE_END -->" README.md
 
       - name: Calculate ML Test Score
@@ -95,15 +91,13 @@ jobs:
           fi
 
       - name: Commit README update
-        if: github.event_name == 'push' && startsWith(github.ref, 'refs/heads/')
         run: |
           git config user.name "github-actions[bot]"
           git config user.email "github-actions[bot]@users.noreply.github.com"
-          branch=$(echo "${GITHUB_REF#refs/heads/}")
           if ! git diff --quiet; then
             git add README.md
             git commit -m "Update README with lint, coverage, and ML test score"
-            git push origin HEAD:$branch
+            git push
           else
             echo "No changes to commit."
           fi
diff --git a/README.md b/README.md
index 1acc988..7d48f7e 100644
--- a/README.md
+++ b/README.md
@@ -5,7 +5,7 @@
 <!-- PYLINT_BADGE_END -->
 
 <!-- COVERAGE_BADGE_START -->
-![Coverage](https://codecov.io/github/remla25-team21/model-training/graph/badge.svg?token=L9ICV9K86O)
+![Coverage](https://codecov.io/github/remla25-team21/model-training/branch/feat%2Fa4-ml-testing/graph/badge.svg?token=L9ICV9K86O)
 <!-- COVERAGE_BADGE_END -->
 
 <!-- ML_SCORE_BADGE_START -->

From 69a94687eb0490937eeb009ba1a604776f30717a Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Sun, 25 May 2025 10:19:52 +0000
Subject: [PATCH 34/36] Update README with lint, coverage, and ML test score

---
 README.md | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 7d48f7e..5d43b2b 100644
--- a/README.md
+++ b/README.md
@@ -197,5 +197,18 @@ Use Git Bash as your terminal:
 ## ML Test Score
 
 <!-- ML_TEST_SCORE_START -->
-
+<!-- ML_TEST_SCORE_END -->
+<!-- ML_TEST_SCORE_START -->
+| Category              | Test Count | Automated? |
+|-----------------------|------------|------------|
+| Feature & Data         | ✅ 5        | ✅         |
+| Model Development      | ✅ 5        | ✅         |
+| ML Infrastructure      | ✅ 2        | ✅         |
+| Monitoring             | ✅ 2        | ✅         |
+| Mutamorphic Testing    | ✅ 3        | ✅         |
+| Preprocessing Module   | ✅ 2        | ✅         |
+| Training Module        | ✅ 5        | ✅         |
+| Evaluation Module      | ✅ 4        | ✅         |
+
+**Final Score:** 12/12
 <!-- ML_TEST_SCORE_END -->
\ No newline at end of file

From cc526a9d337c28c09adfa896dfb8829abff75fa2 Mon Sep 17 00:00:00 2001
From: Yizhen Zang <Y.Zang-3@student.tudelft.nl>
Date: Sun, 25 May 2025 12:50:19 +0200
Subject: [PATCH 35/36] Add instructions on generating coverage report

---
 README.md | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 5d43b2b..e96877b 100644
--- a/README.md
+++ b/README.md
@@ -37,7 +37,7 @@ This repository contains the training pipeline for the sentiment analysis model
 >
 > ```bash
 > dvc remote modify storage --local gdrive_use_service_account true
-> dvc remote modify storage --local gdrive_service_account_json_file_path <path/to/file.json>  # Replace with your Google Drive service account JSON file path
+> dvc remote modify storage --local gdrive_service_account_json_file_path <path/to/file.json> # Replace with your Google Drive service account JSON file path
 > ```
 >
 > 4. Pull the data from remote storage or download it directly (see [Troubleshooting](#troubleshooting) section if facing issues)
@@ -57,6 +57,14 @@ This repository contains the training pipeline for the sentiment analysis model
 > ```bash
 > pytest
 > ```
+>
+> 7. Generate the coverage report
+>
+> ```bash
+> coverage run -m pytest
+> coverage report # Prints summary in terminal 
+> coverage xml # Generates coverage.xml file in the root directory
+> ```
 
 ## Dependencies
 

From 396614207097242ac7a69fa0a6894a98878b41b3 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]" <github-actions[bot]@users.noreply.github.com>
Date: Sun, 25 May 2025 10:52:12 +0000
Subject: [PATCH 36/36] Update README with lint, coverage, and ML test score

---
 README.md | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/README.md b/README.md
index e96877b..126a4e3 100644
--- a/README.md
+++ b/README.md
@@ -204,6 +204,8 @@ Use Git Bash as your terminal:
 
 ## ML Test Score
 
+<!-- ML_TEST_SCORE_START -->
+<!-- ML_TEST_SCORE_END -->
 <!-- ML_TEST_SCORE_START -->
 <!-- ML_TEST_SCORE_END -->
 <!-- ML_TEST_SCORE_START -->