This repository was archived by the owner on Sep 3, 2025. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
Extend ML Testing #8
Merged
Merged
Changes from all commits
Commits
Show all changes
38 commits
Select commit
Hold shift + click to select a range
12480ee
Add test for Feature and Data Integrity
cactusutcac 7168e6a
Add tests for Model Development
cactusutcac 6103954
Add tests for ML Infrastructure and Monitoring
cactusutcac cbdbf76
Order the tests
cactusutcac bf595d1
Structure the mutamorphic tests
cactusutcac 2858165
Configure workflow
cactusutcac 6557fca
Update workflow
cactusutcac fe61228
Update workflow
cactusutcac ba16a66
Update README with lint, coverage, and ML test score
github-actions[bot] 316b0ee
Typos in `README.md`
cactusutcac 2d096f0
Update README with lint, coverage, and ML test score
github-actions[bot] bace0b2
Fix typos
cactusutcac a71b922
Merge branch 'feat/a4-ml-testing' of https://github.com/remla25-team2…
cactusutcac b671540
Update README with lint, coverage, and ML test score
github-actions[bot] 52330c7
Update test score table
cactusutcac a581252
Update README with lint, coverage, and ML test score
github-actions[bot] 43bcb59
Add tests for non-functional requirements
cactusutcac db2205a
Update README with lint, coverage, and ML test score
github-actions[bot] b93df7f
Update ML test score calculation logic
cactusutcac 033ad6c
Update README with lint, coverage, and ML test score
github-actions[bot] 1837c4a
Remove redundancy
cactusutcac 942779d
Update README with lint, coverage, and ML test score
github-actions[bot] 1ba2305
Issues with codecov badge
cactusutcac bdb0edc
Update README with lint, coverage, and ML test score
github-actions[bot] 347c039
ISsues with badge
cactusutcac fab1065
Update README with lint, coverage, and ML test score
github-actions[bot] 1205565
Try to use the default branch
cactusutcac 366d982
Update README with lint, coverage, and ML test score
github-actions[bot] eb1e671
Configure workflow
cactusutcac 1e36aa1
Merge branch 'feat/a4-ml-testing' of https://github.com/remla25-team2…
cactusutcac 88a2512
Issues with README update
cactusutcac 65af541
Fix workflow
cactusutcac ff6ed3d
Configure workflow
cactusutcac 68ba8ce
Issues with README update
cactusutcac 3e373f2
Restore previous workflow setup to fix badge update logic
cactusutcac 69a9468
Update README with lint, coverage, and ML test score
github-actions[bot] cc526a9
Add instructions on generating coverage report
cactusutcac 3966142
Update README with lint, coverage, and ML test score
github-actions[bot] File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -30,3 +30,6 @@ __pycache__ | |
| *.tar.gz | ||
| *.tar | ||
| *.tgz | ||
|
|
||
| .coverage | ||
| coverage.xml | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,67 @@ | ||
| import os | ||
| import re | ||
|
|
||
| TEST_DIR = "tests" | ||
|
|
||
| official_categories = { | ||
| "Feature & Data": "test_01_data_integrity.py", | ||
| "Model Development": "test_02_model_development.py", | ||
| "ML Infrastructure": "test_03_ml_infrastructure.py", | ||
| "Monitoring": "test_04_monitoring.py", | ||
| "Mutamorphic Testing": "test_05_mutamorphic.py", | ||
| } | ||
|
|
||
| extra_modules = { | ||
| "Preprocessing Module": "test_preprocess.py", | ||
| "Training Module": "test_train.py", | ||
| "Evaluation Module": "test_evaluate.py", | ||
| } | ||
|
|
||
| def count_tests(file_path): | ||
| if not os.path.exists(file_path): | ||
| return 0 | ||
| with open(file_path, "r", encoding="utf-8") as f: | ||
| return len(re.findall(r"def test_", f.read())) | ||
|
|
||
| def generate_table(category_map, count_towards_score=True): | ||
| lines = [] | ||
| score = 0 | ||
| for category, filename in category_map.items(): | ||
| path = os.path.join(TEST_DIR, filename) | ||
| test_count = count_tests(path) | ||
| if test_count > 0: | ||
| lines.append(f"| {category:<22} | ✅ {test_count:<8} | ✅ |") | ||
| if count_towards_score: | ||
| score += 2 | ||
| else: | ||
| lines.append(f"| {category:<22} | ❌ 0 | ❌ |") | ||
| return lines, score | ||
|
|
||
| def main(): | ||
| all_lines = [] | ||
| all_lines.append("<!-- ML_TEST_SCORE_START -->") | ||
| all_lines.append("| Category | Test Count | Automated? |") | ||
| all_lines.append("|-----------------------|------------|------------|") | ||
|
|
||
| # Official categories | ||
| official_lines, official_score = generate_table(official_categories) | ||
|
|
||
| # Extra module tests | ||
| extra_lines, extra_score = generate_table(extra_modules, count_towards_score=True) | ||
|
|
||
| all_lines.extend(official_lines) | ||
| all_lines.extend(extra_lines) | ||
| all_lines.append(f"\n**Final Score:** {min(official_score + extra_score, 12)}/12") | ||
| all_lines.append("<!-- ML_TEST_SCORE_END -->") | ||
|
|
||
| with open("ml_test_score.md", "w") as f: | ||
| f.write("\n".join(all_lines)) | ||
|
|
||
| total_score = min(official_score + extra_score, 12) | ||
| badge_color = "brightgreen" if total_score >= 10 else "yellow" if total_score >= 6 else "red" | ||
| badge_url = f"https://img.shields.io/badge/ML%20Test%20Score-{total_score}%2F12-{badge_color}" | ||
| with open("ml_test_score_badge.txt", "w") as f: | ||
| f.write(badge_url) | ||
|
|
||
| if __name__ == "__main__": | ||
| main() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,3 +1,4 @@ | ||
| # telling pytest to add src to PYTHONPATH automatically | ||
| [pytest] | ||
| pythonpath = src | ||
| pythonpath = src | ||
| testpaths = tests |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -8,4 +8,8 @@ pyyaml | |
| pylint | ||
| flake8 | ||
| bandit | ||
| astroid | ||
| astroid | ||
| pytest | ||
| coverage | ||
| pytest-cov | ||
| codecov | ||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,38 @@ | ||
| import os | ||
| import pytest | ||
| import joblib | ||
| import pandas as pd | ||
|
|
||
| RAW_DATA_PATH = "data/raw/a1_RestaurantReviews_HistoricDump.tsv" | ||
|
|
||
| @pytest.fixture(scope="module") | ||
| def raw_data(): | ||
| assert os.path.exists(RAW_DATA_PATH), f"Data file not found at {RAW_DATA_PATH}" | ||
| df = pd.read_csv(RAW_DATA_PATH, sep='\t') | ||
| df.columns = df.columns.str.strip() | ||
| return df | ||
|
|
||
| def test_column_schema(raw_data): | ||
| """Check that expected columns exist""" | ||
| expected = {'Review', 'Liked'} | ||
| actual = set(raw_data.columns) | ||
| missing = expected - actual | ||
| assert not missing, f"Missing expected columns: {missing}" | ||
|
|
||
| def test_no_missing_values(raw_data): | ||
| """Ensure no nulls in important columns""" | ||
| for col in ['Review', 'Liked']: | ||
| assert raw_data[col].isnull().sum() == 0, f"Missing values found in {col}" | ||
|
|
||
| def test_liked_label_values(raw_data): | ||
| """Ensure 'Liked' is binary (0 or 1)""" | ||
| assert raw_data['Liked'].isin([0, 1]).all(), "'Liked' column contains non-binary values" | ||
|
|
||
| def test_review_length(raw_data): | ||
| """Check that Review has sufficient length""" | ||
| assert raw_data['Review'].str.len().gt(10).all(), "Some reviews are too short" | ||
|
|
||
| def test_exact_duplicate_rows(raw_data): | ||
| """Check for fully duplicated rows with same Review and Liked""" | ||
| duplicates = raw_data.duplicated().sum() | ||
| assert duplicates <= 10, f"Unusual number of exact duplicate rows: {duplicates}" |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nice fix! I forgot to add this back after changing the authentication method to service account