Skip to content

Commit

Permalink
new tool to automate group info extraction
Browse files Browse the repository at this point in the history
  • Loading branch information
SkafteNicki committed Jan 21, 2025
1 parent 6d1081c commit 9bd5134
Show file tree
Hide file tree
Showing 3 changed files with 240 additions and 0 deletions.
89 changes: 89 additions & 0 deletions tools/learn_automate/grouped_students_with_links.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
group_nb,student 1,student 2,student 3,student 4,student 5,repository_link
1,s214654,s214610,s237213,https://github.com/albert-moller/MLOpsGroup1
2,s170422,s174152,s194277,s233595,s244492,https://github.com/ZSoleimani/Group-2-mlops-classification
3,s194645,s204605,s204070,s204623,s204617,https://github.com/Fiehn/wiki-classification-mlops
4,s242580,s242591,s246415,s113117,https://github.com/radugrecu97/dtu_mlops_projec
5,s214634,s214605,s214641,s214592,https://github.com/Simo067m/02476_mlops_S25
6,s194572,s214786,s216143,https://github.com/jacobtuxen/MLOPS_DDPM_G6
7,s204145,s230208,s232883,s232924,https://github.com/MrCogito/mlops_dtu_2025_project7
8,huwan,s232189,s232213,s232258,https://github.com/SherryLiu3670/MLops_2025_group8
9,s224775,s224758,s224762,s224773,https://github.com/Hannahtersbol/MLOps
10,s204440,s204504,s243093,s243074,https://github.com/Rasmusdas/chess-board-recognizer
11,s230354,s232793,s233483,https://github.com/LuigiElo/MLPOps
12,jonnil,s233480,s233489,s091969,https://github.com/HackTheOxidation/dtu_mlops_projec
13,s232462,s233095,s240376,https://github.com/yoghurtina/mlops
14,s204218,s204225,s232735,s241645,s242766,https://github.com/arnormoncada/mlops-plant-seedlings
15,s243660,s243933,s243896,s243906,https://github.com/HubertWojcik10/mlops_projec
16,s201189,s214624,s214925,s214940,s216163,https://github.com/Andreas-Sjogren-Furst/ML-OPS-Healthy-vs-Rotten
17,s204654,s204646,s204658,s223517,https://github.com/NusretSalli/dtu_mlops_projec
18,s203957,s232253,s232472,s232414,s243075,https://github.com/hiXixi66/MLOPs_Projec
19,s181486,s194142,s204114,s204243,s204131,https://github.com/nielsenchristianole/mlops_oracle_mnis
20,s224184,s224228,s225786,https://github.com/RasDTU02/02476_FinalProject_Group20
21,s204081,s241981,s241982,https://github.com/vidisha04/MLops-image-classification
22,s214968,s214952,s214963,https://github.com/ThomasSchiolerH/MLOps-Projec
23,s193624,s205336,s204748,s243345,sarste,https://github.com/karlfindhansen/MLOps_Jan2025
24,s201725,s224411,s224397,https://github.com/ChrisVN123/02476_mlops_exam
25,s214655,s214629,s216135,https://github.com/LenaRibena/point_cloud_tree_generation
26,s193992,s215225,s215158,s215133,s242507,https://github.com/Jacopo00811/group26-mlops-project/tree/master
27,diar,s232165,s232515,s233791,s246714,https://github.com/MlOps-DTU/rice-classification
28,s232458,s232469,s233025,s233576,s243169,https://github.com/nandortakacs/face_classification/blob/main/README.md
29,s241925,s242186,s243280,s242906,s243299,https://github.com/mariatmvendas/mlops_projec
30,s232946,s233128,s233148,s233500,s234065,https://github.com/kubekj/mlops_projec
31,s204084,s204150,s240154,s240076,s240056,https://github.com/iasonrap/mlops31
32,s241047,s243805,s250394,s250678,https://github.com/segiITU/dtu_mlops_group32_projec
33,s147082,s205420,s242640,s140035,https://github.com/chen0046/MLOPS_GROUP_33
34,s214625,s214636,s224751,s224803,s224743,https://github.com/MarcusElkjaer/MLOPS
35,s204133,s242540,s242539,s250069,https://github.com/Sunray0466/Project-MLOps
36,s194242,s195398,s243077,s247157,s250797,https://github.com/mmmmaja/starfishDetection
37,s183540,s214238,s233190,s233664,https://github.com/frgorczyca/mlops_project_group_37
38,s201680,s214585,s214647,s214611,https://github.com/gustavn19/MLOps_2025
39,s233162,s233185,s233177,s233564,https://github.com/AfonsoCunha22/MLOpsProjec
40,s183587,s232892,s242066,s242644,s134620,https://github.com/wen2720/group40_dtumlops
41,s180351,s195849,s233353,https://github.com/Gyllz98/pixel_art_diffusion
42,s204052,s214696,s214596,s214638,s214609,https://github.com/KarolineKlan/mlops_nlp
43,s204470,s204424,s204510,https://github.com/niller-g/mlops
44,s193473,s215221,s215231,s215160,s215210,https://github.com/rasmus11423/10_Animal_Classification
45,s242962,s242965,s243296,https://github.com/4Parasonic2/WeVibeMLops
46,s204462,s204684,s233022,s234061,https://github.com/moorekevin/dtu-02476-mlops-project/tree/main
47,s203520,s203581,s203512,s204201,https://github.com/Las02/ml_ops_projec
48,s232507,jalfe,s233142,s233514,s240451,https://github.com/jesusdper/DTU_MLOpsProject/tree/main
49,s204606,s204618,s204621,s214659,s214983,https://github.com/ThorxNxEriksen/ml_ops_02476
50,bjanil,s232425,s233670,s233671,s242577,https://github.com/evittaka/MLOps_catsvsdogs/tree/main
51,s250338,https://github.com/johanna-einsiedler/SoDaOps
53,s242529,s242672,s242954,s243575,s243600,https://github.com/moskitoo/mlops_projec
54,s231255,s234803,s234866,https://github.com/miickii/mlops_projec
55,s154097,s250393,s251116,https://github.com/bertramhojer/mlops-55
56,s203729,s234867,s234829,s236120,https://github.com/Meinert67/mlops_project_fiftysix
57,s194633,s232531,s233249,s233347,https://github.com/erir11/ml_ops_projec
58,s234830,s234823,s234865,https://github.com/tob-euro/02476-Machine-Learning-Operations-Project/tree/main
60,s200513,s241878,s242850,s242804,s243216,https://github.com/siderishub/Mlops-group-60
61,s215002,s214987,s216169,s224190,https://github.com/MagnusBeng2/dtu_mlops_group_61
64,s216136,s234802,s234842,s234854,https://github.com/jonathantybirk/Machine-Yearning-Project-64
65,s241961,s242926,s243416,s244469,https://github.com/luyentru/MLOps/
66,s204103,s204256,s233557,s233559,s243739,https://github.com/GeorgeGeorgioy/MLOps_group_66
67,s204701,sofca,s225083,https://github.com/Cwarburg/MLOPS-group-67
68,s195901,s204431,s240466,s240577,s243867,https://github.com/s204431/mlops-CAPTCHA/
69,s185034,s194045,s200621,s203953,s240398,https://github.com/andrespat01/dtu_mlops_69
70,s204118,s246710,s250088,s250283,https://github.com/lkorinek/mlops-projec
71,s243123,s243133,s243124,s243125,https://github.com/dtu-dl-project/mlops-projec
72,s204259,s204204,s224205,s224235,s224227,https://github.com/EmilieNilsson1/mlops_projec
73,s232775,s233219,s240661,s242781,https://github.com/jofreb/mlops_project/tree/main
74,s240033,s243121,s242943,https://github.com/MarlonBando/Chromify/blob/main/README.md
75,s223750,s234800,s234846,https://github.com/RasmusArnmark/mlops_projec
76,s220006,s230262,s243183,s242947,s246494,https://github.com/aliberkgezgin/MLOPS-76-Projec
77,valfr,s214727,s214681,s214743,https://github.com/Lachrynier/mlops-projec
80,s232811,s232812,s244501,https://github.com/JohnBBB42/dtu_mlops_group_80
83,s203768,s205421,s203822,s205717,s243266,https://github.com/martin5709/Group83-MLOps-02476
84,s204540,s204281,s204558,https://github.com/thomas-tams/mlops_02476_projec
86,s234811,s234873,s234825,https://github.com/philipkierkegaard/english_french_translation
88,s232291,s242519,s242597,https://github.com/ZhuMuMu0216/MLOps
90,s224360,s224401,s224388,https://github.com/Mariooose/mlops_group90
92,s225526,s243418,s242779,s244086,https://github.com/paulobeckhauser/mlops_finances
93,obola,fmfsa,https://github.com/MLSM-at-DTU/ml_metamodels
94,s224176,s224229,No Link Found
95,s201700,s204426,s232773,s144463,https://github.com/sebakeaaen/ml-ops-projec
96,s223481,s242816,s242796,s242798,s246733,https://github.com/kostistzim/Plant_Leaves_Classification_MLOps_DTU02476
97,s193602,s203557,s203572,s203788,https://github.com/anto1282/G-WEB_Fraud_Detection
98,s194613,s194119,s204216,s204227,https://github.com/ludvikpet/Group98_MLOps
99,s220235,s224225,s224197,https://github.com/wkandersen/MLOps_99
100,s214739,s214731,s214742,s214735,s214733,https://github.com/s214735/MLOps-project100
147 changes: 147 additions & 0 deletions tools/learn_automate/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
import csv
import os
import shutil
import time
import zipfile
from collections import defaultdict
from pathlib import Path

import typer
from bs4 import BeautifulSoup
from dotenv import load_dotenv
from playwright.sync_api import sync_playwright

load_dotenv()

USER = os.getenv("USER")
PASSWORD = os.getenv("PASSWORD")


def download_from_learn(course: str) -> tuple[str, str]:
"""Download the group membership and project repositories from DTU Learn."""
with sync_playwright() as p:
browser = p.firefox.launch(headless=False)
context = browser.new_context()
page = context.new_page()
page.goto(f"https://learn.inside.dtu.dk/d2l/home/{course}")
page.get_by_placeholder("User").fill(USER)
page.get_by_placeholder("Password").fill(PASSWORD)
page.get_by_role("button", name="Sign in").click()
page.get_by_role("button", name="My Course").click()
page.get_by_role("link", name="Grades").click()
page.get_by_role("link", name="Enter Grades").click()
page.get_by_role("button", name="Export").click()
page.get_by_label("Both").check()
page.get_by_role("button", name="Export to CSV").click()
with page.expect_download() as download1_info:
page.get_by_role("button", name="Download").click()
download1 = download1_info.value
download1.save_as(os.path.join(os.getcwd(), download1.suggested_filename))
page.get_by_role("button", name="Close").click()
page.get_by_role("button", name="Cancel").click()
page.get_by_role("link", name="Assignments").click()
page.get_by_role("link", name="Project repository link").click()
page.get_by_role("checkbox", name="Select all rows").check()
page.get_by_role("button", name="Download").click()
time.sleep(2) # for some reason, the download doesn't work without this delay
with page.expect_download() as download2_info:
page.get_by_role("button", name="Download").click()
download2 = download2_info.value
download2.save_as(os.path.join(os.getcwd(), download2.suggested_filename))
page.get_by_role("button", name="Close").click()
context.close()
browser.close()
return download1.suggested_filename, download2.suggested_filename


def create_grouped_csv(download1: str) -> None:
"""Create a grouped CSV file from the downloaded group membership."""
groups = defaultdict(list)
with open(download1, encoding="utf-8") as file:
reader = csv.DictReader(file)
for row in reader:
group = row["Project Groups"]
if group: # Only consider rows with a project group
# Extract the student ID, removing the '#' if present
username = row["Username"].lstrip("#")
groups[group.strip("MLOPS ")].append(username)

# Sort groups by numeric value of group number
sorted_groups = sorted(groups.items(), key=lambda x: int(x[0].split()[-1]))

# Write the transformed data
with open("grouped_students.csv", mode="w", encoding="utf-8", newline="") as file:
writer = csv.writer(file)
writer.writerow(["group_nb", "student 1", "student 2", "student 3", "student 4", "student 5"])
for group, students in sorted_groups:
# Limit to 5 students per group
writer.writerow([group] + students[:5])


def main(
course: str = typer.Argument(..., help="The course code"),
clean: bool = typer.Option(True, help="Clean the extracted files"),
) -> None:
"""Automatically download group membership and project repositories from DTU Learn."""
download1, download2 = download_from_learn(course)

os.makedirs("extracted_files", exist_ok=True)
with zipfile.ZipFile(download2, "r") as zip_ref:
zip_ref.extractall("extracted_files")

group_links = {}
for folder in Path("extracted_files").iterdir():
if folder.is_dir():
for file in folder.iterdir():
if file.suffix == ".html":
# Extract group number from folder name
group_number = folder.name.split(" - ")[1].strip().strip("MLOPS ") # Extract group number
# Parse the HTML file
with open(file, encoding="utf-8") as html_file:
soup = BeautifulSoup(html_file, "html.parser")
link_tag = soup.find("a", href=True)
if link_tag:
group_links[group_number] = link_tag["href"].rstrip(".git")

grouped_csv_path = "grouped_students.csv"
updated_csv_path = "grouped_students_with_links.csv"
total_students = 0
total_groups = 0

with (
open(grouped_csv_path, encoding="utf-8") as infile,
open(updated_csv_path, mode="w", encoding="utf-8", newline="") as outfile,
):
reader = csv.reader(infile)
writer = csv.writer(outfile)

# Add a new column header
headers = next(reader)
headers.append("repository_link")
writer.writerow(headers)

# Update rows with links
for row in reader:
total_students += sum(1 for student in row[1:6] if student)
group_number = row[0].split()[-1] # Extract the numeric part of group number
repo_link = group_links.get(group_number, "No Link Found")
row.append(repo_link)
writer.writerow(row)

total_groups += 1

print(f"Updated CSV file saved to: {updated_csv_path}")

# Print totals
print(f"Total number of students: {total_students}")
print(f"Total number of groups: {total_groups}")

if clean:
shutil.rmtree(Path("extracted_files"))
Path("grouped_students.csv").unlink()
Path(download1).unlink()
Path(download2).unlink()


if __name__ == "__main__":
typer.run(main)
4 changes: 4 additions & 0 deletions tools/learn_automate/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
playwright==1.49.1
python-dotenv==1.0.1
beautifulsoup4==4.12.3
typer==0.15.1

0 comments on commit 9bd5134

Please sign in to comment.