Skip to content

Commit f5b1dff

Browse files
committed
fix: strict data types when importing courses
1 parent 3f73c3f commit f5b1dff

File tree

3 files changed

+12
-3
lines changed

3 files changed

+12
-3
lines changed

docs/db_diagram.pdf

-3 Bytes
Binary file not shown.

ferry/database/models.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -415,6 +415,7 @@ class Flag(BaseModel):
415415
"course_id",
416416
ForeignKey("courses.course_id"),
417417
index=True,
418+
nullable=False,
418419
),
419420
Column(
420421
"days_of_week",
@@ -514,7 +515,7 @@ class Professor(BaseModel):
514515

515516
average_rating = Column(
516517
Float,
517-
comment="[computed] Average rating of the professor assessed via the \"Overall assessment\" question in courses taught",
518+
comment='[computed] Average rating of the professor assessed via the "Overall assessment" question in courses taught',
518519
)
519520

520521
average_rating_n = Column(

ferry/transform/import_courses.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -561,7 +561,16 @@ def import_courses(data_dir: Path, seasons: list[str]) -> CourseTables:
561561
if not parsed_courses_file.is_file():
562562
print(f"Skipping season {season}: not found in parsed courses.")
563563
continue
564-
parsed_course_info = pd.read_json(parsed_courses_file, dtype={"crn": int})
564+
parsed_course_info = pd.read_json(
565+
parsed_courses_file,
566+
dtype={
567+
"crn": int,
568+
"primary_crn": pd.Int64Dtype(),
569+
"colsem": bool,
570+
"fysem": bool,
571+
"sysem": bool,
572+
},
573+
)
565574
parsed_course_info["season_code"] = season
566575
all_imported_listings.append(parsed_course_info)
567576

@@ -576,7 +585,6 @@ def import_courses(data_dir: Path, seasons: list[str]) -> CourseTables:
576585
lambda row: f"{row['season_code']}-{row['crn']}",
577586
data_dir / "id_cache" / "listing_id.json",
578587
)
579-
listings["primary_crn"] = listings["primary_crn"].astype(pd.Int64Dtype())
580588
listings, courses = resolve_cross_listings(listings, data_dir)
581589
professors, course_professors = aggregate_professors(courses, data_dir)
582590
flags, course_flags = aggregate_flags(courses, data_dir)

0 commit comments

Comments
 (0)