diff --git a/backend/myapi/db_functions/locations.py b/backend/myapi/db_functions/locations.py index 6c1bc5d..ce95aa1 100644 --- a/backend/myapi/db_functions/locations.py +++ b/backend/myapi/db_functions/locations.py @@ -1,31 +1,48 @@ from ..models import locations_collection +""" +name: str +""" -def remove_dining_halls_from_db(names: list[str]) -> None: - for name in names: - locations_collection.delete_many({"name": name}) +## Basic CRUD +def set_location(location: dict) -> None: + locations_collection.insert_one(location) +def get_location(name: str) -> dict | None: + return locations_collection.find_one({"name": name}) -def add_dining_halls_to_db(dining_halls: list[dict]) -> None: - for dh in dining_halls: - locations_collection.insert_one(dh) +def update_location(name: str, location: dict) -> None: + """ + check if the location exists then overwrite the location + """ + # check if the location exists + if get_location(name) is None: + set_location(location) # if not, create a new location + # overwrite the location + locations_collection.update_one({"name": name}, {"$set": location}) -def get_names_of_dining_halls(dining_halls: list[dict]) -> list[str]: - names = [] - for dh in dining_halls: - names.append(dh["name"]) - return names +def delete_location(name: str) -> None: + locations_collection.delete_one({"name": name}) +## Bulk CRUD -def remove_add_dining_halls_to_db(dining_halls: list[dict]) -> None: - # get names of dining halls - names = get_names_of_dining_halls(dining_halls) - # remove dining halls with the names - remove_dining_halls_from_db(names) - # add dining halls to db - add_dining_halls_to_db(dining_halls) +def set_locations(locations: list[dict]) -> None: + locations_collection.insert_many(locations) +def get_locations(names: list[str] = []) -> list[dict]: + """ + if no names are given, return all locations + else, return the locations with the given names + """ + if len(names) == 0: + return list(locations_collection.find({})) # get all locations -def get_all_dining_halls_from_db() -> list[dict]: - return list(locations_collection.find({})) + return list(locations_collection.find({"name": {"$in": names}})) # get specific locations + +def update_locations(locations: list[dict]) -> None: + for location in locations: + update_location(location["name"], location) + +def delete_locations(names: list[str]) -> None: + locations_collection.delete_many({"name": {"$in": names}}) diff --git a/backend/myapi/views.py b/backend/myapi/views.py index 9575c9a..479e542 100644 --- a/backend/myapi/views.py +++ b/backend/myapi/views.py @@ -1,11 +1,8 @@ -from django.conf.locale import fr +from requests import get from rest_framework.response import Response from rest_framework.decorators import api_view -from .db_functions.locations import ( - get_all_locations_from_db, - remove_add_locations_to_db, -) +from .db_functions.locations import update_locations, get_locations as get_locations_db from .db_functions.tasks import set_task_last_update, get_task_last_update from webscraper.food_locations import FoodLocations @@ -36,11 +33,18 @@ def get_locations(request): # check if not updated in the last hour if last_update is None or (time_now - last_update).seconds > 3600: print("Locations need to be updated...") + # fetch the locations from the web scraper and add them to the db fo = FoodLocations() - locations: list[dict] = [dh.to_dict() for dh in fo.get_locations()] - # add the locations to the db - remove_add_locations_to_db(locations) + + # Filter out the empty locations + filtered_locations = fo.get_non_empty_locations() + + # Convert the list of dining halls to a list of dictionaries + locations = [dh.to_dict() for dh in filtered_locations] + + # Update the locations in the db + update_locations(locations) # update the last update time set_task_last_update(task_name="locations") @@ -48,7 +52,7 @@ def get_locations(request): else: print("Locations are up to date. Getting from DB...") # Get all locations from the db - locations: list[dict] = get_all_locations_from_db() + locations: list[dict] = get_locations_db() # remove the _id field from each dining hall for dh in locations: diff --git a/backend/webscraper/category.py b/backend/webscraper/category.py index e4ae750..99a2ab4 100644 --- a/backend/webscraper/category.py +++ b/backend/webscraper/category.py @@ -1,3 +1,4 @@ +from re import sub from bs4.element import Tag, ResultSet from webscraper.food import Food @@ -9,6 +10,9 @@ def __init__(self, name: str, html_list: list[Tag]) -> None: self.name = name self.foods: list[Food] = [Food(html) for html in html_list] + def is_empty(self) -> bool: + return len(self.foods) == 0 + def __str__(self) -> str: result = f"{self.name}\n" for food in self.foods: @@ -24,6 +28,9 @@ def __init__(self, name: str, html: Tag) -> None: self.name = name self.sub_categories: list[SubCategory] = self.__process_data(html) + def is_empty(self) -> bool: + return all(sub_cat.is_empty() for sub_cat in self.sub_categories) or (len(self.sub_categories) == 0) + def __process_data(self, html: Tag) -> list[SubCategory]: # find the categories in the meal time sub_cat_data: ResultSet = html.find_all("div", class_="shortmenucats") diff --git a/backend/webscraper/dining_hall.py b/backend/webscraper/dining_hall.py index 0fc13c0..3fd8967 100644 --- a/backend/webscraper/dining_hall.py +++ b/backend/webscraper/dining_hall.py @@ -15,6 +15,10 @@ def __init__(self, url: str) -> None: self.categories: list[Category] = self.__retrieve_data(url) print(self.name) + def is_empty(self) -> bool: + # empty if all categories are empty + return all(category.is_empty() for category in self.categories) or (len(self.categories) == 0) + def __retrieve_data(self, url: str) -> list[Category]: # Set the cookies to be empty to avoid loading nothing cookies = { diff --git a/backend/webscraper/food_locations.py b/backend/webscraper/food_locations.py index 53fa999..1f2cda5 100644 --- a/backend/webscraper/food_locations.py +++ b/backend/webscraper/food_locations.py @@ -15,6 +15,12 @@ def __init__(self) -> None: def get_locations(self) -> list[DiningHall]: return self.locations + def get_location_names(self) -> list[str]: + return [dh.name for dh in self.locations] + + def get_non_empty_locations(self) -> list[DiningHall]: + return [dh for dh in self.locations if not dh.is_empty()] + def __retrieve_data(self) -> list[DiningHall]: try: page = requests.get(self.main_url, verify=UCSC_SSL_CERT)