Skip to content

Commit

Permalink
Merge pull request #263 from jakobkoerber/fix-studentenwerk-menu-parser
Browse files Browse the repository at this point in the history
Fix StudentenWerkMenuParser
  • Loading branch information
CommanderStorm authored Sep 23, 2024
2 parents 0f63ff6 + 1c83d40 commit 0be5b23
Show file tree
Hide file tree
Showing 4 changed files with 47 additions and 56 deletions.
1 change: 1 addition & 0 deletions scripts/reformat.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#!/bin/python3
# pylint: disable=too-many-positional-arguments
import json
import os
import re
Expand Down
1 change: 1 addition & 0 deletions src/entities.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
# pylint: disable=too-many-positional-arguments

# Postponed Evaluation of Annotations to allow using a class inside a class for annotations
from __future__ import annotations
Expand Down
91 changes: 38 additions & 53 deletions src/menu_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class MenuParser(ABC):
"""

canteens: Set[Canteen]
_label_lookup: Dict[str, Set[Label]]
_label_subclasses: Dict[str, Set[Label]]
# we use datetime %u, so we go from 1-7
weekday_positions: Dict[str, int] = {"mon": 1, "tue": 2, "wed": 3, "thu": 4, "fri": 5, "sat": 6, "sun": 7}

Expand All @@ -54,7 +54,7 @@ def _parse_label(cls, labels_str: str) -> Set[Label]:
for value in split_values:
stripped = value.strip()
if not stripped.isspace():
labels |= cls._label_lookup.get(stripped, set())
labels |= cls._label_subclasses.get(stripped, set())
Label.add_supertype_labels(labels)
return labels

Expand Down Expand Up @@ -112,7 +112,7 @@ def __init__(self, students: float, staff: float, guests: float):
self.guests = guests
self.unit = "100g"

_label_lookup: Dict[str, Set[Label]] = {
_label_subclasses: Dict[str, Set[Label]] = {
"GQB": {Label.BAVARIA},
"MSC": {Label.MSC},
"1": {Label.DYESTUFF},
Expand Down Expand Up @@ -241,65 +241,51 @@ def __get_price(canteen: Canteen, dish: Tuple[str, str, str, str, str], dish_nam
base_price_type = StudentenwerkMenuParser.SelfServiceBasePriceType.PIZZA_VEGIE
return StudentenwerkMenuParser.__get_self_service_prices(base_price_type, price_per_unit_type)

base_url: str = "http://www.studierendenwerk-muenchen-oberbayern.de/mensa/speiseplan/speiseplan_{url_id}_-de.html"
base_url_with_date: str = (
"http://www.studierendenwerk-muenchen-oberbayern.de/mensa/speiseplan/speiseplan_{date}_{url_id}_-de.html"
)
base_url: str = "https://www.studierendenwerk-muenchen-oberbayern.de/mensa/speiseplan/speiseplan_{url_id}_-de.html"

def parse(self, canteen: Canteen) -> Optional[Dict[datetime.date, Menu]]:
menus = {}
for date in self.__get_available_dates(canteen):
page_link: str = self.base_url_with_date.format(url_id=canteen.url_id, date=date.strftime("%Y-%m-%d"))
page: requests.Response = requests.get(page_link, timeout=10.0)
if page.ok:
try:
tree: html.Element = html.fromstring(page.content)
menu = self.get_menu(tree, canteen, date)
page_link: str = self.base_url.format(url_id=canteen.url_id)
page: requests.Response = requests.get(page_link, timeout=10.0)
if page.ok:
try:
tree: html.Element = html.fromstring(page.content)
html_menus: List[html.Element] = self.get_daily_menus_as_html(tree)
for html_menu in html_menus:
# this solves some weird reference? issue where tree.xpath will subsequently always use
# the first element despite looping through seemingly separate elements
html_menu = html.fromstring(html.tostring(html_menu))
menu = self.get_menu(html_menu, canteen)
if menu:
menus[date] = menu
menus[menu.menu_date] = menu
# pylint: disable=broad-except
except Exception as e:
print(f"Exception while parsing menu from {date}. Skipping current date. Exception args: {e.args}")
# pylint: enable=broad-except
except Exception as e:
print(f"Exception while parsing menu. Skipping current date. Exception args: {e.args}")
# pylint: enable=broad-except
return menus

def get_menu(self, page: html.Element, canteen: Canteen, date: datetime.date) -> Optional[Menu]:
# get current menu
current_menu: html.Element = self.__get_daily_menus_as_html(page)[0]
# get html representation of menu
menu_html = html.fromstring(html.tostring(current_menu))

# parse dishes of current menu
dishes: List[Dish] = self.__parse_dishes(menu_html, canteen)
# create menu object
def get_menu(self, page: html.Element, canteen: Canteen) -> Optional[Menu]:
date = self.extract_date_from_html(page)
dishes: List[Dish] = self.__parse_dishes(page, canteen)
menu: Menu = Menu(date, dishes)
return menu

def __get_available_dates(self, canteen: Canteen) -> List[datetime.date]:
page_link: str = self.base_url.format(url_id=canteen.url_id)
page: requests.Response = requests.get(page_link, timeout=10.0)
tree: html.Element = html.fromstring(page.content)
return self.get_available_dates_for_html(tree)

# public for testing
def get_available_dates_for_html(self, tree: html.Element) -> List[datetime.date]:
dates: List[datetime.date] = []
date_strings: List[str] = tree.xpath("//div[@class='c-schedule__item']//strong/text()")
for date_str in date_strings:
# parse date
try:
date: datetime.date = util.parse_date(date_str)
except ValueError:
print(f"Warning: Error during parsing date from html page. Problematic date: {date_str}")
# continue and parse subsequent menus
continue
dates += [date]
return dates
@staticmethod
def extract_date_from_html(tree: html.Element) -> Optional[datetime.date]:
date_str: str = tree.xpath("//div[@class='c-schedule__item']//strong/text()")[0]
try:
date: datetime.date = util.parse_date(date_str)
return date
except ValueError:
warn(f"Error during parsing date from html page. Problematic date: {date_str}")
return None

# public for testing
@staticmethod
def __get_daily_menus_as_html(page):
def get_daily_menus_as_html(tree: html.Element) -> List[html.Element]:
# obtain all daily menus found in the passed html page by xpath query
daily_menus: page.xpath = page.xpath("//div[@class='c-schedule__item']") # type: ignore
daily_menus: List[html.Element] = tree.xpath("//div[@class='c-schedule__item']")
return daily_menus

@staticmethod
Expand Down Expand Up @@ -405,8 +391,7 @@ class DishType(Enum):
VEGETARIAN = auto()
VEGAN = auto()

# if an label is a subclass of another label,
_label_lookup: Dict[str, Set[Label]] = {
_label_subclasses: Dict[str, Set[Label]] = {
"a": {Label.GLUTEN},
"aW": {Label.WHEAT},
"aR": {Label.RYE},
Expand Down Expand Up @@ -759,7 +744,7 @@ def get_menus(self, text, year, week_number):
try:
price_obj = Price(float(price_str))
except ValueError:
print(f"Warning: Error during parsing price: {price_str}")
warn(f"Error during parsing price: {price_str}")
dishes.append(
Dish(
dish_name.strip(),
Expand All @@ -786,7 +771,7 @@ class MedizinerMensaMenuParser(MenuParser):
labels_regex = r"(\s([A-C]|[E-H]|[K-P]|[R-Z]|[1-9])(,([A-C]|[E-H]|[K-P]|[R-Z]|[1-9]))*(\s|\Z))"
price_regex = r"(\d+(,(\d){2})\s?€)"

_label_lookup: Dict[str, Set[Label]] = {
_label_subclasses: Dict[str, Set[Label]] = {
"1": {Label.DYESTUFF},
"2": {Label.PRESERVATIVES},
"3": {Label.ANTIOXIDANTS},
Expand Down Expand Up @@ -986,7 +971,7 @@ class StraubingMensaMenuParser(MenuParser):
url = "https://www.stwno.de/infomax/daten-extern/csv/HS-SR/{calendar_week}.csv"
canteens = {Canteen.MENSA_STRAUBING}

_label_lookup: Dict[str, Set[Label]] = {
_label_subclasses: Dict[str, Set[Label]] = {
"1": {Label.DYESTUFF},
"2": {Label.PRESERVATIVES},
"3": {Label.ANTIOXIDANTS},
Expand Down
10 changes: 7 additions & 3 deletions src/test/test_menu_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import tempfile
import unittest
from datetime import date
from typing import Dict, List
from typing import Dict

from lxml import html # nosec: https://github.com/TUM-Dev/eat-api/issues/19

Expand Down Expand Up @@ -58,11 +58,15 @@ def test_get_all_dates(self) -> None:
working_days.append(start_date)
start_date += datetime.timedelta(days=1)

dates = []
tree = file_util.load_html(
f"{self.base_path_canteen.format(canteen=Canteen.MENSA_GARCHING.canteen_id)}"
f"/for-generation/overview.html",
)
dates: List[date] = self.studentenwerk_menu_parser.get_available_dates_for_html(tree)
menus = StudentenwerkMenuParser.get_daily_menus_as_html(tree)
for menu in menus:
html_menu = html.fromstring(html.tostring(menu))
dates.append(self.studentenwerk_menu_parser.extract_date_from_html(html_menu))
self.assertEqual(dates, working_days)

def test_studentenwerk(self) -> None:
Expand Down Expand Up @@ -92,7 +96,7 @@ def __get_menus(self, canteen: Canteen) -> Dict[date, Menu]:
f"{self.base_path_canteen.format(canteen=canteen.canteen_id)}/for-generation/{date_}.html",
)
studentenwerk_menu_parser = StudentenwerkMenuParser()
menu = studentenwerk_menu_parser.get_menu(tree, canteen, date_)
menu = studentenwerk_menu_parser.get_menu(tree, canteen)
if menu is not None:
menus[date_] = menu
return menus
Expand Down

0 comments on commit 0be5b23

Please sign in to comment.