From fc93c11d3c45c91a5ebe2a969c3b1ded682e837e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakob=20K=C3=B6rber?=
 <56073945+jakobkoerber@users.noreply.github.com>
Date: Wed, 11 Sep 2024 20:04:40 +0200
Subject: [PATCH 1/4] Refactor Studentenwerkmenuparser to Use Overview Page
 Instead of Detail Page

---
 src/menu_parser.py | 92 +++++++++++++++++++---------------------------
 1 file changed, 38 insertions(+), 54 deletions(-)

diff --git a/src/menu_parser.py b/src/menu_parser.py
index 67c27e98a..85a907aa5 100644
--- a/src/menu_parser.py
+++ b/src/menu_parser.py
@@ -192,8 +192,8 @@ def __init__(self, students: float, staff: float, guests: float):
 
     @staticmethod
     def __get_self_service_prices(
-        base_price_type: SelfServiceBasePriceType,
-        price_per_unit_type: SelfServicePricePerUnitType,
+            base_price_type: SelfServiceBasePriceType,
+            price_per_unit_type: SelfServicePricePerUnitType,
     ) -> Prices:
         students: Price = Price(
             base_price_type.price[0],
@@ -242,64 +242,48 @@ def __get_price(canteen: Canteen, dish: Tuple[str, str, str, str, str], dish_nam
         return StudentenwerkMenuParser.__get_self_service_prices(base_price_type, price_per_unit_type)
 
     base_url: str = "http://www.studierendenwerk-muenchen-oberbayern.de/mensa/speiseplan/speiseplan_{url_id}_-de.html"
-    base_url_with_date: str = (
-        "http://www.studierendenwerk-muenchen-oberbayern.de/mensa/speiseplan/speiseplan_{date}_{url_id}_-de.html"
-    )
 
     def parse(self, canteen: Canteen) -> Optional[Dict[datetime.date, Menu]]:
         menus = {}
-        for date in self.__get_available_dates(canteen):
-            page_link: str = self.base_url_with_date.format(url_id=canteen.url_id, date=date.strftime("%Y-%m-%d"))
-            page: requests.Response = requests.get(page_link, timeout=10.0)
-            if page.ok:
-                try:
-                    tree: html.Element = html.fromstring(page.content)
-                    menu = self.get_menu(tree, canteen, date)
+        page_link: str = self.base_url.format(url_id=canteen.url_id)
+        page: requests.Response = requests.get(page_link, timeout=10.0)
+        if page.ok:
+            try:
+                tree: html.Element = html.fromstring(page.content)
+                html_menus: List[html.Element] = self.__get_daily_menus_as_html(tree)
+                for html_menu in html_menus:
+                    html_menu = html.fromstring(html.tostring(html_menu))
+                    menu = self.get_menu(html_menu, canteen)
                     if menu:
-                        menus[date] = menu
+                        menus[menu.menu_date] = menu
                 # pylint: disable=broad-except
-                except Exception as e:
-                    print(f"Exception while parsing menu from {date}. Skipping current date. Exception args: {e.args}")
-                # pylint: enable=broad-except
+            except Exception as e:
+                print(f"Exception while parsing menu. Skipping current date. Exception args: {e.args}")
+        # pylint: enable=broad-except
         return menus
 
-    def get_menu(self, page: html.Element, canteen: Canteen, date: datetime.date) -> Optional[Menu]:
-        # get current menu
-        current_menu: html.Element = self.__get_daily_menus_as_html(page)[0]
-        # get html representation of menu
-        menu_html = html.fromstring(html.tostring(current_menu))
-
+    def get_menu(self, page: html.Element, canteen: Canteen) -> Optional[Menu]:
+        # extract date
+        date = self.extract_date_from_html(page)
         # parse dishes of current menu
-        dishes: List[Dish] = self.__parse_dishes(menu_html, canteen)
+        dishes: List[Dish] = self.__parse_dishes(page, canteen)
         # create menu object
         menu: Menu = Menu(date, dishes)
         return menu
 
-    def __get_available_dates(self, canteen: Canteen) -> List[datetime.date]:
-        page_link: str = self.base_url.format(url_id=canteen.url_id)
-        page: requests.Response = requests.get(page_link, timeout=10.0)
-        tree: html.Element = html.fromstring(page.content)
-        return self.get_available_dates_for_html(tree)
-
     # public for testing
-    def get_available_dates_for_html(self, tree: html.Element) -> List[datetime.date]:
-        dates: List[datetime.date] = []
-        date_strings: List[str] = tree.xpath("//div[@class='c-schedule__item']//strong/text()")
-        for date_str in date_strings:
-            # parse date
-            try:
-                date: datetime.date = util.parse_date(date_str)
-            except ValueError:
-                print(f"Warning: Error during parsing date from html page. Problematic date: {date_str}")
-                # continue and parse subsequent menus
-                continue
-            dates += [date]
-        return dates
+    def extract_date_from_html(self, tree: html.Element) -> Optional[datetime.date]:
+        date_str: str = tree.xpath("//div[@class='c-schedule__item']//strong/text()")[0]
+        try:
+            date: datetime.date = util.parse_date(date_str)
+            return date
+        except ValueError:
+            print(f"Warning: Error during parsing date from html page. Problematic date: {date_str}")
 
     @staticmethod
-    def __get_daily_menus_as_html(page):
+    def __get_daily_menus_as_html(tree: html.Element) -> List[html.Element]:
         # obtain all daily menus found in the passed html page by xpath query
-        daily_menus: page.xpath = page.xpath("//div[@class='c-schedule__item']")  # type: ignore
+        daily_menus: List[html.Element] = tree.xpath("//div[@class='c-schedule__item']")  # type: ignore
         return daily_menus
 
     @staticmethod
@@ -344,12 +328,12 @@ def __parse_dishes(menu_html: html.Element, canteen: Canteen) -> List[Dish]:
             dish_markers_meetless,
         )
         for (
-            dish_name,
-            dish_type,
-            dish_marker_additional,
-            dish_marker_allergen,
-            dish_marker_type,
-            dish_marker_meetless,
+                dish_name,
+                dish_type,
+                dish_marker_additional,
+                dish_marker_allergen,
+                dish_marker_type,
+                dish_marker_meetless,
         ) in dishes_tup:
             dishes_dict[dish_name] = (
                 dish_type,
@@ -551,7 +535,7 @@ def __get_label_str_and_price(self, column_index: int, line: str) -> Optional[Tu
                 # However, according to
                 # https://black.readthedocs.io/en/stable/faq.html#why-are-flake8-s-e203-and-w503-violated,
                 # this is against PEP8
-                line[estimated_column_end - delta : min(estimated_column_end + delta, len(line))],  # noqa: E203
+                line[estimated_column_end - delta: min(estimated_column_end + delta, len(line))],  # noqa: E203
             )[0]
         except IndexError:
             return None
@@ -563,7 +547,7 @@ def __get_label_str_and_price(self, column_index: int, line: str) -> Optional[Tu
                 # However, according to
                 # https://black.readthedocs.io/en/stable/faq.html#why-are-flake8-s-e203-and-w503-violated,
                 # this is against PEP8
-                line[max(estimated_column_begin - delta, 0) : estimated_column_begin + delta],  # noqa: E203
+                line[max(estimated_column_begin - delta, 0): estimated_column_begin + delta],  # noqa: E203
             )[0]
         except IndexError:
             labels_str = ""
@@ -692,7 +676,7 @@ def get_menus(self, text, year, week_number):
         positions4 = [
             (max(a.start() - 3, 0), a.end())
             for a in list(re.finditer(self.split_days_regex_closed, soup_line1))
-            + list(re.finditer(self.split_days_regex_closed, soup_line2))
+                     + list(re.finditer(self.split_days_regex_closed, soup_line2))
         ]
 
         if positions3:  # Two lines "Tagessuppe siehe Aushang"
@@ -718,7 +702,7 @@ def get_menus(self, text, year, week_number):
         lines_weekdays = {"mon": "", "tue": "", "wed": "", "thu": "", "fri": ""}
         # it must be lines[3:] instead of lines[2:] or else the menus would start with "Preis ab 0,90€" (from the
         # soups) instead of the first menu, if there is a day where the bistro is closed.
-        for line in lines[soup_line_index + 3 :]:  # noqa: E203
+        for line in lines[soup_line_index + 3:]:  # noqa: E203
             lines_weekdays["mon"] += " " + line[pos_mon:pos_tue].replace("\n", " ")
             lines_weekdays["tue"] += " " + line[pos_tue:pos_wed].replace("\n", " ")
             lines_weekdays["wed"] += " " + line[pos_wed:pos_thu].replace("\n", " ")

From 4114cc37e3337a19613a2399b46255bba70017e3 Mon Sep 17 00:00:00 2001
From: Friendly-Banana <69007475+Friendly-Banana@users.noreply.github.com>
Date: Thu, 19 Sep 2024 23:54:30 +0200
Subject: [PATCH 2/4] solve linting issues and fix test

---
 src/menu_parser.py           | 59 ++++++++++++++++++------------------
 src/test/test_menu_parser.py | 10 ++++--
 2 files changed, 37 insertions(+), 32 deletions(-)

diff --git a/src/menu_parser.py b/src/menu_parser.py
index 85a907aa5..1acfb30de 100644
--- a/src/menu_parser.py
+++ b/src/menu_parser.py
@@ -29,7 +29,7 @@ class MenuParser(ABC):
     """
 
     canteens: Set[Canteen]
-    _label_lookup: Dict[str, Set[Label]]
+    _label_subclasses: Dict[str, Set[Label]]
     # we use datetime %u, so we go from 1-7
     weekday_positions: Dict[str, int] = {"mon": 1, "tue": 2, "wed": 3, "thu": 4, "fri": 5, "sat": 6, "sun": 7}
 
@@ -54,7 +54,7 @@ def _parse_label(cls, labels_str: str) -> Set[Label]:
         for value in split_values:
             stripped = value.strip()
             if not stripped.isspace():
-                labels |= cls._label_lookup.get(stripped, set())
+                labels |= cls._label_subclasses.get(stripped, set())
         Label.add_supertype_labels(labels)
         return labels
 
@@ -112,7 +112,7 @@ def __init__(self, students: float, staff: float, guests: float):
             self.guests = guests
             self.unit = "100g"
 
-    _label_lookup: Dict[str, Set[Label]] = {
+    _label_subclasses: Dict[str, Set[Label]] = {
         "GQB": {Label.BAVARIA},
         "MSC": {Label.MSC},
         "1": {Label.DYESTUFF},
@@ -192,8 +192,8 @@ def __init__(self, students: float, staff: float, guests: float):
 
     @staticmethod
     def __get_self_service_prices(
-            base_price_type: SelfServiceBasePriceType,
-            price_per_unit_type: SelfServicePricePerUnitType,
+        base_price_type: SelfServiceBasePriceType,
+        price_per_unit_type: SelfServicePricePerUnitType,
     ) -> Prices:
         students: Price = Price(
             base_price_type.price[0],
@@ -241,7 +241,7 @@ def __get_price(canteen: Canteen, dish: Tuple[str, str, str, str, str], dish_nam
                 base_price_type = StudentenwerkMenuParser.SelfServiceBasePriceType.PIZZA_VEGIE
         return StudentenwerkMenuParser.__get_self_service_prices(base_price_type, price_per_unit_type)
 
-    base_url: str = "http://www.studierendenwerk-muenchen-oberbayern.de/mensa/speiseplan/speiseplan_{url_id}_-de.html"
+    base_url: str = "https://www.studierendenwerk-muenchen-oberbayern.de/mensa/speiseplan/speiseplan_{url_id}_-de.html"
 
     def parse(self, canteen: Canteen) -> Optional[Dict[datetime.date, Menu]]:
         menus = {}
@@ -250,8 +250,10 @@ def parse(self, canteen: Canteen) -> Optional[Dict[datetime.date, Menu]]:
         if page.ok:
             try:
                 tree: html.Element = html.fromstring(page.content)
-                html_menus: List[html.Element] = self.__get_daily_menus_as_html(tree)
+                html_menus: List[html.Element] = self.get_daily_menus_as_html(tree)
                 for html_menu in html_menus:
+                    # this solves some weird reference? issue where tree.xpath will subsequently always use
+                    # the first element despite looping through seemingly separate elements
                     html_menu = html.fromstring(html.tostring(html_menu))
                     menu = self.get_menu(html_menu, canteen)
                     if menu:
@@ -263,27 +265,27 @@ def parse(self, canteen: Canteen) -> Optional[Dict[datetime.date, Menu]]:
         return menus
 
     def get_menu(self, page: html.Element, canteen: Canteen) -> Optional[Menu]:
-        # extract date
         date = self.extract_date_from_html(page)
-        # parse dishes of current menu
         dishes: List[Dish] = self.__parse_dishes(page, canteen)
-        # create menu object
         menu: Menu = Menu(date, dishes)
         return menu
 
     # public for testing
-    def extract_date_from_html(self, tree: html.Element) -> Optional[datetime.date]:
+    @staticmethod
+    def extract_date_from_html(tree: html.Element) -> Optional[datetime.date]:
         date_str: str = tree.xpath("//div[@class='c-schedule__item']//strong/text()")[0]
         try:
             date: datetime.date = util.parse_date(date_str)
             return date
         except ValueError:
-            print(f"Warning: Error during parsing date from html page. Problematic date: {date_str}")
+            warn(f"Error during parsing date from html page. Problematic date: {date_str}")
+            return None
 
+    # public for testing
     @staticmethod
-    def __get_daily_menus_as_html(tree: html.Element) -> List[html.Element]:
+    def get_daily_menus_as_html(tree: html.Element) -> List[html.Element]:
         # obtain all daily menus found in the passed html page by xpath query
-        daily_menus: List[html.Element] = tree.xpath("//div[@class='c-schedule__item']")  # type: ignore
+        daily_menus: List[html.Element] = tree.xpath("//div[@class='c-schedule__item']")
         return daily_menus
 
     @staticmethod
@@ -328,12 +330,12 @@ def __parse_dishes(menu_html: html.Element, canteen: Canteen) -> List[Dish]:
             dish_markers_meetless,
         )
         for (
-                dish_name,
-                dish_type,
-                dish_marker_additional,
-                dish_marker_allergen,
-                dish_marker_type,
-                dish_marker_meetless,
+            dish_name,
+            dish_type,
+            dish_marker_additional,
+            dish_marker_allergen,
+            dish_marker_type,
+            dish_marker_meetless,
         ) in dishes_tup:
             dishes_dict[dish_name] = (
                 dish_type,
@@ -389,8 +391,7 @@ class DishType(Enum):
         VEGETARIAN = auto()
         VEGAN = auto()
 
-    # if an label is a subclass of another label,
-    _label_lookup: Dict[str, Set[Label]] = {
+    _label_subclasses: Dict[str, Set[Label]] = {
         "a": {Label.GLUTEN},
         "aW": {Label.WHEAT},
         "aR": {Label.RYE},
@@ -535,7 +536,7 @@ def __get_label_str_and_price(self, column_index: int, line: str) -> Optional[Tu
                 # However, according to
                 # https://black.readthedocs.io/en/stable/faq.html#why-are-flake8-s-e203-and-w503-violated,
                 # this is against PEP8
-                line[estimated_column_end - delta: min(estimated_column_end + delta, len(line))],  # noqa: E203
+                line[estimated_column_end - delta : min(estimated_column_end + delta, len(line))],  # noqa: E203
             )[0]
         except IndexError:
             return None
@@ -547,7 +548,7 @@ def __get_label_str_and_price(self, column_index: int, line: str) -> Optional[Tu
                 # However, according to
                 # https://black.readthedocs.io/en/stable/faq.html#why-are-flake8-s-e203-and-w503-violated,
                 # this is against PEP8
-                line[max(estimated_column_begin - delta, 0): estimated_column_begin + delta],  # noqa: E203
+                line[max(estimated_column_begin - delta, 0) : estimated_column_begin + delta],  # noqa: E203
             )[0]
         except IndexError:
             labels_str = ""
@@ -676,7 +677,7 @@ def get_menus(self, text, year, week_number):
         positions4 = [
             (max(a.start() - 3, 0), a.end())
             for a in list(re.finditer(self.split_days_regex_closed, soup_line1))
-                     + list(re.finditer(self.split_days_regex_closed, soup_line2))
+            + list(re.finditer(self.split_days_regex_closed, soup_line2))
         ]
 
         if positions3:  # Two lines "Tagessuppe siehe Aushang"
@@ -702,7 +703,7 @@ def get_menus(self, text, year, week_number):
         lines_weekdays = {"mon": "", "tue": "", "wed": "", "thu": "", "fri": ""}
         # it must be lines[3:] instead of lines[2:] or else the menus would start with "Preis ab 0,90€" (from the
         # soups) instead of the first menu, if there is a day where the bistro is closed.
-        for line in lines[soup_line_index + 3:]:  # noqa: E203
+        for line in lines[soup_line_index + 3 :]:  # noqa: E203
             lines_weekdays["mon"] += " " + line[pos_mon:pos_tue].replace("\n", " ")
             lines_weekdays["tue"] += " " + line[pos_tue:pos_wed].replace("\n", " ")
             lines_weekdays["wed"] += " " + line[pos_wed:pos_thu].replace("\n", " ")
@@ -743,7 +744,7 @@ def get_menus(self, text, year, week_number):
                 try:
                     price_obj = Price(float(price_str))
                 except ValueError:
-                    print(f"Warning: Error during parsing price: {price_str}")
+                    warn(f"Error during parsing price: {price_str}")
                 dishes.append(
                     Dish(
                         dish_name.strip(),
@@ -770,7 +771,7 @@ class MedizinerMensaMenuParser(MenuParser):
     labels_regex = r"(\s([A-C]|[E-H]|[K-P]|[R-Z]|[1-9])(,([A-C]|[E-H]|[K-P]|[R-Z]|[1-9]))*(\s|\Z))"
     price_regex = r"(\d+(,(\d){2})\s?€)"
 
-    _label_lookup: Dict[str, Set[Label]] = {
+    _label_subclasses: Dict[str, Set[Label]] = {
         "1": {Label.DYESTUFF},
         "2": {Label.PRESERVATIVES},
         "3": {Label.ANTIOXIDANTS},
@@ -970,7 +971,7 @@ class StraubingMensaMenuParser(MenuParser):
     url = "https://www.stwno.de/infomax/daten-extern/csv/HS-SR/{calendar_week}.csv"
     canteens = {Canteen.MENSA_STRAUBING}
 
-    _label_lookup: Dict[str, Set[Label]] = {
+    _label_subclasses: Dict[str, Set[Label]] = {
         "1": {Label.DYESTUFF},
         "2": {Label.PRESERVATIVES},
         "3": {Label.ANTIOXIDANTS},
diff --git a/src/test/test_menu_parser.py b/src/test/test_menu_parser.py
index e98021e07..d6a9123d9 100644
--- a/src/test/test_menu_parser.py
+++ b/src/test/test_menu_parser.py
@@ -4,7 +4,7 @@
 import tempfile
 import unittest
 from datetime import date
-from typing import Dict, List
+from typing import Dict
 
 from lxml import html  # nosec: https://github.com/TUM-Dev/eat-api/issues/19
 
@@ -58,11 +58,15 @@ def test_get_all_dates(self) -> None:
                 working_days.append(start_date)
             start_date += datetime.timedelta(days=1)
 
+        dates = []
         tree = file_util.load_html(
             f"{self.base_path_canteen.format(canteen=Canteen.MENSA_GARCHING.canteen_id)}"
             f"/for-generation/overview.html",
         )
-        dates: List[date] = self.studentenwerk_menu_parser.get_available_dates_for_html(tree)
+        menus = StudentenwerkMenuParser.get_daily_menus_as_html(tree)
+        for menu in menus:
+            html_menu = html.fromstring(html.tostring(menu))
+            dates.append(self.studentenwerk_menu_parser.extract_date_from_html(html_menu))
         self.assertEqual(dates, working_days)
 
     def test_studentenwerk(self) -> None:
@@ -92,7 +96,7 @@ def __get_menus(self, canteen: Canteen) -> Dict[date, Menu]:
                 f"{self.base_path_canteen.format(canteen=canteen.canteen_id)}/for-generation/{date_}.html",
             )
             studentenwerk_menu_parser = StudentenwerkMenuParser()
-            menu = studentenwerk_menu_parser.get_menu(tree, canteen, date_)
+            menu = studentenwerk_menu_parser.get_menu(tree, canteen)
             if menu is not None:
                 menus[date_] = menu
         return menus

From 4e62829e63df37b7faa5071c4b7fe727a0e4931a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakob=20K=C3=B6rber?=
 <56073945+jakobkoerber@users.noreply.github.com>
Date: Fri, 20 Sep 2024 22:03:12 +0200
Subject: [PATCH 3/4] Fix Linting

---
 scripts/reformat.py | 1 +
 src/entities.py     | 1 +
 2 files changed, 2 insertions(+)

diff --git a/scripts/reformat.py b/scripts/reformat.py
index 6c76cbe93..b7ee7e3b0 100755
--- a/scripts/reformat.py
+++ b/scripts/reformat.py
@@ -1,4 +1,5 @@
 #!/bin/python3
+# pylint: disable=too-many-arguments
 import json
 import os
 import re
diff --git a/src/entities.py b/src/entities.py
index b541e1ae5..2cd22a4fb 100644
--- a/src/entities.py
+++ b/src/entities.py
@@ -1,4 +1,5 @@
 # -*- coding: utf-8 -*-
+# pylint: disable=too-many-arguments
 
 # Postponed Evaluation of Annotations to allow using a class inside a class for annotations
 from __future__ import annotations

From 1c83d406f106326f8f5a1fae26d55baf8c78f3c1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jakob=20K=C3=B6rber?=
 <56073945+jakobkoerber@users.noreply.github.com>
Date: Fri, 20 Sep 2024 22:05:01 +0200
Subject: [PATCH 4/4] Fix Linting :)

---
 scripts/reformat.py | 2 +-
 src/entities.py     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/scripts/reformat.py b/scripts/reformat.py
index b7ee7e3b0..f78ef9a72 100755
--- a/scripts/reformat.py
+++ b/scripts/reformat.py
@@ -1,5 +1,5 @@
 #!/bin/python3
-# pylint: disable=too-many-arguments
+# pylint: disable=too-many-positional-arguments
 import json
 import os
 import re
diff --git a/src/entities.py b/src/entities.py
index 2cd22a4fb..bdb35c3cf 100644
--- a/src/entities.py
+++ b/src/entities.py
@@ -1,5 +1,5 @@
 # -*- coding: utf-8 -*-
-# pylint: disable=too-many-arguments
+# pylint: disable=too-many-positional-arguments
 
 # Postponed Evaluation of Annotations to allow using a class inside a class for annotations
 from __future__ import annotations