From 7d2d4837516f4bb22bf089aa4e25f82e9434f7af Mon Sep 17 00:00:00 2001 From: Browning Date: Wed, 30 Aug 2023 14:46:02 +0100 Subject: [PATCH 01/33] Add code for producing HTML outputs for GTFS data --- .gitignore | 9 +- requirements.txt | 1 + src/transport_performance/gtfs/gtfs_utils.py | 82 ++ .../gtfs/report/css_styles/styles.css | 195 +++++ .../html_templates/evaluation_template.html | 117 +++ .../report/html_templates/stops_template.html | 42 + .../html_templates/summary_template.html | 46 + .../gtfs/report/report_utils.py | 160 ++++ src/transport_performance/gtfs/validation.py | 820 +++++++++++++++++- src/transport_performance/utils/defence.py | 72 ++ tests/data/gtfs/report/gtfs_report/styles.css | 195 +++++ tests/data/gtfs/report/html_template.html | 6 + tests/gtfs/report/test_report_utils.py | 105 +++ tests/gtfs/test_gtfs_utils.py | 63 +- tests/gtfs/test_validation.py | 230 ++++- 15 files changed, 2105 insertions(+), 38 deletions(-) create mode 100644 src/transport_performance/gtfs/report/css_styles/styles.css create mode 100644 src/transport_performance/gtfs/report/html_templates/evaluation_template.html create mode 100644 src/transport_performance/gtfs/report/html_templates/stops_template.html create mode 100644 src/transport_performance/gtfs/report/html_templates/summary_template.html create mode 100644 src/transport_performance/gtfs/report/report_utils.py create mode 100644 tests/data/gtfs/report/gtfs_report/styles.css create mode 100644 tests/data/gtfs/report/html_template.html create mode 100644 tests/gtfs/report/test_report_utils.py diff --git a/.gitignore b/.gitignore index 5a5f920d..11be8265 100644 --- a/.gitignore +++ b/.gitignore @@ -5,10 +5,18 @@ # moved blanket rules above specific exceptions for test fixtures *.zip *.pkl +*.html + # except test fixtures !tests/data/newport-2023-06-13.osm.pbf !tests/data/newport-20230613_gtfs.zip !tests/data/gtfs/route_lookup.pkl +!tests/data/gtfs/report/html_template.html + +# except html templates +!src/transport_performance/gtfs/report/html_templates/evaluation_template.html +!src/transport_performance/gtfs/report/html_templates/stops_template.html +!src/transport_performance/gtfs/report/html_templates/summary_template.html ### Project structure ### data/* @@ -35,7 +43,6 @@ outputs/* *.Rproj -*.html *.pdf *.csv *.rds diff --git a/requirements.txt b/requirements.txt index 6f733cff..8c20ed32 100644 --- a/requirements.txt +++ b/requirements.txt @@ -25,5 +25,6 @@ cartopy folium mapclassify seaborn +pretty_html_table numpy>=1.25.0 # test suite will fail if user installed lower than this -e . diff --git a/src/transport_performance/gtfs/gtfs_utils.py b/src/transport_performance/gtfs/gtfs_utils.py index e72df333..e3bede01 100644 --- a/src/transport_performance/gtfs/gtfs_utils.py +++ b/src/transport_performance/gtfs/gtfs_utils.py @@ -3,10 +3,14 @@ import geopandas as gpd from shapely.geometry import box from pyprojroot import here +import plotly.graph_objects as go +import pandas as pd from transport_performance.utils.defence import ( _is_expected_filetype, _check_list, + _dataframe_defence, + _bool_defence, ) @@ -60,3 +64,81 @@ def bbox_filter_gtfs( print(f"Filtered feed written to {out_pth}.") return None + + +# NOTE: Possibly move to a more generalised utils file +def convert_pandas_to_plotly( + df: pd.DataFrame, return_html=False, scheme: str = "dsc" +) -> go.Figure: + """Convert a pandas dataframe to a visual plotly figure. + + Parameters + ---------- + df : pd.DataFrame + A pandas dataframe to convert to plotly + (single index only) + return_html : bool, optional + Whether or not to return the html element, + by default False + scheme : str, optional + The colour scheme of the dataframe, + by default "dsc" + + Returns + ------- + go.Figure + A plotly figure containing the drawn dataframe + + """ + # pre-defined colour schemes + schemes = { + "dsc": { + "header_fill": "#12436D", + "header_font_colour": "white", + "cell_fill": "#A285D1", + "cell_font_colour": "black", + "font_family": "sans-serif", + "line_colour": "black", + } + } + # defences + _dataframe_defence(df, "df") + _bool_defence(return_html, "return_html") + if scheme not in list(schemes.keys()): + raise LookupError( + f"{scheme} is not a valid colour scheme." + f"Valid colour schemes include {list(schemes.keys())}" + ) + if isinstance(df.columns, pd.MultiIndex): + raise TypeError( + "Pandas dataframe must have a single index," "not MultiIndex" + ) + + # create plotly df + fig = go.Figure( + data=go.Table( + header=dict( + values=df.columns.values, + fill_color=schemes[scheme]["header_fill"], + font=dict( + color=schemes[scheme]["header_font_colour"], + family=schemes[scheme]["font_family"], + ), + line_color=schemes[scheme]["line_colour"], + ), + cells=dict( + values=[df[col_name] for col_name in df.columns], + fill_color="#A285D1", + font=dict( + color=schemes[scheme]["cell_font_colour"], + family=schemes[scheme]["font_family"], + ), + align="left", + line_color=schemes[scheme]["line_colour"], + ), + ) + ) + + if return_html: + return fig.to_html(full_html=False) + return fig diff --git a/src/transport_performance/gtfs/report/css_styles/styles.css b/src/transport_performance/gtfs/report/css_styles/styles.css new file mode 100644 index 00000000..1e816520 --- /dev/null +++ b/src/transport_performance/gtfs/report/css_styles/styles.css @@ -0,0 +1,195 @@ +* { + margin: 0; + padding: 0; + box-sizing: border-box; + font-family: 'Poppins', sans-serif; +} + +body { + min-height: 100vh; +} + +a { + text-decoration: none; + +} + +li { + list-style: none; +} + +h1, +h2 { + color: black; +} + +h3 { + color: #999; +} + +table { + overflow: hidden; + overflow-x: scroll; + max-width: 1500px; + display: block; + overflow-y: scroll; + max-height: 800px; +} + +thead { + position: sticky; + top: -10px; +} + +.btn { + background: #f05462; + color: white; + padding: 5px 10px; + text-align: center; +} + +.btn:hover { + color: #f05462; + background: white; + padding: 3px 8px; + border: 2px solid #f05462; +} + +.title { + display: flex; + align-items: center; + justify-content: space-around; + padding: 15px 10px; + border-bottom: 2px solid #999; +} + +table { + padding: 10px; +} + +th, +td { + text-align: left; + padding: 8px; +} + +.side-menu { + position: fixed; + background: #28A197; + width: 20vw; + min-height: 100vh; + display: flex; + flex-direction: column; +} + +.side-menu .side-menu-title { + height: 10vh; + display: flex; + align-items: center; + justify-content: center; + font-weight: bold +} + +.side-menu li { + font-size: 24px; + padding: 10px 40px; + display: flex; + align-items: center; +} + +.side-menu li:hover { + background: white; + color: #28A197; + +} + +.side-menu li:hover .option{ + color: #28A197; + background-color: white; +} + +.side-menu .option { + color: white; +} + +.side-menu .option:hover{ + color: #28A197; + background-color: white; +} + +.container { + position: absolute; + right: 0; + width: 80vw; + height: 100vh; + background: #f1f1f1; +} + +.container .content { + position: relative; + background: #f1f1f1; + padding-left: 20px; + padding-right: 20px; +} + +.container .content .analysis-cont { + display: flex; + justify-content: space-around; + align-items: flex-start; + flex-wrap: wrap; + background-color: #f1f1f1; + padding: 20px; +} + +.container .content .analysis-cont .summary div { + display: block; +} + + +.container .content .analysis-cont .summary dd { + font-weight: bold; + margin-inline-start: 0; + display: inline-block; + min-width: 130px; +} + +.container .content .analysis-cont .summary dt { + display: inline-block; +} + +.analysis-title { + font-weight: bold; + font-size: large; + margin-bottom: 10px; +} + +hr { + border: 0; + clear:both; + display:block; + width: 100%; + background-color: black; + height: 5px; + } + +.container .header { + position: fixed; + top: 0; + right: 0; + width: 80vw; + height: 10vh; + background: #801650; + display: flex; + align-items: center; + justify-content: center; + box-shadow: 0 4px 8px 0 rgba(0, 0, 0, 0.2); + z-index: 1; +} + +.container .header .header-title { + display: flex; + align-items: center; + color: #F46A25; + font-weight: bold; + font-size: xx-large; +} diff --git a/src/transport_performance/gtfs/report/html_templates/evaluation_template.html b/src/transport_performance/gtfs/report/html_templates/evaluation_template.html new file mode 100644 index 00000000..e4284568 --- /dev/null +++ b/src/transport_performance/gtfs/report/html_templates/evaluation_template.html @@ -0,0 +1,117 @@ + + + + + + + + + GTFS Report [date] + + + +
+
+

GTFS Report

+
+ +
+
+
+
+ GTFS Data Evaluation +
+
+
+
+ GTFS Feed Info +
+
+
+
Publisher Name:
+
[name_placeholder]
+
+
+
Publisher URL:
+
[url_placeholder]
+
+
+
Feed Language:
+
[lang_placeholder]
+
+
+
Feed Start Date:
+
[start_placeholder]
+
+
+
Feed End Data:
+
[end_placeholder]
+
+
+
Feed Version:
+
[version_placeholder]
+
+
+
+
+
+
+ GTFS Feed Counts +
+
+
+
Agencies:
+
[agency_placeholder]
+
+
+
Routes:
+
[routes_placeholder]
+
+
+
Trips:
+
[trips_placeholder]
+
+
+
Stops:
+
[stops_placeholder]
+
+
+
Shapes:
+
[shapes_placeholder]
+
+
+
+
+
+
+ [eval_title_3] +
+
+ [eval_placeholder_3] +
+
+
+
+
+ [eval_title_1] +
+
+ [eval_placeholder_1] +
+
+
+
+
+ [eval_title_2] +
+
+ [eval_placeholder_2] +
+
+
+
+ diff --git a/src/transport_performance/gtfs/report/html_templates/stops_template.html b/src/transport_performance/gtfs/report/html_templates/stops_template.html new file mode 100644 index 00000000..411ea46f --- /dev/null +++ b/src/transport_performance/gtfs/report/html_templates/stops_template.html @@ -0,0 +1,42 @@ + + + + + + + + + GTFS Report - [date] + + + +
+
+

GTFS Report

+
+ +
+
+
+
+ GTFS Trips and Routes Summaries +
+
+
+
+ [stops_title_1] + +
+
+
+
+ [stops_title_2] + +
+
+
+ diff --git a/src/transport_performance/gtfs/report/html_templates/summary_template.html b/src/transport_performance/gtfs/report/html_templates/summary_template.html new file mode 100644 index 00000000..72efdd3b --- /dev/null +++ b/src/transport_performance/gtfs/report/html_templates/summary_template.html @@ -0,0 +1,46 @@ + + + + + + + + + GTFS Report - [date] + + + +
+
+

GTFS Report

+
+ +
+
+
+
+ GTFS Trips and Routes Summaries +
+
+
+
+ [plotly_title_1] +
+ [plotly_placeholder_1] +
+
+
+
+
+ [plotly_title_2] +
+ [plotly_placeholder_2] +
+
+
+
+ diff --git a/src/transport_performance/gtfs/report/report_utils.py b/src/transport_performance/gtfs/report/report_utils.py new file mode 100644 index 00000000..3826e976 --- /dev/null +++ b/src/transport_performance/gtfs/report/report_utils.py @@ -0,0 +1,160 @@ +"""Utils to assist in the creation of a HTML report for GTFS.""" +from typing import Union +import pathlib +import shutil +import os + +from transport_performance.utils.defence import ( + _bool_defence, + _string_defence, + _is_path_like, + _check_parent_dir_exists, +) + +# Constant to remove non needed columns from repeated +# pair error information. +# This is a messy method however it is the only +# way to ensure that the error report remains +# dynamic and can adadpt to different tables +# in the GTFS file. + +GTFS_UNNEEDED_COLUMNS = { + "routes": [], + "agency": ["agency_phone", "agency_lang"], + "stop_times": [ + "stop_headsign", + "pickup_type", + "drop_off_type", + "shape_dist_traveled", + "timepoint", + ], + "stops": [ + "wheelchair_boarding", + "location_type", + "parent_station", + "platform_code", + ], + "calendar_dates": [], + "calendar": [], + "trips": [ + "trip_headsign", + "block_id", + "shape_id", + "wheelchair_accessible", + ], + "shapes": [], +} + + +class TemplateHTML: + """A class for inserting HTML string into a docstring.""" + + def __init__(self, path: Union[str, pathlib.Path]) -> None: + """Initialise the TemplateHTML object. + + Parameters + ---------- + path : Union[str, pathlib.Path] + The file path of the html template + + Returns + ------- + None + + """ + _is_path_like(path, "path") + with open(path, "r", encoding="utf8") as f: + self.template = f.read() + return None + + def insert( + self, placeholder: str, value: str, replace_multiple: bool = False + ) -> None: + """Insert values into the html template. + + Parameters + ---------- + placeholder : str + The placeholder name in the template. + This is a string. In the template it + should be surrounded by sqsuare brackets. + value : str + The value to place in the placeholder + location. + replace_multiple : bool, optional + Whether or not to replace multiple + placeholders that share the same + placeholder value, + by default False + + Returns + ------- + None + + """ + _string_defence(placeholder, "placeholder") + _string_defence(value, "value") + _bool_defence(replace_multiple, "replace_multiple") + occurences = len(self.template.split(f"[{placeholder}]")) - 1 + if occurences > 1 and not replace_multiple: + raise ValueError( + "You have selected not to replace multiple" + "placeholders of the same value, however" + "placeholders occur more than once. \n" + "If you would like to allow this, set the" + "replace_multiple param to True" + ) + + self.template = self.template.replace(f"[{placeholder}]", value) + + def get_template(self) -> str: + """Get the template attribute of the TemplateHTML object. + + Returns + ------- + str + The template attribute + + """ + return self.template + + +def set_up_report_dir( + path: Union[str, pathlib.Path] = "outputs", overwrite: bool = False +) -> None: + """Set up the directory that will hold the report. + + Parameters + ---------- + path : Union[str, pathlib.Path], optional + The path to the directory, + by default "outputs" + overwrite : bool, optional + Whether or not to overwrite any current reports, + by default False + + Returns + ------- + None + + """ + # defences + _check_parent_dir_exists(path, "path", create=True) + + if os.path.exists(f"{path}/gtfs_report") and not overwrite: + raise FileExistsError( + "Report already exists at path: " + f"[{path}]." + "Consider setting overwrite=True" + "if you'd like to overwrite this." + ) + + try: + os.mkdir(f"{path}/gtfs_report") + except FileExistsError: + pass + shutil.copy( + src="src/transport_performance/gtfs/report/css_styles/styles.css", + dst=f"{path}/gtfs_report", + ) + return None diff --git a/src/transport_performance/gtfs/validation.py b/src/transport_performance/gtfs/validation.py index 12da36d7..8adb3b75 100644 --- a/src/transport_performance/gtfs/validation.py +++ b/src/transport_performance/gtfs/validation.py @@ -8,12 +8,32 @@ import numpy as np import os import inspect +import plotly.express as px +import plotly.io as plotly_io +from pretty_html_table import build_table +import zipfile +import pathlib +from typing import Union +from plotly.graph_objects import Figure as PlotlyFigure from transport_performance.gtfs.routes import scrape_route_type_lookup from transport_performance.utils.defence import ( _is_expected_filetype, _check_namespace_export, _check_parent_dir_exists, + _check_column_in_df, + _bool_defence, + _integer_defence, + _string_defence, + _dataframe_defence, + _dict_defence, + _string_and_nonetype_defence, +) + +from transport_performance.gtfs.report.report_utils import ( + TemplateHTML, + set_up_report_dir, + GTFS_UNNEEDED_COLUMNS, ) @@ -116,6 +136,20 @@ def __init__( raise ValueError(f"`units` accepts metric only. Found: {units}") self.feed = gk.read_feed(gtfs_pth, dist_units=units) + self.gtfs_path = gtfs_pth + + def get_gtfs_files(self) -> list: + """Return a list of files making up the GTFS file. + + Returns + ------- + list + A list of files that create the GTFS file + + """ + file_list = zipfile.ZipFile(self.gtfs_path).namelist() + self.file_list = file_list + return self.file_list def is_valid(self): """Check a feed is valid with `gtfs_kit`. @@ -490,16 +524,29 @@ def summarise_trips( # aggregate to mean/median/min/max (default) trips on each day # of the week - day_trip_counts = trip_counts.groupby(["day", "route_type"]).agg( - {"trip_count": summ_ops} + day_trip_counts = ( + trip_counts.groupby(["day", "route_type"]) + .agg({"trip_count": summ_ops}) + .reset_index() ) - day_trip_counts.reset_index(inplace=True) - day_trip_counts = day_trip_counts.round(0) - # order the days (for plotting future purposes) # order the days (for plotting future purposes) day_trip_counts = self._order_dataframe_by_day(df=day_trip_counts) + day_trip_counts = day_trip_counts.round(0) day_trip_counts.reset_index(drop=True, inplace=True) + + # reformat columns + # including normalsing min and max between different + # numpy versions (amin/min, amax/max) + day_trip_counts.columns = day_trip_counts.columns = [ + "_".join(value) if "" not in value else "".join(value) + for value in day_trip_counts.columns.values + ] + day_trip_counts.columns = [ + column.replace("amin", "min").replace("amax", "max") + for column in day_trip_counts.columns.values + ] + self.daily_trip_summary = day_trip_counts.copy() return self.daily_trip_summary @@ -567,8 +614,20 @@ def summarise_routes( day_route_count = self._order_dataframe_by_day(df=day_route_count) day_route_count = day_route_count.round(0) day_route_count.reset_index(drop=True, inplace=True) - self.daily_route_summary = day_route_count.copy() + # reformat columns + # including normalsing min and max between different + # numpy versions (amin/min, amax/max) + day_route_count.columns = day_route_count.columns = [ + "_".join(value) if "" not in value else "".join(value) + for value in day_route_count.columns.values + ] + day_route_count.columns = [ + column.replace("amin", "min").replace("amax", "max") + for column in day_route_count.columns.values + ] + + self.daily_route_summary = day_route_count.copy() return self.daily_route_summary def get_route_modes(self): @@ -601,3 +660,752 @@ def get_route_modes(self): ) self.route_mode_summary_df = out_tab return self.route_mode_summary_df + + def _plot_summary( + self, + summary_df: pd.DataFrame, + target_column: str, + orientation: str = "v", + day_column: str = "day", + width: int = 2000, + height: int = 800, + xlabel: str = None, + ylabel: str = None, + plotly_kwargs: dict = {}, + return_html: bool = False, + save_html: bool = False, + save_image: bool = False, + save_pth: Union[pathlib.Path, str] = pathlib.Path( + os.path.join("outputs", "gtfs") + ), + ) -> Union[PlotlyFigure, str]: + """Plot (and save) a summary table using plotly. + + Parameters + ---------- + summary_df : pd.DataFrame + The dataframe containing the summarised data + target_column : str + The name of the column contianing the + target data (counts) + orientation : str, optional + The orientation of the bar plot ("v" or "h"), + by default "v" + day_column : str, optional + The name of the column containing the day, + by default "day" + width : int, optional + The width of the plot (in pixels), by default 2000 + height : int, optional + The height of the plot (in pixels), by default 800 + xlabel : str, optional + The label for the x axis. + If left empty, the column name will be used, + by default None + ylabel : str, optional + The label for the y axis. + If left empty, the column name will be used, + by default None + plotly_kwargs : dict, optional + Kwargs to pass to fig.update_layout() for + additional plot customisation, + by default {} + return_html : bool, optional + Whether or not to return a html string, + by default False + save_html : bool, optional + Whether or not to save the plot as a html file, + by default False + save_image : bool, optional + Whether or not to save the plot as a PNG, + by default False + save_pth : Union[pathlib.Path, str], optional + The filepath to save the plot to. Including a file extension is + optional (e.g., .html). File paths must include forward slashes if + they aren't passed as a path object (pathlib.Path). + by default pathlib.Path(os.path.join('outputs', 'gtfs')) + + Returns + ------- + Union[PlotlyFigure, str] + Returns either a HTML string or the plotly figure + + """ + # parameter type defences + _dataframe_defence(summary_df, "summary_df") + _string_defence(day_column, "day_column") + _string_defence(target_column, "target_column") + _dict_defence(plotly_kwargs, "plotly_kwargs") + _bool_defence(return_html, "return_html") + _integer_defence(width, "width") + _integer_defence(height, "height") + _string_and_nonetype_defence(xlabel, "xlabel") + _string_and_nonetype_defence(ylabel, "ylabel") + _bool_defence(save_html, "save_html") + _bool_defence(save_image, "save_iamge") + _check_parent_dir_exists(save_pth, "save_pth", create=True) + + # orientation input defences + if orientation.lower() not in ["v", "h"]: + raise ValueError( + "'orientation expected 'v' or 'h'" + " (non case sensitive)." + f" Found {orientation} of type {type(orientation)}." + ) + + # dataframe column defences + _check_column_in_df(df=summary_df, column_name=target_column) + _check_column_in_df(df=summary_df, column_name=day_column) + + # convert column type for better graph plotting + summary_df["route_type"] = summary_df["route_type"].astype("object") + + orientation = orientation.lower() + xlabel = ( + xlabel + if xlabel + else (target_column if orientation == "h" else day_column) + ) + ylabel = ( + ylabel + if ylabel + else (target_column if orientation == "v" else day_column) + ) + + # plot summary using plotly express + fig = px.bar( + summary_df, + x=day_column if orientation == "v" else target_column, + y=target_column if orientation == "v" else day_column, + color="route_type", + barmode="group", + text_auto=True, + height=height, + width=width, + orientation=orientation, + ) + + # format plotly figure + fig.update_layout( + plot_bgcolor="white", + yaxis=dict( + tickfont=dict(size=18), + gridcolor="black", + showline=True, + showgrid=False if orientation == "h" else True, + linecolor="black", + linewidth=2, + title=ylabel, + ), + xaxis=dict( + tickfont=dict(size=18), + gridcolor="black", + showline=True, + showgrid=False if orientation == "v" else True, + linecolor="black", + linewidth=2, + title=xlabel, + ), + font=dict(size=18), + legend=dict( + xanchor="right", + x=0.99, + yanchor="top", + y=0.99, + title="Route Type", + traceorder="normal", + bgcolor="white", + bordercolor="black", + borderwidth=2, + ), + ) + + # apply custom arguments if passed + if plotly_kwargs: + fig.update_layout(**plotly_kwargs) + + # break up the save path + main, ext = os.path.splitext(save_pth) + + # save the plot if specified + if save_html: + if ext != ".png": + print( + "Image save requested but HTML type not specified." + "\nSaving as .html" + ) + plotly_io.write_html( + fig=fig, + file=os.path.normpath(main + ".html"), + full_html=False, + ) + + if save_image: + if ext != ".png": + print( + "HTML save requested but accepted type not specified.\n" + "Accepted image formats include ['.png']\n" + "Saving as .png" + ) + plotly_io.write_image( + fig=fig, file=os.path.normpath(main + ".png") + ) + print(os.path.normpath(main + ".png")) + + if return_html: + return plotly_io.to_html(fig, full_html=False) + return fig + + def plot_route_summary( + self, + target_summary: str, + orientation: str = "v", + plotly_kwargs: dict = {}, + return_html: bool = False, + width: int = 2000, + height: int = 800, + xlabel: str = None, + ylabel: str = None, + save_html: bool = False, + save_image: bool = False, + save_pth: Union[pathlib.Path, str] = pathlib.Path( + os.path.join("outputs", "gtfs") + ), + ) -> Union[PlotlyFigure, str]: + """Plot the summarised route data of a GTFS file. + + Parameters + ---------- + target_summary : str + The name of the summary to plot + (e.g., mean, max, min, median) + orientation : str, optional + The orientation of the bar plot ("v" or "h"), + by default "v" + plotly_kwargs : dict, optional + Kwargs to pass to fig.update_layout() for + additional plot customisation, + by default {} + return_html : bool, optional + Whether or not to return a html string, + by default False + width : int, optional + The width of the plot (in pixels), by default 2000 + height : int, optional + The height of the plot (in pixels), by default 800 + xlabel : str, optional + he label for the x axis. + If left empty, the column name will be used, + by default None + ylabel : str, optional + he label for the y axis. + If left empty, the column name will be used, + by default None + save_html : bool, optional + Whether or not to save the plot as a html file, + by default False + save_image : bool, optional + Whether or not to save the plot as a PNG, + by default False + save_pth : Union[pathlib.Path, str], optional + The filepath to save the plot to. Including a file extension is + optional (e.g., .html). File paths must include forward slashes if + they aren't passed as a path object (pathlib.Path). + by default pathlib.Path(os.path.join('outputs', 'gtfs')) + + Returns + ------- + Union[PlotlyFigure, str] + Returns either a HTML string or the plotly figure + + """ + # defensive checks + if "daily_route_summary" not in self.__dir__(): + raise AttributeError( + "The daily_route_summary table could not be found." + " Did you forget to call '.summarise_routes()' first?" + ) + + # plot the summary + target_col = f"route_count_{target_summary}" + plot = self._plot_summary( + summary_df=self.daily_route_summary, + target_column=target_col, + day_column="day", + orientation=orientation, + plotly_kwargs=plotly_kwargs, + return_html=return_html, + width=width, + height=height, + xlabel=xlabel, + ylabel=ylabel, + save_html=save_html, + save_pth=save_pth, + save_image=save_image, + ) + return plot + + def plot_trip_summary( + self, + target_summary: str, + orientation: str = "v", + plotly_kwargs: dict = {}, + return_html: bool = False, + width: int = 2000, + height: int = 800, + xlabel: str = None, + ylabel: str = None, + save_html: bool = False, + save_image: bool = False, + save_pth: Union[pathlib.Path, str] = pathlib.Path( + os.path.join("outputs", "gtfs") + ), + ): + """Plot the summarised trip data of a GTFS file. + + Parameters + ---------- + target_summary : str + The name of the summary to plot + (e.g., mean, max, min, median) + orientation : str, optional + The orientation of the bar plot ("v" or "h"), + by default "v" + plotly_kwargs : dict, optional + Kwargs to pass to fig.update_layout() for + additional plot customisation, + by default {} + return_html : bool, optional + Whether or not to return a html string, + by default False + width : int, optional + The width of the plot (in pixels), by default 2000 + height : int, optional + The height of the plot (in pixels), by default 800 + xlabel : str, optional + he label for the x axis. + If left empty, the column name will be used, + by default None + ylabel : str, optional + he label for the y axis. + If left empty, the column name will be used, + by default None + save_html : bool, optional + Whether or not to save the plot as a html file, + by default False + save_image : bool, optional + Whether or not to save the plot as a PNG, + by default False + save_pth : Union[pathlib.Path, str], optional + The filepath to save the plot to. Including a file extension is + optional (e.g., .html). File paths must include forward slashes if + they aren't passed as a path object (pathlib.Path). + by default pathlib.Path(os.path.join('outputs', 'gtfs')) + + Returns + ------- + Union[PlotlyFigure, str] + Returns either a HTML string or the plotly figure + + """ + # defensive checks + if "daily_trip_summary" not in self.__dir__(): + raise AttributeError( + "The daily_trip_summary table could not be found." + " Did you forget to call '.summarise_trips()' first?" + ) + + # plot the summary + target_col = f"trip_count_{target_summary}" + plot = self._plot_summary( + summary_df=self.daily_trip_summary, + target_column=target_col, + day_column="day", + orientation=orientation, + plotly_kwargs=plotly_kwargs, + return_html=return_html, + width=width, + height=height, + xlabel=xlabel, + ylabel=ylabel, + save_html=save_html, + save_pth=save_pth, + save_image=save_image, + ) + return plot + + def _create_extended_repeated_pair_table( + self, + table: pd.DataFrame, + join_vars: Union[str, list], + original_rows: list[int], + ) -> pd.DataFrame: + """Generate an extended table for repeated pair warnings. + + Parameters + ---------- + table : pd.DataFrame + The dataframe with the repeated pair warnings + join_vars : Union[str, list] + The variables that have repeated pairs + original_rows : list[int] + The original duplicate rows, contained in + the GTFS validation table (rows column) + + Returns + ------- + pd.DataFrame + An extended dataframe containing repeated pairs + + """ + error_table = table.copy().iloc[original_rows] + remaining = table.copy().loc[~table.index.isin(original_rows)] + joined_rows = error_table.merge( + remaining, + how="left", + on=join_vars, + suffixes=["_original", "_duplicate"], + ) + return joined_rows + + def _extended_validation( + self, output_path: Union[str, pathlib.Path], scheme: str = "green_dark" + ) -> None: + """Generate HTML outputs of impacted rows from GTFS errors/warnings. + + Parameters + ---------- + output_path : Union[str, pathlib.Path] + The path to save the HTML output to + scheme : str, optional + Colour scheme from pretty_html_table, by default "green_dark" + + Returns + ------- + None + + """ + table_map = { + "agency": self.feed.agency, + "routes": self.feed.routes, + "stop_times": self.feed.stop_times, + "stops": self.feed.stops, + "trips": self.feed.trips, + "calendar": self.feed.calendar, + } + + # determine which errors/warnings have rows that can be located + validation_table = self.is_valid() + validation_table["valid_row"] = validation_table["rows"].apply( + lambda x: 1 if len(x) > 0 else 0 + ) + ext_validation_table = validation_table.copy()[ + validation_table["valid_row"] == 1 + ] + # locate the impacted rows for each error + for table, rows, message, msg_type in zip( + ext_validation_table["table"], + ext_validation_table["rows"], + ext_validation_table["message"], + ext_validation_table["type"], + ): + # create a more informative table for repeated pairs + if "Repeated pair" in message: + join_vars = ( + message.split("(")[1] + .replace(")", "") + .replace(" ", "") + .split(",") + ) + drop_cols = [ + col + for col in GTFS_UNNEEDED_COLUMNS[table] + if col not in join_vars + ] + filtered_tbl = table_map[table].copy().drop(drop_cols, axis=1) + impacted_rows = self._create_extended_repeated_pair_table( + table=filtered_tbl, + join_vars=join_vars, + original_rows=rows, + ) + base_columns = [ + item + for item in list(filtered_tbl.columns) + if item not in join_vars + ] + duplicate_counts = {} + for col in base_columns: + duplicate_counts[col] = impacted_rows[ + impacted_rows[f"{col}_original"] + == impacted_rows[f"{col}_duplicate"] + ].shape[0] + else: + impacted_rows = table_map[table].copy().iloc[rows] + + # create the html to display the impacted rows (clean possibly) + table_html = f""" + + + + +

+ Table:{table}
Message: {message}
+ Type: + {msg_type} +

""" + + # Add additional information for repeated pairs + # to the HTML report + try: + for counter, var in enumerate(duplicate_counts): + if counter == 0: + table_html = ( + table_html + + """
+ + Duplicate Counts""" + ) + table_html = table_html + ( + f""" +
+
{var}:
+
+ {duplicate_counts[var]}
+
""" + ) + table_html = table_html + "
" + except NameError: + pass + + table_html = table_html + build_table( + impacted_rows, scheme, padding="10px", escape=False + ) + + table_html = table_html + "" + + # save the output + save_name = f"{'_'.join(message.split(' '))}_{table}" + with open(f"{output_path}/gtfs_report/{save_name}.html", "w") as f: + f.write(table_html) + + return None + + def html_report( + self, + report_dir: Union[str, pathlib.Path] = "outputs", + overwrite: bool = False, + summary_type: str = "mean", + extended_validation: bool = True, + ) -> None: + """Generate a HTML report describing the GTFS data. + + Parameters + ---------- + report_dir : Union[str, pathlib.Path], optional + The directory to save the report to, + by default "outputs" + overwrite : bool, optional + Whether or not to overwrite the existing report + if it already exists in the report_dir, + by default False + summary_type : str, optional + The type of summary to show on the + summaries on the gtfs report., + by default "mean" + extended_validation : bool, optional + Whether or not to create extended reports + for gtfs validation errors/warnings. + + Returns + ------- + None + + """ + _bool_defence(overwrite, "overwrite") + _string_defence(summary_type, "summary_type") + set_up_report_dir(path=report_dir, overwrite=overwrite) + summary_type = summary_type.lower() + if summary_type not in ["mean", "min", "max", "median"]: + raise ValueError("'summary type' must be mean, median, min or max") + + # store todays date + date = datetime.datetime.strftime(datetime.datetime.now(), "%d-%m-%Y") + + # feed evaluation + self.clean_feed() + validation_dataframe = self.is_valid() + + # create extended reports if requested + if extended_validation: + self._extended_validation(output_path=report_dir) + info_href = ( + validation_dataframe["message"].apply( + lambda x: "_".join(x.split(" ")) + ) + + "_" + + validation_dataframe["table"] + + ".html" + ) + validation_dataframe["info"] = [ + f""" Further Info""" + if len(rows) > 1 + else "Unavailable" + for href, rows in zip(info_href, validation_dataframe["rows"]) + ] + + eval_temp = TemplateHTML( + path=( + "src/transport_performance/gtfs/report/" + "html_templates/evaluation_template.html" + ) + ) + eval_temp.insert( + "eval_placeholder_1", + build_table( + validation_dataframe, + "green_dark", + padding="10px", + escape=False, + ), + ) + eval_temp.insert("eval_title_1", "GTFS Feed Warnings and Errors") + + eval_temp.insert( + "eval_placeholder_2", + build_table(self.feed.agency, "green_dark", padding="10px"), + ) + eval_temp.insert("eval_title_2", "GTFS Agency Information") + + eval_temp.insert( + "name_placeholder", self.feed.feed_info["feed_publisher_name"][0] + ) + eval_temp.insert( + "url_placeholder", + self.feed.feed_info["feed_publisher_url"][0], + replace_multiple=True, + ) + eval_temp.insert( + "lang_placeholder", self.feed.feed_info["feed_lang"][0] + ) + eval_temp.insert( + "start_placeholder", self.feed.feed_info["feed_start_date"][0] + ) + eval_temp.insert( + "end_placeholder", self.feed.feed_info["feed_end_date"][0] + ) + eval_temp.insert( + "version_placeholder", self.feed.feed_info["feed_version"][0] + ) + + count_lookup = dict(self.feed.describe().to_numpy()) + eval_temp.insert( + "agency_placeholder", str(len(count_lookup["agencies"])) + ) + eval_temp.insert("routes_placeholder", str(count_lookup["num_routes"])) + eval_temp.insert("trips_placeholder", str(count_lookup["num_trips"])) + eval_temp.insert("stops_placeholder", str(count_lookup["num_stops"])) + eval_temp.insert("shapes_placeholder", str(count_lookup["num_shapes"])) + + self.get_gtfs_files() + file_list_html = "" + for num, file in enumerate(self.file_list, start=1): + file_list_html = ( + file_list_html + + f""" +
+
{num}.
+
{file}
+
""" + ) + + eval_temp.insert("eval_placeholder_3", file_list_html) + eval_temp.insert("eval_title_3", "GTFS Files Included") + + eval_temp.insert("date", date) + + with open( + f"{report_dir}/gtfs_report/index.html", "w", encoding="utf8" + ) as eval_f: + eval_f.writelines(eval_temp.get_template()) + + # stops + self.viz_stops( + out_pth=( + pathlib.Path(f"{report_dir}/gtfs_report/stop_locations.html") + ) + ) + self.viz_stops( + out_pth=pathlib.Path(f"{report_dir}/gtfs_report/convex_hull.html"), + geoms="hull", + geom_crs=27700, + ) + stops_temp = TemplateHTML( + ( + "src/transport_performance/gtfs/report/" + "html_templates/stops_template.html" + ) + ) + stops_temp.insert("stops_placeholder_1", "stop_locations.html") + stops_temp.insert("stops_placeholder_2", "convex_hull.html") + stops_temp.insert("stops_title_1", "Stops from GTFS data") + stops_temp.insert( + "stops_title_2", "Convex Hull Generated from GTFS Data" + ) + stops_temp.insert("date", date) + with open( + f"{report_dir}/gtfs_report/stops.html", "w", encoding="utf8" + ) as stops_f: + stops_f.writelines(stops_temp.get_template()) + + # summaries + self.summarise_routes() + self.summarise_trips() + route_html = self.plot_route_summary( + target_summary=summary_type, + return_html=True, + width=1200, + height=800, + ylabel="Route Count", + xlabel="Day", + ) + trip_html = self.plot_trip_summary( + target_summary=summary_type, + return_html=True, + width=1200, + height=800, + ylabel="Trip Count", + xlabel="Day", + ) + summ_temp = TemplateHTML( + path=( + "src/transport_performance/gtfs/report/" + "html_templates/summary_template.html" + ) + ) + summ_temp.insert("plotly_placeholder_1", route_html) + summ_temp.insert( + "plotly_title_1", + f"Route Summary by Day and Route Type ({summary_type})", + ) + summ_temp.insert("plotly_placeholder_2", trip_html) + summ_temp.insert( + "plotly_title_2", + f"Trip Summary by Day and Route Type ({summary_type})", + ) + summ_temp.insert("date", date) + with open( + f"{report_dir}/gtfs_report/summaries.html", "w", encoding="utf8" + ) as summ_f: + summ_f.writelines(summ_temp.get_template()) + + print( + f"GTFS Report Created at {report_dir}\n" + f"View your report here: {report_dir}/gtfs_report" + ) + + return None diff --git a/src/transport_performance/utils/defence.py b/src/transport_performance/utils/defence.py index 5d60d06f..a0328258 100644 --- a/src/transport_performance/utils/defence.py +++ b/src/transport_performance/utils/defence.py @@ -3,6 +3,7 @@ import numpy as np import os from typing import Union +import pandas as pd def _is_path_like(pth, param_nm): @@ -169,6 +170,50 @@ def _bool_defence(some_bool, param_nm): return None +def _string_defence(string, param_nm): + """Defence checking. Not exported.""" + if not isinstance(string, str): + raise TypeError(f"'{param_nm}' expected str. Got {type(string)}") + + return None + + +def _integer_defence(some_int, param_nm): + """Defence checking. Not exported.""" + if not isinstance(some_int, int): + raise TypeError(f"'{param_nm}' expected int. Got {type(some_int)}") + + return None + + +def _dict_defence(some_dict, param_nm): + """Defence checking. Not exported.""" + if not isinstance(some_dict, dict): + raise TypeError(f"'{param_nm}' expected dict. Got {type(some_dict)}") + + return None + + +def _dataframe_defence(some_df, param_nm): + """Defence checking. Not exported.""" + if not isinstance(some_df, pd.DataFrame): + raise TypeError( + f"'{param_nm}' expected pd.DataFrame. Got {type(some_df)}" + ) + + return None + + +# main use case for this is to avoid +# complexity limits in +# GtfsInstance._plot_summary() +def _string_and_nonetype_defence(some_value, param_nm): + if not isinstance(some_value, (str, type(None))): + raise TypeError( + f"'{param_nm}' expected type str. Found type {type(some_value)}" + ) + + def _check_list(ls, param_nm, check_elements=True, exp_type=str): """Check a list and its elements for type. @@ -208,3 +253,30 @@ def _check_list(ls, param_nm, check_elements=True, exp_type=str): ) return None + + +def _check_column_in_df(df: pd.DataFrame, column_name: str) -> None: + """Defences to check that a column exists in a df. + + Parameters + ---------- + df : pd.DataFrame + A pandas dataframe to check for the + column withhin + column_name : str + The name of the column to check for + + Raises + ------ + IndexError + 'column_name' is not a column in the dataframe + + Returns + ------- + None + + """ + if column_name not in df.columns: + raise IndexError(f"'{column_name}' is not a column in the dataframe.") + + return None diff --git a/tests/data/gtfs/report/gtfs_report/styles.css b/tests/data/gtfs/report/gtfs_report/styles.css new file mode 100644 index 00000000..1e816520 --- /dev/null +++ b/tests/data/gtfs/report/gtfs_report/styles.css @@ -0,0 +1,195 @@ +* { + margin: 0; + padding: 0; + box-sizing: border-box; + font-family: 'Poppins', sans-serif; +} + +body { + min-height: 100vh; +} + +a { + text-decoration: none; + +} + +li { + list-style: none; +} + +h1, +h2 { + color: black; +} + +h3 { + color: #999; +} + +table { + overflow: hidden; + overflow-x: scroll; + max-width: 1500px; + display: block; + overflow-y: scroll; + max-height: 800px; +} + +thead { + position: sticky; + top: -10px; +} + +.btn { + background: #f05462; + color: white; + padding: 5px 10px; + text-align: center; +} + +.btn:hover { + color: #f05462; + background: white; + padding: 3px 8px; + border: 2px solid #f05462; +} + +.title { + display: flex; + align-items: center; + justify-content: space-around; + padding: 15px 10px; + border-bottom: 2px solid #999; +} + +table { + padding: 10px; +} + +th, +td { + text-align: left; + padding: 8px; +} + +.side-menu { + position: fixed; + background: #28A197; + width: 20vw; + min-height: 100vh; + display: flex; + flex-direction: column; +} + +.side-menu .side-menu-title { + height: 10vh; + display: flex; + align-items: center; + justify-content: center; + font-weight: bold +} + +.side-menu li { + font-size: 24px; + padding: 10px 40px; + display: flex; + align-items: center; +} + +.side-menu li:hover { + background: white; + color: #28A197; + +} + +.side-menu li:hover .option{ + color: #28A197; + background-color: white; +} + +.side-menu .option { + color: white; +} + +.side-menu .option:hover{ + color: #28A197; + background-color: white; +} + +.container { + position: absolute; + right: 0; + width: 80vw; + height: 100vh; + background: #f1f1f1; +} + +.container .content { + position: relative; + background: #f1f1f1; + padding-left: 20px; + padding-right: 20px; +} + +.container .content .analysis-cont { + display: flex; + justify-content: space-around; + align-items: flex-start; + flex-wrap: wrap; + background-color: #f1f1f1; + padding: 20px; +} + +.container .content .analysis-cont .summary div { + display: block; +} + + +.container .content .analysis-cont .summary dd { + font-weight: bold; + margin-inline-start: 0; + display: inline-block; + min-width: 130px; +} + +.container .content .analysis-cont .summary dt { + display: inline-block; +} + +.analysis-title { + font-weight: bold; + font-size: large; + margin-bottom: 10px; +} + +hr { + border: 0; + clear:both; + display:block; + width: 100%; + background-color: black; + height: 5px; + } + +.container .header { + position: fixed; + top: 0; + right: 0; + width: 80vw; + height: 10vh; + background: #801650; + display: flex; + align-items: center; + justify-content: center; + box-shadow: 0 4px 8px 0 rgba(0, 0, 0, 0.2); + z-index: 1; +} + +.container .header .header-title { + display: flex; + align-items: center; + color: #F46A25; + font-weight: bold; + font-size: xx-large; +} diff --git a/tests/data/gtfs/report/html_template.html b/tests/data/gtfs/report/html_template.html new file mode 100644 index 00000000..8ba13060 --- /dev/null +++ b/tests/data/gtfs/report/html_template.html @@ -0,0 +1,6 @@ + + + + +
[test_placeholder] Tester [test_placeholder]
+ diff --git a/tests/gtfs/report/test_report_utils.py b/tests/gtfs/report/test_report_utils.py new file mode 100644 index 00000000..d394acb9 --- /dev/null +++ b/tests/gtfs/report/test_report_utils.py @@ -0,0 +1,105 @@ +"""Test scripts for the GTFS report utility functions.""" + +import os +import shutil +import re + +import pytest +from pyprojroot import here + +from transport_performance.gtfs.report.report_utils import ( + TemplateHTML, + set_up_report_dir, +) + + +@pytest.fixture(scope="function") +def template_fixture(): + """Fixture for test funcs expecting a valid feed object.""" + template = TemplateHTML( + path=here("tests/data/gtfs/report/html_template.html") + ) + return template + + +class TestTemplateHTML(object): + """Tests related to the TemplateHTML class.""" + + def test_init(self, template_fixture): + """Test initialising the TemplateHTML class.""" + expected_template = """ + + + +
[test_placeholder] Tester [test_placeholder]
+ +""" + assert ( + expected_template == template_fixture.get_template() + ), "Test template not as expected" + + def test_insert_defence(self, template_fixture): + """Test defences for .insert().""" + with pytest.raises( + ValueError, + match=( + "You have selected not to replace multiple" + "placeholders of the same value, however" + "placeholders occur more than once. \n" + "If you would like to allow this, set the" + "replace_multiple param to True" + ), + ): + template_fixture.insert("test_placeholder", "test_value") + + def test_insert_on_pass(self, template_fixture): + """Test functionality for .insert() when defences are passed.""" + expected_template = """ + + + +
test_value Tester test_value
+ +""" + template_fixture.insert( + placeholder="test_placeholder", + value="test_value", + replace_multiple=True, + ) + assert ( + expected_template == template_fixture.get_template() + ), "Test placeholder replacement not acting as expected" + + +# TODO: Replace all instances of set_up_report_dir() with +# _check_parents_dir_exists() (from src/defences.py) +class TestSetUpReportDir(object): + """Test setting up a dir for a report.""" + + def test_set_up_report_dir_defence(self): + """Test the defences for set_up_report_dir().""" + with pytest.raises( + FileExistsError, + match=( + re.escape( + "Report already exists at path: " + "[tests/data/gtfs/report]." + "Consider setting overwrite=True" + "if you'd like to overwrite this." + ) + ), + ): + set_up_report_dir("tests/data/gtfs/report") + + def test_set_up_report_dir_on_pass(self): + """Test set_up_report_dir() when defences are passed.""" + set_up_report_dir("data/interim") + assert os.path.exists( + here("data/interim/gtfs_report") + ), "Failed to create report in data/interim" + shutil.rmtree(path=here("data/interim/gtfs_report")) + + set_up_report_dir("tests/data/gtfs/report", overwrite=True) + assert os.path.exists( + here("tests/data/gtfs/report/gtfs_report") + ), "Failed to replace report in tests/data/gtfs/report/" diff --git a/tests/gtfs/test_gtfs_utils.py b/tests/gtfs/test_gtfs_utils.py index fbd12b43..25534904 100644 --- a/tests/gtfs/test_gtfs_utils.py +++ b/tests/gtfs/test_gtfs_utils.py @@ -4,8 +4,15 @@ import os import pytest import pathlib +import re -from transport_performance.gtfs.gtfs_utils import bbox_filter_gtfs +import pandas as pd +from plotly.graph_objects import Figure as PlotlyFigure + +from transport_performance.gtfs.gtfs_utils import ( + bbox_filter_gtfs, + convert_pandas_to_plotly, +) from transport_performance.gtfs.validation import GtfsInstance @@ -40,3 +47,57 @@ def test_bbox_filter_gtfs_writes_as_expected(self, tmpdir): assert isinstance( feed, GtfsInstance ), f"Expected class `Gtfs_Instance but found: {type(feed)}`" + + +class TestConvertPandasToPlotly(object): + """Test convert_pandas_to_plotly().""" + + def test_convert_pandas_to_plotly_defences(self): + """Test convert_pandas_to_plotly defences.""" + test_df = pd.DataFrame( + { + "ID": [1, 2, 3, 4, 1], + "score": [45, 34, 23, 12, 23], + "grade": ["A", "B", "C", "D", "C"], + } + ) + with pytest.raises( + LookupError, + match=re.escape( + "dark is not a valid colour scheme." + "Valid colour schemes include ['dsc']" + ), + ): + convert_pandas_to_plotly(test_df, scheme="dark") + + multi_index_df = test_df.groupby(["ID", "grade"]).agg( + {"score": ["mean", "min", "max"]} + ) + with pytest.raises( + TypeError, + match="Pandas dataframe must have a single index," + "not MultiIndex", + ): + convert_pandas_to_plotly(multi_index_df) + + def test_convert_pandas_to_plotly_on_pass(self): + """Test convert_pandas_to_plotly() when defences pass.""" + test_df = pd.DataFrame( + { + "ID": [1, 2, 3, 4, 1], + "score": [45, 34, 23, 12, 23], + "grade": ["A", "B", "C", "D", "C"], + } + ) + # return_html + html_return = convert_pandas_to_plotly(test_df, return_html=True) + assert isinstance(html_return, str), re.escape( + f"Expected type str but {type(html_return)} found" + ) + + # return plotly figure + fig_return = convert_pandas_to_plotly(test_df, return_html=False) + assert isinstance(fig_return, PlotlyFigure), re.escape( + "Expected type plotly.graph_objects.Figure but " + f"{type(fig_return)} found" + ) diff --git a/tests/gtfs/test_validation.py b/tests/gtfs/test_validation.py index 30329f87..1b23520f 100644 --- a/tests/gtfs/test_validation.py +++ b/tests/gtfs/test_validation.py @@ -1,14 +1,16 @@ """Tests for validation module.""" +import re +import os + import pytest from pyprojroot import here import gtfs_kit as gk import pandas as pd from unittest.mock import patch, call -import os from geopandas import GeoDataFrame import numpy as np -import re import pathlib +from plotly.graph_objects import Figure as PlotlyFigure from transport_performance.gtfs.validation import ( GtfsInstance, @@ -76,6 +78,24 @@ def test_init_on_pass(self): gtfs2.feed.dist_units == "m" ), f"Expected 'm', found: {gtfs2.feed.dist_units}" + def test_get_gtfs_files(self, gtfs_fixture): + """Assert files that make up the GTFS.""" + expected_files = [ + "agency.txt", + "calendar_dates.txt", + "stop_times.txt", + "frequencies.txt", + "shapes.txt", + "trips.txt", + "feed_info.txt", + "stops.txt", + "calendar.txt", + "routes.txt", + ] + assert ( + gtfs_fixture.get_gtfs_files() == expected_files + ), "GTFS files not as expected" + def test_is_valid(self, gtfs_fixture): """Assertions about validity_df table.""" gtfs_fixture.is_valid() @@ -457,15 +477,16 @@ def test_summarise_trips_on_pass(self, gtfs_fixture): ) found_ds = gtfs_fixture.daily_trip_summary.columns - exp_cols_ds = pd.MultiIndex.from_tuples( + exp_cols_ds = pd.Index( [ - ("day", ""), - ("route_type", ""), - ("trip_count", "max"), - ("trip_count", "mean"), - ("trip_count", "median"), - ("trip_count", "min"), - ] + "day", + "route_type", + "trip_count_max", + "trip_count_mean", + "trip_count_median", + "trip_count_min", + ], + dtype="object", ) assert ( @@ -490,12 +511,12 @@ def test_summarise_trips_on_pass(self, gtfs_fixture): # tests the output of the daily_route_summary table # using tests/data/newport-20230613_gtfs.zip expected_df = { - ("day", ""): {8: "friday", 9: "friday"}, - ("route_type", ""): {8: 3, 9: 200}, - ("trip_count", "max"): {8: 1211, 9: 90}, - ("trip_count", "mean"): {8: 1211.0, 9: 88.0}, - ("trip_count", "median"): {8: 1211.0, 9: 88.0}, - ("trip_count", "min"): {8: 1211, 9: 88}, + "day": {8: "friday", 9: "friday"}, + "route_type": {8: 3, 9: 200}, + "trip_count_max": {8: 1211, 9: 90}, + "trip_count_min": {8: 1211, 9: 88}, + "trip_count_mean": {8: 1211.0, 9: 88.0}, + "trip_count_median": {8: 1211.0, 9: 88.0}, } found_df = gtfs_fixture.daily_trip_summary[ @@ -525,15 +546,16 @@ def test_summarise_routes_on_pass(self, gtfs_fixture): ) found_ds = gtfs_fixture.daily_route_summary.columns - exp_cols_ds = pd.MultiIndex.from_tuples( + exp_cols_ds = pd.Index( [ - ("day", ""), - ("route_count", "max"), - ("route_count", "mean"), - ("route_count", "median"), - ("route_count", "min"), - ("route_type", ""), - ] + "day", + "route_count_max", + "route_count_mean", + "route_count_median", + "route_count_min", + "route_type", + ], + dtype="object", ) assert ( @@ -558,12 +580,12 @@ def test_summarise_routes_on_pass(self, gtfs_fixture): # tests the output of the daily_route_summary table # using tests/data/newport-20230613_gtfs.zip expected_df = { - ("day", ""): {8: "friday", 9: "friday"}, - ("route_count", "max"): {8: 74, 9: 10}, - ("route_count", "mean"): {8: 74.0, 9: 9.0}, - ("route_count", "median"): {8: 74.0, 9: 9.0}, - ("route_count", "min"): {8: 74, 9: 9}, - ("route_type", ""): {8: 3, 9: 200}, + "day": {8: "friday", 9: "friday"}, + "route_count_max": {8: 74, 9: 10}, + "route_count_min": {8: 74, 9: 9}, + "route_count_mean": {8: 74.0, 9: 9.0}, + "route_count_median": {8: 74.0, 9: 9.0}, + "route_type": {8: 3, 9: 200}, } found_df = gtfs_fixture.daily_route_summary[ @@ -580,3 +602,151 @@ def test_summarise_routes_on_pass(self, gtfs_fixture): "Size of date_route_counts not as expected. " "Expected {expected_size}" ) + + def test__plot_summary_defences(self, gtfs_fixture): + """Test defences for _plot_summary().""" + current_fixture = gtfs_fixture + current_fixture.summarise_routes() + + # test parameters that are yet to be tested + with pytest.raises( + ValueError, + match=re.escape( + "'orientation expected 'v' or 'h'" + " (non case sensitive)." + " Found l of type ." + ), + ): + current_fixture._plot_summary( + current_fixture.daily_route_summary, + "route_count_mean", + orientation="l", + ) + + def test__plot_summary_on_pass(self, gtfs_fixture, tmp_path): + """Test plotting a summary when defences are passed.""" + current_fixture = gtfs_fixture + current_fixture.summarise_routes() + + # test returning a html string + test_html = gtfs_fixture._plot_summary( + gtfs_fixture.daily_route_summary, + "route_count_mean", + return_html=True, + ) + assert type(test_html) == str, "Failed to return HTML for the plot" + + # test returning a plotly figure + test_image = gtfs_fixture._plot_summary( + gtfs_fixture.daily_route_summary, "route_count_mean" + ) + assert ( + type(test_image) == PlotlyFigure + ), "Failed to return plotly.graph_objects.Figure type" + + # test saving plots in html and png format + gtfs_fixture._plot_summary( + gtfs_fixture.daily_route_summary, + "route_count_mean", + width=1200, + height=800, + save_html=True, + save_image=True, + save_pth=pathlib.Path(os.path.join(tmp_path, "save_test")), + ylabel="Mean", + xlabel="Day", + orientation="h", + plotly_kwargs={"legend": dict(bgcolor="lightgrey")}, + ) + + assert os.path.exists( + "outputs/gtfs" + ), "outputs/gtfs could not be created" + assert os.path.exists( + os.path.join(tmp_path, "save_test.html") + ), "Failed to save summary in HTML" + assert os.path.exists( + os.path.join(tmp_path, "save_test.png") + ), "Failed to save summary as a PNG" + + def test__plot_route_summary_defences(self, gtfs_fixture): + """Test the defences for the small wrapper plot_route_summary().""" + # test attribute checks + with pytest.raises( + AttributeError, + match=( + re.escape( + "The daily_route_summary table could not be found." + " Did you forget to call '.summarise_routes()' first?" + ) + ), + ): + gtfs_fixture.plot_route_summary(target_summary="mean") + + def test__plot_trip_summary_defences(self, gtfs_fixture): + """Test the defences for the small wrapper plot_trip_summary().""" + # test attribute checks + with pytest.raises( + AttributeError, + match=( + re.escape( + "The daily_trip_summary table could not be found." + " Did you forget to call '.summarise_trips()' first?" + ) + ), + ): + gtfs_fixture.plot_trip_summary(target_summary="mean") + + def test__plot_route_summary_on_pass(self, gtfs_fixture): + """Test plot_route_summary() calls _plot_summary() succesfully.""" + gtfs_fixture.summarise_routes() + assert isinstance( + gtfs_fixture.plot_route_summary(target_summary="mean"), + PlotlyFigure, + ), "plot_route_summary() failed to return plotly figure" + + def test__plot_trip_summary_on_pass(self, gtfs_fixture): + """Test plot_trip_summary() calls _plot_summary() succesfully.""" + gtfs_fixture.summarise_trips() + assert isinstance( + gtfs_fixture.plot_trip_summary(target_summary="mean"), PlotlyFigure + ), "plot_trip_summary() failed to return plotly figure" + + def test_html_report_defences(self, gtfs_fixture, tmp_path): + """Test the defences whilst generating a HTML report.""" + with pytest.raises( + ValueError, match="'summary type' must be mean, median, min or max" + ): + gtfs_fixture.html_report( + report_dir=tmp_path, + overwrite=True, + summary_type="test_sum", + ) + + def test_html_report_on_pass(self, gtfs_fixture, tmp_path): + """Test that a HTML report is generated if defences are passed.""" + gtfs_fixture.html_report(report_dir=pathlib.Path(tmp_path)) + + # assert that the report has been completely generated + assert os.path.exists( + pathlib.Path(os.path.join(tmp_path, "gtfs_report")) + ), "gtfs_report dir was not created" + assert os.path.exists( + pathlib.Path(os.path.join(tmp_path, "gtfs_report", "index.html")) + ), "gtfs_report/index.html was not created" + assert os.path.exists( + pathlib.Path(os.path.join(tmp_path, "gtfs_report", "styles.css")) + ), "gtfs_report/styles.css was not created" + assert os.path.exists( + pathlib.Path( + os.path.join(tmp_path, "gtfs_report", "summaries.html") + ) + ), "gtfs_report/summaries.html was not created" + assert os.path.exists( + pathlib.Path( + os.path.join(tmp_path, "gtfs_report", "stop_locations.html") + ) + ), "gtfs_report/stop_locations.html was not created" + assert os.path.exists( + pathlib.Path(os.path.join(tmp_path, "gtfs_report", "stops.html")) + ), "gtfs_report/stops.html was not created" From fc1d25511cdc2b3063a6041c5c97f929e4778c04 Mon Sep 17 00:00:00 2001 From: Charlie Browning <121952297+CBROWN-ONS@users.noreply.github.com> Date: Wed, 30 Aug 2023 14:59:18 +0100 Subject: [PATCH 02/33] Update requirements.txt --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 8c20ed32..a19383b0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -26,5 +26,6 @@ folium mapclassify seaborn pretty_html_table +kaleido numpy>=1.25.0 # test suite will fail if user installed lower than this -e . From 6d3292556880a8b9138beaff0638708cdaab4695 Mon Sep 17 00:00:00 2001 From: Browning Date: Wed, 30 Aug 2023 15:23:29 +0100 Subject: [PATCH 03/33] fix: Update tests to reflect changes --- src/transport_performance/gtfs/validation.py | 12 ++++++------ tests/gtfs/test_validation.py | 12 +++++++----- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/src/transport_performance/gtfs/validation.py b/src/transport_performance/gtfs/validation.py index 8adb3b75..160a1753 100644 --- a/src/transport_performance/gtfs/validation.py +++ b/src/transport_performance/gtfs/validation.py @@ -676,7 +676,7 @@ def _plot_summary( save_html: bool = False, save_image: bool = False, save_pth: Union[pathlib.Path, str] = pathlib.Path( - os.path.join("outputs", "gtfs") + os.path.join("outputs", "gtfs", "summary_plot") ), ) -> Union[PlotlyFigure, str]: """Plot (and save) a summary table using plotly. @@ -829,10 +829,11 @@ def _plot_summary( # save the plot if specified if save_html: - if ext != ".png": + if ext != ".html": print( - "Image save requested but HTML type not specified." - "\nSaving as .html" + "HTML save requested but accepted type not specified.\n" + "Accepted image formats include ['.html']\n" + "Saving as .html" ) plotly_io.write_html( fig=fig, @@ -843,14 +844,13 @@ def _plot_summary( if save_image: if ext != ".png": print( - "HTML save requested but accepted type not specified.\n" + "Image save requested but accepted type not specified.\n" "Accepted image formats include ['.png']\n" "Saving as .png" ) plotly_io.write_image( fig=fig, file=os.path.normpath(main + ".png") ) - print(os.path.normpath(main + ".png")) if return_html: return plotly_io.to_html(fig, full_html=False) diff --git a/tests/gtfs/test_validation.py b/tests/gtfs/test_validation.py index 1b23520f..c86b561c 100644 --- a/tests/gtfs/test_validation.py +++ b/tests/gtfs/test_validation.py @@ -652,7 +652,9 @@ def test__plot_summary_on_pass(self, gtfs_fixture, tmp_path): height=800, save_html=True, save_image=True, - save_pth=pathlib.Path(os.path.join(tmp_path, "save_test")), + save_pth=pathlib.Path( + os.path.join(tmp_path, "save_test", "test_image") + ), ylabel="Mean", xlabel="Day", orientation="h", @@ -660,13 +662,13 @@ def test__plot_summary_on_pass(self, gtfs_fixture, tmp_path): ) assert os.path.exists( - "outputs/gtfs" - ), "outputs/gtfs could not be created" + os.path.join(tmp_path, "save_test") + ), "'save_test' dir could not be created'" assert os.path.exists( - os.path.join(tmp_path, "save_test.html") + os.path.join(tmp_path, "save_test", "test_image.html") ), "Failed to save summary in HTML" assert os.path.exists( - os.path.join(tmp_path, "save_test.png") + os.path.join(tmp_path, "save_test", "test_image.png") ), "Failed to save summary as a PNG" def test__plot_route_summary_defences(self, gtfs_fixture): From a71ccada60d89ee260dd6d04e8646e979e74fcf7 Mon Sep 17 00:00:00 2001 From: Browning Date: Thu, 31 Aug 2023 12:39:31 +0100 Subject: [PATCH 04/33] Added better file ext checking; update tests; added new tests --- src/transport_performance/gtfs/validation.py | 45 +++++++---- tests/gtfs/report/test_report_utils.py | 16 +--- tests/gtfs/test_validation.py | 80 ++++++++++++++++++++ 3 files changed, 114 insertions(+), 27 deletions(-) diff --git a/src/transport_performance/gtfs/validation.py b/src/transport_performance/gtfs/validation.py index 160a1753..a8483ac9 100644 --- a/src/transport_performance/gtfs/validation.py +++ b/src/transport_performance/gtfs/validation.py @@ -13,6 +13,7 @@ from pretty_html_table import build_table import zipfile import pathlib +import warnings from typing import Union from plotly.graph_objects import Figure as PlotlyFigure @@ -827,13 +828,17 @@ def _plot_summary( # break up the save path main, ext = os.path.splitext(save_pth) - # save the plot if specified + # save the plot if specified (with correct file type) if save_html: - if ext != ".html": - print( - "HTML save requested but accepted type not specified.\n" - "Accepted image formats include ['.html']\n" - "Saving as .html" + if ext.lower() != ".html": + warnings.warn( + ( + "HTML save requested but accepted type " + "not specified.\n" + "Accepted image formats include ['.html']\n" + "Saving as .html" + ), + UserWarning, ) plotly_io.write_html( fig=fig, @@ -842,16 +847,26 @@ def _plot_summary( ) if save_image: - if ext != ".png": - print( - "Image save requested but accepted type not specified.\n" - "Accepted image formats include ['.png']\n" - "Saving as .png" + valid_img_formats = [ + ".png", + ".pdf", + "jpg", + "jpeg", + ".webp", + ".svg", + ] + if ext.lower() not in valid_img_formats: + warnings.warn( + ( + "Image save requested but accepted type not " + "specified.\n" + f"Accepted image formats include {valid_img_formats}\n" + "Saving as .png" + ), + UserWarning, ) - plotly_io.write_image( - fig=fig, file=os.path.normpath(main + ".png") - ) - + ext = ".png" + plotly_io.write_image(fig=fig, file=os.path.normpath(main + ext)) if return_html: return plotly_io.to_html(fig, full_html=False) return fig diff --git a/tests/gtfs/report/test_report_utils.py b/tests/gtfs/report/test_report_utils.py index d394acb9..b4c2798d 100644 --- a/tests/gtfs/report/test_report_utils.py +++ b/tests/gtfs/report/test_report_utils.py @@ -1,7 +1,7 @@ """Test scripts for the GTFS report utility functions.""" import os -import shutil +import pathlib import re import pytest @@ -71,8 +71,6 @@ def test_insert_on_pass(self, template_fixture): ), "Test placeholder replacement not acting as expected" -# TODO: Replace all instances of set_up_report_dir() with -# _check_parents_dir_exists() (from src/defences.py) class TestSetUpReportDir(object): """Test setting up a dir for a report.""" @@ -91,15 +89,9 @@ def test_set_up_report_dir_defence(self): ): set_up_report_dir("tests/data/gtfs/report") - def test_set_up_report_dir_on_pass(self): + def test_set_up_report_dir_on_pass(self, tmp_path): """Test set_up_report_dir() when defences are passed.""" - set_up_report_dir("data/interim") + set_up_report_dir(pathlib.Path(os.path.join(tmp_path)), overwrite=True) assert os.path.exists( - here("data/interim/gtfs_report") - ), "Failed to create report in data/interim" - shutil.rmtree(path=here("data/interim/gtfs_report")) - - set_up_report_dir("tests/data/gtfs/report", overwrite=True) - assert os.path.exists( - here("tests/data/gtfs/report/gtfs_report") + os.path.join(tmp_path, "gtfs_report") ), "Failed to replace report in tests/data/gtfs/report/" diff --git a/tests/gtfs/test_validation.py b/tests/gtfs/test_validation.py index c86b561c..790ef99a 100644 --- a/tests/gtfs/test_validation.py +++ b/tests/gtfs/test_validation.py @@ -623,6 +623,7 @@ def test__plot_summary_defences(self, gtfs_fixture): orientation="l", ) + @pytest.mark.filterwarnings("ignore::UserWarning") def test__plot_summary_on_pass(self, gtfs_fixture, tmp_path): """Test plotting a summary when defences are passed.""" current_fixture = gtfs_fixture @@ -661,6 +662,7 @@ def test__plot_summary_on_pass(self, gtfs_fixture, tmp_path): plotly_kwargs={"legend": dict(bgcolor="lightgrey")}, ) + # general save test assert os.path.exists( os.path.join(tmp_path, "save_test") ), "'save_test' dir could not be created'" @@ -671,6 +673,55 @@ def test__plot_summary_on_pass(self, gtfs_fixture, tmp_path): os.path.join(tmp_path, "save_test", "test_image.png") ), "Failed to save summary as a PNG" + # save test for HTML with an invalid file extension + with pytest.warns( + UserWarning, + match=re.escape( + "HTML save requested but accepted type not specified.\n" + "Accepted image formats include ['.html']\n" + "Saving as .html" + ), + ): + gtfs_fixture._plot_summary( + gtfs_fixture.daily_route_summary, + "route_count_mean", + save_html=True, + save_pth=pathlib.Path( + os.path.join( + tmp_path, "save_test", "invalid_html_ext.nothtml" + ) + ), + ) + + assert os.path.exists( + os.path.join(tmp_path, "save_test", "invalid_html_ext.html") + ), "Failed to save HTML with automatic file extensions" + + # save test for an image with invalid file extension + valid_img_formats = [".png", ".pdf", "jpg", "jpeg", ".webp", ".svg"] + with pytest.warns( + UserWarning, + match=re.escape( + "Image save requested but accepted type not specified.\n" + f"Accepted image formats include {valid_img_formats}\n" + "Saving as .png" + ), + ): + gtfs_fixture._plot_summary( + gtfs_fixture.daily_route_summary, + "route_count_mean", + save_image=True, + save_pth=pathlib.Path( + os.path.join( + tmp_path, "save_test", "invalid_image_ext.notimg" + ) + ), + ) + + assert os.path.exists( + os.path.join(tmp_path, "save_test", "invalid_image_ext.png") + ), "Failed to save HTML with automatic file extensions" + def test__plot_route_summary_defences(self, gtfs_fixture): """Test the defences for the small wrapper plot_route_summary().""" # test attribute checks @@ -714,6 +765,35 @@ def test__plot_trip_summary_on_pass(self, gtfs_fixture): gtfs_fixture.plot_trip_summary(target_summary="mean"), PlotlyFigure ), "plot_trip_summary() failed to return plotly figure" + def test__create_extended_repeated_pair_table(self, gtfs_fixture): + """Test _create_extended_repeated_pair_table().""" + test_table = pd.DataFrame( + { + "trip_name": ["Newport", "Cwmbran", "Cardiff", "Newport"], + "trip_abbrev": ["Newp", "Cwm", "Card", "Newp"], + "type": ["bus", "train", "bus", "train"], + } + ) + + expected_table = pd.DataFrame( + { + "trip_name": {0: "Newport"}, + "trip_abbrev": {0: "Newp"}, + "type_original": {0: "bus"}, + "type_duplicate": {0: "train"}, + } + ).to_dict() + + returned_table = gtfs_fixture._create_extended_repeated_pair_table( + table=test_table, + join_vars=["trip_name", "trip_abbrev"], + original_rows=[0], + ).to_dict() + + assert ( + expected_table == returned_table + ), "_create_extended_repeated_pair_table() failed" + def test_html_report_defences(self, gtfs_fixture, tmp_path): """Test the defences whilst generating a HTML report.""" with pytest.raises( From 2e7f5e14d31e70f4d1e31b435617ab8020e742cf Mon Sep 17 00:00:00 2001 From: Browning Date: Mon, 4 Sep 2023 10:13:41 +0100 Subject: [PATCH 05/33] chore: Clean up docstrings to comply with numpy formatting --- .../gtfs/report/report_utils.py | 40 +++++++++++++++---- src/transport_performance/gtfs/validation.py | 30 +++++++++++++- src/transport_performance/utils/defence.py | 14 +++---- 3 files changed, 68 insertions(+), 16 deletions(-) diff --git a/src/transport_performance/gtfs/report/report_utils.py b/src/transport_performance/gtfs/report/report_utils.py index 3826e976..21536338 100644 --- a/src/transport_performance/gtfs/report/report_utils.py +++ b/src/transport_performance/gtfs/report/report_utils.py @@ -47,7 +47,21 @@ class TemplateHTML: - """A class for inserting HTML string into a docstring.""" + """A class for inserting HTML string into a docstring. + + Attributes + ---------- + template : str + A string containing the HTML template. + + Methods + ------- + insert(placeholder: str, value: str, replace_multiple: bool = False) + Insert values into the HTML template + get_template() + Returns the template attribute + + """ def __init__(self, path: Union[str, pathlib.Path]) -> None: """Initialise the TemplateHTML object. @@ -75,22 +89,25 @@ def insert( Parameters ---------- placeholder : str - The placeholder name in the template. - This is a string. In the template it - should be surrounded by sqsuare brackets. + The placeholder name in the template. This is a string. In the + template it should be surrounded by square brackets. value : str The value to place in the placeholder location. replace_multiple : bool, optional - Whether or not to replace multiple - placeholders that share the same - placeholder value, - by default False + Whether or not to replace multiple placeholders that share the same + placeholder value, by default False Returns ------- None + Raises + ------ + ValueError + A ValueError is raised if there are multiple instances of a + place-holder but 'replace_multiple' is not True + """ _string_defence(placeholder, "placeholder") _string_defence(value, "value") @@ -137,6 +154,12 @@ def set_up_report_dir( ------- None + Raises + ------ + FileExistsError + Raises an error if you the gtfs report directory already exists in the + given path and overwrite=False + """ # defences _check_parent_dir_exists(path, "path", create=True) @@ -149,6 +172,7 @@ def set_up_report_dir( "if you'd like to overwrite this." ) + # make gtfs_report dir try: os.mkdir(f"{path}/gtfs_report") except FileExistsError: diff --git a/src/transport_performance/gtfs/validation.py b/src/transport_performance/gtfs/validation.py index a8483ac9..7be31483 100644 --- a/src/transport_performance/gtfs/validation.py +++ b/src/transport_performance/gtfs/validation.py @@ -731,6 +731,11 @@ def _plot_summary( Union[PlotlyFigure, str] Returns either a HTML string or the plotly figure + Raises + ------ + ValueError + An error is raised if orientation is not 'v' or 'h'. + """ # parameter type defences _dataframe_defence(summary_df, "summary_df") @@ -889,6 +894,8 @@ def plot_route_summary( ) -> Union[PlotlyFigure, str]: """Plot the summarised route data of a GTFS file. + This is a thin wrapper around _plot_summary() + Parameters ---------- target_summary : str @@ -933,6 +940,12 @@ def plot_route_summary( Union[PlotlyFigure, str] Returns either a HTML string or the plotly figure + Raises + ------ + AttributeError + Raise an error if the daily_route_summary table is yet to be # + created. + """ # defensive checks if "daily_route_summary" not in self.__dir__(): @@ -978,6 +991,8 @@ def plot_trip_summary( ): """Plot the summarised trip data of a GTFS file. + This is a thin wrapper around _plot_summary() + Parameters ---------- target_summary : str @@ -1022,6 +1037,12 @@ def plot_trip_summary( Union[PlotlyFigure, str] Returns either a HTML string or the plotly figure + Raises + ------ + AttributeError + Raise an error if the daily_trip_summary table is yet to be + created. + """ # defensive checks if "daily_trip_summary" not in self.__dir__(): @@ -1093,7 +1114,9 @@ def _extended_validation( output_path : Union[str, pathlib.Path] The path to save the HTML output to scheme : str, optional - Colour scheme from pretty_html_table, by default "green_dark" + Colour scheme from pretty_html_table, by default "green_dark". + Colour schemes can be found here: + https://pypi.org/project/pretty-html-table/ Returns ------- @@ -1240,6 +1263,11 @@ def html_report( ------- None + Raises + ------ + ValueError + An error raised if the type of summary passed is invalid + """ _bool_defence(overwrite, "overwrite") _string_defence(summary_type, "summary_type") diff --git a/src/transport_performance/utils/defence.py b/src/transport_performance/utils/defence.py index a0328258..06442b0a 100644 --- a/src/transport_performance/utils/defence.py +++ b/src/transport_performance/utils/defence.py @@ -261,20 +261,20 @@ def _check_column_in_df(df: pd.DataFrame, column_name: str) -> None: Parameters ---------- df : pd.DataFrame - A pandas dataframe to check for the - column withhin + A pandas dataframe to check if the specified column exists in. column_name : str The name of the column to check for - Raises - ------ - IndexError - 'column_name' is not a column in the dataframe - Returns ------- None + Raises + ------ + IndexError + Raises an error if the column (column_name) does not exist in the + dataframe + """ if column_name not in df.columns: raise IndexError(f"'{column_name}' is not a column in the dataframe.") From cbad5017948b6c25d1a618e3f9b04e1e88694f22 Mon Sep 17 00:00:00 2001 From: Browning Date: Mon, 4 Sep 2023 10:15:46 +0100 Subject: [PATCH 06/33] chore: Add additional raises in docstrings --- src/transport_performance/gtfs/gtfs_utils.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/transport_performance/gtfs/gtfs_utils.py b/src/transport_performance/gtfs/gtfs_utils.py index e3bede01..2865770e 100644 --- a/src/transport_performance/gtfs/gtfs_utils.py +++ b/src/transport_performance/gtfs/gtfs_utils.py @@ -89,6 +89,13 @@ def convert_pandas_to_plotly( go.Figure A plotly figure containing the drawn dataframe + Raises + ------ + LookupError + An error raised if an invalid colour scheme is passed + TypeError + An error raised if the given pandas dataframe is MultiIndex + """ # pre-defined colour schemes schemes = { From 9c3f1b47fa2691ee0d771b04e277308e34326aae Mon Sep 17 00:00:00 2001 From: r-leyshon Date: Mon, 11 Sep 2023 08:53:04 +0100 Subject: [PATCH 07/33] chore: Breaking changes with dev resolved --- src/transport_performance/gtfs/report/report_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/transport_performance/gtfs/report/report_utils.py b/src/transport_performance/gtfs/report/report_utils.py index 3826e976..ccb6de10 100644 --- a/src/transport_performance/gtfs/report/report_utils.py +++ b/src/transport_performance/gtfs/report/report_utils.py @@ -7,7 +7,7 @@ from transport_performance.utils.defence import ( _bool_defence, _string_defence, - _is_path_like, + _handle_path_like, _check_parent_dir_exists, ) @@ -62,7 +62,7 @@ def __init__(self, path: Union[str, pathlib.Path]) -> None: None """ - _is_path_like(path, "path") + _handle_path_like(path, "path") with open(path, "r", encoding="utf8") as f: self.template = f.read() return None From d9da952ae3f49f0fce0363d0678e00aaee712c00 Mon Sep 17 00:00:00 2001 From: r-leyshon Date: Mon, 11 Sep 2023 11:57:28 +0100 Subject: [PATCH 08/33] refactor: Several type utils -> _type_defence --- src/transport_performance/gtfs/gtfs_utils.py | 7 +- .../gtfs/report/report_utils.py | 9 ++- src/transport_performance/gtfs/routes.py | 4 +- src/transport_performance/gtfs/validation.py | 33 ++++----- src/transport_performance/osm/osm_utils.py | 4 +- src/transport_performance/utils/defence.py | 68 ++++++------------- tests/gtfs/test_routes.py | 6 +- tests/osm/test_osm_utils.py | 5 +- 8 files changed, 51 insertions(+), 85 deletions(-) diff --git a/src/transport_performance/gtfs/gtfs_utils.py b/src/transport_performance/gtfs/gtfs_utils.py index e3bede01..a78c3940 100644 --- a/src/transport_performance/gtfs/gtfs_utils.py +++ b/src/transport_performance/gtfs/gtfs_utils.py @@ -9,8 +9,7 @@ from transport_performance.utils.defence import ( _is_expected_filetype, _check_list, - _dataframe_defence, - _bool_defence, + _type_defence, ) @@ -102,8 +101,8 @@ def convert_pandas_to_plotly( } } # defences - _dataframe_defence(df, "df") - _bool_defence(return_html, "return_html") + _type_defence(df, "df", pd.DataFrame) + _type_defence(return_html, "return_html", bool) if scheme not in list(schemes.keys()): raise LookupError( f"{scheme} is not a valid colour scheme." diff --git a/src/transport_performance/gtfs/report/report_utils.py b/src/transport_performance/gtfs/report/report_utils.py index ccb6de10..96aa46cd 100644 --- a/src/transport_performance/gtfs/report/report_utils.py +++ b/src/transport_performance/gtfs/report/report_utils.py @@ -5,8 +5,7 @@ import os from transport_performance.utils.defence import ( - _bool_defence, - _string_defence, + _type_defence, _handle_path_like, _check_parent_dir_exists, ) @@ -92,9 +91,9 @@ def insert( None """ - _string_defence(placeholder, "placeholder") - _string_defence(value, "value") - _bool_defence(replace_multiple, "replace_multiple") + _type_defence(placeholder, "placeholder", str) + _type_defence(value, "value", str) + _type_defence(replace_multiple, "replace_multiple", bool) occurences = len(self.template.split(f"[{placeholder}]")) - 1 if occurences > 1 and not replace_multiple: raise ValueError( diff --git a/src/transport_performance/gtfs/routes.py b/src/transport_performance/gtfs/routes.py index e989645f..d0003f57 100644 --- a/src/transport_performance/gtfs/routes.py +++ b/src/transport_performance/gtfs/routes.py @@ -4,7 +4,7 @@ import requests import warnings -from transport_performance.utils.defence import _url_defence, _bool_defence +from transport_performance.utils.defence import _url_defence, _type_defence warnings.filterwarnings( action="ignore", category=DeprecationWarning, module=".*pkg_resources" @@ -98,7 +98,7 @@ def scrape_route_type_lookup( for url in [gtfs_url, ext_spec_url]: _url_defence(url) - _bool_defence(extended_schema, "extended_schema") + _type_defence(extended_schema, "extended_schema", bool) # Get the basic scheme lookup resp_txt = _get_response_text(gtfs_url) soup = BeautifulSoup(resp_txt, "html.parser") diff --git a/src/transport_performance/gtfs/validation.py b/src/transport_performance/gtfs/validation.py index a8483ac9..523ea305 100644 --- a/src/transport_performance/gtfs/validation.py +++ b/src/transport_performance/gtfs/validation.py @@ -23,12 +23,7 @@ _check_namespace_export, _check_parent_dir_exists, _check_column_in_df, - _bool_defence, - _integer_defence, - _string_defence, - _dataframe_defence, - _dict_defence, - _string_and_nonetype_defence, + _type_defence, ) from transport_performance.gtfs.report.report_utils import ( @@ -733,17 +728,17 @@ def _plot_summary( """ # parameter type defences - _dataframe_defence(summary_df, "summary_df") - _string_defence(day_column, "day_column") - _string_defence(target_column, "target_column") - _dict_defence(plotly_kwargs, "plotly_kwargs") - _bool_defence(return_html, "return_html") - _integer_defence(width, "width") - _integer_defence(height, "height") - _string_and_nonetype_defence(xlabel, "xlabel") - _string_and_nonetype_defence(ylabel, "ylabel") - _bool_defence(save_html, "save_html") - _bool_defence(save_image, "save_iamge") + _type_defence(summary_df, "summary_df", pd.DataFrame) + _type_defence(day_column, "day_column", str) + _type_defence(target_column, "target_column", str) + _type_defence(plotly_kwargs, "plotly_kwargs", dict) + _type_defence(return_html, "return_html", bool) + _type_defence(width, "width", int) + _type_defence(height, "height", int) + _type_defence(xlabel, "xlabel", (str, type(None))) + _type_defence(ylabel, "ylabel", (str, type(None))) + _type_defence(save_html, "save_html", bool) + _type_defence(save_image, "save_iamge", bool) _check_parent_dir_exists(save_pth, "save_pth", create=True) # orientation input defences @@ -1241,8 +1236,8 @@ def html_report( None """ - _bool_defence(overwrite, "overwrite") - _string_defence(summary_type, "summary_type") + _type_defence(overwrite, "overwrite", bool) + _type_defence(summary_type, "summary_type", str) set_up_report_dir(path=report_dir, overwrite=overwrite) summary_type = summary_type.lower() if summary_type not in ["mean", "min", "max", "median"]: diff --git a/src/transport_performance/osm/osm_utils.py b/src/transport_performance/osm/osm_utils.py index e4501416..d086eb6a 100644 --- a/src/transport_performance/osm/osm_utils.py +++ b/src/transport_performance/osm/osm_utils.py @@ -3,7 +3,7 @@ from pyprojroot import here from transport_performance.utils.defence import ( - _bool_defence, + _type_defence, _check_list, _check_parent_dir_exists, _is_expected_filetype, @@ -54,7 +54,7 @@ def filter_osm( "tag_filter": tag_filter, "install_osmosis": install_osmosis, }.items(): - _bool_defence(val, param_nm=nm) + _type_defence(val, nm, bool) # check bbox values makes sense, else osmosis will error if not bbox[0] < bbox[2]: raise ValueError( diff --git a/src/transport_performance/utils/defence.py b/src/transport_performance/utils/defence.py index 8567199f..123fa729 100644 --- a/src/transport_performance/utils/defence.py +++ b/src/transport_performance/utils/defence.py @@ -150,68 +150,40 @@ def _check_namespace_export(pkg=np, func=np.min): def _url_defence(url): """Defence checking. Not exported.""" - if not isinstance(url, str): - raise TypeError(f"url {url} expected string, instead got {type(url)}") - elif not url.startswith((r"http://", r"https://")): + _type_defence(url, "url", str) + if not url.startswith((r"http://", r"https://")): raise ValueError(f"url string expected protocol, instead found {url}") return None -def _bool_defence(some_bool, param_nm): - """Defence checking. Not exported.""" - if not isinstance(some_bool, bool): - raise TypeError( - f"`{param_nm}` expected boolean. Got {type(some_bool)}" - ) - - return None - - -def _string_defence(string, param_nm): - """Defence checking. Not exported.""" - if not isinstance(string, str): - raise TypeError(f"'{param_nm}' expected str. Got {type(string)}") - - return None - - -def _integer_defence(some_int, param_nm): - """Defence checking. Not exported.""" - if not isinstance(some_int, int): - raise TypeError(f"'{param_nm}' expected int. Got {type(some_int)}") - - return None +def _type_defence(some_object, param_nm, types) -> None: + """Defence checking utility. Can handle NoneType. + Parameters + ---------- + some_object : Any + Object to test with isinstance. + param_nm : str + A name for the parameter. Useful when this utility is used in a wrapper + to inherit the parent's parameter name and present in error message. + types : type or tuple + A type or a tuple of types to test `some_object` against. -def _dict_defence(some_dict, param_nm): - """Defence checking. Not exported.""" - if not isinstance(some_dict, dict): - raise TypeError(f"'{param_nm}' expected dict. Got {type(some_dict)}") - - return None - + Raises + ------ + TypeError + `some_object` is not of type `types`. -def _dataframe_defence(some_df, param_nm): - """Defence checking. Not exported.""" - if not isinstance(some_df, pd.DataFrame): + """ + if not isinstance(some_object, types): raise TypeError( - f"'{param_nm}' expected pd.DataFrame. Got {type(some_df)}" + f"`{param_nm}` expected {types}. Got {type(some_object)}" ) return None -# main use case for this is to avoid -# complexity limits in -# GtfsInstance._plot_summary() -def _string_and_nonetype_defence(some_value, param_nm): - if not isinstance(some_value, (str, type(None))): - raise TypeError( - f"'{param_nm}' expected type str. Found type {type(some_value)}" - ) - - def _check_list(ls, param_nm, check_elements=True, exp_type=str): """Check a list and its elements for type. diff --git a/tests/gtfs/test_routes.py b/tests/gtfs/test_routes.py index 38eaa23a..efabfc2f 100644 --- a/tests/gtfs/test_routes.py +++ b/tests/gtfs/test_routes.py @@ -47,12 +47,12 @@ def test_defensive_exceptions(self): """Test the defensive checks raise as expected.""" with pytest.raises( TypeError, - match=r"url 1 expected string, instead got ", + match=r"`url` expected . Got ", ): scrape_route_type_lookup(gtfs_url=1) with pytest.raises( TypeError, - match=r"url False expected string, instead got ", + match=r"`url` expected . Got ", ): scrape_route_type_lookup(ext_spec_url=False) with pytest.raises( @@ -62,7 +62,7 @@ def test_defensive_exceptions(self): scrape_route_type_lookup(gtfs_url="foobar") with pytest.raises( TypeError, - match=r"`extended_schema` expected boolean. Got ", + match=r"`extended_schema` .* . Got ", ): scrape_route_type_lookup(extended_schema="True") diff --git a/tests/osm/test_osm_utils.py b/tests/osm/test_osm_utils.py index 04ce3803..17e145f2 100644 --- a/tests/osm/test_osm_utils.py +++ b/tests/osm/test_osm_utils.py @@ -30,13 +30,14 @@ def test_filter_osm_defense(self): # out_pth is not a path_like filter_osm(out_pth=False) with pytest.raises( - TypeError, match="`tag_filter` expected boolean. Got " + TypeError, + match="`tag_filter` expected . Got ", ): # check for boolean defense filter_osm(tag_filter=1) with pytest.raises( TypeError, - match="`install_osmosis` expected boolean. Got ", + match="`install_osmosis` .* . Got ", ): # check for boolean defense filter_osm(install_osmosis="False") From 7a728b77a7b63f66bc0539fafdfe695742486e08 Mon Sep 17 00:00:00 2001 From: r-leyshon Date: Mon, 11 Sep 2023 12:12:24 +0100 Subject: [PATCH 09/33] fix: Undo accidental overwrite with review branch --- src/transport_performance/gtfs/gtfs_utils.py | 7 +- .../gtfs/report/report_utils.py | 9 ++- src/transport_performance/gtfs/routes.py | 4 +- src/transport_performance/gtfs/validation.py | 33 ++++----- src/transport_performance/osm/osm_utils.py | 4 +- src/transport_performance/utils/defence.py | 68 ++++++------------- tests/gtfs/test_routes.py | 6 +- tests/osm/test_osm_utils.py | 5 +- 8 files changed, 51 insertions(+), 85 deletions(-) diff --git a/src/transport_performance/gtfs/gtfs_utils.py b/src/transport_performance/gtfs/gtfs_utils.py index e3bede01..a78c3940 100644 --- a/src/transport_performance/gtfs/gtfs_utils.py +++ b/src/transport_performance/gtfs/gtfs_utils.py @@ -9,8 +9,7 @@ from transport_performance.utils.defence import ( _is_expected_filetype, _check_list, - _dataframe_defence, - _bool_defence, + _type_defence, ) @@ -102,8 +101,8 @@ def convert_pandas_to_plotly( } } # defences - _dataframe_defence(df, "df") - _bool_defence(return_html, "return_html") + _type_defence(df, "df", pd.DataFrame) + _type_defence(return_html, "return_html", bool) if scheme not in list(schemes.keys()): raise LookupError( f"{scheme} is not a valid colour scheme." diff --git a/src/transport_performance/gtfs/report/report_utils.py b/src/transport_performance/gtfs/report/report_utils.py index ccb6de10..96aa46cd 100644 --- a/src/transport_performance/gtfs/report/report_utils.py +++ b/src/transport_performance/gtfs/report/report_utils.py @@ -5,8 +5,7 @@ import os from transport_performance.utils.defence import ( - _bool_defence, - _string_defence, + _type_defence, _handle_path_like, _check_parent_dir_exists, ) @@ -92,9 +91,9 @@ def insert( None """ - _string_defence(placeholder, "placeholder") - _string_defence(value, "value") - _bool_defence(replace_multiple, "replace_multiple") + _type_defence(placeholder, "placeholder", str) + _type_defence(value, "value", str) + _type_defence(replace_multiple, "replace_multiple", bool) occurences = len(self.template.split(f"[{placeholder}]")) - 1 if occurences > 1 and not replace_multiple: raise ValueError( diff --git a/src/transport_performance/gtfs/routes.py b/src/transport_performance/gtfs/routes.py index e989645f..d0003f57 100644 --- a/src/transport_performance/gtfs/routes.py +++ b/src/transport_performance/gtfs/routes.py @@ -4,7 +4,7 @@ import requests import warnings -from transport_performance.utils.defence import _url_defence, _bool_defence +from transport_performance.utils.defence import _url_defence, _type_defence warnings.filterwarnings( action="ignore", category=DeprecationWarning, module=".*pkg_resources" @@ -98,7 +98,7 @@ def scrape_route_type_lookup( for url in [gtfs_url, ext_spec_url]: _url_defence(url) - _bool_defence(extended_schema, "extended_schema") + _type_defence(extended_schema, "extended_schema", bool) # Get the basic scheme lookup resp_txt = _get_response_text(gtfs_url) soup = BeautifulSoup(resp_txt, "html.parser") diff --git a/src/transport_performance/gtfs/validation.py b/src/transport_performance/gtfs/validation.py index a8483ac9..523ea305 100644 --- a/src/transport_performance/gtfs/validation.py +++ b/src/transport_performance/gtfs/validation.py @@ -23,12 +23,7 @@ _check_namespace_export, _check_parent_dir_exists, _check_column_in_df, - _bool_defence, - _integer_defence, - _string_defence, - _dataframe_defence, - _dict_defence, - _string_and_nonetype_defence, + _type_defence, ) from transport_performance.gtfs.report.report_utils import ( @@ -733,17 +728,17 @@ def _plot_summary( """ # parameter type defences - _dataframe_defence(summary_df, "summary_df") - _string_defence(day_column, "day_column") - _string_defence(target_column, "target_column") - _dict_defence(plotly_kwargs, "plotly_kwargs") - _bool_defence(return_html, "return_html") - _integer_defence(width, "width") - _integer_defence(height, "height") - _string_and_nonetype_defence(xlabel, "xlabel") - _string_and_nonetype_defence(ylabel, "ylabel") - _bool_defence(save_html, "save_html") - _bool_defence(save_image, "save_iamge") + _type_defence(summary_df, "summary_df", pd.DataFrame) + _type_defence(day_column, "day_column", str) + _type_defence(target_column, "target_column", str) + _type_defence(plotly_kwargs, "plotly_kwargs", dict) + _type_defence(return_html, "return_html", bool) + _type_defence(width, "width", int) + _type_defence(height, "height", int) + _type_defence(xlabel, "xlabel", (str, type(None))) + _type_defence(ylabel, "ylabel", (str, type(None))) + _type_defence(save_html, "save_html", bool) + _type_defence(save_image, "save_iamge", bool) _check_parent_dir_exists(save_pth, "save_pth", create=True) # orientation input defences @@ -1241,8 +1236,8 @@ def html_report( None """ - _bool_defence(overwrite, "overwrite") - _string_defence(summary_type, "summary_type") + _type_defence(overwrite, "overwrite", bool) + _type_defence(summary_type, "summary_type", str) set_up_report_dir(path=report_dir, overwrite=overwrite) summary_type = summary_type.lower() if summary_type not in ["mean", "min", "max", "median"]: diff --git a/src/transport_performance/osm/osm_utils.py b/src/transport_performance/osm/osm_utils.py index e4501416..d086eb6a 100644 --- a/src/transport_performance/osm/osm_utils.py +++ b/src/transport_performance/osm/osm_utils.py @@ -3,7 +3,7 @@ from pyprojroot import here from transport_performance.utils.defence import ( - _bool_defence, + _type_defence, _check_list, _check_parent_dir_exists, _is_expected_filetype, @@ -54,7 +54,7 @@ def filter_osm( "tag_filter": tag_filter, "install_osmosis": install_osmosis, }.items(): - _bool_defence(val, param_nm=nm) + _type_defence(val, nm, bool) # check bbox values makes sense, else osmosis will error if not bbox[0] < bbox[2]: raise ValueError( diff --git a/src/transport_performance/utils/defence.py b/src/transport_performance/utils/defence.py index 8567199f..123fa729 100644 --- a/src/transport_performance/utils/defence.py +++ b/src/transport_performance/utils/defence.py @@ -150,68 +150,40 @@ def _check_namespace_export(pkg=np, func=np.min): def _url_defence(url): """Defence checking. Not exported.""" - if not isinstance(url, str): - raise TypeError(f"url {url} expected string, instead got {type(url)}") - elif not url.startswith((r"http://", r"https://")): + _type_defence(url, "url", str) + if not url.startswith((r"http://", r"https://")): raise ValueError(f"url string expected protocol, instead found {url}") return None -def _bool_defence(some_bool, param_nm): - """Defence checking. Not exported.""" - if not isinstance(some_bool, bool): - raise TypeError( - f"`{param_nm}` expected boolean. Got {type(some_bool)}" - ) - - return None - - -def _string_defence(string, param_nm): - """Defence checking. Not exported.""" - if not isinstance(string, str): - raise TypeError(f"'{param_nm}' expected str. Got {type(string)}") - - return None - - -def _integer_defence(some_int, param_nm): - """Defence checking. Not exported.""" - if not isinstance(some_int, int): - raise TypeError(f"'{param_nm}' expected int. Got {type(some_int)}") - - return None +def _type_defence(some_object, param_nm, types) -> None: + """Defence checking utility. Can handle NoneType. + Parameters + ---------- + some_object : Any + Object to test with isinstance. + param_nm : str + A name for the parameter. Useful when this utility is used in a wrapper + to inherit the parent's parameter name and present in error message. + types : type or tuple + A type or a tuple of types to test `some_object` against. -def _dict_defence(some_dict, param_nm): - """Defence checking. Not exported.""" - if not isinstance(some_dict, dict): - raise TypeError(f"'{param_nm}' expected dict. Got {type(some_dict)}") - - return None - + Raises + ------ + TypeError + `some_object` is not of type `types`. -def _dataframe_defence(some_df, param_nm): - """Defence checking. Not exported.""" - if not isinstance(some_df, pd.DataFrame): + """ + if not isinstance(some_object, types): raise TypeError( - f"'{param_nm}' expected pd.DataFrame. Got {type(some_df)}" + f"`{param_nm}` expected {types}. Got {type(some_object)}" ) return None -# main use case for this is to avoid -# complexity limits in -# GtfsInstance._plot_summary() -def _string_and_nonetype_defence(some_value, param_nm): - if not isinstance(some_value, (str, type(None))): - raise TypeError( - f"'{param_nm}' expected type str. Found type {type(some_value)}" - ) - - def _check_list(ls, param_nm, check_elements=True, exp_type=str): """Check a list and its elements for type. diff --git a/tests/gtfs/test_routes.py b/tests/gtfs/test_routes.py index 38eaa23a..efabfc2f 100644 --- a/tests/gtfs/test_routes.py +++ b/tests/gtfs/test_routes.py @@ -47,12 +47,12 @@ def test_defensive_exceptions(self): """Test the defensive checks raise as expected.""" with pytest.raises( TypeError, - match=r"url 1 expected string, instead got ", + match=r"`url` expected . Got ", ): scrape_route_type_lookup(gtfs_url=1) with pytest.raises( TypeError, - match=r"url False expected string, instead got ", + match=r"`url` expected . Got ", ): scrape_route_type_lookup(ext_spec_url=False) with pytest.raises( @@ -62,7 +62,7 @@ def test_defensive_exceptions(self): scrape_route_type_lookup(gtfs_url="foobar") with pytest.raises( TypeError, - match=r"`extended_schema` expected boolean. Got ", + match=r"`extended_schema` .* . Got ", ): scrape_route_type_lookup(extended_schema="True") diff --git a/tests/osm/test_osm_utils.py b/tests/osm/test_osm_utils.py index 04ce3803..17e145f2 100644 --- a/tests/osm/test_osm_utils.py +++ b/tests/osm/test_osm_utils.py @@ -30,13 +30,14 @@ def test_filter_osm_defense(self): # out_pth is not a path_like filter_osm(out_pth=False) with pytest.raises( - TypeError, match="`tag_filter` expected boolean. Got " + TypeError, + match="`tag_filter` expected . Got ", ): # check for boolean defense filter_osm(tag_filter=1) with pytest.raises( TypeError, - match="`install_osmosis` expected boolean. Got ", + match="`install_osmosis` .* . Got ", ): # check for boolean defense filter_osm(install_osmosis="False") From 7ce1b9e43ca69769825d8ad1b5f59fa4587f2a87 Mon Sep 17 00:00:00 2001 From: r-leyshon Date: Tue, 12 Sep 2023 09:29:50 +0100 Subject: [PATCH 10/33] test: _type_defence raises with single or multiple types --- tests/utils/test_defence.py | 101 ++++++++++++++++++++++++++++++++++++ 1 file changed, 101 insertions(+) diff --git a/tests/utils/test_defence.py b/tests/utils/test_defence.py index 412202e7..993e3a77 100644 --- a/tests/utils/test_defence.py +++ b/tests/utils/test_defence.py @@ -8,6 +8,7 @@ from transport_performance.utils.defence import ( _check_list, _check_parent_dir_exists, + _type_defence, ) @@ -121,3 +122,103 @@ def test_check_parents_dir_exists(self, tmp_path): "_check_parent_dir_exists did not make parent dir" " when 'create=True' (multiple levels)" ) + + +class Test_TypeDefence(object): + """Assertions for _type_defence().""" + + def test_type_defence_raises_on_single_types(self): + """Assertions for single values to the `types` parameter.""" + with pytest.raises( + TypeError, + match="`empty_list` expected . Got ", + ): + _type_defence(list(), "empty_list", str) + with pytest.raises( + TypeError, + match="`int_1` expected . Got ", + ): + _type_defence(1, "int_1", list) + with pytest.raises( + TypeError, + match="`string_1` expected . Got ", + ): + _type_defence("1", "string_1", int) + with pytest.raises( + TypeError, + match="`float_1` expected . Got ", + ): + _type_defence(1.0, "float_1", int) + with pytest.raises( + TypeError, + match="`empty_dict` expected . Got ", + ): + _type_defence(dict(), "empty_dict", tuple) + with pytest.raises( + TypeError, + match="`empty_tuple` expected . Got ", + ): + _type_defence(tuple(), "empty_tuple", dict) + with pytest.raises( + TypeError, + match="`None` expected . Got ", + ): + _type_defence(None, "None", int) + + def test_type_defence_raises_on_multiple_types(object): + """Assertions for multiple values to the `types` parameter.""" + with pytest.raises( + TypeError, + match=re.escape( + "pected (, ). Got " + ), + ): + _type_defence(1, "int_1", (str, type(None))) + with pytest.raises( + TypeError, + match=re.escape( + "`str_1` expected (, , , , , , , , , , <" + ), + ): + _type_defence( + tuple(), + "empty_tuple", + (type(None), list, dict, str, int, float), + ) + with pytest.raises( + TypeError, + match=re.escape( + "`None` expected (, , Date: Tue, 12 Sep 2023 09:42:20 +0100 Subject: [PATCH 11/33] test: _type_defence pass on single & multiple values to types --- tests/utils/test_defence.py | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/tests/utils/test_defence.py b/tests/utils/test_defence.py index 993e3a77..5e35ce0a 100644 --- a/tests/utils/test_defence.py +++ b/tests/utils/test_defence.py @@ -128,7 +128,7 @@ class Test_TypeDefence(object): """Assertions for _type_defence().""" def test_type_defence_raises_on_single_types(self): - """Assertions for single values to the `types` parameter.""" + """Assert func raises for single values to the `types` parameter.""" with pytest.raises( TypeError, match="`empty_list` expected . Got ", @@ -166,7 +166,7 @@ def test_type_defence_raises_on_single_types(self): _type_defence(None, "None", int) def test_type_defence_raises_on_multiple_types(object): - """Assertions for multiple values to the `types` parameter.""" + """Assert func raises for multiple values to the `types` parameter.""" with pytest.raises( TypeError, match=re.escape( @@ -222,3 +222,23 @@ def test_type_defence_raises_on_multiple_types(object): ), ): _type_defence(None, "None", (int, str, float)) + + def test_type_defence_passes_on_single_types(self): + """Assert func passes on single values to the `types` parameter.""" + _type_defence(1, "int_1", int) + _type_defence(1.0, "float_1", float) + _type_defence("1", "str_1", str) + _type_defence(dict(), "empty_dict", dict) + _type_defence(tuple(), "empty_tuple", tuple) + _type_defence(list(), "empty_list", list) + _type_defence(None, "None", type(None)) + + def test_type_defence_passes_on_multiple_types(self): + """Assert func passes on multiple values to the `types` parameter.""" + _type_defence(1, "int_1", (tuple, int)) + _type_defence("1", "str_1", (int, float, str)) + _type_defence(1.0, "float_1", (float, type(None))) + _type_defence(dict(), "empty_dict", (tuple, dict)) + _type_defence(list(), "empty_list", (type(None), list)) + _type_defence(tuple(), "empty_tuple", (tuple, dict)) + _type_defence(None, "None", (list, dict, type(None))) From 1ca03b01defcaba37dd580ec71b2103c31492120 Mon Sep 17 00:00:00 2001 From: Charlie Browning <121952297+CBROWN-ONS@users.noreply.github.com> Date: Tue, 12 Sep 2023 09:57:19 +0100 Subject: [PATCH 12/33] Refactor save paths for plotting summaries (#124) --- src/transport_performance/gtfs/validation.py | 116 ++++++++++--------- tests/gtfs/test_validation.py | 66 +++-------- 2 files changed, 80 insertions(+), 102 deletions(-) diff --git a/src/transport_performance/gtfs/validation.py b/src/transport_performance/gtfs/validation.py index 38c77b95..420cd113 100644 --- a/src/transport_performance/gtfs/validation.py +++ b/src/transport_performance/gtfs/validation.py @@ -13,7 +13,6 @@ from pretty_html_table import build_table import zipfile import pathlib -import warnings from typing import Union from plotly.graph_objects import Figure as PlotlyFigure @@ -671,9 +670,10 @@ def _plot_summary( return_html: bool = False, save_html: bool = False, save_image: bool = False, - save_pth: Union[pathlib.Path, str] = pathlib.Path( - os.path.join("outputs", "gtfs", "summary_plot") + out_dir: Union[pathlib.Path, str] = pathlib.Path( + os.path.join("outputs", "gtfs") ), + img_type: str = "png", ) -> Union[PlotlyFigure, str]: """Plot (and save) a summary table using plotly. @@ -715,11 +715,15 @@ def _plot_summary( save_image : bool, optional Whether or not to save the plot as a PNG, by default False - save_pth : Union[pathlib.Path, str], optional - The filepath to save the plot to. Including a file extension is - optional (e.g., .html). File paths must include forward slashes if - they aren't passed as a path object (pathlib.Path). - by default pathlib.Path(os.path.join('outputs', 'gtfs')) + out_dir : Union[pathlib.Path, str], optional + The directory to save the plot into. If a file extension is added + to this directory, it won't be cleaned. Whatever is passed as the + out dir will be used as the parent directory of the save, leaving + the responsibility on the user to specify the correct path., + by default os.path.join("outputs", "gtfs") + img_type : str, optional + The type of the image to be saved. E.g, .svg or .jpeg., + by defauly "png" Returns ------- @@ -730,6 +734,8 @@ def _plot_summary( ------ ValueError An error is raised if orientation is not 'v' or 'h'. + ValueError + An error is raised if an invalid iamge type is passed. """ # parameter type defences @@ -744,7 +750,13 @@ def _plot_summary( _type_defence(ylabel, "ylabel", (str, type(None))) _type_defence(save_html, "save_html", bool) _type_defence(save_image, "save_iamge", bool) - _check_parent_dir_exists(save_pth, "save_pth", create=True) + _type_defence(img_type, "img_type", str) + + raw_pth = os.path.join( + out_dir, + "summary_" + datetime.datetime.now().strftime("%d_%m_%Y-%H_%M_%S"), + ) + _check_parent_dir_exists(raw_pth, "save_pth", create=True) # orientation input defences if orientation.lower() not in ["v", "h"]: @@ -825,48 +837,34 @@ def _plot_summary( if plotly_kwargs: fig.update_layout(**plotly_kwargs) - # break up the save path - main, ext = os.path.splitext(save_pth) - # save the plot if specified (with correct file type) if save_html: - if ext.lower() != ".html": - warnings.warn( - ( - "HTML save requested but accepted type " - "not specified.\n" - "Accepted image formats include ['.html']\n" - "Saving as .html" - ), - UserWarning, - ) plotly_io.write_html( fig=fig, - file=os.path.normpath(main + ".html"), + file=os.path.normpath(raw_pth + ".html"), full_html=False, ) if save_image: valid_img_formats = [ - ".png", - ".pdf", + "png", + "pdf", "jpg", "jpeg", - ".webp", - ".svg", + "webp", + "svg", ] - if ext.lower() not in valid_img_formats: - warnings.warn( - ( - "Image save requested but accepted type not " - "specified.\n" - f"Accepted image formats include {valid_img_formats}\n" - "Saving as .png" - ), - UserWarning, + if img_type.lower().replace(".", "") not in valid_img_formats: + raise ValueError( + "Please specify a valid image format. Valid formats " + f"include {valid_img_formats}" ) - ext = ".png" - plotly_io.write_image(fig=fig, file=os.path.normpath(main + ext)) + plotly_io.write_image( + fig=fig, + file=os.path.normpath( + raw_pth + f".{img_type.replace('.', '')}" + ), + ) if return_html: return plotly_io.to_html(fig, full_html=False) return fig @@ -883,9 +881,10 @@ def plot_route_summary( ylabel: str = None, save_html: bool = False, save_image: bool = False, - save_pth: Union[pathlib.Path, str] = pathlib.Path( + out_dir: Union[pathlib.Path, str] = pathlib.Path( os.path.join("outputs", "gtfs") ), + img_type: str = "png", ) -> Union[PlotlyFigure, str]: """Plot the summarised route data of a GTFS file. @@ -924,11 +923,15 @@ def plot_route_summary( save_image : bool, optional Whether or not to save the plot as a PNG, by default False - save_pth : Union[pathlib.Path, str], optional - The filepath to save the plot to. Including a file extension is - optional (e.g., .html). File paths must include forward slashes if - they aren't passed as a path object (pathlib.Path). - by default pathlib.Path(os.path.join('outputs', 'gtfs')) + out_dir : Union[pathlib.Path, str], optional + The directory to save the plot into. If a file extension is added + to this directory, it won't be cleaned. Whatever is passed as the + out dir will be used as the parent directory of the save, leaving + the responsibility on the user to specify the correct path., + by default os.path.join("outputs", "gtfs") + img_type : str, optional + The type of the image to be saved. E.g, .svg or .jpeg., + by defauly "png" Returns ------- @@ -963,8 +966,9 @@ def plot_route_summary( xlabel=xlabel, ylabel=ylabel, save_html=save_html, - save_pth=save_pth, + out_dir=out_dir, save_image=save_image, + img_type=img_type, ) return plot @@ -979,10 +983,10 @@ def plot_trip_summary( xlabel: str = None, ylabel: str = None, save_html: bool = False, - save_image: bool = False, - save_pth: Union[pathlib.Path, str] = pathlib.Path( + out_dir: Union[pathlib.Path, str] = pathlib.Path( os.path.join("outputs", "gtfs") ), + img_type: str = "png", ): """Plot the summarised trip data of a GTFS file. @@ -1021,11 +1025,15 @@ def plot_trip_summary( save_image : bool, optional Whether or not to save the plot as a PNG, by default False - save_pth : Union[pathlib.Path, str], optional - The filepath to save the plot to. Including a file extension is - optional (e.g., .html). File paths must include forward slashes if - they aren't passed as a path object (pathlib.Path). - by default pathlib.Path(os.path.join('outputs', 'gtfs')) + out_dir : Union[pathlib.Path, str], optional + The directory to save the plot into. If a file extension is added + to this directory, it won't be cleaned. Whatever is passed as the + out dir will be used as the parent directory of the save, leaving + the responsibility on the user to specify the correct path., + by default os.path.join("outputs", "gtfs") + img_type : str, optional + The type of the image to be saved. E.g, .svg or .jpeg., + by defauly "png" Returns ------- @@ -1060,8 +1068,8 @@ def plot_trip_summary( xlabel=xlabel, ylabel=ylabel, save_html=save_html, - save_pth=save_pth, - save_image=save_image, + out_dir=out_dir, + img_type=img_type, ) return plot diff --git a/tests/gtfs/test_validation.py b/tests/gtfs/test_validation.py index d13cf340..8043d5ac 100644 --- a/tests/gtfs/test_validation.py +++ b/tests/gtfs/test_validation.py @@ -653,75 +653,45 @@ def test__plot_summary_on_pass(self, gtfs_fixture, tmp_path): height=800, save_html=True, save_image=True, - save_pth=pathlib.Path( - os.path.join(tmp_path, "save_test", "test_image") - ), ylabel="Mean", xlabel="Day", orientation="h", plotly_kwargs={"legend": dict(bgcolor="lightgrey")}, + out_dir=os.path.join(tmp_path, "save_test"), ) # general save test + save_dir = os.listdir(os.path.join(tmp_path, "save_test")) + counts = {"html": 0, "png": 0} + for pth in save_dir: + if ".html" in pth: + counts["html"] += 1 + elif ".png" in pth: + counts["png"] += 1 + assert os.path.exists( os.path.join(tmp_path, "save_test") ), "'save_test' dir could not be created'" - assert os.path.exists( - os.path.join(tmp_path, "save_test", "test_image.html") - ), "Failed to save summary in HTML" - assert os.path.exists( - os.path.join(tmp_path, "save_test", "test_image.png") - ), "Failed to save summary as a PNG" - - # save test for HTML with an invalid file extension - with pytest.warns( - UserWarning, - match=re.escape( - "HTML save requested but accepted type not specified.\n" - "Accepted image formats include ['.html']\n" - "Saving as .html" - ), - ): - gtfs_fixture._plot_summary( - gtfs_fixture.daily_route_summary, - "route_count_mean", - save_html=True, - save_pth=pathlib.Path( - os.path.join( - tmp_path, "save_test", "invalid_html_ext.nothtml" - ) - ), - ) - - assert os.path.exists( - os.path.join(tmp_path, "save_test", "invalid_html_ext.html") - ), "Failed to save HTML with automatic file extensions" + assert counts["html"] == 1, "Failed to save plot as HTML" + assert counts["png"] == 1, "Failed to save plot as png" # save test for an image with invalid file extension - valid_img_formats = [".png", ".pdf", "jpg", "jpeg", ".webp", ".svg"] - with pytest.warns( - UserWarning, + valid_img_formats = ["png", "pdf", "jpg", "jpeg", "webp", "svg"] + with pytest.raises( + ValueError, match=re.escape( - "Image save requested but accepted type not specified.\n" - f"Accepted image formats include {valid_img_formats}\n" - "Saving as .png" + "Please specify a valid image format. Valid formats " + f"include {valid_img_formats}" ), ): gtfs_fixture._plot_summary( gtfs_fixture.daily_route_summary, "route_count_mean", save_image=True, - save_pth=pathlib.Path( - os.path.join( - tmp_path, "save_test", "invalid_image_ext.notimg" - ) - ), + out_dir=os.path.join(tmp_path, "outputs"), + img_type="test", ) - assert os.path.exists( - os.path.join(tmp_path, "save_test", "invalid_image_ext.png") - ), "Failed to save HTML with automatic file extensions" - def test__plot_route_summary_defences(self, gtfs_fixture): """Test the defences for the small wrapper plot_route_summary().""" # test attribute checks From d61377247067f1729a97426761249bb0c4e897af Mon Sep 17 00:00:00 2001 From: Browning Date: Wed, 13 Sep 2023 12:03:43 +0100 Subject: [PATCH 13/33] feat: Tests for _check_column_in_df() --- tests/utils/test_defence.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/tests/utils/test_defence.py b/tests/utils/test_defence.py index 5e35ce0a..ffa7c4b7 100644 --- a/tests/utils/test_defence.py +++ b/tests/utils/test_defence.py @@ -4,11 +4,13 @@ import pathlib import pytest +import pandas as pd from transport_performance.utils.defence import ( _check_list, _check_parent_dir_exists, _type_defence, + _check_column_in_df, ) @@ -242,3 +244,28 @@ def test_type_defence_passes_on_multiple_types(self): _type_defence(list(), "empty_list", (type(None), list)) _type_defence(tuple(), "empty_tuple", (tuple, dict)) _type_defence(None, "None", (list, dict, type(None))) + + +@pytest.fixture(scope="function") +def test_df(): + """A test fixture for an example dataframe.""" + test_df = pd.DataFrame( + {"test_col_1": [1, 2, 3, 4], "test_col_2": [True, True, False, True]} + ) + return test_df + + +class Test_CheckColumnInDf(object): + """Tests for _check_column_in_df().""" + + def test__check_column_in_df_defence(self, test_df): + """Defensive tests for _check_colum_in_df().""" + with pytest.raises( + IndexError, match="'test' is not a column in the dataframe." + ): + _check_column_in_df(df=test_df, column_name="test") + + def test__check_column_in_df_on_pass(self, test_df): + """General tests for _check_colum_in_df().""" + _check_column_in_df(test_df, "test_col_1") + _check_column_in_df(test_df, "test_col_2") From c32af5d95ca948d6c7032f7ec356a300c61e25f2 Mon Sep 17 00:00:00 2001 From: Browning Date: Wed, 13 Sep 2023 12:08:15 +0100 Subject: [PATCH 14/33] fix: Added another stage of testing for set_up_report_dir() which improves coverage --- tests/gtfs/report/test_report_utils.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tests/gtfs/report/test_report_utils.py b/tests/gtfs/report/test_report_utils.py index b4c2798d..977974d0 100644 --- a/tests/gtfs/report/test_report_utils.py +++ b/tests/gtfs/report/test_report_utils.py @@ -91,7 +91,15 @@ def test_set_up_report_dir_defence(self): def test_set_up_report_dir_on_pass(self, tmp_path): """Test set_up_report_dir() when defences are passed.""" + # create original report + set_up_report_dir( + pathlib.Path(os.path.join(tmp_path)), overwrite=False + ) + assert os.path.exists( + os.path.join(tmp_path, "gtfs_report") + ), "Failed to create report in temporary directory" + # attempt to overwrite the previous report set_up_report_dir(pathlib.Path(os.path.join(tmp_path)), overwrite=True) assert os.path.exists( os.path.join(tmp_path, "gtfs_report") - ), "Failed to replace report in tests/data/gtfs/report/" + ), "Failed to create report in temporary directory" From acaf557374622c20c11b020da7ee907122960d37 Mon Sep 17 00:00:00 2001 From: Browning Date: Thu, 14 Sep 2023 10:50:11 +0100 Subject: [PATCH 15/33] Updated gtfs_utils.py and corresponding tests. Removed scheme param. Improved type check. Improved type hint. Included test fixture --- src/transport_performance/gtfs/gtfs_utils.py | 21 ++++----- tests/gtfs/test_gtfs_utils.py | 46 ++++++++------------ 2 files changed, 28 insertions(+), 39 deletions(-) diff --git a/src/transport_performance/gtfs/gtfs_utils.py b/src/transport_performance/gtfs/gtfs_utils.py index 4703c5f0..956f05f7 100644 --- a/src/transport_performance/gtfs/gtfs_utils.py +++ b/src/transport_performance/gtfs/gtfs_utils.py @@ -67,7 +67,7 @@ def bbox_filter_gtfs( # NOTE: Possibly move to a more generalised utils file def convert_pandas_to_plotly( - df: pd.DataFrame, return_html=False, scheme: str = "dsc" + df: pd.DataFrame, return_html: bool = False ) -> go.Figure: """Convert a pandas dataframe to a visual plotly figure. @@ -79,9 +79,6 @@ def convert_pandas_to_plotly( return_html : bool, optional Whether or not to return the html element, by default False - scheme : str, optional - The colour scheme of the dataframe, - by default "dsc" Returns ------- @@ -110,16 +107,16 @@ def convert_pandas_to_plotly( # defences _type_defence(df, "df", pd.DataFrame) _type_defence(return_html, "return_html", bool) - if scheme not in list(schemes.keys()): - raise LookupError( - f"{scheme} is not a valid colour scheme." - f"Valid colour schemes include {list(schemes.keys())}" - ) - if isinstance(df.columns, pd.MultiIndex): + if isinstance(df.columns, pd.MultiIndex) or isinstance( + df.index, pd.MultiIndex + ): raise TypeError( - "Pandas dataframe must have a single index," "not MultiIndex" + "Pandas dataframe must have a singular index, not MultiIndex. " + "This means that 'df.columns' or 'df.index' does not return a " + "MultiIndex." ) - + # harcoding scheme for now. Could be changed to param if more are added + scheme = "dsc" # create plotly df fig = go.Figure( data=go.Table( diff --git a/tests/gtfs/test_gtfs_utils.py b/tests/gtfs/test_gtfs_utils.py index 25534904..443a9a57 100644 --- a/tests/gtfs/test_gtfs_utils.py +++ b/tests/gtfs/test_gtfs_utils.py @@ -49,46 +49,38 @@ def test_bbox_filter_gtfs_writes_as_expected(self, tmpdir): ), f"Expected class `Gtfs_Instance but found: {type(feed)}`" +@pytest.fixture(scope="function") +def test_df(): + """A test fixture.""" + test_df = pd.DataFrame( + { + "ID": [1, 2, 3, 4, 1], + "score": [45, 34, 23, 12, 23], + "grade": ["A", "B", "C", "D", "C"], + } + ) + return test_df + + class TestConvertPandasToPlotly(object): """Test convert_pandas_to_plotly().""" - def test_convert_pandas_to_plotly_defences(self): + def test_convert_pandas_to_plotly_defences(self, test_df): """Test convert_pandas_to_plotly defences.""" - test_df = pd.DataFrame( - { - "ID": [1, 2, 3, 4, 1], - "score": [45, 34, 23, 12, 23], - "grade": ["A", "B", "C", "D", "C"], - } - ) - with pytest.raises( - LookupError, - match=re.escape( - "dark is not a valid colour scheme." - "Valid colour schemes include ['dsc']" - ), - ): - convert_pandas_to_plotly(test_df, scheme="dark") - multi_index_df = test_df.groupby(["ID", "grade"]).agg( {"score": ["mean", "min", "max"]} ) with pytest.raises( TypeError, - match="Pandas dataframe must have a single index," - "not MultiIndex", + match="Pandas dataframe must have a singular index, not " + "MultiIndex. " + "This means that 'df.columns' or 'df.index' does not return a " + "MultiIndex.", ): convert_pandas_to_plotly(multi_index_df) - def test_convert_pandas_to_plotly_on_pass(self): + def test_convert_pandas_to_plotly_on_pass(self, test_df): """Test convert_pandas_to_plotly() when defences pass.""" - test_df = pd.DataFrame( - { - "ID": [1, 2, 3, 4, 1], - "score": [45, 34, 23, 12, 23], - "grade": ["A", "B", "C", "D", "C"], - } - ) # return_html html_return = convert_pandas_to_plotly(test_df, return_html=True) assert isinstance(html_return, str), re.escape( From 5c4b07d0289d3a2574ede64362388d3714453ef9 Mon Sep 17 00:00:00 2001 From: Browning Date: Thu, 14 Sep 2023 10:57:27 +0100 Subject: [PATCH 16/33] Make TemplateHtml methods private; Update docstring --- .../gtfs/report/report_utils.py | 8 +-- src/transport_performance/gtfs/validation.py | 70 ++++++++++--------- tests/gtfs/report/test_report_utils.py | 18 ++--- 3 files changed, 51 insertions(+), 45 deletions(-) diff --git a/src/transport_performance/gtfs/report/report_utils.py b/src/transport_performance/gtfs/report/report_utils.py index ad0f4a9c..a74a13b1 100644 --- a/src/transport_performance/gtfs/report/report_utils.py +++ b/src/transport_performance/gtfs/report/report_utils.py @@ -46,7 +46,7 @@ class TemplateHTML: - """A class for inserting HTML string into a docstring. + """A class for inserting HTML string into a template. Attributes ---------- @@ -80,7 +80,7 @@ def __init__(self, path: Union[str, pathlib.Path]) -> None: self.template = f.read() return None - def insert( + def _insert( self, placeholder: str, value: str, replace_multiple: bool = False ) -> None: """Insert values into the html template. @@ -123,7 +123,7 @@ def insert( self.template = self.template.replace(f"[{placeholder}]", value) - def get_template(self) -> str: + def _get_template(self) -> str: """Get the template attribute of the TemplateHTML object. Returns @@ -135,7 +135,7 @@ def get_template(self) -> str: return self.template -def set_up_report_dir( +def _set_up_report_dir( path: Union[str, pathlib.Path] = "outputs", overwrite: bool = False ) -> None: """Set up the directory that will hold the report. diff --git a/src/transport_performance/gtfs/validation.py b/src/transport_performance/gtfs/validation.py index 420cd113..8137ddd1 100644 --- a/src/transport_performance/gtfs/validation.py +++ b/src/transport_performance/gtfs/validation.py @@ -27,7 +27,7 @@ from transport_performance.gtfs.report.report_utils import ( TemplateHTML, - set_up_report_dir, + _set_up_report_dir, GTFS_UNNEEDED_COLUMNS, ) @@ -1274,7 +1274,7 @@ def html_report( """ _type_defence(overwrite, "overwrite", bool) _type_defence(summary_type, "summary_type", str) - set_up_report_dir(path=report_dir, overwrite=overwrite) + _set_up_report_dir(path=report_dir, overwrite=overwrite) summary_type = summary_type.lower() if summary_type not in ["mean", "min", "max", "median"]: raise ValueError("'summary type' must be mean, median, min or max") @@ -1310,7 +1310,7 @@ def html_report( "html_templates/evaluation_template.html" ) ) - eval_temp.insert( + eval_temp._insert( "eval_placeholder_1", build_table( validation_dataframe, @@ -1319,43 +1319,47 @@ def html_report( escape=False, ), ) - eval_temp.insert("eval_title_1", "GTFS Feed Warnings and Errors") + eval_temp._insert("eval_title_1", "GTFS Feed Warnings and Errors") - eval_temp.insert( + eval_temp._insert( "eval_placeholder_2", build_table(self.feed.agency, "green_dark", padding="10px"), ) - eval_temp.insert("eval_title_2", "GTFS Agency Information") + eval_temp._insert("eval_title_2", "GTFS Agency Information") - eval_temp.insert( + eval_temp._insert( "name_placeholder", self.feed.feed_info["feed_publisher_name"][0] ) - eval_temp.insert( + eval_temp._insert( "url_placeholder", self.feed.feed_info["feed_publisher_url"][0], replace_multiple=True, ) - eval_temp.insert( + eval_temp._insert( "lang_placeholder", self.feed.feed_info["feed_lang"][0] ) - eval_temp.insert( + eval_temp._insert( "start_placeholder", self.feed.feed_info["feed_start_date"][0] ) - eval_temp.insert( + eval_temp._insert( "end_placeholder", self.feed.feed_info["feed_end_date"][0] ) - eval_temp.insert( + eval_temp._insert( "version_placeholder", self.feed.feed_info["feed_version"][0] ) count_lookup = dict(self.feed.describe().to_numpy()) - eval_temp.insert( + eval_temp._insert( "agency_placeholder", str(len(count_lookup["agencies"])) ) - eval_temp.insert("routes_placeholder", str(count_lookup["num_routes"])) - eval_temp.insert("trips_placeholder", str(count_lookup["num_trips"])) - eval_temp.insert("stops_placeholder", str(count_lookup["num_stops"])) - eval_temp.insert("shapes_placeholder", str(count_lookup["num_shapes"])) + eval_temp._insert( + "routes_placeholder", str(count_lookup["num_routes"]) + ) + eval_temp._insert("trips_placeholder", str(count_lookup["num_trips"])) + eval_temp._insert("stops_placeholder", str(count_lookup["num_stops"])) + eval_temp._insert( + "shapes_placeholder", str(count_lookup["num_shapes"]) + ) self.get_gtfs_files() file_list_html = "" @@ -1369,15 +1373,15 @@ def html_report( """ ) - eval_temp.insert("eval_placeholder_3", file_list_html) - eval_temp.insert("eval_title_3", "GTFS Files Included") + eval_temp._insert("eval_placeholder_3", file_list_html) + eval_temp._insert("eval_title_3", "GTFS Files Included") - eval_temp.insert("date", date) + eval_temp._insert("date", date) with open( f"{report_dir}/gtfs_report/index.html", "w", encoding="utf8" ) as eval_f: - eval_f.writelines(eval_temp.get_template()) + eval_f.writelines(eval_temp._get_template()) # stops self.viz_stops( @@ -1396,17 +1400,17 @@ def html_report( "html_templates/stops_template.html" ) ) - stops_temp.insert("stops_placeholder_1", "stop_locations.html") - stops_temp.insert("stops_placeholder_2", "convex_hull.html") - stops_temp.insert("stops_title_1", "Stops from GTFS data") - stops_temp.insert( + stops_temp._insert("stops_placeholder_1", "stop_locations.html") + stops_temp._insert("stops_placeholder_2", "convex_hull.html") + stops_temp._insert("stops_title_1", "Stops from GTFS data") + stops_temp._insert( "stops_title_2", "Convex Hull Generated from GTFS Data" ) - stops_temp.insert("date", date) + stops_temp._insert("date", date) with open( f"{report_dir}/gtfs_report/stops.html", "w", encoding="utf8" ) as stops_f: - stops_f.writelines(stops_temp.get_template()) + stops_f.writelines(stops_temp._get_template()) # summaries self.summarise_routes() @@ -1433,21 +1437,21 @@ def html_report( "html_templates/summary_template.html" ) ) - summ_temp.insert("plotly_placeholder_1", route_html) - summ_temp.insert( + summ_temp._insert("plotly_placeholder_1", route_html) + summ_temp._insert( "plotly_title_1", f"Route Summary by Day and Route Type ({summary_type})", ) - summ_temp.insert("plotly_placeholder_2", trip_html) - summ_temp.insert( + summ_temp._insert("plotly_placeholder_2", trip_html) + summ_temp._insert( "plotly_title_2", f"Trip Summary by Day and Route Type ({summary_type})", ) - summ_temp.insert("date", date) + summ_temp._insert("date", date) with open( f"{report_dir}/gtfs_report/summaries.html", "w", encoding="utf8" ) as summ_f: - summ_f.writelines(summ_temp.get_template()) + summ_f.writelines(summ_temp._get_template()) print( f"GTFS Report Created at {report_dir}\n" diff --git a/tests/gtfs/report/test_report_utils.py b/tests/gtfs/report/test_report_utils.py index 977974d0..2fce4da5 100644 --- a/tests/gtfs/report/test_report_utils.py +++ b/tests/gtfs/report/test_report_utils.py @@ -9,7 +9,7 @@ from transport_performance.gtfs.report.report_utils import ( TemplateHTML, - set_up_report_dir, + _set_up_report_dir, ) @@ -35,7 +35,7 @@ def test_init(self, template_fixture): """ assert ( - expected_template == template_fixture.get_template() + expected_template == template_fixture._get_template() ), "Test template not as expected" def test_insert_defence(self, template_fixture): @@ -50,7 +50,7 @@ def test_insert_defence(self, template_fixture): "replace_multiple param to True" ), ): - template_fixture.insert("test_placeholder", "test_value") + template_fixture._insert("test_placeholder", "test_value") def test_insert_on_pass(self, template_fixture): """Test functionality for .insert() when defences are passed.""" @@ -61,13 +61,13 @@ def test_insert_on_pass(self, template_fixture):
test_value Tester test_value
""" - template_fixture.insert( + template_fixture._insert( placeholder="test_placeholder", value="test_value", replace_multiple=True, ) assert ( - expected_template == template_fixture.get_template() + expected_template == template_fixture._get_template() ), "Test placeholder replacement not acting as expected" @@ -87,19 +87,21 @@ def test_set_up_report_dir_defence(self): ) ), ): - set_up_report_dir("tests/data/gtfs/report") + _set_up_report_dir("tests/data/gtfs/report") def test_set_up_report_dir_on_pass(self, tmp_path): """Test set_up_report_dir() when defences are passed.""" # create original report - set_up_report_dir( + _set_up_report_dir( pathlib.Path(os.path.join(tmp_path)), overwrite=False ) assert os.path.exists( os.path.join(tmp_path, "gtfs_report") ), "Failed to create report in temporary directory" # attempt to overwrite the previous report - set_up_report_dir(pathlib.Path(os.path.join(tmp_path)), overwrite=True) + _set_up_report_dir( + pathlib.Path(os.path.join(tmp_path)), overwrite=True + ) assert os.path.exists( os.path.join(tmp_path, "gtfs_report") ), "Failed to create report in temporary directory" From bbd58300df92ba2cb7f73e0c7687eab50bab4a31 Mon Sep 17 00:00:00 2001 From: Browning Date: Thu, 14 Sep 2023 11:05:37 +0100 Subject: [PATCH 17/33] Updated error message for replace_multiple=False and multiple placeholders found --- src/transport_performance/gtfs/report/report_utils.py | 7 ++----- tests/gtfs/report/test_report_utils.py | 7 ++----- 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/src/transport_performance/gtfs/report/report_utils.py b/src/transport_performance/gtfs/report/report_utils.py index a74a13b1..4868b1bf 100644 --- a/src/transport_performance/gtfs/report/report_utils.py +++ b/src/transport_performance/gtfs/report/report_utils.py @@ -114,11 +114,8 @@ def _insert( occurences = len(self.template.split(f"[{placeholder}]")) - 1 if occurences > 1 and not replace_multiple: raise ValueError( - "You have selected not to replace multiple" - "placeholders of the same value, however" - "placeholders occur more than once. \n" - "If you would like to allow this, set the" - "replace_multiple param to True" + "`replace multiple` requires True as found \n" + "multiple placeholder matches in template." ) self.template = self.template.replace(f"[{placeholder}]", value) diff --git a/tests/gtfs/report/test_report_utils.py b/tests/gtfs/report/test_report_utils.py index 2fce4da5..4c90d67b 100644 --- a/tests/gtfs/report/test_report_utils.py +++ b/tests/gtfs/report/test_report_utils.py @@ -43,11 +43,8 @@ def test_insert_defence(self, template_fixture): with pytest.raises( ValueError, match=( - "You have selected not to replace multiple" - "placeholders of the same value, however" - "placeholders occur more than once. \n" - "If you would like to allow this, set the" - "replace_multiple param to True" + "`replace multiple` requires True as found \n" + "multiple placeholder matches in template." ), ): template_fixture._insert("test_placeholder", "test_value") From ae17f6d0064cb4bf2d02ddc4641755814f28e9f2 Mon Sep 17 00:00:00 2001 From: Browning Date: Thu, 14 Sep 2023 11:07:05 +0100 Subject: [PATCH 18/33] Updated test_report_utils.py to reflect private functions --- tests/gtfs/report/test_report_utils.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/gtfs/report/test_report_utils.py b/tests/gtfs/report/test_report_utils.py index 4c90d67b..880ec8cc 100644 --- a/tests/gtfs/report/test_report_utils.py +++ b/tests/gtfs/report/test_report_utils.py @@ -38,8 +38,8 @@ def test_init(self, template_fixture): expected_template == template_fixture._get_template() ), "Test template not as expected" - def test_insert_defence(self, template_fixture): - """Test defences for .insert().""" + def test__insert_defence(self, template_fixture): + """Test defences for ._insert().""" with pytest.raises( ValueError, match=( @@ -49,8 +49,8 @@ def test_insert_defence(self, template_fixture): ): template_fixture._insert("test_placeholder", "test_value") - def test_insert_on_pass(self, template_fixture): - """Test functionality for .insert() when defences are passed.""" + def test__insert_on_pass(self, template_fixture): + """Test functionality for ._insert() when defences are passed.""" expected_template = """ @@ -71,8 +71,8 @@ def test_insert_on_pass(self, template_fixture): class TestSetUpReportDir(object): """Test setting up a dir for a report.""" - def test_set_up_report_dir_defence(self): - """Test the defences for set_up_report_dir().""" + def test__set_up_report_dir_defence(self): + """Test the defences for _set_up_report_dir().""" with pytest.raises( FileExistsError, match=( @@ -86,8 +86,8 @@ def test_set_up_report_dir_defence(self): ): _set_up_report_dir("tests/data/gtfs/report") - def test_set_up_report_dir_on_pass(self, tmp_path): - """Test set_up_report_dir() when defences are passed.""" + def test__set_up_report_dir_on_pass(self, tmp_path): + """Test _set_up_report_dir() when defences are passed.""" # create original report _set_up_report_dir( pathlib.Path(os.path.join(tmp_path)), overwrite=False From ce0113e7adda0dfebe8374a457e31a9ee258d3e5 Mon Sep 17 00:00:00 2001 From: Browning Date: Thu, 14 Sep 2023 11:18:27 +0100 Subject: [PATCH 19/33] Updated _set_up_report_dir() to better suite check_parent_dir_exists() functionality --- .../gtfs/report/report_utils.py | 13 +- .../testing/testing/gtfs_report/styles.css | 195 ++++++++++++++++++ 2 files changed, 204 insertions(+), 4 deletions(-) create mode 100644 testing/testing/testing/gtfs_report/styles.css diff --git a/src/transport_performance/gtfs/report/report_utils.py b/src/transport_performance/gtfs/report/report_utils.py index 4868b1bf..41fd27c3 100644 --- a/src/transport_performance/gtfs/report/report_utils.py +++ b/src/transport_performance/gtfs/report/report_utils.py @@ -123,6 +123,9 @@ def _insert( def _get_template(self) -> str: """Get the template attribute of the TemplateHTML object. + This is an internal method. + This method also allows for better testing with pytest. + Returns ------- str @@ -157,10 +160,12 @@ def _set_up_report_dir( given path and overwrite=False """ + # create report_dir var + report_dir = os.path.join(path, "gtfs_report") # defences - _check_parent_dir_exists(path, "path", create=True) + _check_parent_dir_exists(report_dir, "path", create=True) - if os.path.exists(f"{path}/gtfs_report") and not overwrite: + if os.path.exists(report_dir) and not overwrite: raise FileExistsError( "Report already exists at path: " f"[{path}]." @@ -170,11 +175,11 @@ def _set_up_report_dir( # make gtfs_report dir try: - os.mkdir(f"{path}/gtfs_report") + os.mkdir(report_dir) except FileExistsError: pass shutil.copy( src="src/transport_performance/gtfs/report/css_styles/styles.css", - dst=f"{path}/gtfs_report", + dst=report_dir, ) return None diff --git a/testing/testing/testing/gtfs_report/styles.css b/testing/testing/testing/gtfs_report/styles.css new file mode 100644 index 00000000..1e816520 --- /dev/null +++ b/testing/testing/testing/gtfs_report/styles.css @@ -0,0 +1,195 @@ +* { + margin: 0; + padding: 0; + box-sizing: border-box; + font-family: 'Poppins', sans-serif; +} + +body { + min-height: 100vh; +} + +a { + text-decoration: none; + +} + +li { + list-style: none; +} + +h1, +h2 { + color: black; +} + +h3 { + color: #999; +} + +table { + overflow: hidden; + overflow-x: scroll; + max-width: 1500px; + display: block; + overflow-y: scroll; + max-height: 800px; +} + +thead { + position: sticky; + top: -10px; +} + +.btn { + background: #f05462; + color: white; + padding: 5px 10px; + text-align: center; +} + +.btn:hover { + color: #f05462; + background: white; + padding: 3px 8px; + border: 2px solid #f05462; +} + +.title { + display: flex; + align-items: center; + justify-content: space-around; + padding: 15px 10px; + border-bottom: 2px solid #999; +} + +table { + padding: 10px; +} + +th, +td { + text-align: left; + padding: 8px; +} + +.side-menu { + position: fixed; + background: #28A197; + width: 20vw; + min-height: 100vh; + display: flex; + flex-direction: column; +} + +.side-menu .side-menu-title { + height: 10vh; + display: flex; + align-items: center; + justify-content: center; + font-weight: bold +} + +.side-menu li { + font-size: 24px; + padding: 10px 40px; + display: flex; + align-items: center; +} + +.side-menu li:hover { + background: white; + color: #28A197; + +} + +.side-menu li:hover .option{ + color: #28A197; + background-color: white; +} + +.side-menu .option { + color: white; +} + +.side-menu .option:hover{ + color: #28A197; + background-color: white; +} + +.container { + position: absolute; + right: 0; + width: 80vw; + height: 100vh; + background: #f1f1f1; +} + +.container .content { + position: relative; + background: #f1f1f1; + padding-left: 20px; + padding-right: 20px; +} + +.container .content .analysis-cont { + display: flex; + justify-content: space-around; + align-items: flex-start; + flex-wrap: wrap; + background-color: #f1f1f1; + padding: 20px; +} + +.container .content .analysis-cont .summary div { + display: block; +} + + +.container .content .analysis-cont .summary dd { + font-weight: bold; + margin-inline-start: 0; + display: inline-block; + min-width: 130px; +} + +.container .content .analysis-cont .summary dt { + display: inline-block; +} + +.analysis-title { + font-weight: bold; + font-size: large; + margin-bottom: 10px; +} + +hr { + border: 0; + clear:both; + display:block; + width: 100%; + background-color: black; + height: 5px; + } + +.container .header { + position: fixed; + top: 0; + right: 0; + width: 80vw; + height: 10vh; + background: #801650; + display: flex; + align-items: center; + justify-content: center; + box-shadow: 0 4px 8px 0 rgba(0, 0, 0, 0.2); + z-index: 1; +} + +.container .header .header-title { + display: flex; + align-items: center; + color: #F46A25; + font-weight: bold; + font-size: xx-large; +} From 2d6c9575df3ecd841b98f0fe8045ef6ea4718da7 Mon Sep 17 00:00:00 2001 From: Browning Date: Thu, 14 Sep 2023 11:29:02 +0100 Subject: [PATCH 20/33] Update tests in test_report_utils --- tests/gtfs/report/test_report_utils.py | 41 ++++++++++++++++---------- 1 file changed, 26 insertions(+), 15 deletions(-) diff --git a/tests/gtfs/report/test_report_utils.py b/tests/gtfs/report/test_report_utils.py index 880ec8cc..5f0819ea 100644 --- a/tests/gtfs/report/test_report_utils.py +++ b/tests/gtfs/report/test_report_utils.py @@ -34,8 +34,10 @@ def test_init(self, template_fixture):
[test_placeholder] Tester [test_placeholder]
""" - assert ( - expected_template == template_fixture._get_template() + assert expected_template.replace( + r"\n", "" + ) == template_fixture._get_template().replace( + r"\n", "" ), "Test template not as expected" def test__insert_defence(self, template_fixture): @@ -51,40 +53,35 @@ def test__insert_defence(self, template_fixture): def test__insert_on_pass(self, template_fixture): """Test functionality for ._insert() when defences are passed.""" - expected_template = """ - - - -
test_value Tester test_value
- -""" template_fixture._insert( placeholder="test_placeholder", - value="test_value", + value="test_value_insert_test", replace_multiple=True, ) assert ( - expected_template == template_fixture._get_template() - ), "Test placeholder replacement not acting as expected" + "test_value_insert_test" + in template_fixture._get_template().replace(r"\n", "") + ), ("Test placeholder replacement not acting as expected") class TestSetUpReportDir(object): """Test setting up a dir for a report.""" - def test__set_up_report_dir_defence(self): + def test__set_up_report_dir_defence(self, tmp_path): """Test the defences for _set_up_report_dir().""" + _set_up_report_dir(os.path.join(tmp_path)) with pytest.raises( FileExistsError, match=( re.escape( "Report already exists at path: " - "[tests/data/gtfs/report]." + f"[{tmp_path}]." "Consider setting overwrite=True" "if you'd like to overwrite this." ) ), ): - _set_up_report_dir("tests/data/gtfs/report") + _set_up_report_dir(os.path.join(tmp_path), overwrite=False) def test__set_up_report_dir_on_pass(self, tmp_path): """Test _set_up_report_dir() when defences are passed.""" @@ -102,3 +99,17 @@ def test__set_up_report_dir_on_pass(self, tmp_path): assert os.path.exists( os.path.join(tmp_path, "gtfs_report") ), "Failed to create report in temporary directory" + # attempt to create report in different paths + _set_up_report_dir(os.path.join(tmp_path, "testing")) + assert os.path.exists( + os.path.join(tmp_path, "testing", "gtfs_report") + ), ( + f"Failed to create report dir in {tmp_path}/testing/" "gtfs_report" + ) + _set_up_report_dir(os.path.join(tmp_path, "testing", "testing")) + assert os.path.exists( + os.path.join(tmp_path, "testing", "testing", "gtfs_report") + ), ( + f"Failed to create report dir in {tmp_path}/testing/testing/" + "gtfs_report" + ) From cf1ed8778bb204889d69e60a95343b6db721b6aa Mon Sep 17 00:00:00 2001 From: Browning Date: Thu, 14 Sep 2023 15:11:53 +0100 Subject: [PATCH 21/33] Only .lower() orientation in one place, rather than 2 --- src/transport_performance/gtfs/validation.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/transport_performance/gtfs/validation.py b/src/transport_performance/gtfs/validation.py index 8137ddd1..f83bdf24 100644 --- a/src/transport_performance/gtfs/validation.py +++ b/src/transport_performance/gtfs/validation.py @@ -752,6 +752,8 @@ def _plot_summary( _type_defence(save_image, "save_iamge", bool) _type_defence(img_type, "img_type", str) + # lower orientation + orientation = orientation.lower() raw_pth = os.path.join( out_dir, "summary_" + datetime.datetime.now().strftime("%d_%m_%Y-%H_%M_%S"), @@ -759,7 +761,7 @@ def _plot_summary( _check_parent_dir_exists(raw_pth, "save_pth", create=True) # orientation input defences - if orientation.lower() not in ["v", "h"]: + if orientation not in ["v", "h"]: raise ValueError( "'orientation expected 'v' or 'h'" " (non case sensitive)." @@ -773,7 +775,6 @@ def _plot_summary( # convert column type for better graph plotting summary_df["route_type"] = summary_df["route_type"].astype("object") - orientation = orientation.lower() xlabel = ( xlabel if xlabel From 151d357e782054be4fce095b67ff896223d9c29c Mon Sep 17 00:00:00 2001 From: Browning Date: Thu, 14 Sep 2023 15:50:27 +0100 Subject: [PATCH 22/33] Create small helper function to remove duplicate logic; added testing --- src/transport_performance/gtfs/validation.py | 76 +++++++++++++++----- tests/gtfs/test_validation.py | 20 ++++++ 2 files changed, 80 insertions(+), 16 deletions(-) diff --git a/src/transport_performance/gtfs/validation.py b/src/transport_performance/gtfs/validation.py index f83bdf24..d95c06c5 100644 --- a/src/transport_performance/gtfs/validation.py +++ b/src/transport_performance/gtfs/validation.py @@ -108,6 +108,35 @@ def _create_map_title_text(gdf, units, geom_crs): return txt +def _convert_multi_index_to_single(df: pd.DataFrame) -> pd.DataFrame: + """Convert a dataframes index from MultiIndex to a singular index. + + This function also removes any differing names generated from numpy + function + + Parameters + ---------- + df : pd.DataFrame + Pandas dataframe to adjust index (columns) of. + + Returns + ------- + df : pd.DataFrame + Pandas dataframe with a modified index (columns) + + """ + df.columns = df.columns = [ + "_".join(value) if "" not in value else "".join(value) + for value in df.columns.values + ] + df.columns = [ + column.replace("amin", "min").replace("amax", "max") + for column in df.columns.values + ] + + return df + + class GtfsInstance: """Create a feed instance for validation, cleaning & visualisation.""" @@ -471,6 +500,34 @@ def _summary_defence( f" functions. Found {type(summ_ops)}" ) + def _convert_multi_index_to_single(self, df: pd.DataFrame) -> pd.DataFrame: + """Convert a dataframes index from MultiIndex to a singular index. + + This function also removes any differing names generated from numpy + function + + Parameters + ---------- + df : pd.DataFrame + Pandas dataframe to adjust index (columns) of. + + Returns + ------- + df : pd.DataFrame + Pandas dataframe with a modified index (columns) + + """ + df.columns = df.columns = [ + "_".join(value) if "" not in value else "".join(value) + for value in df.columns.values + ] + df.columns = [ + column.replace("amin", "min").replace("amax", "max") + for column in df.columns.values + ] + + return df + def summarise_trips( self, summ_ops: list = [np.min, np.max, np.mean, np.median], @@ -533,14 +590,7 @@ def summarise_trips( # reformat columns # including normalsing min and max between different # numpy versions (amin/min, amax/max) - day_trip_counts.columns = day_trip_counts.columns = [ - "_".join(value) if "" not in value else "".join(value) - for value in day_trip_counts.columns.values - ] - day_trip_counts.columns = [ - column.replace("amin", "min").replace("amax", "max") - for column in day_trip_counts.columns.values - ] + day_trip_counts = _convert_multi_index_to_single(df=day_trip_counts) self.daily_trip_summary = day_trip_counts.copy() return self.daily_trip_summary @@ -613,14 +663,7 @@ def summarise_routes( # reformat columns # including normalsing min and max between different # numpy versions (amin/min, amax/max) - day_route_count.columns = day_route_count.columns = [ - "_".join(value) if "" not in value else "".join(value) - for value in day_route_count.columns.values - ] - day_route_count.columns = [ - column.replace("amin", "min").replace("amax", "max") - for column in day_route_count.columns.values - ] + day_route_count = _convert_multi_index_to_single(day_route_count) self.daily_route_summary = day_route_count.copy() return self.daily_route_summary @@ -754,6 +797,7 @@ def _plot_summary( # lower orientation orientation = orientation.lower() + raw_pth = os.path.join( out_dir, "summary_" + datetime.datetime.now().strftime("%d_%m_%Y-%H_%M_%S"), diff --git a/tests/gtfs/test_validation.py b/tests/gtfs/test_validation.py index 8043d5ac..d1a72bf9 100644 --- a/tests/gtfs/test_validation.py +++ b/tests/gtfs/test_validation.py @@ -16,6 +16,7 @@ GtfsInstance, _get_intermediate_dates, _create_map_title_text, + _convert_multi_index_to_single, ) @@ -26,6 +27,25 @@ def gtfs_fixture(): return gtfs +def test__convert_multi_index_to_single(): + """Light testing got _convert_multi_index_to_single().""" + test_df = pd.DataFrame({"test": [1, 2, 3, 4], "id": ["E", "E", "C", "D"]}) + test_df = test_df.groupby("id").agg({"test": ["min", "mean", "max"]}) + expected_cols = pd.Index( + ["test_min", "test_mean", "test_max"], dtype="object" + ) + output_cols = _convert_multi_index_to_single(df=test_df).columns + assert isinstance( + output_cols, pd.Index + ), "_convert_multi_index_to_single() not behaving as expected" + expected_cols = list(expected_cols) + output_cols = list(output_cols) + for col in output_cols: + assert col in expected_cols, f"{col} not an expected column" + expected_cols.remove(col) + assert len(expected_cols) == 0, "Not all expected cols in output cols" + + class TestGtfsInstance(object): """Tests related to the GtfsInstance class.""" From 47a35e9ce51af3404f497b58e026a350c64c1602 Mon Sep 17 00:00:00 2001 From: Browning Date: Thu, 14 Sep 2023 15:56:24 +0100 Subject: [PATCH 23/33] Move test to test__plot_summary_defences() as it is more appropriate --- tests/gtfs/test_validation.py | 36 +++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/tests/gtfs/test_validation.py b/tests/gtfs/test_validation.py index d1a72bf9..b791e92b 100644 --- a/tests/gtfs/test_validation.py +++ b/tests/gtfs/test_validation.py @@ -623,7 +623,7 @@ def test_summarise_routes_on_pass(self, gtfs_fixture): "Expected {expected_size}" ) - def test__plot_summary_defences(self, gtfs_fixture): + def test__plot_summary_defences(self, tmp_path, gtfs_fixture): """Test defences for _plot_summary().""" current_fixture = gtfs_fixture current_fixture.summarise_routes() @@ -643,6 +643,23 @@ def test__plot_summary_defences(self, gtfs_fixture): orientation="l", ) + # save test for an image with invalid file extension + valid_img_formats = ["png", "pdf", "jpg", "jpeg", "webp", "svg"] + with pytest.raises( + ValueError, + match=re.escape( + "Please specify a valid image format. Valid formats " + f"include {valid_img_formats}" + ), + ): + gtfs_fixture._plot_summary( + gtfs_fixture.daily_route_summary, + "route_count_mean", + save_image=True, + out_dir=os.path.join(tmp_path, "outputs"), + img_type="test", + ) + @pytest.mark.filterwarnings("ignore::UserWarning") def test__plot_summary_on_pass(self, gtfs_fixture, tmp_path): """Test plotting a summary when defences are passed.""" @@ -695,23 +712,6 @@ def test__plot_summary_on_pass(self, gtfs_fixture, tmp_path): assert counts["html"] == 1, "Failed to save plot as HTML" assert counts["png"] == 1, "Failed to save plot as png" - # save test for an image with invalid file extension - valid_img_formats = ["png", "pdf", "jpg", "jpeg", "webp", "svg"] - with pytest.raises( - ValueError, - match=re.escape( - "Please specify a valid image format. Valid formats " - f"include {valid_img_formats}" - ), - ): - gtfs_fixture._plot_summary( - gtfs_fixture.daily_route_summary, - "route_count_mean", - save_image=True, - out_dir=os.path.join(tmp_path, "outputs"), - img_type="test", - ) - def test__plot_route_summary_defences(self, gtfs_fixture): """Test the defences for the small wrapper plot_route_summary().""" # test attribute checks From 291b3b5528d07d7e230b47cdf31c39229c4eb3d3 Mon Sep 17 00:00:00 2001 From: Browning Date: Fri, 15 Sep 2023 11:23:17 +0100 Subject: [PATCH 24/33] Refactore validation.py to only use _plot_summary();Added additional defences;Included and updated testing for all changes/additions --- .gitignore | 4 +- src/transport_performance/gtfs/validation.py | 303 ++++-------------- src/transport_performance/utils/defence.py | 41 +++ tests/data/gtfs/report/gtfs_report/styles.css | 195 ----------- tests/gtfs/test_validation.py | 151 +++++---- tests/utils/test_defence.py | 61 ++++ 6 files changed, 234 insertions(+), 521 deletions(-) delete mode 100644 tests/data/gtfs/report/gtfs_report/styles.css diff --git a/.gitignore b/.gitignore index 11be8265..e79360e9 100644 --- a/.gitignore +++ b/.gitignore @@ -7,13 +7,13 @@ *.pkl *.html -# except test fixtures +# exception for test fixtures !tests/data/newport-2023-06-13.osm.pbf !tests/data/newport-20230613_gtfs.zip !tests/data/gtfs/route_lookup.pkl !tests/data/gtfs/report/html_template.html -# except html templates +# exception for html templates !src/transport_performance/gtfs/report/html_templates/evaluation_template.html !src/transport_performance/gtfs/report/html_templates/stops_template.html !src/transport_performance/gtfs/report/html_templates/summary_template.html diff --git a/src/transport_performance/gtfs/validation.py b/src/transport_performance/gtfs/validation.py index d95c06c5..80f7132f 100644 --- a/src/transport_performance/gtfs/validation.py +++ b/src/transport_performance/gtfs/validation.py @@ -23,6 +23,8 @@ _check_parent_dir_exists, _check_column_in_df, _type_defence, + _check_item_in_list, + _check_attribute, ) from transport_performance.gtfs.report.report_utils import ( @@ -500,34 +502,6 @@ def _summary_defence( f" functions. Found {type(summ_ops)}" ) - def _convert_multi_index_to_single(self, df: pd.DataFrame) -> pd.DataFrame: - """Convert a dataframes index from MultiIndex to a singular index. - - This function also removes any differing names generated from numpy - function - - Parameters - ---------- - df : pd.DataFrame - Pandas dataframe to adjust index (columns) of. - - Returns - ------- - df : pd.DataFrame - Pandas dataframe with a modified index (columns) - - """ - df.columns = df.columns = [ - "_".join(value) if "" not in value else "".join(value) - for value in df.columns.values - ] - df.columns = [ - column.replace("amin", "min").replace("amax", "max") - for column in df.columns.values - ] - - return df - def summarise_trips( self, summ_ops: list = [np.min, np.max, np.mean, np.median], @@ -701,8 +675,8 @@ def get_route_modes(self): def _plot_summary( self, - summary_df: pd.DataFrame, target_column: str, + which: str = "trip", orientation: str = "v", day_column: str = "day", width: int = 2000, @@ -722,11 +696,12 @@ def _plot_summary( Parameters ---------- - summary_df : pd.DataFrame - The dataframe containing the summarised data target_column : str The name of the column contianing the target data (counts) + which : str, optional + Which summary to plot. Options include 'trip' and 'route', + by default "trip" orientation : str, optional The orientation of the bar plot ("v" or "h"), by default "v" @@ -782,7 +757,7 @@ def _plot_summary( """ # parameter type defences - _type_defence(summary_df, "summary_df", pd.DataFrame) + _type_defence(which, "which", str) _type_defence(day_column, "day_column", str) _type_defence(target_column, "target_column", str) _type_defence(plotly_kwargs, "plotly_kwargs", dict) @@ -795,8 +770,14 @@ def _plot_summary( _type_defence(save_image, "save_iamge", bool) _type_defence(img_type, "img_type", str) - # lower orientation + # lower params orientation = orientation.lower() + which = which.lower() + + # ensure 'which' is valid + _check_item_in_list( + item=which, _list=["trip", "route"], param_nm="which" + ) raw_pth = os.path.join( out_dir, @@ -805,11 +786,42 @@ def _plot_summary( _check_parent_dir_exists(raw_pth, "save_pth", create=True) # orientation input defences - if orientation not in ["v", "h"]: - raise ValueError( - "'orientation expected 'v' or 'h'" - " (non case sensitive)." - f" Found {orientation} of type {type(orientation)}." + _check_item_in_list( + item=orientation, _list=["v", "h"], param_nm="orientation" + ) + + # assign the correct values depending on which breakdown has been + # chosen + if which == "trip": + _check_attribute( + obj=self, + attr="daily_trip_summary", + message=( + "The daily_trip_summary table could not be found." + " Did you forget to call '.summarise_trips()' first?" + ), + ) + summary_df = self.daily_trip_summary + target_column = ( + f"trip_count_{target_column}" + if "trip_count" not in target_column + else target_column + ) + + if which == "route": + _check_attribute( + obj=self, + attr="daily_route_summary", + message=( + "The daily_route_summary table could not be found." + " Did you forget to call '.summarise_routes()' first?" + ), + ) + summary_df = self.daily_route_summary + target_column = ( + f"route_count_{target_column}" + if "route_count" not in target_column + else target_column ) # dataframe column defences @@ -914,210 +926,6 @@ def _plot_summary( return plotly_io.to_html(fig, full_html=False) return fig - def plot_route_summary( - self, - target_summary: str, - orientation: str = "v", - plotly_kwargs: dict = {}, - return_html: bool = False, - width: int = 2000, - height: int = 800, - xlabel: str = None, - ylabel: str = None, - save_html: bool = False, - save_image: bool = False, - out_dir: Union[pathlib.Path, str] = pathlib.Path( - os.path.join("outputs", "gtfs") - ), - img_type: str = "png", - ) -> Union[PlotlyFigure, str]: - """Plot the summarised route data of a GTFS file. - - This is a thin wrapper around _plot_summary() - - Parameters - ---------- - target_summary : str - The name of the summary to plot - (e.g., mean, max, min, median) - orientation : str, optional - The orientation of the bar plot ("v" or "h"), - by default "v" - plotly_kwargs : dict, optional - Kwargs to pass to fig.update_layout() for - additional plot customisation, - by default {} - return_html : bool, optional - Whether or not to return a html string, - by default False - width : int, optional - The width of the plot (in pixels), by default 2000 - height : int, optional - The height of the plot (in pixels), by default 800 - xlabel : str, optional - he label for the x axis. - If left empty, the column name will be used, - by default None - ylabel : str, optional - he label for the y axis. - If left empty, the column name will be used, - by default None - save_html : bool, optional - Whether or not to save the plot as a html file, - by default False - save_image : bool, optional - Whether or not to save the plot as a PNG, - by default False - out_dir : Union[pathlib.Path, str], optional - The directory to save the plot into. If a file extension is added - to this directory, it won't be cleaned. Whatever is passed as the - out dir will be used as the parent directory of the save, leaving - the responsibility on the user to specify the correct path., - by default os.path.join("outputs", "gtfs") - img_type : str, optional - The type of the image to be saved. E.g, .svg or .jpeg., - by defauly "png" - - Returns - ------- - Union[PlotlyFigure, str] - Returns either a HTML string or the plotly figure - - Raises - ------ - AttributeError - Raise an error if the daily_route_summary table is yet to be # - created. - - """ - # defensive checks - if "daily_route_summary" not in self.__dir__(): - raise AttributeError( - "The daily_route_summary table could not be found." - " Did you forget to call '.summarise_routes()' first?" - ) - - # plot the summary - target_col = f"route_count_{target_summary}" - plot = self._plot_summary( - summary_df=self.daily_route_summary, - target_column=target_col, - day_column="day", - orientation=orientation, - plotly_kwargs=plotly_kwargs, - return_html=return_html, - width=width, - height=height, - xlabel=xlabel, - ylabel=ylabel, - save_html=save_html, - out_dir=out_dir, - save_image=save_image, - img_type=img_type, - ) - return plot - - def plot_trip_summary( - self, - target_summary: str, - orientation: str = "v", - plotly_kwargs: dict = {}, - return_html: bool = False, - width: int = 2000, - height: int = 800, - xlabel: str = None, - ylabel: str = None, - save_html: bool = False, - out_dir: Union[pathlib.Path, str] = pathlib.Path( - os.path.join("outputs", "gtfs") - ), - img_type: str = "png", - ): - """Plot the summarised trip data of a GTFS file. - - This is a thin wrapper around _plot_summary() - - Parameters - ---------- - target_summary : str - The name of the summary to plot - (e.g., mean, max, min, median) - orientation : str, optional - The orientation of the bar plot ("v" or "h"), - by default "v" - plotly_kwargs : dict, optional - Kwargs to pass to fig.update_layout() for - additional plot customisation, - by default {} - return_html : bool, optional - Whether or not to return a html string, - by default False - width : int, optional - The width of the plot (in pixels), by default 2000 - height : int, optional - The height of the plot (in pixels), by default 800 - xlabel : str, optional - he label for the x axis. - If left empty, the column name will be used, - by default None - ylabel : str, optional - he label for the y axis. - If left empty, the column name will be used, - by default None - save_html : bool, optional - Whether or not to save the plot as a html file, - by default False - save_image : bool, optional - Whether or not to save the plot as a PNG, - by default False - out_dir : Union[pathlib.Path, str], optional - The directory to save the plot into. If a file extension is added - to this directory, it won't be cleaned. Whatever is passed as the - out dir will be used as the parent directory of the save, leaving - the responsibility on the user to specify the correct path., - by default os.path.join("outputs", "gtfs") - img_type : str, optional - The type of the image to be saved. E.g, .svg or .jpeg., - by defauly "png" - - Returns - ------- - Union[PlotlyFigure, str] - Returns either a HTML string or the plotly figure - - Raises - ------ - AttributeError - Raise an error if the daily_trip_summary table is yet to be - created. - - """ - # defensive checks - if "daily_trip_summary" not in self.__dir__(): - raise AttributeError( - "The daily_trip_summary table could not be found." - " Did you forget to call '.summarise_trips()' first?" - ) - - # plot the summary - target_col = f"trip_count_{target_summary}" - plot = self._plot_summary( - summary_df=self.daily_trip_summary, - target_column=target_col, - day_column="day", - orientation=orientation, - plotly_kwargs=plotly_kwargs, - return_html=return_html, - width=width, - height=height, - xlabel=xlabel, - ylabel=ylabel, - save_html=save_html, - out_dir=out_dir, - img_type=img_type, - ) - return plot - def _create_extended_repeated_pair_table( self, table: pd.DataFrame, @@ -1460,22 +1268,25 @@ def html_report( # summaries self.summarise_routes() self.summarise_trips() - route_html = self.plot_route_summary( - target_summary=summary_type, + route_html = self._plot_summary( + which="route", + target_column=summary_type, return_html=True, width=1200, height=800, ylabel="Route Count", xlabel="Day", ) - trip_html = self.plot_trip_summary( - target_summary=summary_type, + trip_html = self._plot_summary( + which="trip", + target_column=summary_type, return_html=True, width=1200, height=800, ylabel="Trip Count", xlabel="Day", ) + summ_temp = TemplateHTML( path=( "src/transport_performance/gtfs/report/" diff --git a/src/transport_performance/utils/defence.py b/src/transport_performance/utils/defence.py index a56e92a9..74906150 100644 --- a/src/transport_performance/utils/defence.py +++ b/src/transport_performance/utils/defence.py @@ -250,3 +250,44 @@ def _check_column_in_df(df: pd.DataFrame, column_name: str) -> None: raise IndexError(f"'{column_name}' is not a column in the dataframe.") return None + + +def _check_item_in_list(item: str, _list: list, param_nm: str) -> None: + """Defence to check if an item is present in a list. + + Parameters + ---------- + item : str + THe item to check the list for + _list : list + The list to check that the item is in + param_nm : str + The name of the param that the item has been passed to + + Returns + ------- + None + + Raises + ------ + ValueError + Error raised when item not in the list. + + """ + if item not in _list: + raise ValueError( + f"'{param_nm}' expected one of the following:" + f"{_list} Got {item}" + ) + return None + + +def _check_attribute(obj, attr: str, message: str = None): + err_msg = ( + message + if message + else (f"{obj.__class__.__name__} has no attribute {attr}") + ) + + if attr not in obj.__dir__(): + raise AttributeError(err_msg) diff --git a/tests/data/gtfs/report/gtfs_report/styles.css b/tests/data/gtfs/report/gtfs_report/styles.css deleted file mode 100644 index 1e816520..00000000 --- a/tests/data/gtfs/report/gtfs_report/styles.css +++ /dev/null @@ -1,195 +0,0 @@ -* { - margin: 0; - padding: 0; - box-sizing: border-box; - font-family: 'Poppins', sans-serif; -} - -body { - min-height: 100vh; -} - -a { - text-decoration: none; - -} - -li { - list-style: none; -} - -h1, -h2 { - color: black; -} - -h3 { - color: #999; -} - -table { - overflow: hidden; - overflow-x: scroll; - max-width: 1500px; - display: block; - overflow-y: scroll; - max-height: 800px; -} - -thead { - position: sticky; - top: -10px; -} - -.btn { - background: #f05462; - color: white; - padding: 5px 10px; - text-align: center; -} - -.btn:hover { - color: #f05462; - background: white; - padding: 3px 8px; - border: 2px solid #f05462; -} - -.title { - display: flex; - align-items: center; - justify-content: space-around; - padding: 15px 10px; - border-bottom: 2px solid #999; -} - -table { - padding: 10px; -} - -th, -td { - text-align: left; - padding: 8px; -} - -.side-menu { - position: fixed; - background: #28A197; - width: 20vw; - min-height: 100vh; - display: flex; - flex-direction: column; -} - -.side-menu .side-menu-title { - height: 10vh; - display: flex; - align-items: center; - justify-content: center; - font-weight: bold -} - -.side-menu li { - font-size: 24px; - padding: 10px 40px; - display: flex; - align-items: center; -} - -.side-menu li:hover { - background: white; - color: #28A197; - -} - -.side-menu li:hover .option{ - color: #28A197; - background-color: white; -} - -.side-menu .option { - color: white; -} - -.side-menu .option:hover{ - color: #28A197; - background-color: white; -} - -.container { - position: absolute; - right: 0; - width: 80vw; - height: 100vh; - background: #f1f1f1; -} - -.container .content { - position: relative; - background: #f1f1f1; - padding-left: 20px; - padding-right: 20px; -} - -.container .content .analysis-cont { - display: flex; - justify-content: space-around; - align-items: flex-start; - flex-wrap: wrap; - background-color: #f1f1f1; - padding: 20px; -} - -.container .content .analysis-cont .summary div { - display: block; -} - - -.container .content .analysis-cont .summary dd { - font-weight: bold; - margin-inline-start: 0; - display: inline-block; - min-width: 130px; -} - -.container .content .analysis-cont .summary dt { - display: inline-block; -} - -.analysis-title { - font-weight: bold; - font-size: large; - margin-bottom: 10px; -} - -hr { - border: 0; - clear:both; - display:block; - width: 100%; - background-color: black; - height: 5px; - } - -.container .header { - position: fixed; - top: 0; - right: 0; - width: 80vw; - height: 10vh; - background: #801650; - display: flex; - align-items: center; - justify-content: center; - box-shadow: 0 4px 8px 0 rgba(0, 0, 0, 0.2); - z-index: 1; -} - -.container .header .header-title { - display: flex; - align-items: center; - color: #F46A25; - font-weight: bold; - font-size: xx-large; -} diff --git a/tests/gtfs/test_validation.py b/tests/gtfs/test_validation.py index b791e92b..a9de95d4 100644 --- a/tests/gtfs/test_validation.py +++ b/tests/gtfs/test_validation.py @@ -27,25 +27,6 @@ def gtfs_fixture(): return gtfs -def test__convert_multi_index_to_single(): - """Light testing got _convert_multi_index_to_single().""" - test_df = pd.DataFrame({"test": [1, 2, 3, 4], "id": ["E", "E", "C", "D"]}) - test_df = test_df.groupby("id").agg({"test": ["min", "mean", "max"]}) - expected_cols = pd.Index( - ["test_min", "test_mean", "test_max"], dtype="object" - ) - output_cols = _convert_multi_index_to_single(df=test_df).columns - assert isinstance( - output_cols, pd.Index - ), "_convert_multi_index_to_single() not behaving as expected" - expected_cols = list(expected_cols) - output_cols = list(output_cols) - for col in output_cols: - assert col in expected_cols, f"{col} not an expected column" - expected_cols.remove(col) - assert len(expected_cols) == 0, "Not all expected cols in output cols" - - class TestGtfsInstance(object): """Tests related to the GtfsInstance class.""" @@ -304,6 +285,26 @@ def test__get_intermediate_dates(self): pd.Timestamp("2023-05-08"), ] + def test__convert_multi_index_to_single(self): + """Light testing got _convert_multi_index_to_single().""" + test_df = pd.DataFrame( + {"test": [1, 2, 3, 4], "id": ["E", "E", "C", "D"]} + ) + test_df = test_df.groupby("id").agg({"test": ["min", "mean", "max"]}) + expected_cols = pd.Index( + ["test_min", "test_mean", "test_max"], dtype="object" + ) + output_cols = _convert_multi_index_to_single(df=test_df).columns + assert isinstance( + output_cols, pd.Index + ), "_convert_multi_index_to_single() not behaving as expected" + expected_cols = list(expected_cols) + output_cols = list(output_cols) + for col in output_cols: + assert col in expected_cols, f"{col} not an expected column" + expected_cols.remove(col) + assert len(expected_cols) == 0, "Not all expected cols in output cols" + def test__order_dataframe_by_day_defence(self, gtfs_fixture): """Test __order_dataframe_by_day defences.""" with pytest.raises( @@ -625,22 +626,40 @@ def test_summarise_routes_on_pass(self, gtfs_fixture): def test__plot_summary_defences(self, tmp_path, gtfs_fixture): """Test defences for _plot_summary().""" - current_fixture = gtfs_fixture - current_fixture.summarise_routes() + # test defences for checks summaries exist + with pytest.raises( + AttributeError, + match=re.escape( + "The daily_trip_summary table could not be found." + " Did you forget to call '.summarise_trips()' first?" + ), + ): + gtfs_fixture._plot_summary(which="trip", target_column="mean") + + with pytest.raises( + AttributeError, + match=re.escape( + "The daily_route_summary table could not be found." + " Did you forget to call '.summarise_routes()' first?" + ), + ): + gtfs_fixture._plot_summary(which="route", target_column="mean") + + gtfs_fixture.summarise_routes() # test parameters that are yet to be tested + options = ["v", "h"] with pytest.raises( ValueError, match=re.escape( - "'orientation expected 'v' or 'h'" - " (non case sensitive)." - " Found l of type ." + "'orientation' expected one of the following:" + f"{options} Got i" ), ): - current_fixture._plot_summary( - current_fixture.daily_route_summary, - "route_count_mean", - orientation="l", + gtfs_fixture._plot_summary( + which="route", + target_column="route_count_mean", + orientation="i", ) # save test for an image with invalid file extension @@ -653,13 +672,23 @@ def test__plot_summary_defences(self, tmp_path, gtfs_fixture): ), ): gtfs_fixture._plot_summary( - gtfs_fixture.daily_route_summary, - "route_count_mean", + which="route", + target_column="route_count_mean", save_image=True, out_dir=os.path.join(tmp_path, "outputs"), img_type="test", ) + # test choosing an invalid value for 'which' + with pytest.raises( + ValueError, + match=re.escape( + "'which' expected one of the following:" + "['trip', 'route'] Got tester" + ), + ): + gtfs_fixture._plot_summary(which="tester", target_column="tester") + @pytest.mark.filterwarnings("ignore::UserWarning") def test__plot_summary_on_pass(self, gtfs_fixture, tmp_path): """Test plotting a summary when defences are passed.""" @@ -668,15 +697,24 @@ def test__plot_summary_on_pass(self, gtfs_fixture, tmp_path): # test returning a html string test_html = gtfs_fixture._plot_summary( - gtfs_fixture.daily_route_summary, - "route_count_mean", + which="route", + target_column="route_count_mean", return_html=True, ) assert type(test_html) == str, "Failed to return HTML for the plot" # test returning a plotly figure test_image = gtfs_fixture._plot_summary( - gtfs_fixture.daily_route_summary, "route_count_mean" + which="route", target_column="route_count_mean" + ) + assert ( + type(test_image) == PlotlyFigure + ), "Failed to return plotly.graph_objects.Figure type" + + # test returning a plotly for trips + gtfs_fixture.summarise_trips() + test_image = gtfs_fixture._plot_summary( + which="trip", target_column="trip_count_mean" ) assert ( type(test_image) == PlotlyFigure @@ -684,8 +722,8 @@ def test__plot_summary_on_pass(self, gtfs_fixture, tmp_path): # test saving plots in html and png format gtfs_fixture._plot_summary( - gtfs_fixture.daily_route_summary, - "route_count_mean", + which="route", + target_column="mean", width=1200, height=800, save_html=True, @@ -712,49 +750,6 @@ def test__plot_summary_on_pass(self, gtfs_fixture, tmp_path): assert counts["html"] == 1, "Failed to save plot as HTML" assert counts["png"] == 1, "Failed to save plot as png" - def test__plot_route_summary_defences(self, gtfs_fixture): - """Test the defences for the small wrapper plot_route_summary().""" - # test attribute checks - with pytest.raises( - AttributeError, - match=( - re.escape( - "The daily_route_summary table could not be found." - " Did you forget to call '.summarise_routes()' first?" - ) - ), - ): - gtfs_fixture.plot_route_summary(target_summary="mean") - - def test__plot_trip_summary_defences(self, gtfs_fixture): - """Test the defences for the small wrapper plot_trip_summary().""" - # test attribute checks - with pytest.raises( - AttributeError, - match=( - re.escape( - "The daily_trip_summary table could not be found." - " Did you forget to call '.summarise_trips()' first?" - ) - ), - ): - gtfs_fixture.plot_trip_summary(target_summary="mean") - - def test__plot_route_summary_on_pass(self, gtfs_fixture): - """Test plot_route_summary() calls _plot_summary() succesfully.""" - gtfs_fixture.summarise_routes() - assert isinstance( - gtfs_fixture.plot_route_summary(target_summary="mean"), - PlotlyFigure, - ), "plot_route_summary() failed to return plotly figure" - - def test__plot_trip_summary_on_pass(self, gtfs_fixture): - """Test plot_trip_summary() calls _plot_summary() succesfully.""" - gtfs_fixture.summarise_trips() - assert isinstance( - gtfs_fixture.plot_trip_summary(target_summary="mean"), PlotlyFigure - ), "plot_trip_summary() failed to return plotly figure" - def test__create_extended_repeated_pair_table(self, gtfs_fixture): """Test _create_extended_repeated_pair_table().""" test_table = pd.DataFrame( diff --git a/tests/utils/test_defence.py b/tests/utils/test_defence.py index ffa7c4b7..2cf617f6 100644 --- a/tests/utils/test_defence.py +++ b/tests/utils/test_defence.py @@ -11,6 +11,8 @@ _check_parent_dir_exists, _type_defence, _check_column_in_df, + _check_item_in_list, + _check_attribute, ) @@ -269,3 +271,62 @@ def test__check_column_in_df_on_pass(self, test_df): """General tests for _check_colum_in_df().""" _check_column_in_df(test_df, "test_col_1") _check_column_in_df(test_df, "test_col_2") + + +@pytest.fixture(scope="function") +def test_list(): + """Test fixture.""" + my_list = ["test", "test2", "tester", "definitely_testing"] + return my_list + + +class TestCheckItemInList(object): + """Tests for _check_item_in_list().""" + + def test_check_item_in_list_defence(self, test_list): + """Defensive tests for check_item_in_list().""" + with pytest.raises( + ValueError, + match=re.escape( + "'test' expected one of the following:" + f"{test_list} Got not_in_test" + ), + ): + _check_item_in_list( + item="not_in_test", _list=test_list, param_nm="test" + ) + + def test_check_item_in_list_on_pass(self, test_list): + """General tests for check_item_in_list().""" + _check_item_in_list(item="test", _list=test_list, param_nm="test") + + +@pytest.fixture(scope="function") +def dummy_obj(): + """Fixture to assist with tests.""" + + class dummy: + """Dummy class for testing.""" + + def __init__(self) -> None: + """Intialise dummy object.""" + self.tester = "test" + self.tester_also = "also_test" + + new_dummy = dummy() + return new_dummy + + +class TestCheckAttribute(object): + """Tests for _check_item_in_list().""" + + def test_check_attribute_defence(self, dummy_obj): + """Defensive tests for check_attribute.""" + with pytest.raises(AttributeError, match="dummy test msg"): + _check_attribute( + obj=dummy_obj, attr="not_in_test", message="dummy test msg" + ) + + def test_check_attribute_on_pass(self, dummy_obj): + """General tests for check_attribute().""" + _check_attribute(dummy_obj, "tester") From c1d415e1f82f451155c37e499012d48511acede7 Mon Sep 17 00:00:00 2001 From: Charlie Browning <121952297+CBROWN-ONS@users.noreply.github.com> Date: Fri, 15 Sep 2023 11:45:19 +0100 Subject: [PATCH 25/33] Update check_attribute() due to save error --- src/transport_performance/utils/defence.py | 27 ++++++++++++++++++---- 1 file changed, 22 insertions(+), 5 deletions(-) diff --git a/src/transport_performance/utils/defence.py b/src/transport_performance/utils/defence.py index 74906150..ec362b9f 100644 --- a/src/transport_performance/utils/defence.py +++ b/src/transport_performance/utils/defence.py @@ -283,11 +283,28 @@ def _check_item_in_list(item: str, _list: list, param_nm: str) -> None: def _check_attribute(obj, attr: str, message: str = None): - err_msg = ( - message - if message - else (f"{obj.__class__.__name__} has no attribute {attr}") + """A test to check if an attribute exists in an object. + + Parameters + ---------- + obj : any + The object to check that the attr exists in + attr : str + The attribute to check exists in an object + message : str, optional + The error message to display, by default None + + Raises + ------ + AttributeError + An error raised if the attr does not exist + + """ + err_msg = message if message else ( + f"{obj.__class__.__name__} has no attribute {attr}" ) if attr not in obj.__dir__(): - raise AttributeError(err_msg) + raise AttributeError( + err_msg + ) From 14442ff2180ec20a4a8cd7ba80b21253b51f9006 Mon Sep 17 00:00:00 2001 From: Charlie Browning <121952297+CBROWN-ONS@users.noreply.github.com> Date: Fri, 15 Sep 2023 11:54:07 +0100 Subject: [PATCH 26/33] Update defence.py --- src/transport_performance/utils/defence.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transport_performance/utils/defence.py b/src/transport_performance/utils/defence.py index ec362b9f..68504942 100644 --- a/src/transport_performance/utils/defence.py +++ b/src/transport_performance/utils/defence.py @@ -283,7 +283,7 @@ def _check_item_in_list(item: str, _list: list, param_nm: str) -> None: def _check_attribute(obj, attr: str, message: str = None): - """A test to check if an attribute exists in an object. + ""Test to check if an attribute exists in an object. Parameters ---------- From 8fed07dbf48a3577914f589a7e01c5e40663d702 Mon Sep 17 00:00:00 2001 From: Charlie Browning <121952297+CBROWN-ONS@users.noreply.github.com> Date: Fri, 15 Sep 2023 12:08:57 +0100 Subject: [PATCH 27/33] Update defence.py for missing char --- src/transport_performance/utils/defence.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transport_performance/utils/defence.py b/src/transport_performance/utils/defence.py index 68504942..c2985516 100644 --- a/src/transport_performance/utils/defence.py +++ b/src/transport_performance/utils/defence.py @@ -283,7 +283,7 @@ def _check_item_in_list(item: str, _list: list, param_nm: str) -> None: def _check_attribute(obj, attr: str, message: str = None): - ""Test to check if an attribute exists in an object. + """Test to check if an attribute exists in an object. Parameters ---------- From 3dfef83cb210990f86fc5e9307cb800c78a3a3f0 Mon Sep 17 00:00:00 2001 From: r-leyshon Date: Fri, 15 Sep 2023 13:08:19 +0100 Subject: [PATCH 28/33] chore: Run pre-commit against all files --- src/transport_performance/utils/defence.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/transport_performance/utils/defence.py b/src/transport_performance/utils/defence.py index c2985516..69df5670 100644 --- a/src/transport_performance/utils/defence.py +++ b/src/transport_performance/utils/defence.py @@ -298,13 +298,13 @@ def _check_attribute(obj, attr: str, message: str = None): ------ AttributeError An error raised if the attr does not exist - + """ - err_msg = message if message else ( - f"{obj.__class__.__name__} has no attribute {attr}" + err_msg = ( + message + if message + else (f"{obj.__class__.__name__} has no attribute {attr}") ) if attr not in obj.__dir__(): - raise AttributeError( - err_msg - ) + raise AttributeError(err_msg) From c45e4ed986c5f243f7096c8f53115e28a7777f32 Mon Sep 17 00:00:00 2001 From: r-leyshon Date: Fri, 15 Sep 2023 13:13:06 +0100 Subject: [PATCH 29/33] docs: Update private methods in docstring --- src/transport_performance/gtfs/report/report_utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/transport_performance/gtfs/report/report_utils.py b/src/transport_performance/gtfs/report/report_utils.py index 41fd27c3..084fe819 100644 --- a/src/transport_performance/gtfs/report/report_utils.py +++ b/src/transport_performance/gtfs/report/report_utils.py @@ -55,9 +55,9 @@ class TemplateHTML: Methods ------- - insert(placeholder: str, value: str, replace_multiple: bool = False) + _insert(placeholder: str, value: str, replace_multiple: bool = False) Insert values into the HTML template - get_template() + _get_template() Returns the template attribute """ From 2b6fd163d2ed34bcb89c7eb43fbdb5c9413f66c0 Mon Sep 17 00:00:00 2001 From: r-leyshon Date: Fri, 15 Sep 2023 13:40:47 +0100 Subject: [PATCH 30/33] chore: Remove lint --- .../testing/testing/gtfs_report/styles.css | 195 ------------------ 1 file changed, 195 deletions(-) delete mode 100644 testing/testing/testing/gtfs_report/styles.css diff --git a/testing/testing/testing/gtfs_report/styles.css b/testing/testing/testing/gtfs_report/styles.css deleted file mode 100644 index 1e816520..00000000 --- a/testing/testing/testing/gtfs_report/styles.css +++ /dev/null @@ -1,195 +0,0 @@ -* { - margin: 0; - padding: 0; - box-sizing: border-box; - font-family: 'Poppins', sans-serif; -} - -body { - min-height: 100vh; -} - -a { - text-decoration: none; - -} - -li { - list-style: none; -} - -h1, -h2 { - color: black; -} - -h3 { - color: #999; -} - -table { - overflow: hidden; - overflow-x: scroll; - max-width: 1500px; - display: block; - overflow-y: scroll; - max-height: 800px; -} - -thead { - position: sticky; - top: -10px; -} - -.btn { - background: #f05462; - color: white; - padding: 5px 10px; - text-align: center; -} - -.btn:hover { - color: #f05462; - background: white; - padding: 3px 8px; - border: 2px solid #f05462; -} - -.title { - display: flex; - align-items: center; - justify-content: space-around; - padding: 15px 10px; - border-bottom: 2px solid #999; -} - -table { - padding: 10px; -} - -th, -td { - text-align: left; - padding: 8px; -} - -.side-menu { - position: fixed; - background: #28A197; - width: 20vw; - min-height: 100vh; - display: flex; - flex-direction: column; -} - -.side-menu .side-menu-title { - height: 10vh; - display: flex; - align-items: center; - justify-content: center; - font-weight: bold -} - -.side-menu li { - font-size: 24px; - padding: 10px 40px; - display: flex; - align-items: center; -} - -.side-menu li:hover { - background: white; - color: #28A197; - -} - -.side-menu li:hover .option{ - color: #28A197; - background-color: white; -} - -.side-menu .option { - color: white; -} - -.side-menu .option:hover{ - color: #28A197; - background-color: white; -} - -.container { - position: absolute; - right: 0; - width: 80vw; - height: 100vh; - background: #f1f1f1; -} - -.container .content { - position: relative; - background: #f1f1f1; - padding-left: 20px; - padding-right: 20px; -} - -.container .content .analysis-cont { - display: flex; - justify-content: space-around; - align-items: flex-start; - flex-wrap: wrap; - background-color: #f1f1f1; - padding: 20px; -} - -.container .content .analysis-cont .summary div { - display: block; -} - - -.container .content .analysis-cont .summary dd { - font-weight: bold; - margin-inline-start: 0; - display: inline-block; - min-width: 130px; -} - -.container .content .analysis-cont .summary dt { - display: inline-block; -} - -.analysis-title { - font-weight: bold; - font-size: large; - margin-bottom: 10px; -} - -hr { - border: 0; - clear:both; - display:block; - width: 100%; - background-color: black; - height: 5px; - } - -.container .header { - position: fixed; - top: 0; - right: 0; - width: 80vw; - height: 10vh; - background: #801650; - display: flex; - align-items: center; - justify-content: center; - box-shadow: 0 4px 8px 0 rgba(0, 0, 0, 0.2); - z-index: 1; -} - -.container .header .header-title { - display: flex; - align-items: center; - color: #F46A25; - font-weight: bold; - font-size: xx-large; -} From 77d8c6e8582f95eca34612853a516c99f5e2db0c Mon Sep 17 00:00:00 2001 From: r-leyshon Date: Fri, 15 Sep 2023 13:54:44 +0100 Subject: [PATCH 31/33] refactor: Param formatting in Error message --- src/transport_performance/gtfs/report/report_utils.py | 2 +- tests/gtfs/report/test_report_utils.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/transport_performance/gtfs/report/report_utils.py b/src/transport_performance/gtfs/report/report_utils.py index 084fe819..96f963a1 100644 --- a/src/transport_performance/gtfs/report/report_utils.py +++ b/src/transport_performance/gtfs/report/report_utils.py @@ -114,7 +114,7 @@ def _insert( occurences = len(self.template.split(f"[{placeholder}]")) - 1 if occurences > 1 and not replace_multiple: raise ValueError( - "`replace multiple` requires True as found \n" + "`replace_multiple` requires True as found \n" "multiple placeholder matches in template." ) diff --git a/tests/gtfs/report/test_report_utils.py b/tests/gtfs/report/test_report_utils.py index 5f0819ea..c523cdfe 100644 --- a/tests/gtfs/report/test_report_utils.py +++ b/tests/gtfs/report/test_report_utils.py @@ -45,7 +45,7 @@ def test__insert_defence(self, template_fixture): with pytest.raises( ValueError, match=( - "`replace multiple` requires True as found \n" + "`replace_multiple` requires True as found \n" "multiple placeholder matches in template." ), ): From 6adc2455e1f771c83529b7d4f14989029e9b16cb Mon Sep 17 00:00:00 2001 From: r-leyshon Date: Fri, 15 Sep 2023 14:48:30 +0100 Subject: [PATCH 32/33] refactor: Rm str replace carriage returns logic in test --- tests/gtfs/report/test_report_utils.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/gtfs/report/test_report_utils.py b/tests/gtfs/report/test_report_utils.py index c523cdfe..4a3727df 100644 --- a/tests/gtfs/report/test_report_utils.py +++ b/tests/gtfs/report/test_report_utils.py @@ -34,10 +34,8 @@ def test_init(self, template_fixture):
[test_placeholder] Tester [test_placeholder]
""" - assert expected_template.replace( - r"\n", "" - ) == template_fixture._get_template().replace( - r"\n", "" + assert ( + expected_template == template_fixture._get_template() ), "Test template not as expected" def test__insert_defence(self, template_fixture): From b516ce5a98ea6277afc60468f3bde7dbf5081ad2 Mon Sep 17 00:00:00 2001 From: r-leyshon Date: Fri, 15 Sep 2023 14:54:36 +0100 Subject: [PATCH 33/33] chore: Remove filterwarning --- tests/gtfs/test_validation.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/gtfs/test_validation.py b/tests/gtfs/test_validation.py index a9de95d4..40b9c3e8 100644 --- a/tests/gtfs/test_validation.py +++ b/tests/gtfs/test_validation.py @@ -689,7 +689,6 @@ def test__plot_summary_defences(self, tmp_path, gtfs_fixture): ): gtfs_fixture._plot_summary(which="tester", target_column="tester") - @pytest.mark.filterwarnings("ignore::UserWarning") def test__plot_summary_on_pass(self, gtfs_fixture, tmp_path): """Test plotting a summary when defences are passed.""" current_fixture = gtfs_fixture