Skip to content

Commit

Permalink
Merge pull request #1111 from cal-itp/holiday-service-update
Browse files Browse the repository at this point in the history
Holiday service update
  • Loading branch information
vevetron authored May 13, 2024
2 parents 1ce0303 + 5b8e2b7 commit b1e5d4f
Show file tree
Hide file tree
Showing 4 changed files with 1,201 additions and 417 deletions.
99 changes: 87 additions & 12 deletions holiday_service_research/funcs_vars.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
from matplotlib.colors import ListedColormap

import numpy as np
import pandas as pd
import seaborn as sns # For visualization (optional)
Expand Down Expand Up @@ -56,10 +59,10 @@
'date': '2024-01-15',
}, {
'name': "Reference Weekday",
'date': '2023-12-14',
'date': '2023-12-15',
}, {
'name': "Reference Saturday",
'date': '2023-11-25',
'date': '2023-12-16',
}, {
'name': "Reference Sunday",
'date': '2023-12-17',
Expand Down Expand Up @@ -88,21 +91,93 @@

def plot_confusion_matrices(df, y_true, y_pred, title):
desired_order = ['No service', 'Reduced service', 'Regular service']
x_desired_order = ['No service', 'Reduced service', 'Regular service']
y_desired_order = [ 'Regular service', 'Reduced service', 'No service']
cm = confusion_matrix(y_true=df[y_true], y_pred=df[y_pred], labels=desired_order)
df_cm = pd.DataFrame(cm, index=desired_order, columns=desired_order)
# df_cm = df_cm.reindex(desired_order, axis=0) # Rows
# df_cm = df_cm.reindex(desired_order, axis=1) # Columns
df_cm = df_cm.reindex(y_desired_order, axis=0) # Rows
df_cm = df_cm.reindex(x_desired_order, axis=1) # Columns
df_cm = (df_cm/df_cm.sum().sum()).round(2)*100 # Make cm based on percentages

# https://stackoverflow.com/questions/64800003/seaborn-confusion-matrix-heatmap-2-color-schemes-correct-diagonal-vs-wrong-re
vmin = np.min(cm)
vmax = np.max(cm)
off_diag_mask = np.eye(*cm.shape, dtype=bool)
#It might have been easier to make this manually :P. Make a diagonal matrix from upper left to lower right, then flip it on tahe horizontal.
gtfs_matches_website = np.fliplr(np.eye(*cm.shape, dtype=bool, k=0))
gtfs_greater_website = ([
[True, True, False],
[True, False, False],
[False, False, False]])
gtfs_less_website = ([
[False, False, False],
[False, False, True],
[False, True, True]])

gtfs_greater_website = pd.DataFrame(gtfs_greater_website).to_numpy()
gtfs_less_website = pd.DataFrame(gtfs_less_website).to_numpy()
plt.rcParams.update({'font.size': 13})

# Used https://redketchup.io/color-picker to match colors in comparison graph to make this heatmap
color = mcolors.to_rgb('#1F77B4')
blue_cmap = mcolors.ListedColormap([color])
color = mcolors.to_rgb('#FF7F0E')
orange_cmap = mcolors.ListedColormap([color])
color = mcolors.to_rgb('#2CA02C')
light_green_cmap = mcolors.ListedColormap([color])

plt.figure(figsize=(8, 6))
sns.heatmap(df_cm, annot=True, fmt='g', mask=~off_diag_mask, cmap="Blues", vmin=0, vmax=1, cbar=False, linewidths=0.8, linecolor='k')
sns.heatmap(df_cm, annot=True, fmt='g', mask=off_diag_mask, cmap="OrRd", vmin=0, vmax=1, cbar=False, linewidths=0.8, linecolor='k')
plt.xlabel('Service Level on Website')
plt.ylabel('GTFS Service Levels')
plt.title(title)
# plt.show()
font_size = {"fontsize":18}
sns.heatmap(df_cm, annot=True, annot_kws=font_size, fmt='g', mask=~gtfs_matches_website, cmap=blue_cmap, vmin=0, vmax=1, cbar=False, linewidths=0.8, linecolor='k')
sns.heatmap(df_cm, annot=True, annot_kws=font_size, fmt='g', mask=~gtfs_greater_website, cmap=light_green_cmap, vmin=.5, vmax=.6, cbar=False, linewidths=0.8, linecolor='k')
sns.heatmap(df_cm, annot=True, annot_kws=font_size, fmt='g', mask=~gtfs_less_website, cmap=orange_cmap, vmin=.04, vmax=.06, cbar=False, linewidths=0.8, linecolor='k')

#fontsize=14.0
plt.xlabel('Service Level on Website (% of agencies)', fontweight='bold')
plt.ylabel('GTFS Service Levels (% of agencies)', fontweight='bold')
plt.title(title, fontweight='bold')
file = title
plt.savefig(f"plots/{file}.png")
plt.savefig(f"plots/{file}.png")
# return cm, df_cm

excel_col_order = ['Name', 'Notes', 'gtfs_dataset_name',
'Total VOMS (NTD) (from Provider)', 'Customer Facing',"name",
"Reference Saturday",
"Reference Sunday",
"Reference Weekday",
"Holiday Schedule – Thanksgiving Day",
"Thanksgiving Day",
"score - Thanksgiving Day",
"score_text - Thanksgiving Day",
"Holiday Schedule – Christmas Day",
"Christmas Day",
"score - Christmas Day",
"score_text - Christmas Day",
"Holiday Schedule – New Year's Day",
"New Year's Day",
"score - New Year's Day",
"score_text - New Year's Day",
"Holiday Schedule – MLK Day",
"MLK Day",
"score - MLK Day",
"score_text - MLK Day",
"Holiday Schedule – Veterans Day (Observed)",
"Veterans Day (Observed)",
"score - Veterans Day (Observed)",
"score_text - Veterans Day (Observed)",
"Holiday Schedule – Veterans Day",
"Veterans Day",
"score - Veterans Day",
"score_text - Veterans Day",
"Holiday Schedule – Day after Thanksgiving Day",
"Day After Thanksgiving",
"score - Day After Thanksgiving",
"score_text - Day After Thanksgiving",
"Holiday Schedule – Christmas Eve",
"Christmas Eve",
"score - Christmas Eve",
"score_text - Christmas Eve",
"Holiday Schedule – New Year’s Eve",
"New Year's Eve",
"score - New Year's Eve",
"score_text - New Year's Eve",
"Holiday Schedule Notes"]
Loading

0 comments on commit b1e5d4f

Please sign in to comment.