diff --git a/gtfs_digest/typologies.ipynb b/gtfs_digest/typologies.ipynb
new file mode 100644
index 000000000..704684952
--- /dev/null
+++ b/gtfs_digest/typologies.ipynb
@@ -0,0 +1,550 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3e65b7c6-a202-42dc-8fde-fbf1e9fc4369",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%capture\n",
+ "\n",
+ "import calitp_data_analysis.magics\n",
+ "import geopandas as gpd\n",
+ "import pandas as pd\n",
+ "from segment_speed_utils.project_vars import (RT_SCHED_GCS, \n",
+ " SCHED_GCS,\n",
+ " GTFS_DATA_DICT\n",
+ " )\n",
+ "\n",
+ "import altair as alt\n",
+ "\n",
+ "from IPython.display import HTML\n",
+ "from calitp_data_analysis import calitp_color_palette as cp\n",
+ "\n",
+ "alt.renderers.enable(\"html\")\n",
+ "alt.data_transformers.enable('default', max_rows=None)\n",
+ "\n",
+ "\n",
+ "import great_tables as gt\n",
+ "from great_tables import md\n",
+ "\n",
+ "import yaml\n",
+ "\n",
+ "with open(\"readable.yml\") as f:\n",
+ " readable_dict = yaml.safe_load(f)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "2aaa2608-4172-4228-b66d-a501dca88798",
+ "metadata": {
+ "tags": [
+ "parameters"
+ ]
+ },
+ "outputs": [],
+ "source": [
+ "#name = \"City of Santa Monica\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "1d278a37-a4d2-40ff-9f2d-f859531c9795",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%capture_parameters\n",
+ "name"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "5c729b02-09e9-43bb-9af6-ee40992bf4e0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "FILE = GTFS_DATA_DICT.digest_tables.route_schedule_vp\n",
+ "\n",
+ "df = pd.read_parquet(\n",
+ " f\"{RT_SCHED_GCS}{FILE}.parquet\",\n",
+ " filters = [[\n",
+ " (\"time_period\", \"==\", \"all_day\"),\n",
+ " (\"organization_name\", \"==\", name)]]\n",
+ " \n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "bfd9acff-4d99-4ba8-a647-74b647894772",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df.dtypes"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7897465d-14b8-4087-a230-32179c167ab8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "display(df.typology.value_counts())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "1c10849d-1023-462e-8042-b0ef5341395e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Import data \n",
+ "most_recent_date = df.service_date.max()\n",
+ "\n",
+ "# Operator data\n",
+ "operator_df = pd.read_parquet(\n",
+ " f\"{RT_SCHED_GCS}digest/operator_profiles.parquet\",\n",
+ " filters = [[\n",
+ " (\"organization_name\", \"==\", name), \n",
+ " (\"service_date\", \"==\", most_recent_date)]]\n",
+ ")\n",
+ "\n",
+ "# Operator route gdf to plot map\n",
+ "operator_route_gdf = gpd.read_parquet(\n",
+ " f\"{RT_SCHED_GCS}digest/operator_routes.parquet\",\n",
+ " filters = [[\n",
+ " (\"organization_name\", \"==\", name), \n",
+ " (\"service_date\", \"==\", most_recent_date)]]\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "53e888ad-bd77-4c7e-9f17-8fdd2be34de6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def make_map(gdf: gpd.GeoDataFrame):\n",
+ " m = gdf[[\"route_id\", \"route_combined_name\", \n",
+ " \"geometry\"]].explore(\n",
+ " \"route_combined_name\", \n",
+ " tiles = \"CartoDB Positron\", \n",
+ " legend=False\n",
+ " )\n",
+ " return m"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e3cb2c4f-dac7-4cdd-96bf-ea9b0036d05d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "operator_p1 = [\n",
+ " 'operator_n_routes', \n",
+ " 'operator_n_trips',\n",
+ " 'operator_n_shapes', \n",
+ " 'operator_n_stops', \n",
+ "]\n",
+ "\n",
+ "operator_p2 = [\n",
+ " 'operator_n_arrivals',\n",
+ " 'operator_route_length_miles', \n",
+ " 'operator_arrivals_per_stop'\n",
+ "]\n",
+ "\n",
+ "operator_p3 = [\n",
+ " 'n_coverage_routes', \n",
+ " 'n_downtown_local_routes', \n",
+ " 'n_local_routes',\n",
+ " 'n_rapid_routes'\n",
+ "]\n",
+ "\n",
+ "def readable(column_name: str, readable_dict: dict) -> str:\n",
+ " try:\n",
+ " return readable_dict[column_name][\"readable\"]\n",
+ " except:\n",
+ " return readable_dict[column_name]\n",
+ " \n",
+ "def great_table_config(table: gt.GT) -> gt.GT:\n",
+ " table = (table\n",
+ " .tab_options(container_width = \"75%\")\n",
+ " .tab_options(table_font_size=\"16px\")\n",
+ " .cols_align(align=\"center\")\n",
+ " )\n",
+ " \n",
+ " return table"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c3ed017c-933b-47d9-b18b-1414b3727a76",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "operator_p1_dict = {k: readable(k, readable_dict) for k in operator_p1}\n",
+ "operator_p2_dict = {k: readable(k, readable_dict) for k in operator_p2}\n",
+ "operator_p3_dict = {k: readable(k, readable_dict) for k in operator_p3}"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "06f0afac-f7c9-4368-9fc6-1f2fb6da8d3a",
+ "metadata": {},
+ "source": [
+ "# {name}\n",
+ "## Operator Stats"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "69f8fe1c-7baf-438e-8e25-23819397ae1a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "table1 = (gt.GT(data=operator_df[[\"name\"] + operator_p1])\n",
+ " .fmt_integer(\n",
+ " columns = operator_p1,\n",
+ " compact=True\n",
+ " ).cols_align(align=\"center\")\n",
+ " .cols_label(**operator_p1_dict)\n",
+ " .tab_header(\n",
+ " title=md(f\"### {operator_df.name.iloc[0]}: Daily Stats\"),\n",
+ " subtitle=md(f\"#### {most_recent_date.date()}\")\n",
+ " )\n",
+ ")\n",
+ "\n",
+ "table2 = (gt.GT(data=operator_df[operator_p2])\n",
+ " .fmt_integer(\n",
+ " columns = [\"operator_n_arrivals\"],\n",
+ " compact=True\n",
+ " ).cols_label(**operator_p2_dict)\n",
+ ".tab_source_note(\n",
+ " source_note=md(\n",
+ " \"Service area (miles) is the the sum of miles across routes. \"\n",
+ " \"
The longest shape is selected for each route.\"\n",
+ " )\n",
+ "))\n",
+ " \n",
+ "nacto_url = (\n",
+ " \"https://nacto.org/\"\n",
+ " \"publication/transit-street-design-guide/\"\n",
+ " \"introduction/service-context/transit-route-types/\"\n",
+ ")\n",
+ "\n",
+ "table3 = (gt.GT(data=operator_df[operator_p3])\n",
+ " .cols_label(**operator_p3_dict)\n",
+ " ).tab_header(\n",
+ " title=md(f\"#### Route Typologies\"),\n",
+ " subtitle=md(f\"#### Routes Classified in Each Typology\")\n",
+ " ).tab_source_note(\n",
+ " source_note=md(\n",
+ " f\"Source: [NACTO Route Types]({nacto_url})\"\n",
+ " \"
Transit routes can have multiple typologies. \"\n",
+ " \"
A typology is selected by plurality.\"\n",
+ " )\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "a7db2820-39a2-41de-b59b-99786530f98f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "display(great_table_config(table1))\n",
+ "display(great_table_config(table2))\n",
+ "display(great_table_config(table3))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "af7627c4-01c9-4695-8e91-761914d6082b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "make_map(operator_route_gdf)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "2c9fd0bc-5cc0-4836-88b4-8d9a2d64a5ac",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def base_route_chart(df: pd.DataFrame, y_col: str) -> alt.Chart:\n",
+ " \"\"\"\n",
+ " \"\"\"\n",
+ " selected_colors = [\n",
+ " cp.CALITP_CATEGORY_BRIGHT_COLORS[0], # blue\n",
+ " cp.CALITP_CATEGORY_BRIGHT_COLORS[3], # green\n",
+ " cp.CALITP_CATEGORY_BOLD_COLORS[1], # orange,\n",
+ " ]\n",
+ " \n",
+ " #https://stackoverflow.com/questions/26454649/python-round-up-to-the-nearest-ten\n",
+ "\n",
+ " chart = (\n",
+ " alt.Chart(df)\n",
+ " .mark_line()\n",
+ " .encode(\n",
+ " x = alt.X(\"yearmonthdate(service_date):O\", title = \"Date\",\n",
+ " axis = alt.Axis(format = '%b %Y')\n",
+ " ),\n",
+ " y = alt.Y(f\"{y_col}:Q\"),\n",
+ " color = alt.Color(\"time_period:N\"),\n",
+ " tooltip = [\"route_combined_name\", \"route_id\", \"direction_id\", \n",
+ " \"time_period\", y_col]\n",
+ " ).facet(\n",
+ " column = alt.Column(\"direction_id:N\"),\n",
+ " ).interactive()\n",
+ " )\n",
+ " \n",
+ " return chart"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "9032ec4d-db58-46a5-baa3-b6d1af4ebf20",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# https://stackoverflow.com/questions/62103632/altair-change-the-position-of-a-slider\n",
+ "display(\n",
+ " HTML(\n",
+ " \"\"\"\n",
+ " \n",
+ " \"\"\"\n",
+ " )\n",
+ ")\n",
+ "\n",
+ "def filtered_route_charts(\n",
+ " df: pd.DataFrame,\n",
+ " control_field: str = \"route_combined_name\",\n",
+ ") -> alt.Chart:\n",
+ " \"\"\"\n",
+ " https://stackoverflow.com/questions/58919888/multiple-selections-in-altair\n",
+ " \"\"\"\n",
+ "\n",
+ " route_dropdown = alt.binding_select(\n",
+ " options=sorted(df[control_field].unique().tolist()), \n",
+ " name='Routes ', \n",
+ " )\n",
+ " \n",
+ " # Column that controls the bar charts\n",
+ " route_selector = alt.selection_point(\n",
+ " fields=[control_field], \n",
+ " bind=route_dropdown,\n",
+ " )\n",
+ " \n",
+ " vp_df = df[df.sched_rt_category != \"schedule_only\"]\n",
+ "\n",
+ " speeds_chart = base_route_chart(\n",
+ " vp_df, \"speed_mph\"\n",
+ " ).add_params(route_selector).transform_filter(route_selector)\n",
+ " \n",
+ " ping_density_chart = base_route_chart(\n",
+ " vp_df, \"vp_per_minute\"\n",
+ " ).add_params(route_selector).transform_filter(route_selector)\n",
+ " \n",
+ " spatial_accuracy_chart = base_route_chart(\n",
+ " vp_df, \"pct_in_shape\"\n",
+ " ).add_params(route_selector).transform_filter(route_selector)\n",
+ "\n",
+ " atleast2vp_chart = base_route_chart(\n",
+ " vp_df, \"pct_rt_journey_atleast2_vp\"\n",
+ " ).add_params(route_selector).transform_filter(route_selector) \n",
+ " \n",
+ " chart_list = [\n",
+ " speeds_chart,\n",
+ " ping_density_chart, \n",
+ " spatial_accuracy_chart,\n",
+ " atleast2vp_chart\n",
+ " ]\n",
+ " \n",
+ " chart = alt.vconcat(*chart_list).resolve_scale(y=\"independent\")\n",
+ " \n",
+ " return chart\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "2589bba0-1276-413d-8980-fc635f3bceee",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "available_typologies = df.typology.unique()\n",
+ "print(available_typologies)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e0a5114c-e4d2-4365-b484-580c6b537692",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def make_chart(df, t: str):\n",
+ " subset_df = df[df.typology==t]\n",
+ " if len(subset_df) == 0:\n",
+ " chart = alt.LayerChart()\n",
+ " else:\n",
+ " chart = filtered_route_charts(subset_df)\n",
+ " \n",
+ " return chart"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "65a389bb-1470-4996-bf08-4bea18663b29",
+ "metadata": {},
+ "source": [
+ "## Downtown Local"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "38192789-0b3d-4dd4-8dab-d9000ca0fc1c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "t = \"downtown_local\"\n",
+ "chart = make_chart(df, t)\n",
+ "chart"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "3e082f71-c624-477f-a327-a8d18edc9931",
+ "metadata": {},
+ "source": [
+ "## Local"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "47f2f3f3-e948-492a-bd2f-f71e8870f63f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "t = \"local\"\n",
+ "chart = make_chart(df, t)\n",
+ "chart"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "71b9491e-6bd6-4ff8-8725-811a5c97b272",
+ "metadata": {},
+ "source": [
+ "## Coverage"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "06129e3d-b8ba-4c29-8dfb-4036f7fb81ef",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "t = \"coverage\"\n",
+ "chart = make_chart(df, t)\n",
+ "chart"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2a760b1a-284d-47c3-a227-8078b15708df",
+ "metadata": {},
+ "source": [
+ "## Rapid"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "649c6f2c-5979-4f97-a431-3bc4cbba033b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "t = \"rapid\"\n",
+ "chart = make_chart(df, t)\n",
+ "chart"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ee30aa32-9dde-427c-9d3d-5db8a7f2fad9",
+ "metadata": {},
+ "source": [
+ "## Unknown"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "30c64ec5-1b98-44fc-80f0-9ca7f9b370ce",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "t = \"unknown\"\n",
+ "chart = make_chart(df, t)\n",
+ "chart"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "80e0d88a-afca-4916-89eb-e914aec74cdf",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.13"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}