From 94d18081d411a2955fbe2469fb3fbeebf99703d6 Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Fri, 12 Apr 2024 22:12:03 +0000 Subject: [PATCH] add digest typologies notebook --- gtfs_digest/typologies.ipynb | 550 +++++++++++++++++++++++++++++++++++ 1 file changed, 550 insertions(+) create mode 100644 gtfs_digest/typologies.ipynb diff --git a/gtfs_digest/typologies.ipynb b/gtfs_digest/typologies.ipynb new file mode 100644 index 000000000..704684952 --- /dev/null +++ b/gtfs_digest/typologies.ipynb @@ -0,0 +1,550 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "3e65b7c6-a202-42dc-8fde-fbf1e9fc4369", + "metadata": {}, + "outputs": [], + "source": [ + "%%capture\n", + "\n", + "import calitp_data_analysis.magics\n", + "import geopandas as gpd\n", + "import pandas as pd\n", + "from segment_speed_utils.project_vars import (RT_SCHED_GCS, \n", + " SCHED_GCS,\n", + " GTFS_DATA_DICT\n", + " )\n", + "\n", + "import altair as alt\n", + "\n", + "from IPython.display import HTML\n", + "from calitp_data_analysis import calitp_color_palette as cp\n", + "\n", + "alt.renderers.enable(\"html\")\n", + "alt.data_transformers.enable('default', max_rows=None)\n", + "\n", + "\n", + "import great_tables as gt\n", + "from great_tables import md\n", + "\n", + "import yaml\n", + "\n", + "with open(\"readable.yml\") as f:\n", + " readable_dict = yaml.safe_load(f)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2aaa2608-4172-4228-b66d-a501dca88798", + "metadata": { + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "#name = \"City of Santa Monica\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1d278a37-a4d2-40ff-9f2d-f859531c9795", + "metadata": {}, + "outputs": [], + "source": [ + "%%capture_parameters\n", + "name" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5c729b02-09e9-43bb-9af6-ee40992bf4e0", + "metadata": {}, + "outputs": [], + "source": [ + "FILE = GTFS_DATA_DICT.digest_tables.route_schedule_vp\n", + "\n", + "df = pd.read_parquet(\n", + " f\"{RT_SCHED_GCS}{FILE}.parquet\",\n", + " filters = [[\n", + " (\"time_period\", \"==\", \"all_day\"),\n", + " (\"organization_name\", \"==\", name)]]\n", + " \n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bfd9acff-4d99-4ba8-a647-74b647894772", + "metadata": {}, + "outputs": [], + "source": [ + "df.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7897465d-14b8-4087-a230-32179c167ab8", + "metadata": {}, + "outputs": [], + "source": [ + "display(df.typology.value_counts())" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1c10849d-1023-462e-8042-b0ef5341395e", + "metadata": {}, + "outputs": [], + "source": [ + "# Import data \n", + "most_recent_date = df.service_date.max()\n", + "\n", + "# Operator data\n", + "operator_df = pd.read_parquet(\n", + " f\"{RT_SCHED_GCS}digest/operator_profiles.parquet\",\n", + " filters = [[\n", + " (\"organization_name\", \"==\", name), \n", + " (\"service_date\", \"==\", most_recent_date)]]\n", + ")\n", + "\n", + "# Operator route gdf to plot map\n", + "operator_route_gdf = gpd.read_parquet(\n", + " f\"{RT_SCHED_GCS}digest/operator_routes.parquet\",\n", + " filters = [[\n", + " (\"organization_name\", \"==\", name), \n", + " (\"service_date\", \"==\", most_recent_date)]]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "53e888ad-bd77-4c7e-9f17-8fdd2be34de6", + "metadata": {}, + "outputs": [], + "source": [ + "def make_map(gdf: gpd.GeoDataFrame):\n", + " m = gdf[[\"route_id\", \"route_combined_name\", \n", + " \"geometry\"]].explore(\n", + " \"route_combined_name\", \n", + " tiles = \"CartoDB Positron\", \n", + " legend=False\n", + " )\n", + " return m" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e3cb2c4f-dac7-4cdd-96bf-ea9b0036d05d", + "metadata": {}, + "outputs": [], + "source": [ + "operator_p1 = [\n", + " 'operator_n_routes', \n", + " 'operator_n_trips',\n", + " 'operator_n_shapes', \n", + " 'operator_n_stops', \n", + "]\n", + "\n", + "operator_p2 = [\n", + " 'operator_n_arrivals',\n", + " 'operator_route_length_miles', \n", + " 'operator_arrivals_per_stop'\n", + "]\n", + "\n", + "operator_p3 = [\n", + " 'n_coverage_routes', \n", + " 'n_downtown_local_routes', \n", + " 'n_local_routes',\n", + " 'n_rapid_routes'\n", + "]\n", + "\n", + "def readable(column_name: str, readable_dict: dict) -> str:\n", + " try:\n", + " return readable_dict[column_name][\"readable\"]\n", + " except:\n", + " return readable_dict[column_name]\n", + " \n", + "def great_table_config(table: gt.GT) -> gt.GT:\n", + " table = (table\n", + " .tab_options(container_width = \"75%\")\n", + " .tab_options(table_font_size=\"16px\")\n", + " .cols_align(align=\"center\")\n", + " )\n", + " \n", + " return table" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c3ed017c-933b-47d9-b18b-1414b3727a76", + "metadata": {}, + "outputs": [], + "source": [ + "operator_p1_dict = {k: readable(k, readable_dict) for k in operator_p1}\n", + "operator_p2_dict = {k: readable(k, readable_dict) for k in operator_p2}\n", + "operator_p3_dict = {k: readable(k, readable_dict) for k in operator_p3}" + ] + }, + { + "cell_type": "markdown", + "id": "06f0afac-f7c9-4368-9fc6-1f2fb6da8d3a", + "metadata": {}, + "source": [ + "# {name}\n", + "## Operator Stats" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "69f8fe1c-7baf-438e-8e25-23819397ae1a", + "metadata": {}, + "outputs": [], + "source": [ + "table1 = (gt.GT(data=operator_df[[\"name\"] + operator_p1])\n", + " .fmt_integer(\n", + " columns = operator_p1,\n", + " compact=True\n", + " ).cols_align(align=\"center\")\n", + " .cols_label(**operator_p1_dict)\n", + " .tab_header(\n", + " title=md(f\"### {operator_df.name.iloc[0]}: Daily Stats\"),\n", + " subtitle=md(f\"#### {most_recent_date.date()}\")\n", + " )\n", + ")\n", + "\n", + "table2 = (gt.GT(data=operator_df[operator_p2])\n", + " .fmt_integer(\n", + " columns = [\"operator_n_arrivals\"],\n", + " compact=True\n", + " ).cols_label(**operator_p2_dict)\n", + ".tab_source_note(\n", + " source_note=md(\n", + " \"Service area (miles) is the the sum of miles across routes. \"\n", + " \"
The longest shape is selected for each route.\"\n", + " )\n", + "))\n", + " \n", + "nacto_url = (\n", + " \"https://nacto.org/\"\n", + " \"publication/transit-street-design-guide/\"\n", + " \"introduction/service-context/transit-route-types/\"\n", + ")\n", + "\n", + "table3 = (gt.GT(data=operator_df[operator_p3])\n", + " .cols_label(**operator_p3_dict)\n", + " ).tab_header(\n", + " title=md(f\"#### Route Typologies\"),\n", + " subtitle=md(f\"#### Routes Classified in Each Typology\")\n", + " ).tab_source_note(\n", + " source_note=md(\n", + " f\"Source: [NACTO Route Types]({nacto_url})\"\n", + " \"
Transit routes can have multiple typologies. \"\n", + " \"
A typology is selected by plurality.\"\n", + " )\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a7db2820-39a2-41de-b59b-99786530f98f", + "metadata": {}, + "outputs": [], + "source": [ + "display(great_table_config(table1))\n", + "display(great_table_config(table2))\n", + "display(great_table_config(table3))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "af7627c4-01c9-4695-8e91-761914d6082b", + "metadata": {}, + "outputs": [], + "source": [ + "make_map(operator_route_gdf)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2c9fd0bc-5cc0-4836-88b4-8d9a2d64a5ac", + "metadata": {}, + "outputs": [], + "source": [ + "def base_route_chart(df: pd.DataFrame, y_col: str) -> alt.Chart:\n", + " \"\"\"\n", + " \"\"\"\n", + " selected_colors = [\n", + " cp.CALITP_CATEGORY_BRIGHT_COLORS[0], # blue\n", + " cp.CALITP_CATEGORY_BRIGHT_COLORS[3], # green\n", + " cp.CALITP_CATEGORY_BOLD_COLORS[1], # orange,\n", + " ]\n", + " \n", + " #https://stackoverflow.com/questions/26454649/python-round-up-to-the-nearest-ten\n", + "\n", + " chart = (\n", + " alt.Chart(df)\n", + " .mark_line()\n", + " .encode(\n", + " x = alt.X(\"yearmonthdate(service_date):O\", title = \"Date\",\n", + " axis = alt.Axis(format = '%b %Y')\n", + " ),\n", + " y = alt.Y(f\"{y_col}:Q\"),\n", + " color = alt.Color(\"time_period:N\"),\n", + " tooltip = [\"route_combined_name\", \"route_id\", \"direction_id\", \n", + " \"time_period\", y_col]\n", + " ).facet(\n", + " column = alt.Column(\"direction_id:N\"),\n", + " ).interactive()\n", + " )\n", + " \n", + " return chart" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9032ec4d-db58-46a5-baa3-b6d1af4ebf20", + "metadata": {}, + "outputs": [], + "source": [ + "# https://stackoverflow.com/questions/62103632/altair-change-the-position-of-a-slider\n", + "display(\n", + " HTML(\n", + " \"\"\"\n", + " \n", + " \"\"\"\n", + " )\n", + ")\n", + "\n", + "def filtered_route_charts(\n", + " df: pd.DataFrame,\n", + " control_field: str = \"route_combined_name\",\n", + ") -> alt.Chart:\n", + " \"\"\"\n", + " https://stackoverflow.com/questions/58919888/multiple-selections-in-altair\n", + " \"\"\"\n", + "\n", + " route_dropdown = alt.binding_select(\n", + " options=sorted(df[control_field].unique().tolist()), \n", + " name='Routes ', \n", + " )\n", + " \n", + " # Column that controls the bar charts\n", + " route_selector = alt.selection_point(\n", + " fields=[control_field], \n", + " bind=route_dropdown,\n", + " )\n", + " \n", + " vp_df = df[df.sched_rt_category != \"schedule_only\"]\n", + "\n", + " speeds_chart = base_route_chart(\n", + " vp_df, \"speed_mph\"\n", + " ).add_params(route_selector).transform_filter(route_selector)\n", + " \n", + " ping_density_chart = base_route_chart(\n", + " vp_df, \"vp_per_minute\"\n", + " ).add_params(route_selector).transform_filter(route_selector)\n", + " \n", + " spatial_accuracy_chart = base_route_chart(\n", + " vp_df, \"pct_in_shape\"\n", + " ).add_params(route_selector).transform_filter(route_selector)\n", + "\n", + " atleast2vp_chart = base_route_chart(\n", + " vp_df, \"pct_rt_journey_atleast2_vp\"\n", + " ).add_params(route_selector).transform_filter(route_selector) \n", + " \n", + " chart_list = [\n", + " speeds_chart,\n", + " ping_density_chart, \n", + " spatial_accuracy_chart,\n", + " atleast2vp_chart\n", + " ]\n", + " \n", + " chart = alt.vconcat(*chart_list).resolve_scale(y=\"independent\")\n", + " \n", + " return chart\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2589bba0-1276-413d-8980-fc635f3bceee", + "metadata": {}, + "outputs": [], + "source": [ + "available_typologies = df.typology.unique()\n", + "print(available_typologies)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e0a5114c-e4d2-4365-b484-580c6b537692", + "metadata": {}, + "outputs": [], + "source": [ + "def make_chart(df, t: str):\n", + " subset_df = df[df.typology==t]\n", + " if len(subset_df) == 0:\n", + " chart = alt.LayerChart()\n", + " else:\n", + " chart = filtered_route_charts(subset_df)\n", + " \n", + " return chart" + ] + }, + { + "cell_type": "markdown", + "id": "65a389bb-1470-4996-bf08-4bea18663b29", + "metadata": {}, + "source": [ + "## Downtown Local" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "38192789-0b3d-4dd4-8dab-d9000ca0fc1c", + "metadata": {}, + "outputs": [], + "source": [ + "t = \"downtown_local\"\n", + "chart = make_chart(df, t)\n", + "chart" + ] + }, + { + "cell_type": "markdown", + "id": "3e082f71-c624-477f-a327-a8d18edc9931", + "metadata": {}, + "source": [ + "## Local" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "47f2f3f3-e948-492a-bd2f-f71e8870f63f", + "metadata": {}, + "outputs": [], + "source": [ + "t = \"local\"\n", + "chart = make_chart(df, t)\n", + "chart" + ] + }, + { + "cell_type": "markdown", + "id": "71b9491e-6bd6-4ff8-8725-811a5c97b272", + "metadata": {}, + "source": [ + "## Coverage" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "06129e3d-b8ba-4c29-8dfb-4036f7fb81ef", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "t = \"coverage\"\n", + "chart = make_chart(df, t)\n", + "chart" + ] + }, + { + "cell_type": "markdown", + "id": "2a760b1a-284d-47c3-a227-8078b15708df", + "metadata": {}, + "source": [ + "## Rapid" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "649c6f2c-5979-4f97-a431-3bc4cbba033b", + "metadata": {}, + "outputs": [], + "source": [ + "t = \"rapid\"\n", + "chart = make_chart(df, t)\n", + "chart" + ] + }, + { + "cell_type": "markdown", + "id": "ee30aa32-9dde-427c-9d3d-5db8a7f2fad9", + "metadata": {}, + "source": [ + "## Unknown" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "30c64ec5-1b98-44fc-80f0-9ca7f9b370ce", + "metadata": {}, + "outputs": [], + "source": [ + "t = \"unknown\"\n", + "chart = make_chart(df, t)\n", + "chart" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "80e0d88a-afca-4916-89eb-e914aec74cdf", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}