From e23363612d54b2edbb8cc093f4181d1ae6dce791 Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Wed, 12 Jun 2024 21:43:33 +0000 Subject: [PATCH 01/10] (remove): logs, old notebooks --- .../check-route-categories.ipynb | 345 ------------------ .../check-stable-route-categories.ipynb | 319 ---------------- .../logs/quarterly_performance_pipeline.log | 20 - 3 files changed, 684 deletions(-) delete mode 100644 quarterly_performance_objective/check-route-categories.ipynb delete mode 100644 quarterly_performance_objective/check-stable-route-categories.ipynb delete mode 100644 quarterly_performance_objective/logs/quarterly_performance_pipeline.log diff --git a/quarterly_performance_objective/check-route-categories.ipynb b/quarterly_performance_objective/check-route-categories.ipynb deleted file mode 100644 index cd8d1f84a..000000000 --- a/quarterly_performance_objective/check-route-categories.ipynb +++ /dev/null @@ -1,345 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "0fe6a2ee-2b13-4541-b88d-767967d336cf", - "metadata": {}, - "source": [ - "# Categorize `on_shn`, `parallel` (affected by SHN), and `other`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "49a3bed2-9a44-4f33-a882-f4bb46ff5853", - "metadata": {}, - "outputs": [], - "source": [ - "import geopandas as gpd\n", - "import pandas as pd\n", - "\n", - "from update_vars import (ANALYSIS_DATE, \n", - " BUS_SERVICE_GCS, COMPILED_CACHED_GCS)" - ] - }, - { - "cell_type": "markdown", - "id": "a4d90a10-fffb-4da3-a491-46d348979d8a", - "metadata": {}, - "source": [ - "## `on_shn`\n", - "* Since `on_shn` is the primary category, and it's drawn with a 50 ft buffer around hwy centerline, no longer need to use `pct_highway` (set `pct_highway > 0`)\n", - "* Is 25% too high of a threshold? \n", - "* `pct_route` threshold of 20% and 25% both fall within the top 70%-75% of routes\n", - "* Settle for at least 20% of route length runs within 50 ft of hwy (on hwy)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f6351dd0-74e5-4e70-9bd7-534aa391630e", - "metadata": {}, - "outputs": [], - "source": [ - "df = gpd.read_parquet(f\"{BUS_SERVICE_GCS}routes_on_shn_{ANALYSIS_DATE}.parquet\")\n", - "\n", - "print(f\"# rows (route_id-Route pairs): {len(df)}\")\n", - "print(f\"# route_id: {len(df[['itp_id', 'route_id']].drop_duplicates())}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "da4f50a0-d90d-4f9e-bd0e-43f4627e3875", - "metadata": {}, - "outputs": [], - "source": [ - "unique_routes = (df.sort_values([\"itp_id\", \"route_id\", \"pct_route\"], \n", - " ascending=[True, True, False])\n", - " .drop_duplicates(subset=[\"itp_id\", \"route_id\"])\n", - " .reset_index(drop=True)\n", - " )\n", - "\n", - "ptile = []\n", - "\n", - "for i in range(5, 100, 5):\n", - " ptile.append(i/100)\n", - "\n", - "unique_routes.pct_route.describe(percentiles=ptile)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4f73424c-12ec-4704-839b-99b993f1422b", - "metadata": {}, - "outputs": [], - "source": [ - "for r in range(20, 35, 5):\n", - " subset = unique_routes[unique_routes.pct_route >= r/100]\n", - " \n", - " print(f\"route threshold: {r/100} - {len(subset)}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7923082e-901a-4b79-807d-fdfd4d6f2324", - "metadata": {}, - "outputs": [], - "source": [ - "twenty = unique_routes[unique_routes.pct_route >= 0.20]\n", - "twentyfive = unique_routes[unique_routes.pct_route >= 0.25]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b3d9928a-b126-42ef-b31a-8db35d361693", - "metadata": {}, - "outputs": [], - "source": [ - "def make_map(gdf: gpd.GeoDataFrame): \n", - " \n", - " cols = [\"itp_id\", \"route_id\", \"geometry\"]\n", - "\n", - " m = (gdf[cols].drop_duplicates()\n", - " .explore(\"itp_id\", categorical=True, tiles = \"CartoDB Positron\")\n", - " )\n", - " \n", - " print(f\"route threshold: {gdf.pct_route.min()}\")\n", - " display(m)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c2219597-792a-4d68-b63a-04dc898ecdcc", - "metadata": {}, - "outputs": [], - "source": [ - "#make_map(twenty)\n", - "#make_map(twentyfive)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7bad865c-4c0f-433d-81f2-f695564fa6ca", - "metadata": {}, - "outputs": [], - "source": [ - "itp_id = 182\n", - "\n", - "operator_twenty = twenty[twenty.itp_id==itp_id]\n", - "operator_twentyfive = twentyfive[twentyfive.itp_id==itp_id]\n", - "\n", - "difference_routes = list(set(operator_twentyfive.route_id)\n", - " .symmetric_difference(set(operator_twenty.route_id)))\n", - "\n", - "make_map(operator_twenty)\n", - "make_map(operator_twentyfive)\n", - "\n", - "print(\"Routes Included if Threshold is 20%\")\n", - "make_map(operator_twenty[operator_twenty.route_id.isin(difference_routes)])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "af1e8a15-13ec-4b1e-b24a-543e3b28c4cc", - "metadata": {}, - "outputs": [], - "source": [ - "itp_id = 4\n", - "\n", - "operator_twenty = twenty[twenty.itp_id==itp_id]\n", - "operator_twentyfive = twentyfive[twentyfive.itp_id==itp_id]\n", - "\n", - "difference_routes = list(set(operator_twentyfive.route_id)\n", - " .symmetric_difference(set(operator_twenty.route_id)))\n", - "\n", - "make_map(operator_twenty)\n", - "make_map(operator_twentyfive)\n", - "\n", - "print(\"Routes Included if Threshold is 20%\")\n", - "make_map(operator_twenty[operator_twenty.route_id.isin(difference_routes)])" - ] - }, - { - "cell_type": "markdown", - "id": "915efd7b-dfd8-4285-96b4-6257f3392885", - "metadata": {}, - "source": [ - "## `parallel`\n", - "\n", - "* These are routes that are affected by SHN, where bottlenecks might occur because bus routes have to pass through where there are on-ramps. \n", - "* Use a 0.5 mile buffer from SHN, and see whether threshold should be 30%? 20%? lower? higher?\n", - "* It's much more marginal to add a couple more routes in this category, go with 20%, which will grab about 60% of the 260 routes that was tagged as being `parallel` and is not `on_shn`" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4251cc97-0a2d-4acb-b4cf-3493a528f323", - "metadata": {}, - "outputs": [], - "source": [ - "df2 = gpd.read_parquet(f\"{BUS_SERVICE_GCS}parallel_or_intersecting_{ANALYSIS_DATE}.parquet\")\n", - "\n", - "print(f\"# rows (route_id-Route pairs): {len(df2)}\")\n", - "print(f\"# route_id: {len(df2[['itp_id', 'route_id']].drop_duplicates())}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7e0278d5-1a80-4a50-8a98-9643f639751c", - "metadata": {}, - "outputs": [], - "source": [ - "unique_routes2 = (df2.sort_values([\"itp_id\", \"route_id\", \"pct_route\"], \n", - " ascending=[True, True, False])\n", - " .drop_duplicates(subset=[\"itp_id\", \"route_id\"])\n", - " .reset_index(drop=True)\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "568aaa67-1468-449b-b58c-77a8217b7937", - "metadata": {}, - "outputs": [], - "source": [ - "route_cols = [\"itp_id\", \"route_id\"]\n", - "\n", - "on_shn = (unique_routes[unique_routes.pct_route >= 0.2]\n", - " .assign(category=\"on_shn\")\n", - " .rename(columns = {\"pct_route\": \"pct_route_on_hwy\"})\n", - " [route_cols + [\"pct_route_on_hwy\"]]\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d09c3c4b-8c44-4a18-9164-c7b134060a63", - "metadata": {}, - "outputs": [], - "source": [ - "unique_routes3 = pd.merge(\n", - " on_shn,\n", - " unique_routes2[route_cols + [\"pct_route\"]],\n", - " on = route_cols,\n", - " how = \"outer\",\n", - " validate = \"1:1\",\n", - " indicator=True\n", - ")\n", - "\n", - "unique_routes3._merge.value_counts()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "187e5c99-2a62-4076-9b18-f0929b0035a3", - "metadata": {}, - "outputs": [], - "source": [ - "ptile = []\n", - "\n", - "for i in range(5, 100, 5):\n", - " ptile.append(i/100)\n", - "\n", - "unique_routes3[unique_routes3._merge==\"right_only\"].pct_route.describe(percentiles=ptile)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b3344e39-0cec-4578-a966-d99d11d49069", - "metadata": {}, - "outputs": [], - "source": [ - "# There's quite a lot more routes we can add\n", - "# in this intersects_shn group\n", - "for r in range(20, 35, 5):\n", - " subset = unique_routes3[(unique_routes3._merge==\"right_only\") &\n", - " (unique_routes3.pct_route >= r/100)]\n", - " \n", - " print(f\"route threshold: {r/100} - {len(subset)}\")\n", - " \n", - " make_map(df2[df2.route_id.isin(subset.route_id)])" - ] - }, - { - "cell_type": "markdown", - "id": "441f9dc1-ec2d-4cb8-8317-5658dc82c836", - "metadata": {}, - "source": [ - "Depends whether we want another 50% of the routes of the 1,900 routes (`pct_route >= 0.35`)\n", - "\n", - "Looking at LA Metro, we do want to grab all the routes that span big boulevards, and don't want to be too restrictive. Stick with `pct_route >= 0.35`, since that's close to 1/3 of the route, and gives more options for improvements. Grabbing another 50% of the 1,900 routes is ok." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a143425e-0067-431d-bf3c-ef41062589f6", - "metadata": {}, - "outputs": [], - "source": [ - "unique_routes3[unique_routes3._merge==\"right_only\"].itp_id.value_counts()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "67e8defe-2aef-4d85-8fdb-fd23d1c2e850", - "metadata": {}, - "outputs": [], - "source": [ - "itp_id = 182\n", - "\n", - "for r in range(25, 50, 5):\n", - " subset = unique_routes3[(unique_routes3._merge==\"right_only\") &\n", - " (unique_routes3.pct_route >= r/100)]\n", - " \n", - " print(f\"route threshold: {r/100} - {len(subset)}\")\n", - " \n", - " make_map(df2[(df2.itp_id==itp_id) & \n", - " (df2.route_id.isin(subset.route_id))])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ffbfe1a4-b37c-416e-a87b-4ce83a9d549d", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.13" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/quarterly_performance_objective/check-stable-route-categories.ipynb b/quarterly_performance_objective/check-stable-route-categories.ipynb deleted file mode 100644 index 19beede4a..000000000 --- a/quarterly_performance_objective/check-stable-route-categories.ipynb +++ /dev/null @@ -1,319 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "037b6307-b03d-4630-9e99-00c94fcae40d", - "metadata": {}, - "source": [ - "# Are route categories stable quarter to quarter?\n", - "\n", - "If a `route_id` is `parallel` in one quarter, would it change to `on_shn` in another? It should be pretty stable, since how often would a bus route drastically deviate from its original route? \n", - "\n", - "Freeways don't change quarter to quarter.\n", - "\n", - "Check if there are large shifts in categories from current quarter to prior quarter." - ] - }, - { - "cell_type": "code", - "execution_count": 1, - "id": "519e516a-f08e-44c4-9315-2f1848991ba8", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/opt/conda/lib/python3.9/site-packages/geopandas/_compat.py:123: UserWarning: The Shapely GEOS version (3.11.1-CAPI-1.17.1) is incompatible with the GEOS version PyGEOS was compiled with (3.10.1-CAPI-1.16.0). Conversions between both will be slow.\n", - " warnings.warn(\n", - "/tmp/ipykernel_1812/2943624353.py:1: UserWarning: Shapely 2.0 is installed, but because PyGEOS is also installed, GeoPandas will still use PyGEOS by default for now. To force to use and test Shapely 2.0, you have to set the environment variable USE_PYGEOS=0. You can do this before starting the Python process, or in your code before importing geopandas:\n", - "\n", - "import os\n", - "os.environ['USE_PYGEOS'] = '0'\n", - "import geopandas\n", - "\n", - "In a future release, GeoPandas will switch to using Shapely by default. If you are using PyGEOS directly (calling PyGEOS functions on geometries from GeoPandas), this will then stop working and you are encouraged to migrate from PyGEOS to Shapely 2.0 (https://shapely.readthedocs.io/en/latest/migration_pygeos.html).\n", - " import geopandas as gpd\n" - ] - } - ], - "source": [ - "import geopandas as gpd\n", - "import pandas as pd\n", - "\n", - "from shared_utils import rt_dates\n", - "from update_vars import BUS_SERVICE_GCS" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "4f723d37-9ee1-4184-9b8e-e641debb5de4", - "metadata": {}, - "outputs": [], - "source": [ - "# Only look at v2 warehouse\n", - "dfs = {}\n", - "for key, date in rt_dates.PMAC.items():\n", - " if \"2023\" in date:\n", - " df = gpd.read_parquet(f\"{BUS_SERVICE_GCS}routes_categorized_{date}.parquet\")\n", - " dfs[key] = df" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "212768d2-44d6-4751-9d6b-3308e95b8f9d", - "metadata": {}, - "outputs": [], - "source": [ - "keep_cols = [\n", - " \"feed_key\", \"name\",\n", - " \"category\", \"route_id\", \n", - " \"district\"\n", - "]\n", - "\n", - "df1 = dfs[\"Q1_2023\"][keep_cols]\n", - "df2 = dfs[\"Q2_2023\"][keep_cols]" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "bbbea287-3737-4eae-8c78-ebb388e5c382", - "metadata": {}, - "outputs": [], - "source": [ - "def compare_col(df1, df2, col):\n", - " print(df1[col].value_counts())\n", - " print(df2[col].value_counts())\n", - " print(df1[col].value_counts(normalize=True))\n", - " print(df2[col].value_counts(normalize=True))" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "6b0c0c1b-f1f4-4bdd-aa09-a4afd03525ca", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "intersects_shn 1229\n", - "on_shn 548\n", - "other 492\n", - "Name: category, dtype: int64\n", - "intersects_shn 1248\n", - "on_shn 566\n", - "other 519\n", - "Name: category, dtype: int64\n", - "intersects_shn 0.541648\n", - "on_shn 0.241516\n", - "other 0.216836\n", - "Name: category, dtype: float64\n", - "intersects_shn 0.534934\n", - "on_shn 0.242606\n", - "other 0.222460\n", - "Name: category, dtype: float64\n" - ] - } - ], - "source": [ - "compare_col(df1, df2, \"category\")" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "4a36a890-7cf0-4333-836c-f8660118cd46", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "4.0 664\n", - "7.0 518\n", - "3.0 216\n", - "8.0 184\n", - "11.0 151\n", - "5.0 142\n", - "10.0 93\n", - "6.0 86\n", - "12.0 71\n", - "2.0 41\n", - "1.0 39\n", - "9.0 16\n", - "Name: district, dtype: int64\n", - "4.0 647\n", - "7.0 529\n", - "3.0 214\n", - "8.0 181\n", - "11.0 163\n", - "5.0 144\n", - "10.0 129\n", - "6.0 116\n", - "12.0 71\n", - "1.0 40\n", - "2.0 35\n", - "9.0 16\n", - "Name: district, dtype: int64\n", - "4.0 0.298964\n", - "7.0 0.233228\n", - "3.0 0.097253\n", - "8.0 0.082846\n", - "11.0 0.067987\n", - "5.0 0.063935\n", - "10.0 0.041873\n", - "6.0 0.038721\n", - "12.0 0.031968\n", - "2.0 0.018460\n", - "1.0 0.017560\n", - "9.0 0.007204\n", - "Name: district, dtype: float64\n", - "4.0 0.283151\n", - "7.0 0.231510\n", - "3.0 0.093654\n", - "8.0 0.079212\n", - "11.0 0.071335\n", - "5.0 0.063020\n", - "10.0 0.056455\n", - "6.0 0.050766\n", - "12.0 0.031072\n", - "1.0 0.017505\n", - "2.0 0.015317\n", - "9.0 0.007002\n", - "Name: district, dtype: float64\n" - ] - } - ], - "source": [ - "compare_col(df1, df2, \"district\")" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "12a837d8-6849-44c7-9f05-733d78809349", - "metadata": {}, - "outputs": [], - "source": [ - "m1 = pd.merge(\n", - " df1, \n", - " df2,\n", - " on = [\"feed_key\", \"name\", \"route_id\"],\n", - " how = \"outer\",\n", - " validate = \"1:1\",\n", - " indicator=\"compare_categories\"\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "5482a309-8825-4e14-928c-29bb594d9d60", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "right_only 2061\n", - "left_only 1997\n", - "both 272\n", - "Name: compare_categories, dtype: int64" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "m1.compare_categories.value_counts()" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "2ec0413a-3ec0-49bb-aaf6-96866554480e", - "metadata": {}, - "outputs": [], - "source": [ - "in_both = m1[(m1.compare_categories==\"both\")]" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "38359bf6-37d3-4aa1-90f4-ae43720061a2", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(272, 8)" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "in_both.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "80cc287d-89dc-4563-80f9-a2fc029036ed", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(0, 8)" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "in_both[(in_both.category_x != in_both.category_y)].shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7f6628ea-5f21-46c2-9a57-81d3d7a4041e", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.13" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/quarterly_performance_objective/logs/quarterly_performance_pipeline.log b/quarterly_performance_objective/logs/quarterly_performance_pipeline.log deleted file mode 100644 index a69a646b1..000000000 --- a/quarterly_performance_objective/logs/quarterly_performance_pipeline.log +++ /dev/null @@ -1,20 +0,0 @@ -2023-11-08 12:10:16.976 | INFO | __main__::179 - route level service: 2023-10-11 0:00:21.452201 -2023-11-08 12:11:10.785 | INFO | __main__::52 - create intermediate dfs: 2023-10-11 0:00:37.861739 -2023-11-08 12:11:37.530 | INFO | __main__::159 - categorize routes: 2023-10-11 0:00:09.897875 -2023-11-08 17:18:04.683 | INFO | __main__::163 - attach speed: 2023-10-11 0:00:11.320978 -2023-11-08 17:28:35.081 | INFO | __main__::179 - route level service: 2023-07-12 0:00:21.622882 -2023-11-08 17:29:29.484 | INFO | __main__::52 - create intermediate dfs: 2023-07-12 0:00:37.504010 -2023-11-08 17:29:55.583 | INFO | __main__::159 - categorize routes: 2023-07-12 0:00:09.474284 -2023-11-08 17:30:24.241 | INFO | __main__::163 - attach speed: 2023-07-12 0:00:10.915350 -2023-11-08 17:31:11.749 | INFO | __main__::179 - route level service: 2023-04-12 0:00:21.043589 -2023-11-08 17:32:03.341 | INFO | __main__::52 - create intermediate dfs: 2023-04-12 0:00:34.668850 -2023-11-08 17:32:28.409 | INFO | __main__::159 - categorize routes: 2023-04-12 0:00:08.669938 -2023-11-08 17:32:57.670 | INFO | __main__::163 - attach speed: 2023-04-12 0:00:12.054249 -2024-05-08 12:46:39.216 | INFO | __main__::151 - route level service: 2024-01-17 0:00:22.935689 -2024-05-08 12:48:37.229 | INFO | __main__::52 - create intermediate dfs: 2024-01-17 0:00:53.393774 -2024-05-08 12:54:21.710 | INFO | __main__::159 - categorize routes: 2024-01-17 0:00:10.477356 -2024-05-08 13:02:58.552 | INFO | __main__::130 - attach speed: 2024-01-17 0:00:05.890457 -2024-05-08 13:16:55.619 | INFO | __main__::151 - route level service: 2024-04-17 0:00:22.346017 -2024-05-08 13:18:08.149 | INFO | __main__::52 - create intermediate dfs: 2024-04-17 0:00:52.865328 -2024-05-08 13:18:38.545 | INFO | __main__::159 - categorize routes: 2024-04-17 0:00:10.733925 -2024-05-08 13:19:03.130 | INFO | __main__::130 - attach speed: 2024-04-17 0:00:05.055891 From 956fcc63bbf08395d50d523d3c751f6ba70b8625 Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Wed, 19 Jun 2024 21:06:58 +0000 Subject: [PATCH 02/10] add script to compile monthly data into quarterly and average it --- quarterly_performance_objective/Makefile | 1 + .../compile_time_series.py | 305 ++++++++++++++++++ .../requirements.txt | 4 + .../update_vars.py | 5 +- .../segment_speed_utils/parallel_corridors.py | 64 +++- 5 files changed, 367 insertions(+), 12 deletions(-) create mode 100644 quarterly_performance_objective/compile_time_series.py create mode 100644 quarterly_performance_objective/requirements.txt diff --git a/quarterly_performance_objective/Makefile b/quarterly_performance_objective/Makefile index 422cd4a99..26f478310 100644 --- a/quarterly_performance_objective/Makefile +++ b/quarterly_performance_objective/Makefile @@ -1,4 +1,5 @@ quarterly_performance_report: #cd ../rt_segment_speeds/ && make && pip install -r requirements.txt && cd .. python clean_data.py + python compile_time_series.py #cd ../ && make build_quarterly_performance_metrics -f Makefile \ No newline at end of file diff --git a/quarterly_performance_objective/compile_time_series.py b/quarterly_performance_objective/compile_time_series.py new file mode 100644 index 000000000..96bff811e --- /dev/null +++ b/quarterly_performance_objective/compile_time_series.py @@ -0,0 +1,305 @@ +""" +Get quarterly metrics as a time-series df +to use in report. +""" +import datetime +import geopandas as gpd +import pandas as pd + +from calitp_data_analysis import utils +from segment_speed_utils import time_series_utils +from shared_utils import rt_dates +from update_vars import BUS_SERVICE_GCS + +operator_cols = [ + "name", + "organization_name", "caltrans_district" +] + +category_cols = ["category", "year_quarter"] + +subtotal_categories_dict = { + "shn_subtotal": ["on_shn", "parallel"], + "total": ["on_shn", "parallel", "other"] +} + +def assemble_time_series( + date_list: list +) -> gpd.GeoDataFrame: + """ + Assemble time-series data and add column showing what + year-quarter the service_date belongs to. + We'll aggregate all the available data for each + quarter (which could be 1-3 dates). + """ + df = time_series_utils.concatenate_datasets_across_dates( + BUS_SERVICE_GCS, + f"routes_categorized_with_speed", + date_list, + data_type = "gdf", + ) + + df = df.assign( + year_quarter = (df.service_date.dt.year.astype(str) + + "-Q" + + df.service_date.dt.quarter.astype(str) + ) + ) + + return df + + +def service_hours_aggregation( + df: pd.DataFrame, + group_cols: list +) -> pd.DataFrame: + """ + Aggregate service hours by some grouping of columns + and also add service hours per route. + """ + df2 = ( + df + .groupby(group_cols, + observed=True, group_keys=False) + .agg({"service_hours": "sum", + "route_key": "count", + "service_date": "nunique" + }) + .reset_index() + .rename(columns = { + "route_key": "n_routes", + "service_date": "n_dates" + }) + ) + + df2 = df2.assign( + # if we have multiple days, the n_routes counted will reflect that + service_hours_per_route = df2.service_hours.divide(df2.n_routes).round(2), + daily_service_hours = df2.service_hours.divide(df2.n_dates), + daily_routes = df2.n_routes.divide(df2.n_dates).round(0).astype(int) + ) + + return df2 + + +def speed_aggregation( + df: pd.DataFrame, + group_cols: list +) -> pd.DataFrame: + """ + Aggregate speeds (wherever route averages are available). + """ + df2 = ( + df[df.speed_mph.notna()] + .groupby(group_cols, + observed=True, group_keys=False) + .agg({"speed_mph": "mean", + "route_key": "count", + "service_date": "nunique" + }) + .reset_index() + .rename(columns = { + "route_key": "n_vp_routes", + "service_date": "n_dates" + }) + ) + + df2 = df2.assign( + speed_mph = df2.speed_mph.round(2), + daily_vp_routes = df2.n_vp_routes.divide(df2.n_dates).round(0).astype(int) + ).drop(columns = "n_dates") + + return df2 + + +def aggregated_metrics( + df: gpd.GeoDataFrame, + group_cols: list +) -> pd.DataFrame: + """ + Aggregate metrics by grouping of columns (either + by operator or statewide). + """ + service_hours_agg = service_hours_aggregation(df, group_cols) + speed_agg = speed_aggregation(df, group_cols) + + df2 = pd.merge( + service_hours_agg, + speed_agg, + on = group_cols, + how = "left", + ).fillna( + {"n_vp_routes": 0} + ).astype( + {"n_vp_routes": "int"} + ) + + return df2 + + +def get_dissolved_geometry( + df: pd.DataFrame, + group_cols: list +) -> gpd.GeoDataFrame: + + unique_combos = df[ + group_cols + ["route_id", "geometry"] + ].drop_duplicates( + subset=group_cols + ["route_id"] + ) + + # Simplify geometry for quicker dissolve (25ft) + unique_combos = unique_combos.assign( + geometry = unique_combos.geometry.simplify(tolerance=25) + ) + + route_geom = unique_combos[ + group_cols + ["geometry"] + ].dissolve(by=group_cols).reset_index() + + return route_geom + + +def get_subtotals( + df: pd.DataFrame, + group_cols: list +) -> dict: + """ + Add a row that captures the SHN subtotals + and the total across all categories. + """ + results = {} + + for grp, category_list in subtotal_categories_dict.items(): + subset_df = df.loc[df.category.isin(category_list)].assign( + category = grp + ) + + results[grp] = aggregated_metrics( + subset_df, + group_cols + ) + + return results + + +def assemble_operator_df( + df: gpd.GeoDataFrame +) -> gpd.GeoDataFrame: + """ + Operator df with service hours and speed metrics. + Also add subtotals for on_shn + parallel + and operator totals. + """ + group_cols = operator_cols + category_cols + + by_category = aggregated_metrics( + df, group_cols + ) + + subtotal_dfs = get_subtotals( + df, group_cols + ) + + operator_geom = get_dissolved_geometry( + df, group_cols + ) + + by_category_gdf = pd.merge( + operator_geom, + by_category, + on = group_cols, + how = "inner" + ) + + final_df = pd.concat([ + by_category_gdf, + subtotal_dfs["shn_subtotal"], + subtotal_dfs["total"]], + axis=0, ignore_index=True + ) + + return final_df + + +def assemble_statewide_df( + df: gpd.GeoDataFrame +) -> gpd.GeoDataFrame: + """ + Statewide (aggregate across operators) df with service hours and + speed metrics. + Also add subtotals for on_shn + parallel + and statewide totals. + """ + group_cols = category_cols + + by_category = aggregated_metrics( + df, group_cols + ) + + subtotal_dfs = get_subtotals( + df, group_cols + ) + + final_df = pd.concat([ + by_category, + subtotal_dfs["shn_subtotal"], + subtotal_dfs["total"]], + axis=0, ignore_index=True + ) + + return final_df + + +def category_wrangling( + df: pd.DataFrame, + col: str = "category", + sort_key: list = ["on_shn", "parallel", "other", "shn_subtotal", "total"] +) -> pd.DataFrame: + """ + Custom sort order for categorical variable + https://stackoverflow.com/questions/23482668/sorting-by-a-custom-list-in-pandas + """ + category_values = { + "on_shn": "On SHN", + "parallel": "Intersects SHN", + "other": "Other", + "shn_subtotal": "On or Intersects SHN", + "total": "Total" + } + + df = df.sort_values( + col, key=lambda c: c.map(lambda e: sort_key.index(e)) + ) + + df = df.assign( + category = df.category.map(category_values) + ) + + return df + +if __name__ == "__main__": + + start = datetime.datetime.now() + + all_dates = rt_dates.y2023_dates + rt_dates.y2024_dates + + df = assemble_time_series(all_dates) + + operator_df = assemble_operator_df(df) + + utils.geoparquet_gcs_export( + operator_df, + BUS_SERVICE_GCS, + "quarterly_metrics/operator_time_series" + ) + + statewide_df = assemble_statewide_df(df) + + statewide_df.to_parquet( + f"{BUS_SERVICE_GCS}" + "quarterly_metrics/statewide_time_series.parquet" + ) + + end = datetime.datetime.now() + print(f"quarterly metrics time-series: {end - start}") \ No newline at end of file diff --git a/quarterly_performance_objective/requirements.txt b/quarterly_performance_objective/requirements.txt new file mode 100644 index 000000000..72fb4a000 --- /dev/null +++ b/quarterly_performance_objective/requirements.txt @@ -0,0 +1,4 @@ +great_tables==0.6.1 +polars==0.20.29 +quarto-cli==1.4.554 +quarto==0.1.0 \ No newline at end of file diff --git a/quarterly_performance_objective/update_vars.py b/quarterly_performance_objective/update_vars.py index 3a341821d..b6bf21fa3 100644 --- a/quarterly_performance_objective/update_vars.py +++ b/quarterly_performance_objective/update_vars.py @@ -7,5 +7,6 @@ COMPILED_CACHED_GCS = GTFS_DATA_DICT.gcs_paths.COMPILED_CACHED_VIEWS SEGMENT_GCS = GTFS_DATA_DICT.gcs_paths.SEGMENT_GCS -CURRENT_QUARTER = "Q2_2024" -ANALYSIS_DATE = rt_dates.PMAC[CURRENT_QUARTER] \ No newline at end of file +analysis_date = rt_dates.DATES["jun2024"] +#CURRENT_QUARTER = "Q2_2024" +#ANALYSIS_DATE = rt_dates.PMAC[CURRENT_QUARTER] \ No newline at end of file diff --git a/rt_segment_speeds/segment_speed_utils/parallel_corridors.py b/rt_segment_speeds/segment_speed_utils/parallel_corridors.py index f739ceb8f..6a4cad464 100644 --- a/rt_segment_speeds/segment_speed_utils/parallel_corridors.py +++ b/rt_segment_speeds/segment_speed_utils/parallel_corridors.py @@ -10,19 +10,26 @@ Usually, this can be achieved by merging `trips` and `shapes`. """ import geopandas as gpd +import gcsfs import pandas as pd from calitp_data_analysis import geography_utils, utils from segment_speed_utils import gtfs_schedule_wrangling, helpers from shared_utils import catalog_utils +BUS_SERVICE_GCS = "gs://calitp-analytics-data/data-analyses/bus_service_increase/" +fs = gcsfs.GCSFileSystem() +hwy_group_cols = ["Route", "County", "District", "RouteType"] + def process_transit_routes(analysis_date: str) -> gpd.GeoDataFrame: """ For each route, select the longest shape for each route to overlay with SHN. Also count how many routes there are for each operator. """ - longest_shape = gtfs_schedule_wrangling.longest_shape_by_route_direction(analysis_date) + longest_shape = gtfs_schedule_wrangling.longest_shape_by_route_direction( + analysis_date + ).pipe(helpers.remove_shapes_outside_ca) gdf = longest_shape.assign( total_routes = longest_shape.groupby("feed_key").route_key.transform("nunique") @@ -58,7 +65,8 @@ def process_highways( direction_cols = ["NB", "SB", "EB", "WB"] df = (gpd.read_parquet(SHN_FILE) - .to_crs(geography_utils.CA_StatePlane)) + .to_crs(geography_utils.CA_StatePlane) + ) # Get dummies for direction # Can make data wide instead of long @@ -97,14 +105,17 @@ def overlay_transit_to_highways( Returns: geopandas.GeoDataFrame, with geometry column reflecting the areas of intersection. - """ - hwy_group_cols = ["Route", "County", "District", "RouteType"] - + """ # Can pass a different buffer zone to determine parallel corridors - highways = process_highways( - group_cols = hwy_group_cols, - buffer_feet = hwy_buffer_feet - ) + HWY_FILE = f"{BUS_SERVICE_GCS}highways_buffer{hwy_buffer_feet}.parquet" + + if fs.exists(HWY_FILE): + highways = gpd.read_parquet(HWY_FILE) + else: + highways = process_highways( + group_cols = hwy_group_cols, + buffer_feet = hwy_buffer_feet + ) transit_routes = process_transit_routes(analysis_date) # Overlay @@ -197,4 +208,37 @@ def routes_by_on_shn_parallel_categories( category = df[category_cols].idxmax(axis=1) )[keep_cols] - return df2 \ No newline at end of file + return df2 + + +if __name__ == "__main__": + + SHN_HWY_BUFFER_FEET = 50 + PARALLEL_HWY_BUFFER_FEET = int(geography_utils.FEET_PER_MI * 0.5) + + highways_shn_buffer = process_highways( + group_cols = hwy_group_cols, + buffer_feet = SHN_HWY_BUFFER_FEET + ) + + utils.geoparquet_gcs_export( + highways_shn_buffer, + BUS_SERVICE_GCS, + f"highways_buffer{SHN_HWY_BUFFER_FEET}" + ) + + print(f"exported highways_buffer{SHN_HWY_BUFFER_FEET}") + del highways_shn_buffer + + highways_parallel_buffer = process_highways( + group_cols = hwy_group_cols, + buffer_feet = PARALLEL_HWY_BUFFER_FEET + ) + + utils.geoparquet_gcs_export( + highways_parallel_buffer, + BUS_SERVICE_GCS, + f"highways_buffer{PARALLEL_HWY_BUFFER_FEET}" + ) + + print(f"exported highways_buffer{PARALLEL_HWY_BUFFER_FEET}") \ No newline at end of file From 7e17d4f9d2e7fff0640aa68bd6a1ff3f2bd5c0f9 Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Wed, 19 Jun 2024 21:07:14 +0000 Subject: [PATCH 03/10] use qmd for new report --- .../new_report.ipynb | 233 ++++++++++++++++++ .../new_report.qmd | 171 +++++++++++++ 2 files changed, 404 insertions(+) create mode 100644 quarterly_performance_objective/new_report.ipynb create mode 100644 quarterly_performance_objective/new_report.qmd diff --git a/quarterly_performance_objective/new_report.ipynb b/quarterly_performance_objective/new_report.ipynb new file mode 100644 index 000000000..3d4aa7c73 --- /dev/null +++ b/quarterly_performance_objective/new_report.ipynb @@ -0,0 +1,233 @@ +{ + "cells": [ + { + "cell_type": "raw", + "metadata": {}, + "source": [ + "---\n", + "title: Quarterly Performance Metrics\n", + "execute:\n", + " echo: false\n", + "format:\n", + " html:\n", + " mainfont: sans-serif\n", + " monofont: sans-serif\n", + " anchor-sections: true\n", + " toc: true\n", + " toc-title: Contents\n", + " toc-depth: 3\n", + " code-links:\n", + " - text: Analysis Products\n", + " icon: bar-chart-fill\n", + " href: 'https://analysis.calitp.org'\n", + " - text: Reach Out!\n", + " icon: envelope\n", + " href: 'mailto:hello@calitp.org'\n", + "---" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import altair as alt\n", + "import geopandas as gpd\n", + "import pandas as pd\n", + "import polars as pl\n", + "\n", + "from great_tables import GT, _data_color, loc, md, nanoplot_options, style\n", + "\n", + "from update_vars import BUS_SERVICE_GCS" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "operator_df = gpd.read_parquet(\n", + " f\"{BUS_SERVICE_GCS}\"\n", + " \"quarterly_metrics/operator_time_series.parquet\"\n", + ")\n", + "\n", + "statewide_df = pd.read_parquet(\n", + " f\"{BUS_SERVICE_GCS}\"\n", + " \"quarterly_metrics/statewide_time_series.parquet\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def category_wrangling(\n", + " df: pd.DataFrame, \n", + " col: str = \"category\", \n", + " sort_key: list = [\"on_shn\", \"parallel\", \"other\", \"shn_subtotal\", \"total\"]\n", + ") -> pd.DataFrame:\n", + " \"\"\"\n", + " Custom sort order for categorical variable\n", + " https://stackoverflow.com/questions/23482668/sorting-by-a-custom-list-in-pandas\n", + " \"\"\"\n", + " category_values = {\n", + " \"on_shn\": \"On SHN\", \n", + " \"parallel\": \"Intersects SHN\",\n", + " \"other\": \"Other\",\n", + " \"shn_subtotal\": \"On or Intersects SHN\",\n", + " \"total\": \"Total\"\n", + " }\n", + " \n", + " df = df.sort_values(\n", + " col, key=lambda c: c.map(lambda e: sort_key.index(e))\n", + " ) \n", + " \n", + " df = df.assign(\n", + " category = df.category.map(category_values)\n", + " )\n", + " \n", + " return df\n", + "\n", + "def get_hex(color_name: str) -> str:\n", + " \"\"\"\n", + " Since some of the color names don't pull the hex code, \n", + " we'll grab it here.\n", + " https://github.com/posit-dev/great-tables/blob/main/great_tables/_data_color/constants.py\n", + " \"\"\"\n", + " return _data_color.constants.COLOR_NAME_TO_HEX[color_name]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "current_quarter = \"2024-Q2\"\n", + "\n", + "current_statewide = statewide_df.loc[\n", + " statewide_df.year_quarter==current_quarter\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "(GT(pl.from_pandas(current_statewide.pipe(category_wrangling)))\n", + ".fmt_number(\n", + " columns = [\n", + " \"daily_service_hours\", \n", + " \"service_hours_per_route\", \n", + " \"speed_mph\"], decimals=1\n", + ").fmt_integer(\n", + " columns = [\"daily_routes\", \"daily_vp_routes\"]\n", + " ).cols_label(\n", + " category = \"Category\",\n", + " year_quarter = \"Quarter\",\n", + " daily_service_hours = \"Daily Service Hours\",\n", + " service_hours_per_route = \"Service Hours per Route\",\n", + " speed_mph = \"Average Speed\",\n", + " daily_routes = \"# Routes\",\n", + " daily_vp_routes = \"# Routes with Speed\"\n", + " ).tab_header(\n", + " title = \"Service Hours and Speed\",\n", + " subtitle = f\"{current_quarter}\"\n", + " ).tab_spanner(\n", + " label=\"Service\", \n", + " columns=[\"daily_service_hours\", \n", + " \"service_hours_per_route\", \"daily_routes\"]\n", + ").tab_spanner(\n", + " label=\"Speed\",\n", + " columns = [\"speed_mph\", \"daily_vp_routes\"]\n", + ").tab_options(\n", + " container_width = \"100%\",\n", + " table_background_color=\"white\",\n", + " table_body_hlines_style=\"none\",\n", + " table_body_vlines_style=\"none\",\n", + " heading_background_color=\"white\",\n", + " column_labels_background_color=\"white\",\n", + " row_group_background_color=\"white\",\n", + " stub_background_color=\"white\",\n", + " source_notes_background_color=\"white\",\n", + " table_font_size=\"14px\",\n", + " heading_align=\"center\"\n", + ").cols_hide(\n", + " [\"year_quarter\", \"service_hours\", \"n_routes\", \n", + " \"n_dates\", \"n_vp_routes\"]\n", + ").tab_style(\n", + " style=style.text(weight=\"bold\"),\n", + " locations=loc.body(rows=pl.col(\"category\") == \"Total\")\n", + ").tab_style(\n", + " style=style.text(\n", + " weight=\"bold\", style=\"italic\", color=get_hex(\"gray30\")),\n", + " locations=loc.body(rows=pl.col(\"category\") == \"On or Intersects SHN\"),\n", + ").cols_align(align=\"center\")\n", + " .cols_align(align=\"left\", columns=\"category\")\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "(alt.Chart(operator_df.drop(columns = \"geometry\"))\n", + " .mark_bar()\n", + " .encode(\n", + " x=\"year_quarter:O\",\n", + " y=\"sum(service_hours_per_route):Q\",\n", + " column = \"category\",\n", + " tooltip=[\"year_quarter\", \"service_hours_per_route\", \n", + " \"category\", \"service_hours\"]\n", + " ).interactive()\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "(alt.Chart(statewide_df)\n", + " .mark_bar()\n", + " .encode(\n", + " x=\"year_quarter:O\",\n", + " y=\"service_hours_per_route:Q\",\n", + " column = \"category\",\n", + " tooltip = [\"year_quarter\", \"service_hours_per_route\", \"category\"]\n", + " ).interactive()\n", + ")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/quarterly_performance_objective/new_report.qmd b/quarterly_performance_objective/new_report.qmd new file mode 100644 index 000000000..31700a9cc --- /dev/null +++ b/quarterly_performance_objective/new_report.qmd @@ -0,0 +1,171 @@ +--- +title: Quarterly Performance Metrics +execute: + echo: false +format: + html: + mainfont: sans-serif + monofont: sans-serif + anchor-sections: true + toc: true + toc-title: Contents + toc-depth: 3 + code-links: + - text: Analysis Products + icon: bar-chart-fill + href: https://analysis.calitp.org + - text: Reach Out! + icon: envelope + href: mailto:hello@calitp.org +jupyter: python3 +--- + + +```{python} +import altair as alt +import geopandas as gpd +import pandas as pd +import polars as pl + +from great_tables import GT, _data_color, loc, md, nanoplot_options, style + +from update_vars import BUS_SERVICE_GCS +``` + +```{python} +operator_df = gpd.read_parquet( + f"{BUS_SERVICE_GCS}" + "quarterly_metrics/operator_time_series.parquet" +) + +statewide_df = pd.read_parquet( + f"{BUS_SERVICE_GCS}" + "quarterly_metrics/statewide_time_series.parquet" +) +``` + +```{python} +def category_wrangling( + df: pd.DataFrame, + col: str = "category", + sort_key: list = ["on_shn", "parallel", "other", "shn_subtotal", "total"] +) -> pd.DataFrame: + """ + Custom sort order for categorical variable + https://stackoverflow.com/questions/23482668/sorting-by-a-custom-list-in-pandas + """ + category_values = { + "on_shn": "On SHN", + "parallel": "Intersects SHN", + "other": "Other", + "shn_subtotal": "On or Intersects SHN", + "total": "Total" + } + + df = df.sort_values( + col, key=lambda c: c.map(lambda e: sort_key.index(e)) + ) + + df = df.assign( + category = df.category.map(category_values) + ) + + return df + +def get_hex(color_name: str) -> str: + """ + Since some of the color names don't pull the hex code, + we'll grab it here. + https://github.com/posit-dev/great-tables/blob/main/great_tables/_data_color/constants.py + """ + return _data_color.constants.COLOR_NAME_TO_HEX[color_name] +``` + +```{python} +current_quarter = "2024-Q2" + +current_statewide = statewide_df.loc[ + statewide_df.year_quarter==current_quarter +] +``` + +```{python} +(GT(pl.from_pandas(current_statewide.pipe(category_wrangling))) +.fmt_number( + columns = [ + "daily_service_hours", + "service_hours_per_route", + "speed_mph"], decimals=1 +).fmt_integer( + columns = ["daily_routes", "daily_vp_routes"] + ).cols_label( + category = "Category", + year_quarter = "Quarter", + daily_service_hours = "Daily Service Hours", + service_hours_per_route = "Service Hours per Route", + speed_mph = "Average Speed", + daily_routes = "# Routes", + daily_vp_routes = "# Routes with Speed" + ).tab_header( + title = "Service Hours and Speed", + subtitle = f"{current_quarter}" + ).tab_spanner( + label="Service", + columns=["daily_service_hours", + "service_hours_per_route", "daily_routes"] +).tab_spanner( + label="Speed", + columns = ["speed_mph", "daily_vp_routes"] +).tab_options( + container_width = "100%", + table_background_color="white", + table_body_hlines_style="none", + table_body_vlines_style="none", + heading_background_color="white", + column_labels_background_color="white", + row_group_background_color="white", + stub_background_color="white", + source_notes_background_color="white", + table_font_size="14px", + heading_align="center" +).cols_hide( + ["year_quarter", "service_hours", "n_routes", + "n_dates", "n_vp_routes"] +).tab_style( + style=style.text(weight="bold"), + locations=loc.body(rows=pl.col("category") == "Total") +).tab_style( + style=style.text( + weight="bold", style="italic", color=get_hex("gray30")), + locations=loc.body(rows=pl.col("category") == "On or Intersects SHN"), +).cols_align(align="center") + .cols_align(align="left", columns="category") +) +``` + +```{python} +(alt.Chart(operator_df.drop(columns = "geometry")) + .mark_bar() + .encode( + x="year_quarter:O", + y="sum(service_hours_per_route):Q", + column = "category", + tooltip=["year_quarter", "service_hours_per_route", + "category", "service_hours"] + ).interactive() +) +``` + +```{python} +(alt.Chart(statewide_df) + .mark_bar() + .encode( + x="year_quarter:O", + y="service_hours_per_route:Q", + column = "category", + tooltip = ["year_quarter", "service_hours_per_route", "category"] + ).interactive() +) +``` + + From b01348e0877ea3970dc26631de5ec7c154c9da87 Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Fri, 21 Jun 2024 00:00:43 +0000 Subject: [PATCH 04/10] add nanoplot to report --- .../compile_time_series.py | 189 ++++++------ .../new_report.ipynb | 280 ++++++++++++------ .../new_report.qmd | 256 ++++++++++------ 3 files changed, 437 insertions(+), 288 deletions(-) diff --git a/quarterly_performance_objective/compile_time_series.py b/quarterly_performance_objective/compile_time_series.py index 96bff811e..80c9f5af5 100644 --- a/quarterly_performance_objective/compile_time_series.py +++ b/quarterly_performance_objective/compile_time_series.py @@ -6,7 +6,7 @@ import geopandas as gpd import pandas as pd -from calitp_data_analysis import utils +#from calitp_data_analysis import utils from segment_speed_utils import time_series_utils from shared_utils import rt_dates from update_vars import BUS_SERVICE_GCS @@ -16,12 +16,10 @@ "organization_name", "caltrans_district" ] +district_cols = ["caltrans_district"] + category_cols = ["category", "year_quarter"] -subtotal_categories_dict = { - "shn_subtotal": ["on_shn", "parallel"], - "total": ["on_shn", "parallel", "other"] -} def assemble_time_series( date_list: list @@ -158,125 +156,75 @@ def get_dissolved_geometry( ].dissolve(by=group_cols).reset_index() return route_geom - - -def get_subtotals( - df: pd.DataFrame, - group_cols: list -) -> dict: - """ - Add a row that captures the SHN subtotals - and the total across all categories. - """ - results = {} - - for grp, category_list in subtotal_categories_dict.items(): - subset_df = df.loc[df.category.isin(category_list)].assign( - category = grp - ) - - results[grp] = aggregated_metrics( - subset_df, - group_cols - ) - - return results - - -def assemble_operator_df( - df: gpd.GeoDataFrame -) -> gpd.GeoDataFrame: - """ - Operator df with service hours and speed metrics. - Also add subtotals for on_shn + parallel - and operator totals. - """ - group_cols = operator_cols + category_cols - - by_category = aggregated_metrics( - df, group_cols - ) - - subtotal_dfs = get_subtotals( - df, group_cols - ) - - operator_geom = get_dissolved_geometry( - df, group_cols - ) - - by_category_gdf = pd.merge( - operator_geom, - by_category, - on = group_cols, - how = "inner" - ) - - final_df = pd.concat([ - by_category_gdf, - subtotal_dfs["shn_subtotal"], - subtotal_dfs["total"]], - axis=0, ignore_index=True - ) - - return final_df -def assemble_statewide_df( - df: gpd.GeoDataFrame +def assemble_aggregated_df_with_subtotals( + df: gpd.GeoDataFrame, + group_cols: list ) -> gpd.GeoDataFrame: """ Statewide (aggregate across operators) df with service hours and speed metrics. Also add subtotals for on_shn + parallel and statewide totals. - """ - group_cols = category_cols - + """ by_category = aggregated_metrics( df, group_cols ) - subtotal_dfs = get_subtotals( - df, group_cols + shn_categories = ["on_shn", "parallel"] + + shn_subtotal_df = aggregated_metrics( + df[df.category.isin(shn_categories)].assign( + category = "shn_subtotal"), + group_cols ) + total_df = aggregated_metrics( + df.assign( + category = "total" + ), + group_cols + ) + final_df = pd.concat([ by_category, - subtotal_dfs["shn_subtotal"], - subtotal_dfs["total"]], + shn_subtotal_df, + total_df], axis=0, ignore_index=True ) return final_df -def category_wrangling( - df: pd.DataFrame, - col: str = "category", - sort_key: list = ["on_shn", "parallel", "other", "shn_subtotal", "total"] +def add_time_series_list_columns( + df: pd.DataFrame, + group_cols: list, + time_series_cols: list, ) -> pd.DataFrame: """ - Custom sort order for categorical variable - https://stackoverflow.com/questions/23482668/sorting-by-a-custom-list-in-pandas - """ - category_values = { - "on_shn": "On SHN", - "parallel": "Intersects SHN", - "other": "Other", - "shn_subtotal": "On or Intersects SHN", - "total": "Total" - } - - df = df.sort_values( - col, key=lambda c: c.map(lambda e: sort_key.index(e)) - ) + """ + group_cols2 = [c for c in group_cols if c != "year_quarter"] + + list_aggregation = (df.sort_values("year_quarter") + .groupby(group_cols2) + .agg({ + **{c: lambda x: list(x) + for c in time_series_cols} + }).reset_index() + .rename(columns = { + **{c: f"{c}_ts" for c in time_series_cols} + }) + ) - df = df.assign( - category = df.category.map(category_values) + df2 = pd.merge( + df, + list_aggregation, + on = group_cols2, + how = "inner" ) - return df + return df2 if __name__ == "__main__": @@ -286,17 +234,46 @@ def category_wrangling( df = assemble_time_series(all_dates) - operator_df = assemble_operator_df(df) - - utils.geoparquet_gcs_export( - operator_df, - BUS_SERVICE_GCS, - "quarterly_metrics/operator_time_series" + time_series_cols = ["service_hours_per_route", "speed_mph"] + + operator_df = assemble_aggregated_df_with_subtotals( + df, operator_cols + category_cols) + + operator_df2 = add_time_series_list_columns( + operator_df, + operator_cols + category_cols, + time_series_cols ) - statewide_df = assemble_statewide_df(df) - - statewide_df.to_parquet( + operator_df2.to_parquet( + f"{BUS_SERVICE_GCS}" + "quarterly_metrics/operator_time_series.parquet" + ) + + district_df = assemble_aggregated_df_with_subtotals( + df, district_cols + category_cols) + + district_df2 = add_time_series_list_columns( + district_df, + district_cols + category_cols, + time_series_cols + ) + + district_df2.to_parquet( + f"{BUS_SERVICE_GCS}" + "quarterly_metrics/district_time_series.parquet" + ) + + statewide_df = assemble_aggregated_df_with_subtotals( + df, category_cols) + + statewide_df2 = add_time_series_list_columns( + statewide_df, + category_cols, + time_series_cols + ) + + statewide_df2.to_parquet( f"{BUS_SERVICE_GCS}" "quarterly_metrics/statewide_time_series.parquet" ) diff --git a/quarterly_performance_objective/new_report.ipynb b/quarterly_performance_objective/new_report.ipynb index 3d4aa7c73..5769638b4 100644 --- a/quarterly_performance_objective/new_report.ipynb +++ b/quarterly_performance_objective/new_report.ipynb @@ -42,23 +42,6 @@ "from update_vars import BUS_SERVICE_GCS" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "operator_df = gpd.read_parquet(\n", - " f\"{BUS_SERVICE_GCS}\"\n", - " \"quarterly_metrics/operator_time_series.parquet\"\n", - ")\n", - "\n", - "statewide_df = pd.read_parquet(\n", - " f\"{BUS_SERVICE_GCS}\"\n", - " \"quarterly_metrics/statewide_time_series.parquet\"\n", - ")" - ] - }, { "cell_type": "code", "execution_count": null, @@ -109,9 +92,155 @@ "source": [ "current_quarter = \"2024-Q2\"\n", "\n", - "current_statewide = statewide_df.loc[\n", - " statewide_df.year_quarter==current_quarter\n", - "]" + "operator_df = pd.read_parquet(\n", + " f\"{BUS_SERVICE_GCS}\"\n", + " \"quarterly_metrics/operator_time_series.parquet\",\n", + " filters = [[(\"year_quarter\", \"==\", current_quarter)]]\n", + ").pipe(category_wrangling)\n", + "\n", + "district_df = pd.read_parquet(\n", + " f\"{BUS_SERVICE_GCS}\"\n", + " \"quarterly_metrics/district_time_series.parquet\",\n", + " filters = [[(\"year_quarter\", \"==\", current_quarter)]]\n", + ").pipe(category_wrangling)\n", + "\n", + "statewide_df = pd.read_parquet(\n", + " f\"{BUS_SERVICE_GCS}\"\n", + " \"quarterly_metrics/statewide_time_series.parquet\",\n", + " filters = [[(\"year_quarter\", \"==\", current_quarter)]]\n", + ").pipe(category_wrangling)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def shared_nano_options(\n", + " point_stroke_color: str,\n", + " line_stroke_color: str,\n", + " point_fill_color: str,\n", + " area_fill_color: str\n", + "):\n", + " nano_options = nanoplot_options(\n", + " data_point_radius=6,\n", + " data_point_stroke_color=get_hex(point_stroke_color),\n", + " data_point_fill_color=get_hex(point_fill_color),\n", + " data_point_stroke_width=3,\n", + " data_line_type=\"curved\",\n", + " data_line_stroke_color=get_hex(line_stroke_color),\n", + " data_line_stroke_width=8,\n", + " data_area_fill_color=get_hex(area_fill_color),\n", + " #vertical_guide_stroke_color=None,\n", + " show_y_axis_guide=True,\n", + " #show_vertical_guides=False,\n", + " interactive_data_values = True,\n", + " #reference_line_color=get_hex(\"salmon1\"),\n", + " show_reference_line=False\n", + " )\n", + " \n", + " return nano_options" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def plot_table(df: pd.DataFrame): \n", + " \n", + " MIN_SPEED, MAX_SPEED = df.speed_mph.min(), df.speed_mph.max()\n", + " MIN_SERVICE, MAX_SERVICE = df.service_hours_per_route.min(), df.service_hours_per_route.max()\n", + " \n", + " table = (\n", + " GT(pl.from_pandas(df))\n", + " .fmt_nanoplot(\n", + " columns=\"speed_mph_ts\",\n", + " plot_type=\"line\",\n", + " expand_y=[round(MIN_SPEED, 0), round(MAX_SPEED, 0)],\n", + " options=shared_nano_options(\n", + " point_stroke_color = \"black\",\n", + " line_stroke_color = \"green\",\n", + " point_fill_color = \"white\",\n", + " area_fill_color = \"seagreen2\")\n", + " ).fmt_nanoplot(\n", + " columns=\"service_hours_per_route_ts\",\n", + " plot_type=\"line\",\n", + " expand_y=[round(MIN_SERVICE, 0), round(MAX_SERVICE, 0)],\n", + " options=shared_nano_options(\n", + " point_stroke_color = \"black\", \n", + " line_stroke_color = \"steelblue1\",\n", + " point_fill_color = \"white\",\n", + " area_fill_color = \"lightskyblue2\", \n", + " )\n", + " ).fmt_number(\n", + " columns = [\n", + " \"daily_service_hours\", \n", + " \"service_hours_per_route\", \n", + " \"speed_mph\"], decimals=1\n", + " ).fmt_integer(\n", + " columns = [\"daily_routes\", \"daily_vp_routes\"]\n", + " ).cols_label(\n", + " category = \"Category\",\n", + " daily_service_hours = \"Daily Service Hours\",\n", + " service_hours_per_route = \"Service Hours per Route\",\n", + " speed_mph = \"Average Speed\",\n", + " daily_routes = \"# Routes\",\n", + " daily_vp_routes = \"# Routes\",\n", + " service_hours_per_route_ts = \"Service Hours per Route (time-series)\",\n", + " speed_mph_ts = \"Speed (time-series)\",\n", + " ).tab_header(\n", + " title = \"Service Hours and Speed\",\n", + " subtitle = f\"{current_quarter}\"\n", + " ).tab_spanner(\n", + " label=\"Service\", \n", + " columns=[\"daily_service_hours\", \n", + " \"service_hours_per_route\", \"daily_routes\", \n", + " \"service_hours_per_route_ts\"]\n", + " ).tab_spanner(\n", + " label=\"Speed (mph)\",\n", + " columns = [\"speed_mph\", \"daily_vp_routes\", \"speed_mph_ts\"]\n", + " ).tab_options(\n", + " container_width = \"100%\",\n", + " table_background_color=\"white\",\n", + " table_body_hlines_style=\"none\",\n", + " table_body_vlines_style=\"none\",\n", + " heading_background_color=\"white\",\n", + " column_labels_background_color=\"white\",\n", + " row_group_background_color=\"white\",\n", + " stub_background_color=\"white\",\n", + " source_notes_background_color=\"white\",\n", + " table_font_size=\"14px\",\n", + " heading_align=\"center\"\n", + " ).cols_hide(\n", + " [\"year_quarter\", \"service_hours\", \"n_routes\", \n", + " \"n_dates\", \"n_vp_routes\"]\n", + " ).sub_missing(\n", + " columns = [\"speed_mph\", \"speed_mph_ts\", \"daily_vp_routes\"],\n", + " missing_text = \"\"\n", + " ).tab_style(\n", + " style=style.text(weight=\"bold\"),\n", + " locations=loc.body(rows=pl.col(\"category\") == \"Total\")\n", + " ).tab_style(\n", + " style=style.text(\n", + " weight=\"normal\", style=\"italic\", color=get_hex(\"gray20\")),\n", + " locations=loc.body(\n", + " rows=pl.col(\"category\") == \"On or Intersects SHN\"),\n", + " ).cols_align(align=\"center\")\n", + " .cols_align(align=\"left\", columns=\"category\")\n", + " \n", + " )\n", + " \n", + " return table" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Statewide Metrics" ] }, { @@ -120,57 +249,14 @@ "metadata": {}, "outputs": [], "source": [ - "(GT(pl.from_pandas(current_statewide.pipe(category_wrangling)))\n", - ".fmt_number(\n", - " columns = [\n", - " \"daily_service_hours\", \n", - " \"service_hours_per_route\", \n", - " \"speed_mph\"], decimals=1\n", - ").fmt_integer(\n", - " columns = [\"daily_routes\", \"daily_vp_routes\"]\n", - " ).cols_label(\n", - " category = \"Category\",\n", - " year_quarter = \"Quarter\",\n", - " daily_service_hours = \"Daily Service Hours\",\n", - " service_hours_per_route = \"Service Hours per Route\",\n", - " speed_mph = \"Average Speed\",\n", - " daily_routes = \"# Routes\",\n", - " daily_vp_routes = \"# Routes with Speed\"\n", - " ).tab_header(\n", - " title = \"Service Hours and Speed\",\n", - " subtitle = f\"{current_quarter}\"\n", - " ).tab_spanner(\n", - " label=\"Service\", \n", - " columns=[\"daily_service_hours\", \n", - " \"service_hours_per_route\", \"daily_routes\"]\n", - ").tab_spanner(\n", - " label=\"Speed\",\n", - " columns = [\"speed_mph\", \"daily_vp_routes\"]\n", - ").tab_options(\n", - " container_width = \"100%\",\n", - " table_background_color=\"white\",\n", - " table_body_hlines_style=\"none\",\n", - " table_body_vlines_style=\"none\",\n", - " heading_background_color=\"white\",\n", - " column_labels_background_color=\"white\",\n", - " row_group_background_color=\"white\",\n", - " stub_background_color=\"white\",\n", - " source_notes_background_color=\"white\",\n", - " table_font_size=\"14px\",\n", - " heading_align=\"center\"\n", - ").cols_hide(\n", - " [\"year_quarter\", \"service_hours\", \"n_routes\", \n", - " \"n_dates\", \"n_vp_routes\"]\n", - ").tab_style(\n", - " style=style.text(weight=\"bold\"),\n", - " locations=loc.body(rows=pl.col(\"category\") == \"Total\")\n", - ").tab_style(\n", - " style=style.text(\n", - " weight=\"bold\", style=\"italic\", color=get_hex(\"gray30\")),\n", - " locations=loc.body(rows=pl.col(\"category\") == \"On or Intersects SHN\"),\n", - ").cols_align(align=\"center\")\n", - " .cols_align(align=\"left\", columns=\"category\")\n", - ")" + "plot_table(statewide_df)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## District Breakdown" ] }, { @@ -179,16 +265,31 @@ "metadata": {}, "outputs": [], "source": [ - "(alt.Chart(operator_df.drop(columns = \"geometry\"))\n", - " .mark_bar()\n", - " .encode(\n", - " x=\"year_quarter:O\",\n", - " y=\"sum(service_hours_per_route):Q\",\n", - " column = \"category\",\n", - " tooltip=[\"year_quarter\", \"service_hours_per_route\", \n", - " \"category\", \"service_hours\"]\n", - " ).interactive()\n", - ")" + "def district_table_specs(table, one_district, one_quarter):\n", + " table2 = (table\n", + " .cols_hide(\"caltrans_district\")\n", + " .tab_header(\n", + " title = f\"District {one_district}\",\n", + " subtitle = f\"Service Hours and Speed {one_quarter}\")\n", + " )\n", + " \n", + " return table2\n", + " \n", + "\n", + "for i in sorted(district_df.caltrans_district.unique()):\n", + " table = plot_table(\n", + " district_df[district_df.caltrans_district==i])\n", + " \n", + " table = district_table_specs(table, i, current_quarter)\n", + "\n", + " display(table)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Operator Breakdown" ] }, { @@ -197,16 +298,15 @@ "metadata": {}, "outputs": [], "source": [ - "(alt.Chart(statewide_df)\n", - " .mark_bar()\n", - " .encode(\n", - " x=\"year_quarter:O\",\n", - " y=\"service_hours_per_route:Q\",\n", - " column = \"category\",\n", - " tooltip = [\"year_quarter\", \"service_hours_per_route\", \"category\"]\n", - " ).interactive()\n", - ")" + "plot_table(operator_df)" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/quarterly_performance_objective/new_report.qmd b/quarterly_performance_objective/new_report.qmd index 31700a9cc..de120c4e0 100644 --- a/quarterly_performance_objective/new_report.qmd +++ b/quarterly_performance_objective/new_report.qmd @@ -11,16 +11,15 @@ format: toc-title: Contents toc-depth: 3 code-links: - - text: Analysis Products - icon: bar-chart-fill - href: https://analysis.calitp.org - - text: Reach Out! - icon: envelope - href: mailto:hello@calitp.org + - text: Analysis Products + icon: bar-chart-fill + href: 'https://analysis.calitp.org' + - text: Reach Out! + icon: envelope + href: 'mailto:hello@calitp.org' jupyter: python3 --- - ```{python} import altair as alt import geopandas as gpd @@ -32,18 +31,6 @@ from great_tables import GT, _data_color, loc, md, nanoplot_options, style from update_vars import BUS_SERVICE_GCS ``` -```{python} -operator_df = gpd.read_parquet( - f"{BUS_SERVICE_GCS}" - "quarterly_metrics/operator_time_series.parquet" -) - -statewide_df = pd.read_parquet( - f"{BUS_SERVICE_GCS}" - "quarterly_metrics/statewide_time_series.parquet" -) -``` - ```{python} def category_wrangling( df: pd.DataFrame, @@ -84,88 +71,173 @@ def get_hex(color_name: str) -> str: ```{python} current_quarter = "2024-Q2" -current_statewide = statewide_df.loc[ - statewide_df.year_quarter==current_quarter -] +operator_df = pd.read_parquet( + f"{BUS_SERVICE_GCS}" + "quarterly_metrics/operator_time_series.parquet", + filters = [[("year_quarter", "==", current_quarter)]] +).pipe(category_wrangling) + +district_df = pd.read_parquet( + f"{BUS_SERVICE_GCS}" + "quarterly_metrics/district_time_series.parquet", + filters = [[("year_quarter", "==", current_quarter)]] +).pipe(category_wrangling) + +statewide_df = pd.read_parquet( + f"{BUS_SERVICE_GCS}" + "quarterly_metrics/statewide_time_series.parquet", + filters = [[("year_quarter", "==", current_quarter)]] +).pipe(category_wrangling) +``` + +```{python} +def shared_nano_options( + point_stroke_color: str, + line_stroke_color: str, + point_fill_color: str, + area_fill_color: str +): + nano_options = nanoplot_options( + data_point_radius=6, + data_point_stroke_color=get_hex(point_stroke_color), + data_point_fill_color=get_hex(point_fill_color), + data_point_stroke_width=3, + data_line_type="curved", + data_line_stroke_color=get_hex(line_stroke_color), + data_line_stroke_width=8, + data_area_fill_color=get_hex(area_fill_color), + #vertical_guide_stroke_color=None, + show_y_axis_guide=True, + #show_vertical_guides=False, + interactive_data_values = True, + #reference_line_color=get_hex("salmon1"), + show_reference_line=False + ) + + return nano_options +``` + +```{python} +def plot_table(df: pd.DataFrame): + + MIN_SPEED, MAX_SPEED = df.speed_mph.min(), df.speed_mph.max() + MIN_SERVICE, MAX_SERVICE = df.service_hours_per_route.min(), df.service_hours_per_route.max() + + table = ( + GT(pl.from_pandas(df)) + .fmt_nanoplot( + columns="speed_mph_ts", + plot_type="line", + expand_y=[round(MIN_SPEED, 0), round(MAX_SPEED, 0)], + options=shared_nano_options( + point_stroke_color = "black", + line_stroke_color = "green", + point_fill_color = "white", + area_fill_color = "seagreen2") + ).fmt_nanoplot( + columns="service_hours_per_route_ts", + plot_type="line", + expand_y=[round(MIN_SERVICE, 0), round(MAX_SERVICE, 0)], + options=shared_nano_options( + point_stroke_color = "black", + line_stroke_color = "steelblue1", + point_fill_color = "white", + area_fill_color = "lightskyblue2", + ) + ).fmt_number( + columns = [ + "daily_service_hours", + "service_hours_per_route", + "speed_mph"], decimals=1 + ).fmt_integer( + columns = ["daily_routes", "daily_vp_routes"] + ).cols_label( + category = "Category", + daily_service_hours = "Daily Service Hours", + service_hours_per_route = "Service Hours per Route", + speed_mph = "Average Speed", + daily_routes = "# Routes", + daily_vp_routes = "# Routes", + service_hours_per_route_ts = "Service Hours per Route (time-series)", + speed_mph_ts = "Speed (time-series)", + ).tab_header( + title = "Service Hours and Speed", + subtitle = f"{current_quarter}" + ).tab_spanner( + label="Service", + columns=["daily_service_hours", + "service_hours_per_route", "daily_routes", + "service_hours_per_route_ts"] + ).tab_spanner( + label="Speed (mph)", + columns = ["speed_mph", "daily_vp_routes", "speed_mph_ts"] + ).tab_options( + container_width = "100%", + table_background_color="white", + table_body_hlines_style="none", + table_body_vlines_style="none", + heading_background_color="white", + column_labels_background_color="white", + row_group_background_color="white", + stub_background_color="white", + source_notes_background_color="white", + table_font_size="14px", + heading_align="center" + ).cols_hide( + ["year_quarter", "service_hours", "n_routes", + "n_dates", "n_vp_routes"] + ).sub_missing( + columns = ["speed_mph", "speed_mph_ts", "daily_vp_routes"], + missing_text = "" + ).tab_style( + style=style.text(weight="bold"), + locations=loc.body(rows=pl.col("category") == "Total") + ).tab_style( + style=style.text( + weight="normal", style="italic", color=get_hex("gray20")), + locations=loc.body( + rows=pl.col("category") == "On or Intersects SHN"), + ).cols_align(align="center") + .cols_align(align="left", columns="category") + + ) + + return table ``` +## Statewide Metrics + ```{python} -(GT(pl.from_pandas(current_statewide.pipe(category_wrangling))) -.fmt_number( - columns = [ - "daily_service_hours", - "service_hours_per_route", - "speed_mph"], decimals=1 -).fmt_integer( - columns = ["daily_routes", "daily_vp_routes"] - ).cols_label( - category = "Category", - year_quarter = "Quarter", - daily_service_hours = "Daily Service Hours", - service_hours_per_route = "Service Hours per Route", - speed_mph = "Average Speed", - daily_routes = "# Routes", - daily_vp_routes = "# Routes with Speed" - ).tab_header( - title = "Service Hours and Speed", - subtitle = f"{current_quarter}" - ).tab_spanner( - label="Service", - columns=["daily_service_hours", - "service_hours_per_route", "daily_routes"] -).tab_spanner( - label="Speed", - columns = ["speed_mph", "daily_vp_routes"] -).tab_options( - container_width = "100%", - table_background_color="white", - table_body_hlines_style="none", - table_body_vlines_style="none", - heading_background_color="white", - column_labels_background_color="white", - row_group_background_color="white", - stub_background_color="white", - source_notes_background_color="white", - table_font_size="14px", - heading_align="center" -).cols_hide( - ["year_quarter", "service_hours", "n_routes", - "n_dates", "n_vp_routes"] -).tab_style( - style=style.text(weight="bold"), - locations=loc.body(rows=pl.col("category") == "Total") -).tab_style( - style=style.text( - weight="bold", style="italic", color=get_hex("gray30")), - locations=loc.body(rows=pl.col("category") == "On or Intersects SHN"), -).cols_align(align="center") - .cols_align(align="left", columns="category") -) +plot_table(statewide_df) ``` +## District Breakdown + ```{python} -(alt.Chart(operator_df.drop(columns = "geometry")) - .mark_bar() - .encode( - x="year_quarter:O", - y="sum(service_hours_per_route):Q", - column = "category", - tooltip=["year_quarter", "service_hours_per_route", - "category", "service_hours"] - ).interactive() -) +def district_table_specs(table, one_district, one_quarter): + table2 = (table + .cols_hide("caltrans_district") + .tab_header( + title = f"District {one_district}", + subtitle = f"Service Hours and Speed {one_quarter}") + ) + + return table2 + + +for i in sorted(district_df.caltrans_district.unique()): + table = plot_table( + district_df[district_df.caltrans_district==i]) + + table = district_table_specs(table, i, current_quarter) + + display(table) ``` +## Operator Breakdown + ```{python} -(alt.Chart(statewide_df) - .mark_bar() - .encode( - x="year_quarter:O", - y="service_hours_per_route:Q", - column = "category", - tooltip = ["year_quarter", "service_hours_per_route", "category"] - ).interactive() -) +plot_table(operator_df) ``` From a5664cd2d6b33f4c3a9619fe9b8a36d92f314196 Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Fri, 21 Jun 2024 00:04:11 +0000 Subject: [PATCH 05/10] update readme --- quarterly_performance_objective/README.md | 20 ++++----- quarterly_performance_objective/clean_data.py | 41 +++++++++++-------- 2 files changed, 32 insertions(+), 29 deletions(-) diff --git a/quarterly_performance_objective/README.md b/quarterly_performance_objective/README.md index 74577a7c2..f577ebbe6 100644 --- a/quarterly_performance_objective/README.md +++ b/quarterly_performance_objective/README.md @@ -1,11 +1,11 @@ # Mass Transit Performance Objectives -## Performance Objective 01: Increase total amount of service on the SHN and reliability of that service by 2024 +## Performance Objective 01: Increase total amount of service on the SHN and reliability (speed) of that service Transit routes along the SHN can be categorized into 3 groups: 1. **On SHN** - where at least 20% of the transit route runs the SHN (within 50 ft) -1. **Intersects SHN** - where at least 35% of the transit route runs within 0.5 mile of the SHN. +1. **Intersects SHN** - where at least 20% of the transit route runs within 0.5 mile of the SHN. 1. **Other** - all other transit routes. @@ -14,18 +14,12 @@ Initially presented for the Planning and Modal Advisory Committee (PMAC). ## Workflow ### Data Generation -1. [Aggregate from shape level to route level](https://github.com/cal-itp/data-analyses/blob/main/quarterly_performance_objective/A1_scheduled_route_level_df) -1. [Generate processed data for categories and service hours](https://github.com/cal-itp/data-analyses/blob/main/quarterly_performance_objective/A2_generate_routes_on_shn_data.py) with GTFS schedule data -1. [Categorize routes into 3 groups](https://github.com/cal-itp/data-analyses/blob/main/quarterly_performance_objective/A3_categorize_routes.py) -1. [Merge service hours and speeds and estimate delay](https://github.com/cal-itp/data-analyses/blob/main/quarterly_performance_objective/A4_add_route_speeds.py) +1. [Clean and process data each month](https://github.com/cal-itp/data-analyses/blob/main/quarterly_performance_objective/clean_data.py) +1. [Compile monthly data into time-series](https://github.com/cal-itp/data-analyses/blob/main/quarterly_performance_objective/compile_time_series.py) -### Helper Scripts for Reports -1. [data prep functions](https://github.com/cal-itp/data-analyses/blob/main/quarterly_performance_objective/report_metrics.py) -1. [chart functions](https://github.com/cal-itp/data-analyses/blob/main/quarterly_performance_objective/report_charts.py) +### Helper Functions +[Categorize routes](https://github.com/cal-itp/data-analyses/blob/main/rt_segment_speeds/segment_speed_utils/parallel_corridors.py) into on SHN, parallel / intersects SHN, or other. ### Reports -Create a report of current quarter's snapshot as well as a historical comparison of quarterly metrics report. - -1. [current quarter's snapshot](https://github.com/cal-itp/data-analyses/blob/main/quarterly_performance_objective/current_quarter_report.ipynb) -1. [historical comparison](https://github.com/cal-itp/data-analyses/blob/main/quarterly_performance_objective/historical_service_hours_v2.ipynb) \ No newline at end of file +Create a report of current quarter's snapshot with a historical comparison. \ No newline at end of file diff --git a/quarterly_performance_objective/clean_data.py b/quarterly_performance_objective/clean_data.py index 21493d6a1..d74b88d3d 100644 --- a/quarterly_performance_objective/clean_data.py +++ b/quarterly_performance_objective/clean_data.py @@ -10,11 +10,9 @@ from segment_speed_utils import (helpers, gtfs_schedule_wrangling, parallel_corridors) from calitp_data_analysis import geography_utils, utils -from update_vars import ( - GTFS_DATA_DICT, SEGMENT_GCS, - BUS_SERVICE_GCS, ANALYSIS_DATE -) - +from shared_utils import rt_dates +from update_vars import (GTFS_DATA_DICT, SEGMENT_GCS, + BUS_SERVICE_GCS) def aggregate_to_route_service_hours(analysis_date: str) -> pd.DataFrame: """ @@ -69,6 +67,9 @@ def get_route_summary_speed(analysis_date: str) -> pd.DataFrame: def process_df(analysis_date: str) -> gpd.GeoDataFrame: """ + Tag routes by on_shn / parallel / other, + and attach average route_speeds, caltrans_district. + Get route-level df for an analysis date to use in report. """ # Get gdf of unique routes tagging them by on_shn/parallel/other df = parallel_corridors.routes_by_on_shn_parallel_categories(analysis_date) @@ -76,8 +77,8 @@ def process_df(analysis_date: str) -> gpd.GeoDataFrame: # Get df of route service hours route_service_hours = aggregate_to_route_service_hours(analysis_date) - # Get crosswalk linking schedule_gtfs_dataset_key to the organization_name (which we use for portfolio) - # and caltrans_district + # Get crosswalk linking schedule_gtfs_dataset_key to the organization_name + # (which we use for portfolio) and caltrans_district crosswalk = helpers.import_schedule_gtfs_key_organization_crosswalk( analysis_date, columns = ["schedule_gtfs_dataset_key", "organization_name", @@ -87,6 +88,7 @@ def process_df(analysis_date: str) -> gpd.GeoDataFrame: # Get route summary speeds route_speeds = get_route_summary_speed(analysis_date) + # Merge route categories with route service hours df2 = pd.merge( df, route_service_hours, @@ -94,6 +96,7 @@ def process_df(analysis_date: str) -> gpd.GeoDataFrame: how = "inner" ) + # Merge in crosswalk for caltrans_district df3 = pd.merge( df2, crosswalk, @@ -108,6 +111,7 @@ def process_df(analysis_date: str) -> gpd.GeoDataFrame: geography_utils.FEET_PER_MI) ).drop(columns = "route_length_feet") + # Merge in route average speeds (left merge because not every operator has RT) df4 = pd.merge( df3, route_speeds, @@ -124,15 +128,20 @@ def process_df(analysis_date: str) -> gpd.GeoDataFrame: if __name__ == "__main__": - start = datetime.datetime.now() + analysis_date_list = [rt_dates.DATES["jun2024"]] + + for d in analysis_date_list: + start = datetime.datetime.now() - gdf = process_df(ANALYSIS_DATE) + gdf = process_df(d) - utils.geoparquet_gcs_export( - gdf, - BUS_SERVICE_GCS, - f"routes_categorized_with_speed_{ANALYSIS_DATE}" - ) + utils.geoparquet_gcs_export( + gdf, + BUS_SERVICE_GCS, + f"routes_categorized_with_speed_{d}" + ) + + del gdf - end = datetime.datetime.now() - print(f"quarterly route df for {ANALYSIS_DATE}: {end - start}") \ No newline at end of file + end = datetime.datetime.now() + print(f"quarterly route df for {d}: {end - start}") \ No newline at end of file From a27103b29e44431a872389f56d71a646ee237b3d Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Fri, 21 Jun 2024 00:05:20 +0000 Subject: [PATCH 06/10] remove old report and helper functions --- .../current_quarter_report.ipynb | 511 -------------- .../historical_report.ipynb | 504 -------------- .../historical_service_hours_v2.ipynb | 659 ------------------ .../report_charts.py | 97 --- .../report_metrics.py | 249 ------- 5 files changed, 2020 deletions(-) delete mode 100644 quarterly_performance_objective/current_quarter_report.ipynb delete mode 100644 quarterly_performance_objective/historical_report.ipynb delete mode 100644 quarterly_performance_objective/historical_service_hours_v2.ipynb delete mode 100644 quarterly_performance_objective/report_charts.py delete mode 100644 quarterly_performance_objective/report_metrics.py diff --git a/quarterly_performance_objective/current_quarter_report.ipynb b/quarterly_performance_objective/current_quarter_report.ipynb deleted file mode 100644 index 57eefcee6..000000000 --- a/quarterly_performance_objective/current_quarter_report.ipynb +++ /dev/null @@ -1,511 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "aba3b0aa-73bf-4a8d-b4ee-64ecc98196fc", - "metadata": {}, - "source": [ - "# Current Quarter\n", - "\n", - "01 - Increase total amount of service on the SHN and reliability of that service by 2024\n", - "\n", - "## Routes on the State Highway Network (SHN)\n", - "\n", - "Transit routes along the SHN can be categorized into 3 groups:\n", - "1. **On SHN** - where at least 20% of the transit route runs the SHN (within 50 ft) \n", - "2. **Intersects SHN** - where at least 35% of the transit route runs within 0.5 mile of the SHN.\n", - "3. **Other** - all other transit routes.\n", - "\n", - "### Metrics\n", - "* service hours, service hours per route\n", - "* delay hours, delay hours per route\n", - "* map of route by category (and by mode)\n", - "\n", - "The metrics are shown for for transit routes **on the SHN** and **intersects SHN**." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "792ee3df-ebcb-4bf7-a7f2-f7f903efe126", - "metadata": {}, - "outputs": [], - "source": [ - "%%capture\n", - "import warnings\n", - "warnings.filterwarnings('ignore')\n", - "\n", - "import altair as alt\n", - "import calitp_data_analysis.magics\n", - "import geopandas as gpd\n", - "import intake\n", - "import pandas as pd\n", - "\n", - "from IPython.display import HTML\n", - "\n", - "import report_metrics\n", - "import report_charts\n", - "from update_vars import BUS_SERVICE_GCS, CURRENT_QUARTER, ANALYSIS_DATE\n", - "from shared_utils import portfolio_utils, rt_dates\n", - "from calitp_data_analysis import calitp_color_palette as cp\n", - "from calitp_data_analysis import styleguide\n", - "\n", - "hq_catalog = intake.open_catalog(\"../high_quality_transit_areas/*.yml\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a15fcf6a-b0d4-47f0-be78-118ddf43ab91", - "metadata": { - "tags": [ - "parameters" - ] - }, - "outputs": [], - "source": [ - "# parameters cell" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e00a8689-f56e-47d4-bd4a-6795122a1e86", - "metadata": {}, - "outputs": [], - "source": [ - "%%capture_parameters\n", - "QUARTER_CLEANED = CURRENT_QUARTER.replace('_', ' ')\n", - "CURRENT_QUARTER, ANALYSIS_DATE, QUARTER_CLEANED" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "add13293-3d9d-40e0-b1fb-c50b89fe84de", - "metadata": {}, - "outputs": [], - "source": [ - "df = report_metrics.prep_data_for_report(ANALYSIS_DATE)" - ] - }, - { - "cell_type": "markdown", - "id": "c22bad3d-2f2c-4e1c-8aae-edc59377410a", - "metadata": {}, - "source": [ - "## Statewide Stats for {QUARTER_CLEANED} ({ANALYSIS_DATE})" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "79217ca7-e3d6-46eb-b5f9-4bf79c230f24", - "metadata": {}, - "outputs": [], - "source": [ - "summary = report_metrics.get_service_hours_summary_table(df) " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "92e49d50-009f-4b2e-9661-688b81c38694", - "metadata": {}, - "outputs": [], - "source": [ - "all_hours = portfolio_utils.aggregate_by_geography(\n", - " summary.assign(category=\"All\"),\n", - " group_cols = [\"category\"],\n", - " sum_cols = [\"unique_route\", \"service_hours\"]\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5ef150ae-3dda-42ec-af43-13b36df5e231", - "metadata": {}, - "outputs": [], - "source": [ - "STATEWIDE_HOURS = all_hours.service_hours.iloc[0]\n", - "FORMATTED_HOURS = f'{STATEWIDE_HOURS:,}' \n", - "AVG_SERVICE = round(STATEWIDE_HOURS / all_hours.unique_route.iloc[0], 1)\n", - "\n", - "display(\n", - " HTML(\n", - " f\"

{FORMATTED_HOURS} total service hours statewide

\"\n", - " f\"

{AVG_SERVICE} service hours per route statewide

\"\n", - "\n", - " )\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "78da2714-2999-4763-ab71-b041a68c5d68", - "metadata": {}, - "outputs": [], - "source": [ - "# Chart utils\n", - "HEIGHT = 200\n", - "WIDTH = 150\n", - "\n", - "color_dict = {\n", - " \"On SHN\": cp.CALITP_CATEGORY_BRIGHT_COLORS[3],\n", - " \"Intersects SHN\": cp.CALITP_CATEGORY_BRIGHT_COLORS[4],\n", - " \"Other\": cp.CALITP_CATEGORY_BRIGHT_COLORS[1]\n", - "}\n", - "\n", - "def make_bar_by_category(df: pd.DataFrame, x_col: str,\n", - " y_col: str) -> alt.Chart:\n", - " \n", - " base_bar = report_charts.base_bar(df, x_col = x_col)\n", - " legend_sort_order = [\"On SHN\", \"Intersects SHN\", \"Other\"]\n", - " \n", - " y_title = y_col.replace('_', ' ').title()\n", - " \n", - " chart = base_bar.encode(\n", - " x = alt.X(x_col, sort = legend_sort_order),\n", - " y = alt.Y(f\"{y_col}:Q\", title = f\"{y_title}\"),\n", - " color = alt.Color(f\"{x_col}:N\", title = f\"{x_col.title()}\",\n", - " scale = alt.Scale(domain = legend_sort_order,\n", - " range = list(color_dict.values())))\n", - " )\n", - " \n", - " chart = (chart\n", - " .properties(width = WIDTH, height = HEIGHT, \n", - " title= f\"{y_title} by Category\")\n", - " )\n", - " \n", - " return chart\n", - "\n", - "s1 = make_bar_by_category(summary, x_col = \"category\", \n", - " y_col = \"service_hours_per_route\")\n", - "s2 = make_bar_by_category(summary, x_col = \"category\", \n", - " y_col = \"service_hours\")\n", - "\n", - "combined = alt.hconcat(s1, s2)\n", - "combined = styleguide.apply_chart_config(combined)\n", - "\n", - "combined" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "81c94bf7-f55b-43e2-9204-ad70b45f61a2", - "metadata": {}, - "outputs": [], - "source": [ - "service_cols_dict = {\n", - " \"category\": \"Category\",\n", - " \"service_hours\": \"Service Hours\",\n", - " \"pct_service_hours\": \"% Service Hours\",\n", - " \"unique_route\": \"# Routes\",\n", - " \"pct_unique_route\": \"% Routes\",\n", - " \"service_hours_per_route\": \"Service Hours per Route\",\n", - "}\n", - "\n", - "summary_styled = portfolio_utils.style_table(\n", - " summary, \n", - " rename_cols = service_cols_dict, \n", - " integer_cols = [\"Service Hours\", \"# Routes\"],\n", - " one_decimal_cols = [\"Service Hours per Route\"],\n", - " left_align_cols = \"first\",\n", - " center_align_cols = \"all\",\n", - " custom_format_cols = {'{:.1%}': [\"% Service Hours\", \"% Routes\"]},\n", - " display_table = True\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "4466056e-6c6e-437f-acec-f9e9323d5153", - "metadata": {}, - "source": [ - "## Reliability (Delay)\n", - "\n", - "Note: Not every route has GTFS Real-Time information, which supplies delay data." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0867cb50-8fdc-43a7-a286-e3c782792cb3", - "metadata": {}, - "outputs": [], - "source": [ - "delay_summary = report_metrics.get_delay_summary_table(df)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3e12806f-bf4a-453d-a753-ffe42f2a7b90", - "metadata": {}, - "outputs": [], - "source": [ - "delay_summary" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fdf3b32c-6842-4c16-b076-805c7f1c9f68", - "metadata": {}, - "outputs": [], - "source": [ - "STATEWIDE_DELAY = delay_summary.delay_hours.sum()\n", - "FORMATTED_HOURS = f'{STATEWIDE_DELAY:,g}' \n", - "AVG_DELAY = round(STATEWIDE_DELAY / delay_summary.unique_route.sum(), 2)\n", - "\n", - "\n", - "display(\n", - " HTML(\n", - " f\"

{FORMATTED_HOURS} total delay hours statewide

\"\n", - " f\"

{AVG_DELAY} delay hours per route statewide

\"\n", - " )\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a677fc4c-3156-4ecc-abbb-334e3849cec4", - "metadata": {}, - "outputs": [], - "source": [ - "d1 = make_bar_by_category(delay_summary, x_col = \"category\", \n", - " y_col = \"delay_hours_per_route\")\n", - "d2 = make_bar_by_category(delay_summary, x_col = \"category\", \n", - " y_col = \"delay_hours\")\n", - "\n", - "combined = alt.hconcat(d1, d2)\n", - "combined = styleguide.apply_chart_config(combined)\n", - "\n", - "combined" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6c3fdecd-9b56-4121-9d97-f1b22fe67fab", - "metadata": {}, - "outputs": [], - "source": [ - "delay_cols_dict = {\n", - " \"category\": \"Category\",\n", - " \"delay_hours\": \"Total Delay Hours\",\n", - " \"pct_delay_hours\": \"% Delay Hours\",\n", - " \"unique_route\": \"# Routes\",\n", - " \"pct_unique_route\": \"% Routes\",\n", - " \"delay_hours_per_route\": \"Delay Hours per Route\",\n", - "}\n", - "\n", - "delay_summary_styled = portfolio_utils.style_table(\n", - " delay_summary, \n", - " rename_cols = delay_cols_dict, \n", - " integer_cols = [\"Total Delay Hours\", \"# Routes\"],\n", - " two_decimal_cols = [\"Delay Hours per Route\"],\n", - " left_align_cols = \"first\",\n", - " center_align_cols = \"all\",\n", - " custom_format_cols = {'{:.1%}': [\"% Delay Hours\", \"% Routes\"]},\n", - " display_table = True\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "94f69cf9-4e05-4c49-b19b-81661d394169", - "metadata": {}, - "source": [ - "## By District" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d4a74098-6dce-42ec-8393-545e97bd7e04", - "metadata": {}, - "outputs": [], - "source": [ - "# Have some rows where district is missing, \n", - "# but only for intersects_shn and other categories\n", - "# focus on just the on_shn category and do district breakdown\n", - "df[(df.district.isna())].category.value_counts()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5f948e21-1555-4892-a4c2-1da54a84c268", - "metadata": {}, - "outputs": [], - "source": [ - "# Chart utils\n", - "WIDTH = 300\n", - "HEIGHT = 200\n", - "\n", - "by_district_service = report_metrics.by_district_on_shn_breakdown(\n", - " df, [\"service_hours\", \"unique_route\"])\n", - "\n", - "bar_total = (report_charts.make_district_bar(\n", - " by_district_service, \"service_hours\")\n", - " .properties(width=WIDTH, height=HEIGHT) \n", - " )\n", - "bar_avg = (report_charts.make_district_bar(\n", - " by_district_service, \"avg_service_hours\")\n", - " .properties(width=WIDTH, height=HEIGHT)\n", - " )\n", - "\n", - "service_hours_chart = report_charts.configure_hconcat_charts(\n", - " [bar_avg, bar_total], \n", - " x_scale=\"independent\", \n", - " y_scale=\"independent\", \n", - " chart_title=\"Service Hours by District\")\n", - "\n", - "service_hours_chart" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "24d959ea-e8fe-46fc-a2d2-bb85ada4f627", - "metadata": {}, - "outputs": [], - "source": [ - "by_district_delay = report_metrics.by_district_on_shn_breakdown(\n", - " df, [\"delay_hours\", \"unique_route\"]\n", - ")\n", - "\n", - "bar_total = (report_charts.make_district_bar(\n", - " by_district_delay, \"delay_hours\")\n", - " .properties(width=WIDTH, height=HEIGHT) \n", - " )\n", - "bar_avg = (report_charts.make_district_bar(\n", - " by_district_delay, \"avg_delay_hours\")\n", - " .properties(width=WIDTH, height=HEIGHT)\n", - " )\n", - "\n", - "delay_hours_chart = report_charts.configure_hconcat_charts(\n", - " [bar_avg, bar_total], \n", - " x_scale=\"independent\", \n", - " y_scale=\"independent\", \n", - " chart_title=\"Delay Hours by District\")\n", - "\n", - "delay_hours_chart" - ] - }, - { - "cell_type": "markdown", - "id": "aee65310-bd12-4dfe-9fb4-237d43d0a163", - "metadata": {}, - "source": [ - "## Map of Routes by Category" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "058dd099-90e2-490e-8a26-5858e00022fb", - "metadata": {}, - "outputs": [], - "source": [ - "def data_for_viz(df: gpd.GeoDataFrame):\n", - " gdf = report_metrics.clean_up_category_values(df)\n", - " \n", - " # line must fall within CA\n", - " ca = hq_catalog.ca_boundary.read().to_crs(f\"EPSG: {gdf.crs.to_epsg()}\")\n", - "\n", - " gdf = gpd.sjoin(\n", - " gdf,\n", - " ca,\n", - " how = \"inner\",\n", - " predicate = \"within\",\n", - " ).drop(columns= [\"index_right\"])\n", - " \n", - " # Drop columns that shouldn't get displayed in tooltip\n", - " drop_cols = [\n", - " \"rt_sched_category\", \n", - " \"State\", \"unique_route\"]\n", - " \n", - " gdf2 = gdf.drop(columns = drop_cols)\n", - " \n", - " return gdf2\n", - "\n", - "\n", - "gdf = data_for_viz(df)" - ] - }, - { - "cell_type": "markdown", - "id": "559fbcde-f01d-400f-bb15-b7c770997abb", - "metadata": {}, - "source": [ - "### All Routes" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "735ccd97-9c90-47c6-b768-bc09f02b887b", - "metadata": {}, - "outputs": [], - "source": [ - "def make_map(gdf: gpd.GeoDataFrame): \n", - " # category is going to be sorted alphabetically,\n", - " # so sort our color_dict\n", - " sorted_dict = dict(sorted(color_dict.items()))\n", - " \n", - " m = gdf.explore(\n", - " \"category\", categorical=True,\n", - " cmap = list(sorted_dict.values()),\n", - " tiles = \"Carto DB Positron\"\n", - " )\n", - "\n", - " return m\n", - " \n", - "m = make_map(gdf)\n", - "m" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "948cb692-79b3-428a-a91d-e38ed3763d06", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.13" - }, - "widgets": { - "application/vnd.jupyter.widget-state+json": { - "state": {}, - "version_major": 2, - "version_minor": 0 - } - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/quarterly_performance_objective/historical_report.ipynb b/quarterly_performance_objective/historical_report.ipynb deleted file mode 100644 index 5c8809546..000000000 --- a/quarterly_performance_objective/historical_report.ipynb +++ /dev/null @@ -1,504 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "0801efda-e075-4826-a167-a8e54a8167fb", - "metadata": {}, - "source": [ - "# Historical Trends\n", - "\n", - "01 - Increase total amount of service on the SHN and reliability of that service by 2024\n", - "\n", - "## Routes on the State Highway Network (SHN)\n", - "Transit routes along the SHN can be categorized into 3 groups:\n", - "1. **On SHN** - where at least 20% of the transit route runs the SHN (within 50 ft) \n", - "2. **Intersects SHN** - where at least 35% of the transit route runs within 0.5 mile of the SHN.\n", - "3. **Other** - all other transit routes.\n", - "\n", - "## Metrics\n", - "* service hours, service hours per route\n", - "* delay hours, delay hours per route\n", - "\n", - "The metrics are shown for for transit routes **on the SHN** and **intersects SHN**." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "34d1396e-e0c7-4804-8647-4aae76cc300c", - "metadata": {}, - "outputs": [], - "source": [ - "%%capture\n", - "import warnings\n", - "warnings.filterwarnings('ignore')\n", - "\n", - "import altair as alt\n", - "import calitp_data_analysis.magics\n", - "import pandas as pd\n", - "\n", - "import report_metrics\n", - "from shared_utils import rt_dates, portfolio_utils\n", - "from calitp_data_analysis import calitp_color_palette as cp\n", - "from calitp_data_analysis import styleguide\n", - "from bus_service_utils import chart_utils" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c5fb6bec-7aa3-4ee2-97a2-21f5713df370", - "metadata": {}, - "outputs": [], - "source": [ - "quarterly_metrics_dict" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cc1eeb8f-d2ae-4152-8624-a7a10012d6bf", - "metadata": {}, - "outputs": [], - "source": [ - "quarterly_metrics_dict = {k: v for k, v in rt_dates.PMAC.items() \n", - " if \"2022\" not in k and k != \"Q1_2023\"}\n", - "\n", - "summary_df = report_metrics.concatenate_summary_across_dates(\n", - " quarterly_metrics_dict, summary_dataset = \"summary\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ba2aff18-9a0a-40cc-8bfd-5ef655e64071", - "metadata": {}, - "outputs": [], - "source": [ - "def get_statewide_averages(df: pd.DataFrame) -> pd.DataFrame:\n", - "\n", - " var_list = [\"service_hours\", \"delay_hours\", \n", - " \"unique_route\", \"delay_unique_route\"]\n", - "\n", - " group_cols = [\"year_quarter\", \"service_date\", \"year\", \"quarter\"]\n", - "\n", - " all_routes = portfolio_utils.aggregate_by_geography(\n", - " df[df.variable.isin(var_list)],\n", - " group_cols + [\"variable\"],\n", - " sum_cols = [\"value\"]\n", - " )\n", - " \n", - " # Make wide, to calculate average again\n", - " all_routes2 = pd.pivot(all_routes, \n", - " index = group_cols, \n", - " columns = \"variable\", values = \"value\"\n", - " ).reset_index()\n", - " \n", - " all_routes2 = all_routes2.assign(\n", - " service_hours_per_route = (all_routes2.service_hours.divide(\n", - " all_routes2.unique_route)).round(2), \n", - " delay_hours_per_route = (all_routes2.delay_hours.divide(\n", - " all_routes2.delay_unique_route)).round(2),\n", - " category = \"All\"\n", - " )\n", - " \n", - " #https://stackoverflow.com/questions/55027108/pandas-rename-index\n", - " # Get rid of column name\n", - " all_routes2.columns.name = \"\"\n", - " \n", - " # Wrangle back to long!\n", - " value_vars = [c for c in all_routes2.columns if c != \"category\" and \n", - " c not in group_cols\n", - " ]\n", - "\n", - " all_routes3 = pd.melt(\n", - " all_routes2, \n", - " id_vars = group_cols + [\"category\"],\n", - " var_name = \"variable\",\n", - " value_vars = value_vars \n", - " )\n", - " \n", - " return all_routes3" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a65763be-f6e8-4ea4-bcf0-62e0ce3dd811", - "metadata": {}, - "outputs": [], - "source": [ - "statewide_avg = get_statewide_averages(summary_df)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b9b7e294-74b9-451d-b169-6eca6a649d72", - "metadata": {}, - "outputs": [], - "source": [ - "# chart utils\n", - "HEIGHT = 250\n", - "WIDTH = 200\n", - "\n", - "def base_quarterly_bar(df: pd.DataFrame, variable: str,\n", - " x_col: str, y_col: str) -> alt.Chart:\n", - " \n", - " bar = (alt.Chart(df)\n", - " .mark_bar()\n", - " .encode(\n", - " x=alt.X(f\"{x_col}:O\", \n", - " #axis=alt.Axis(format=f'%Y Q%q'), \n", - " title = None),\n", - " y=alt.Y(f\"{y_col}:Q\", title = chart_utils.labeling(variable)),\n", - " )\n", - " )\n", - " \n", - " return bar" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "bcf6de1e-b4e2-488b-8310-e9fe7e82184a", - "metadata": {}, - "outputs": [], - "source": [ - "def quarterly_bar_for_category(\n", - " df: pd.DataFrame, \n", - " variable_list: list = [\"service_hours\", \"delay_hours\"], \n", - " category: str = \"On SHN\",\n", - " x_col: str = \"year_quarter\", \n", - " y_col: str = \"value\", \n", - " chart_height: int = 200, chart_width: int = 500,\n", - ") -> alt.Chart: \n", - " \"\"\"\n", - " Plot quarterly metrics within the same category.\n", - " Ex: for all routes on SHN, show service hours, delay hours, \n", - " avg service hours, etc\n", - " \"\"\"\n", - " subset = df[(df.variable.isin(variable_list)) & (df.category==category)]\n", - " \n", - " var1 = variable_list[0]\n", - " df1 = subset[subset.variable==var1]\n", - " var2 = variable_list[1]\n", - " df2 = subset[subset.variable==var2]\n", - "\n", - " category = df1.category.iloc[0]\n", - " \n", - " color_dict = {\n", - " \"On SHN\": cp.CALITP_CATEGORY_BRIGHT_COLORS[4],\n", - " \"Intersects SHN\": cp.CALITP_CATEGORY_BRIGHT_COLORS[3],\n", - " \"All\": cp.CALITP_CATEGORY_BRIGHT_COLORS[0],\n", - " \"service_hours\": cp.CALITP_CATEGORY_BRIGHT_COLORS[4], # light blue\n", - " \"delay_hours\": cp.CALITP_CATEGORY_BRIGHT_COLORS[1], # light orange\n", - " \"service_hours_per_route\": cp.CALITP_CATEGORY_BRIGHT_COLORS[0], # med blue\n", - " \"delay_hours_per_route\": cp.CALITP_CATEGORY_BOLD_COLORS[1], # dark orange\n", - " }\n", - " \n", - " tooltip = ['year', 'quarter', 'year_quarter', \n", - " 'variable', 'category', 'value']\n", - " \n", - " bar1 = (base_quarterly_bar(df1, var1, x_col, y_col)\n", - " .encode(color = alt.value(color_dict[var1]), \n", - " tooltip = tooltip)\n", - " .properties(title={\n", - " \"text\": f\"{chart_utils.labeling(var1)}\",\n", - " \"subtitle\": f\"{category}\"\n", - " }, width = chart_width, height = chart_height)\n", - " .interactive()\n", - " )\n", - " \n", - " bar2 = (base_quarterly_bar(df2, var2, x_col, y_col)\n", - " .encode(color=alt.value(color_dict[var2]), \n", - " tooltip = tooltip)\n", - " .properties(title={\n", - " \"text\": f\"{chart_utils.labeling(var2)}\",\n", - " \"subtitle\": f\"{category}\"\n", - " }, width = chart_width, height = chart_height)\n", - " .interactive()\n", - " )\n", - " \n", - " if var1 == \"service_hours\":\n", - " space = 0\n", - " else:\n", - " space = 25\n", - " combined = (styleguide.apply_chart_config(alt.hconcat(bar1, bar2, \n", - " spacing=space))\n", - " .resolve_scale(y=\"independent\")\n", - " )\n", - "\n", - " return combined" - ] - }, - { - "cell_type": "markdown", - "id": "38b33eff-e35d-45f6-8338-efc20fe77e12", - "metadata": {}, - "source": [ - "## All Routes" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a71a4b63-f133-4800-b754-3c5390425960", - "metadata": {}, - "outputs": [], - "source": [ - "category = \"All\"\n", - "var_list = [\"service_hours_per_route\", \"delay_hours_per_route\"]\n", - "\n", - "s1 = quarterly_bar_for_category(\n", - " statewide_avg,\n", - " variable_list = var_list, \n", - " category = category,\n", - " x_col = \"year_quarter\",\n", - " y_col = \"value\",\n", - " chart_height = HEIGHT, chart_width = WIDTH\n", - ")\n", - "\n", - "\n", - "var_list = [\"service_hours\", \"delay_hours\"]\n", - "\n", - "s2 = quarterly_bar_for_category(\n", - " statewide_avg,\n", - " variable_list = var_list, \n", - " category = category,\n", - " x_col = \"year_quarter\",\n", - " y_col = \"value\",\n", - " chart_height = HEIGHT, chart_width = WIDTH\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c73c9542-f54d-44ee-92cc-80a751864de8", - "metadata": {}, - "outputs": [], - "source": [ - "s1" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3928dcc1-58e7-4a1b-b91f-8e8c5044eb7c", - "metadata": {}, - "outputs": [], - "source": [ - "s2" - ] - }, - { - "cell_type": "markdown", - "id": "aa5564b6-6056-4dbc-8f36-193d7cf9043c", - "metadata": {}, - "source": [ - "## Routes on SHN" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b3b929dd-f394-457f-b57d-2a292f7f3fd8", - "metadata": {}, - "outputs": [], - "source": [ - "category = \"On SHN\"\n", - "var_list = [\"service_hours_per_route\", \"delay_hours_per_route\"]\n", - "\n", - "o1 = quarterly_bar_for_category(\n", - " summary_df,\n", - " variable_list = var_list, \n", - " category = category,\n", - " x_col = \"year_quarter\",\n", - " y_col = \"value\",\n", - " chart_height = HEIGHT, chart_width = WIDTH\n", - ")\n", - "\n", - "var_list = [\"service_hours\", \"delay_hours\"]\n", - "\n", - "o2 = quarterly_bar_for_category(\n", - " summary_df,\n", - " variable_list = var_list, \n", - " category = category,\n", - " x_col = \"year_quarter\",\n", - " y_col = \"value\",\n", - " chart_height = HEIGHT, chart_width = WIDTH\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e8990963-9167-439b-a929-e6ef9107256e", - "metadata": {}, - "outputs": [], - "source": [ - "o1" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6f154b08-d722-4959-be45-32bd93560576", - "metadata": {}, - "outputs": [], - "source": [ - "o2" - ] - }, - { - "cell_type": "markdown", - "id": "e69f82d3-0dd4-454c-b89b-70f290c6f590", - "metadata": {}, - "source": [ - "## Routes Intersecting SHN" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9243c9f7-d096-4c2f-bc24-c3947ef2e1aa", - "metadata": {}, - "outputs": [], - "source": [ - "category = \"Intersects SHN\"\n", - "var_list = [\"service_hours_per_route\", \"delay_hours_per_route\"]\n", - "\n", - "i1 = quarterly_bar_for_category(\n", - " summary_df,\n", - " variable_list = var_list, \n", - " category = category,\n", - " x_col = \"year_quarter\",\n", - " y_col = \"value\",\n", - " chart_height = HEIGHT, chart_width = WIDTH\n", - ")\n", - "\n", - "var_list = [\"service_hours\", \"delay_hours\"]\n", - "\n", - "i2 = quarterly_bar_for_category(\n", - " summary_df,\n", - " variable_list = var_list, \n", - " category = category,\n", - " x_col = \"year_quarter\",\n", - " y_col = \"value\",\n", - " chart_height = HEIGHT, chart_width = WIDTH\n", - ")\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e7b40537-948b-4871-95b7-f73d3cd20410", - "metadata": {}, - "outputs": [], - "source": [ - "i1" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "91447b75-d247-4bf5-b522-7b9f22a94a17", - "metadata": {}, - "outputs": [], - "source": [ - "i2" - ] - }, - { - "cell_type": "markdown", - "id": "60fc11da-799b-46d6-b682-5158c4705fe9", - "metadata": {}, - "source": [ - "## Routes on SHN by District" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ca1363fb-8fd9-4578-a466-7105c21c4170", - "metadata": {}, - "outputs": [], - "source": [ - "district_df = report_metrics.concatenate_summary_across_dates(\n", - " quarterly_metrics_dict, summary_dataset = \"district\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "497496fb-63d6-4fa8-a4d1-d3e8fd3673dc", - "metadata": {}, - "outputs": [], - "source": [ - "def facet_by_district(df: pd.DataFrame, variable: str) -> alt.Chart:\n", - " subset = df[df.variable == variable]\n", - " \n", - " bar = (alt.Chart(subset)\n", - " .mark_bar()\n", - " .encode(\n", - " x=alt.X(f\"year_quarter:O\"),\n", - " y = alt.Y(\"value:Q\", title=f\"{variable.replace('_', ' '.title())}\"),\n", - " color = alt.Color(\"district:N\", title=None, \n", - " scale = alt.Scale(\n", - " range = cp.CALITP_CATEGORY_BRIGHT_COLORS + \n", - " cp.CALITP_CATEGORY_BOLD_COLORS), legend=None),\n", - " tooltip = [\"district\", \"year_quarter\", \"value\", \"variable\"]\n", - " ).facet(facet=\"district:N\", columns = 1, spacing=10, \n", - " title = f\"{variable.replace('_', ' ').title()}\")\n", - " .interactive()\n", - " )\n", - " \n", - " return bar" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8fa9eef8-ee56-42cf-9110-e445f223025d", - "metadata": {}, - "outputs": [], - "source": [ - "bar1 = facet_by_district(district_df, \"avg_service_hours\")\n", - "bar2 = facet_by_district(district_df, \"avg_delay_hours\")\n", - "\n", - "district_chart = styleguide.apply_chart_config(alt.hconcat(bar1, bar2))\n", - "district_chart" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a0f0c816-03dc-41ca-81b7-8a6c11930c52", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.13" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/quarterly_performance_objective/historical_service_hours_v2.ipynb b/quarterly_performance_objective/historical_service_hours_v2.ipynb deleted file mode 100644 index d4440265d..000000000 --- a/quarterly_performance_objective/historical_service_hours_v2.ipynb +++ /dev/null @@ -1,659 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "0801efda-e075-4826-a167-a8e54a8167fb", - "metadata": {}, - "source": [ - "# Historical Trends\n", - "\n", - "01 - Increase total amount of service on the SHN and reliability of that service by 2024\n", - "\n", - "## Routes on the State Highway Network (SHN)\n", - "Transit routes along the SHN can be categorized into 3 groups:\n", - "1. **On SHN** - where at least 20% of the transit route runs the SHN (within 50 ft) \n", - "2. **Intersects SHN** - where at least 35% of the transit route runs within 0.5 mile of the SHN.\n", - "3. **Other** - all other transit routes.\n", - "\n", - "## Metrics\n", - "* service hours, service hours per route\n", - "* delay hours, delay hours per route\n", - "\n", - "The metrics are shown for for transit routes **on the SHN** and **intersects SHN**." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "34d1396e-e0c7-4804-8647-4aae76cc300c", - "metadata": {}, - "outputs": [], - "source": [ - "%%capture\n", - "import warnings\n", - "warnings.filterwarnings('ignore')\n", - "\n", - "import altair as alt\n", - "import calitp_data_analysis.magics\n", - "import geopandas as gpd\n", - "import pandas as pd\n", - "\n", - "from calitp_data_analysis import calitp_color_palette as cp\n", - "from calitp_data_analysis import styleguide\n", - "import report_metrics\n", - "from shared_utils import rt_dates, portfolio_utils\n", - "from bus_service_utils import chart_utils\n", - "from update_vars import BUS_SERVICE_GCS" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0390d418-6861-4e5c-8775-101bf80e97dd", - "metadata": {}, - "outputs": [], - "source": [ - "def quarterly_summary_long(analysis_date: str) -> pd.DataFrame: \n", - " \"\"\"\n", - " For historical report, get a long df of service hours and delay hours \n", - " summary tables.\n", - " \"\"\"\n", - " #df = report_metrics.prep_data_for_report(analysis_date)\n", - " df = gpd.read_parquet(\n", - " f\"{BUS_SERVICE_GCS}routes_categorized_{analysis_date}.parquet\"\n", - " )\n", - " \n", - " \n", - " service_summary = report_metrics.get_service_hours_summary_table(df) \n", - " '''\n", - " delay_summary = (get_delay_summary_table(df)\n", - " .rename(columns = {\"unique_route\": \"delay_unique_route\"})\n", - " )\n", - " ''' \n", - " # Make long\n", - " service_value_vars = [c for c in service_summary.columns if c != \"category\"]\n", - " #delay_value_vars = [c for c in delay_summary.columns if c != \"category\"]\n", - "\n", - " service_long = pd.melt(\n", - " service_summary,\n", - " id_vars = \"category\",\n", - " value_vars = service_value_vars,\n", - " )\n", - " '''\n", - " delay_long = pd.melt(\n", - " delay_summary, \n", - " id_vars = \"category\", \n", - " value_vars = delay_value_vars\n", - " )\n", - " '''\n", - " # Concatenante\n", - " summary = pd.concat([service_long, \n", - " #delay_long\n", - " ], axis=0)\n", - " summary = summary.assign(\n", - " service_date = analysis_date\n", - " )\n", - " \n", - " return summary" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "35403601-0b56-4c2b-a787-844db8fd5176", - "metadata": {}, - "outputs": [], - "source": [ - "def concatenate_summary_across_dates(\n", - " rt_dates_dict: dict, \n", - " summary_dataset: str) -> pd.DataFrame: \n", - " df = pd.DataFrame()\n", - "\n", - " rt_dates_reversed = {value: key for key, value in rt_dates_dict.items()}\n", - "\n", - " for date, quarter in rt_dates_reversed.items():\n", - " if summary_dataset == \"summary\":\n", - " one_quarter = quarterly_summary_long(date)\n", - " \n", - " elif summary_dataset == \"district\":\n", - " one_quarter = district_breakdown_long(date)\n", - " df = pd.concat([df, one_quarter], axis=0)\n", - "\n", - " df = df.assign(\n", - " year_quarter = df.service_date.map(rt_dates_reversed)\n", - " )\n", - "\n", - " df = df.assign(\n", - " quarter = df.year_quarter.str.split('_', expand=True)[0],\n", - " year = df.year_quarter.str.split('_', expand=True)[1].astype(int),\n", - " )\n", - " \n", - " # Get it to be year first\n", - " df = df.assign(\n", - " year_quarter = df.year.astype(str) + ' ' + df.quarter\n", - " )\n", - " \n", - " return df" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cc1eeb8f-d2ae-4152-8624-a7a10012d6bf", - "metadata": {}, - "outputs": [], - "source": [ - "quarterly_metrics_dict = {k: v for k, v in rt_dates.PMAC.items() \n", - " if k != \"Q1_2022\"}\n", - "\n", - "summary_df = concatenate_summary_across_dates(\n", - " quarterly_metrics_dict, summary_dataset = \"summary\")\n", - "\n", - "#summary_df = report_metrics.concatenate_summary_across_dates(\n", - "# quarterly_metrics_dict, summary_dataset = \"summary\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f83b6045-730d-4082-ab6d-5716e879ce09", - "metadata": {}, - "outputs": [], - "source": [ - "def get_statewide_averages(df: pd.DataFrame) -> pd.DataFrame:\n", - "\n", - " var_list = [\"service_hours\", \n", - " #\"delay_hours\", \n", - " \"unique_route\", \n", - " #\"delay_unique_route\"\n", - " ]\n", - "\n", - " group_cols = [\"year_quarter\", \"service_date\", \"year\", \"quarter\"]\n", - "\n", - " all_routes = portfolio_utils.aggregate_by_geography(\n", - " df[df.variable.isin(var_list)],\n", - " group_cols + [\"variable\"],\n", - " sum_cols = [\"value\"]\n", - " )\n", - " \n", - " # Make wide, to calculate average again\n", - " all_routes2 = pd.pivot(all_routes, \n", - " index = group_cols, \n", - " columns = \"variable\", values = \"value\"\n", - " ).reset_index()\n", - " \n", - " all_routes2 = all_routes2.assign(\n", - " service_hours_per_route = (all_routes2.service_hours.divide(\n", - " all_routes2.unique_route)).round(2), \n", - " #delay_hours_per_route = (all_routes2.delay_hours.divide(\n", - " # all_routes2.delay_unique_route)).round(2),\n", - " category = \"All\"\n", - " )\n", - " \n", - " #https://stackoverflow.com/questions/55027108/pandas-rename-index\n", - " # Get rid of column name\n", - " all_routes2.columns.name = \"\"\n", - " \n", - " # Wrangle back to long!\n", - " value_vars = [c for c in all_routes2.columns if c != \"category\" and \n", - " c not in group_cols\n", - " ]\n", - "\n", - " all_routes3 = pd.melt(\n", - " all_routes2, \n", - " id_vars = group_cols + [\"category\"],\n", - " var_name = \"variable\",\n", - " value_vars = value_vars \n", - " )\n", - " \n", - " return all_routes3" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a65763be-f6e8-4ea4-bcf0-62e0ce3dd811", - "metadata": {}, - "outputs": [], - "source": [ - "statewide_avg = get_statewide_averages(summary_df)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b9b7e294-74b9-451d-b169-6eca6a649d72", - "metadata": {}, - "outputs": [], - "source": [ - "# chart utils\n", - "HEIGHT = 250\n", - "WIDTH = 200\n", - "\n", - "def base_quarterly_bar(df: pd.DataFrame, variable: str,\n", - " x_col: str, y_col: str) -> alt.Chart:\n", - " \n", - " bar = (alt.Chart(df)\n", - " .mark_bar()\n", - " .encode(\n", - " x=alt.X(f\"{x_col}:O\", \n", - " # formatting for quarters is weird, construct our own string\n", - " #axis=alt.Axis(format='Q%q-%Y'), \n", - " title = None),\n", - " y=alt.Y(f\"{y_col}:Q\", title = chart_utils.labeling(variable)),\n", - " )\n", - " )\n", - " \n", - " return bar" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "bcf6de1e-b4e2-488b-8310-e9fe7e82184a", - "metadata": {}, - "outputs": [], - "source": [ - "def quarterly_bar_for_category(\n", - " df: pd.DataFrame, \n", - " variable_list: list = [\"service_hours\", \"delay_hours\"], \n", - " category: str = \"On SHN\",\n", - " x_col: str = \"year_quarter\", \n", - " y_col: str = \"value\", \n", - " chart_height: int = 200, chart_width: int = 500,\n", - ") -> alt.Chart: \n", - " \"\"\"\n", - " Plot quarterly metrics within the same category.\n", - " Ex: for all routes on SHN, show service hours, delay hours, \n", - " avg service hours, etc\n", - " \"\"\"\n", - " subset = df[(df.variable.isin(variable_list)) & \n", - " (df.category==category)]\n", - " \n", - " var1 = variable_list[0]\n", - " df1 = subset[subset.variable==var1]\n", - " #var2 = variable_list[1]\n", - " #df2 = subset[subset.variable==var2]\n", - "\n", - " category = df1.category.iloc[0]\n", - " \n", - " color_dict = {\n", - " \"On SHN\": cp.CALITP_CATEGORY_BRIGHT_COLORS[4],\n", - " \"Intersects SHN\": cp.CALITP_CATEGORY_BRIGHT_COLORS[3],\n", - " \"All\": cp.CALITP_CATEGORY_BRIGHT_COLORS[0],\n", - " \"service_hours\": cp.CALITP_CATEGORY_BRIGHT_COLORS[4], # light blue\n", - " \"delay_hours\": cp.CALITP_CATEGORY_BRIGHT_COLORS[1], # light orange\n", - " \"service_hours_per_route\": cp.CALITP_CATEGORY_BRIGHT_COLORS[0], # med blue\n", - " \"delay_hours_per_route\": cp.CALITP_CATEGORY_BOLD_COLORS[1], # dark orange\n", - " }\n", - " \n", - " tooltip = ['year', 'quarter', 'year_quarter', \n", - " 'variable', 'category', 'value']\n", - " \n", - " bar1 = (base_quarterly_bar(df1, var1, x_col, y_col)\n", - " .encode(color = alt.value(color_dict[var1]), \n", - " tooltip = tooltip)\n", - " .properties(title={\n", - " \"text\": f\"{chart_utils.labeling(var1)}\",\n", - " \"subtitle\": f\"{category}\"\n", - " }, width = chart_width, height = chart_height)\n", - " )\n", - " \n", - " '''\n", - " bar2 = (base_quarterly_bar(df2, var2, x_col, y_col)\n", - " .encode(color=alt.value(color_dict[var2]), \n", - " tooltip = tooltip)\n", - " .properties(title={\n", - " \"text\": f\"{chart_utils.labeling(var2)}\",\n", - " \"subtitle\": f\"{category}\"\n", - " }, width = chart_width, height = chart_height)\n", - " )\n", - " '''\n", - " if var1 == \"service_hours\":\n", - " space = 0\n", - " else:\n", - " space = 25\n", - " combined = (styleguide.apply_chart_config(alt.hconcat(bar1, #bar2, \n", - " spacing=space))\n", - " .resolve_scale(y=\"independent\")\n", - " )\n", - "\n", - " return combined" - ] - }, - { - "cell_type": "markdown", - "id": "38b33eff-e35d-45f6-8338-efc20fe77e12", - "metadata": {}, - "source": [ - "## All Routes" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a71a4b63-f133-4800-b754-3c5390425960", - "metadata": {}, - "outputs": [], - "source": [ - "category = \"All\"\n", - "var_list = [\"service_hours_per_route\", \n", - " #\"delay_hours_per_route\"\n", - " ]\n", - "\n", - "s1 = quarterly_bar_for_category(\n", - " statewide_avg,\n", - " variable_list = var_list, \n", - " category = category,\n", - " x_col = \"year_quarter\",\n", - " y_col = \"value\",\n", - " chart_height = HEIGHT, chart_width = WIDTH\n", - ")\n", - "\n", - "\n", - "var_list = [\"service_hours\", \n", - " #\"delay_hours\"\n", - " ]\n", - "\n", - "s2 = quarterly_bar_for_category(\n", - " statewide_avg,\n", - " variable_list = var_list, \n", - " category = category,\n", - " x_col = \"year_quarter\",\n", - " y_col = \"value\",\n", - " chart_height = HEIGHT, chart_width = WIDTH\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c73c9542-f54d-44ee-92cc-80a751864de8", - "metadata": {}, - "outputs": [], - "source": [ - "s1" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3928dcc1-58e7-4a1b-b91f-8e8c5044eb7c", - "metadata": {}, - "outputs": [], - "source": [ - "s2" - ] - }, - { - "cell_type": "markdown", - "id": "aa5564b6-6056-4dbc-8f36-193d7cf9043c", - "metadata": {}, - "source": [ - "## Routes on SHN" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b3b929dd-f394-457f-b57d-2a292f7f3fd8", - "metadata": {}, - "outputs": [], - "source": [ - "category = \"On SHN\"\n", - "var_list = [\"service_hours_per_route\", \n", - " #\"delay_hours_per_route\"\n", - " ]\n", - "\n", - "o1 = quarterly_bar_for_category(\n", - " summary_df,\n", - " variable_list = var_list, \n", - " category = category,\n", - " x_col = \"year_quarter\",\n", - " y_col = \"value\",\n", - " chart_height = HEIGHT, chart_width = WIDTH\n", - ")\n", - "\n", - "var_list = [\"service_hours\", \n", - " #\"delay_hours\"\n", - " ]\n", - "\n", - "o2 = quarterly_bar_for_category(\n", - " summary_df,\n", - " variable_list = var_list, \n", - " category = category,\n", - " x_col = \"year_quarter\",\n", - " y_col = \"value\",\n", - " chart_height = HEIGHT, chart_width = WIDTH\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e8990963-9167-439b-a929-e6ef9107256e", - "metadata": {}, - "outputs": [], - "source": [ - "o1" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6f154b08-d722-4959-be45-32bd93560576", - "metadata": {}, - "outputs": [], - "source": [ - "o2" - ] - }, - { - "cell_type": "markdown", - "id": "e69f82d3-0dd4-454c-b89b-70f290c6f590", - "metadata": {}, - "source": [ - "## Routes Intersecting SHN" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9243c9f7-d096-4c2f-bc24-c3947ef2e1aa", - "metadata": {}, - "outputs": [], - "source": [ - "category = \"Intersects SHN\"\n", - "var_list = [\"service_hours_per_route\", \n", - " #\"delay_hours_per_route\"\n", - " ]\n", - "\n", - "i1 = quarterly_bar_for_category(\n", - " summary_df,\n", - " variable_list = var_list, \n", - " category = category,\n", - " x_col = \"year_quarter\",\n", - " y_col = \"value\",\n", - " chart_height = HEIGHT, chart_width = WIDTH\n", - ")\n", - "\n", - "var_list = [\"service_hours\", \n", - " #\"delay_hours\"\n", - " ]\n", - "\n", - "i2 = quarterly_bar_for_category(\n", - " summary_df,\n", - " variable_list = var_list, \n", - " category = category,\n", - " x_col = \"year_quarter\",\n", - " y_col = \"value\",\n", - " chart_height = HEIGHT, chart_width = WIDTH\n", - ")\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e7b40537-948b-4871-95b7-f73d3cd20410", - "metadata": {}, - "outputs": [], - "source": [ - "i1" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "91447b75-d247-4bf5-b522-7b9f22a94a17", - "metadata": {}, - "outputs": [], - "source": [ - "i2" - ] - }, - { - "cell_type": "markdown", - "id": "60fc11da-799b-46d6-b682-5158c4705fe9", - "metadata": {}, - "source": [ - "## Routes on SHN by District" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2ebf727b-dccc-4840-babc-fdd55bbba21a", - "metadata": {}, - "outputs": [], - "source": [ - "def district_breakdown_long(analysis_date: str) -> pd.DataFrame: \n", - " \"\"\"\n", - " For historical report, get a long df of service hours and delay hours \n", - " summary tables.\n", - " \"\"\"\n", - " df = gpd.read_parquet(\n", - " f\"{BUS_SERVICE_GCS}routes_categorized_{analysis_date}.parquet\")\n", - " \n", - " by_district_summary = report_metrics.by_district_on_shn_breakdown(\n", - " df, sum_cols = [\"service_hours\", \"unique_route\"])\n", - " '''\n", - " by_district_delay = by_district_on_shn_breakdown(\n", - " df, sum_cols = [\"delay_hours\", \"unique_route\"]\n", - " ).rename(columns = {\"unique_route\": \"delay_unique_route\"})\n", - " ''' \n", - " # Make long\n", - " service_value_vars = [c for c in by_district_summary.columns if c != 'district']\n", - " #delay_value_vars = [c for c in by_district_delay.columns if c != 'district']\n", - "\n", - " service_long = pd.melt(\n", - " by_district_summary,\n", - " id_vars = \"district\",\n", - " value_vars = service_value_vars,\n", - " )\n", - " '''\n", - " delay_long = pd.melt(\n", - " by_district_delay, \n", - " id_vars = \"district\", \n", - " value_vars = delay_value_vars\n", - " )\n", - " '''\n", - " # Concatenante\n", - " summary = pd.concat([service_long, \n", - " #delay_long\n", - " ], axis=0)\n", - " summary = summary.assign(\n", - " service_date = analysis_date\n", - " )\n", - " \n", - " return summary" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "42d89815-5960-4b82-872f-3f2a04ba1c2a", - "metadata": {}, - "outputs": [], - "source": [ - "district_df = concatenate_summary_across_dates(\n", - " quarterly_metrics_dict, summary_dataset=\"district\")\n", - " \n", - "#district_df = report_metrics.concatenate_summary_across_dates(\n", - "# quarterly_metrics_dict, summary_dataset = \"district\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "497496fb-63d6-4fa8-a4d1-d3e8fd3673dc", - "metadata": {}, - "outputs": [], - "source": [ - "def facet_by_district(df: pd.DataFrame, variable: str) -> alt.Chart:\n", - " subset = df[df.variable == variable]\n", - " \n", - " bar = (alt.Chart(subset)\n", - " .mark_bar()\n", - " .encode(\n", - " x=alt.X(f\"year_quarter:O\"),\n", - " y = alt.Y(\"value:Q\", title=f\"{variable.replace('_', ' '.title())}\"),\n", - " color = alt.Color(\"district:N\", title=None, \n", - " scale = alt.Scale(\n", - " range = cp.CALITP_CATEGORY_BRIGHT_COLORS + \n", - " cp.CALITP_CATEGORY_BOLD_COLORS), legend=None),\n", - " tooltip = [\"district\", \"year_quarter\", \"value\", \"variable\"]\n", - " ).facet(facet=\"district:N\", columns = 1, spacing=10, \n", - " title = f\"{variable.replace('_', ' ').title()}\")\n", - " .interactive()\n", - " )\n", - " \n", - " return bar" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8fa9eef8-ee56-42cf-9110-e445f223025d", - "metadata": {}, - "outputs": [], - "source": [ - "bar1 = facet_by_district(district_df, \"avg_service_hours\")\n", - "#bar2 = facet_by_district(district_df, \"avg_delay_hours\")\n", - "\n", - "district_chart = styleguide.apply_chart_config(alt.hconcat(bar1, \n", - " #bar2\n", - " ))\n", - "district_chart" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3444fae3-7ca5-410c-adc3-4def6c830901", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.13" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/quarterly_performance_objective/report_charts.py b/quarterly_performance_objective/report_charts.py deleted file mode 100644 index 0e35dfaf6..000000000 --- a/quarterly_performance_objective/report_charts.py +++ /dev/null @@ -1,97 +0,0 @@ -import altair as alt -import pandas as pd - -from typing import List, Literal - -from calitp_data_analysis import styleguide -from calitp_data_analysis import calitp_color_palette as cp - -def base_bar(df: pd.DataFrame, x_col: str) -> alt.Chart: - chart = (alt.Chart(df) - .mark_bar() - .encode( - x=alt.X(f"{x_col}:N", title=f"{x_col.title()}") - ) - ) - return chart - - -def make_district_bar(df: pd.DataFrame, y_col: str) -> alt.Chart: - """ - Make bar chart that's total service hours or - average service hours by district. - """ - y_title = f"{y_col.replace('_', ' ').title()}" - - if y_col == "service_hours": - value_format = ",.0f" - elif y_col == "avg_delay_hours": - value_format = ",.1f" - else: - value_format = ",.1f" - - Y_MAX = df[y_col].max() * 1.1 - - bar = base_bar(df, x_col="district") - - bar = (bar.encode( - y=alt.Y(f"{y_col}:Q", title=f"{y_title}", - scale=alt.Scale(domain=[0, Y_MAX]), - axis=None - ), - color=alt.Color("district:N", - scale=alt.Scale( - range=cp.CALITP_CATEGORY_BRIGHT_COLORS - ), legend=None - ) - ) - ) - #https://stackoverflow.com/questions/54015250/altair-setting-constant-label-color-for-bar-chart - text = (bar - .mark_text(align="center", baseline="bottom", - color="black", dy=-5 - ) - .encode(text=alt.Text(y_col, format=value_format), - # Set color here, because encoding for mark_text gets - # superseded by alt.Color - color=alt.value("black"), - tooltip=["district:N", - alt.Tooltip(f"{y_col}:Q", format=",", - title=f"{y_col.replace('_', ' '.title())}" - )] - - ) - ) - - chart = ((bar + text) - .properties(title = { - "text": f"{y_title} by District", - "subtitle": "Routes on SHN" - }).interactive() - ) - - return chart - - -def configure_hconcat_charts( - my_chart_list: List[alt.Chart], - x_scale: Literal["independent", "shared"] = "independent", - y_scale: Literal["independent", "shared"] = "independent", - chart_title: str = "Title" -) -> alt.Chart: - """ - Horizontally concatenate altair charts, - and also add top-level configurations after hconcat is done - """ - combined = (alt.hconcat(*my_chart_list) - .resolve_scale(x = x_scale, y = y_scale) - ) - - - combined = (styleguide.apply_chart_config(combined) - .properties(title = chart_title) - .configure_axis(grid=False) - .configure_view(strokeWidth=0) - ) - - return combined \ No newline at end of file diff --git a/quarterly_performance_objective/report_metrics.py b/quarterly_performance_objective/report_metrics.py deleted file mode 100644 index 47632b233..000000000 --- a/quarterly_performance_objective/report_metrics.py +++ /dev/null @@ -1,249 +0,0 @@ -""" -Functions to calculate summary stats for report. - -Two reports: current quarter, historical report. -Since we need to the same dataset across notebooks, -generate the various pieces needed in the report too. -""" -import geopandas as gpd -import intake -import pandas as pd - -from typing import Literal - -from shared_utils import portfolio_utils - -catalog = intake.open_catalog("*.yml") - -def aggregate_calculate_percent_and_average( - df: pd.DataFrame, - group_cols: list, - sum_cols: list) -> pd.DataFrame: - """ - Create columns with pct values. - """ - agg_df = portfolio_utils.aggregate_by_geography( - df, - group_cols = group_cols, - sum_cols = sum_cols, - ) - - for c in sum_cols: - new_col = f"pct_{c}" - agg_df[new_col] = (agg_df[c] / agg_df[c].sum()).round(3) - agg_df[c] = agg_df[c].round(0) - - return agg_df - -#https://stackoverflow.com/questions/23482668/sorting-by-a-custom-list-in-pandas -def sort_by_column(df: pd.DataFrame, - col: str = "category", - sort_key: list = ["on_shn", "intersects_shn", "other"] - ) -> pd.DataFrame: - # Custom sort order for categorical variable - df = df.sort_values( - col, key=lambda c: c.map(lambda e: sort_key.index(e))) - return df - - -def clean_up_category_values(df: pd.DataFrame) -> pd.DataFrame: - category_values = { - "on_shn": "On SHN", - "intersects_shn": "Intersects SHN", - "other": "Other" - } - - df = df.assign( - category = df.category.map(category_values) - ) - - return df - - -def prep_data_for_report(analysis_date: str) -> gpd.GeoDataFrame: - # https://stackoverflow.com/questions/69781678/intake-catalogue-level-parameters - - return catalog.routes_categorized_with_speed( - analysis_date = analysis_date).read() - -def get_service_hours_summary_table(df: pd.DataFrame)-> pd.DataFrame: - """ - Aggregate by parallel/on_shn/other category. - Calculate number and pct of service hours, routes. - """ - - summary = aggregate_calculate_percent_and_average( - df, - group_cols = ["category"], - sum_cols = ["service_hours", "unique_route"] - ).astype({"unique_route": int}) - - summary = sort_by_column(summary).pipe(clean_up_category_values) - - summary = summary.assign( - service_hours_per_route = round(summary.service_hours / - summary.unique_route, 2) - ) - - return summary - - -def get_delay_summary_table(df: pd.DataFrame) -> pd.DataFrame: - # Note: merge_delay both narrows down the dataset quite a bit - delay_df = df[df.rt_sched_category=="schedule_and_vp"] - - delay_summary = aggregate_calculate_percent_and_average( - delay_df, - group_cols = ["category"], - sum_cols = ["delay_hours", "unique_route"] - ).astype({"unique_route": int}) - - delay_summary = (sort_by_column(delay_summary) - .pipe(clean_up_category_values) - ) - - delay_summary = delay_summary.assign( - delay_hours_per_route = round(delay_summary.delay_hours / - delay_summary.unique_route, 2) - ) - - return delay_summary - - -def by_district_on_shn_breakdown(df: pd.DataFrame, - sum_cols: list) -> pd.DataFrame: - """ - Get service hours or delay hours by district, and - add in percent and average metrics. - """ - by_district = aggregate_calculate_percent_and_average( - df[df.category=="on_shn"], - group_cols = ["district"], - sum_cols = sum_cols - ).astype(int).sort_values("district").reset_index(drop=True) - - # Calculate average - if "service_hours" in by_district.columns: - numerator_col = "service_hours" - elif "delay_hours" in by_district.columns: - numerator_col = "delay_hours" - - by_district = by_district.assign( - avg = by_district[numerator_col].divide( - by_district.unique_route).round(1) - ).rename(columns = {"avg": f"avg_{numerator_col}"}) - - return by_district - - -def quarterly_summary_long(analysis_date: str) -> pd.DataFrame: - """ - For historical report, get a long df of service hours and delay hours - summary tables. - """ - df = prep_data_for_report(analysis_date) - - service_summary = get_service_hours_summary_table(df) - delay_summary = (get_delay_summary_table(df) - .rename(columns = {"unique_route": "delay_unique_route"}) - ) - - # Make long - service_value_vars = [c for c in service_summary.columns if c != "category"] - delay_value_vars = [c for c in delay_summary.columns if c != "category"] - - service_long = pd.melt( - service_summary, - id_vars = "category", - value_vars = service_value_vars, - ) - - delay_long = pd.melt( - delay_summary, - id_vars = "category", - value_vars = delay_value_vars - ) - - # Concatenante - summary = pd.concat([service_long, delay_long], axis=0) - summary = summary.assign( - service_date = analysis_date - ) - - return summary - - -def district_breakdown_long(analysis_date: str) -> pd.DataFrame: - """ - For historical report, get a long df of service hours and delay hours - summary tables. - """ - df = prep_data_for_report(analysis_date) - - by_district_summary = by_district_on_shn_breakdown( - df, sum_cols = ["service_hours", "unique_route"]) - - by_district_delay = by_district_on_shn_breakdown( - df, sum_cols = ["delay_hours", "unique_route"] - ).rename(columns = {"unique_route": "delay_unique_route"}) - - # Make long - service_value_vars = [c for c in by_district_summary.columns if c != 'district'] - delay_value_vars = [c for c in by_district_delay.columns if c != 'district'] - - service_long = pd.melt( - by_district_summary, - id_vars = "district", - value_vars = service_value_vars, - ) - - delay_long = pd.melt( - by_district_delay, - id_vars = "district", - value_vars = delay_value_vars - ) - - # Concatenante - summary = pd.concat([service_long, delay_long], axis=0) - summary = summary.assign( - service_date = analysis_date - ) - - return summary - - -def concatenate_summary_across_dates( - rt_dates_dict: dict, - summary_dataset: Literal["summary", "district"], -) -> pd.DataFrame: - """ - Loop across dates available for quarterly performance metrics, - and concatenate into 1 long df. - """ - df = pd.DataFrame() - - rt_dates_reversed = {value: key for key, value in rt_dates_dict.items()} - - for date, quarter in rt_dates_reversed.items(): - if summary_dataset == "summary": - one_quarter = quarterly_summary_long(date) - - elif summary_dataset == "district": - one_quarter = district_breakdown_long(date) - df = pd.concat([df, one_quarter], axis=0) - - df = df.assign( - year_quarter = df.service_date.map(rt_dates_reversed) - ) - - df = df.assign( - quarter = df.year_quarter.str.split('_', expand=True)[0], - year = df.year_quarter.str.split('_', expand=True)[1].astype(int), - ) - - # Get it to be year first - df = df.assign( - year_quarter = df.year.astype(str) + ' ' + df.quarter - ) - - return df \ No newline at end of file From bc1e31e7a8b17df2d182f5ea4c8a94b999ee5fc9 Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Fri, 21 Jun 2024 00:06:10 +0000 Subject: [PATCH 07/10] remove script to backfill, new script selects routes the same way --- .../download_trips_v2_backfill.py | 173 ------------------ 1 file changed, 173 deletions(-) delete mode 100644 quarterly_performance_objective/download_trips_v2_backfill.py diff --git a/quarterly_performance_objective/download_trips_v2_backfill.py b/quarterly_performance_objective/download_trips_v2_backfill.py deleted file mode 100644 index 098dea1cd..000000000 --- a/quarterly_performance_objective/download_trips_v2_backfill.py +++ /dev/null @@ -1,173 +0,0 @@ -""" -Download all trips and shapes for a day for v2. - -Use this to figure out why v1 and v2 aggregate service hours are so different.s -""" -import os -os.environ["CALITP_BQ_MAX_BYTES"] = str(300_000_000_000) - -import datetime as dt -import pandas as pd -import sys - -from calitp.tables import tbls -from siuba import * -from loguru import logger - -from shared_utils import (gtfs_utils_v2, gtfs_utils, - rt_dates) -from calitp_data_analysis import utils, geography_utils -from update_vars import COMPILED_CACHED_GCS - -def scheduled_operators(analysis_date: str): - """ - This is how HQTA data is downloaded...so do the same here for backfill. - """ - all_operators = gtfs_utils_v2.schedule_daily_feed_to_organization( - selected_date = analysis_date, - keep_cols = None, - get_df = True, - feed_option = "use_subfeeds" - ) - - keep_cols = ["feed_key", "name"] - - operators_to_include = all_operators[keep_cols] - - # There shouldn't be any duplicates by name, since we got rid - # of precursor feeds. But, just in case, don't allow dup names. - operators_to_include = (operators_to_include - .drop_duplicates(subset="name") - .reset_index(drop=True) - ) - - return operators_to_include - - -if __name__=="__main__": - logger.add("./logs/download_trips_v2_backfill.log") - logger.add(sys.stderr, - format="{time:YYYY-MM-DD at HH:mm:ss} | {level} | {message}", - level="INFO") - - analysis_date = rt_dates.PMAC["Q2_2022"] - VERSION = "v2" - logger.info(f"Analysis date: {analysis_date} warehouse {VERSION}") - - start = dt.datetime.now() - - if VERSION == "v1": - ITP_IDS = (tbls.gtfs_schedule.agency() - >> distinct(_.calitp_itp_id) - >> filter(_.calitp_itp_id != 200) - >> collect() - ).calitp_itp_id.tolist() - - IDS_TO_RUN = sorted(ITP_IDS) - - logger.info(f"# operators to run: {len(IDS_TO_RUN)}") - - dataset = "trips" - logger.info(f"*********** Download {dataset} data ***********") - - - keep_trip_cols = [ - "calitp_itp_id", "calitp_url_number", - "service_date", "trip_key", "trip_id", - "route_id", "direction_id", "shape_id", - "calitp_extracted_at", "calitp_deleted_at", - "trip_first_departure_ts", "trip_last_arrival_ts", - "service_hours" - ] - - trips = gtfs_utils.get_trips( - selected_date = analysis_date, - itp_id_list = IDS_TO_RUN, - trip_cols = keep_trip_cols, - get_df = True, - ) - - trips.to_parquet( - f"{COMPILED_CACHED_GCS}{dataset}_{analysis_date}_{VERSION}.parquet") - - trips = pd.read_parquet( - f"{COMPILED_CACHED_GCS}trips_{analysis_date}_{VERSION}.parquet") - - dataset = "routelines" - logger.info(f"*********** Download {dataset} data ***********") - - routelines = gtfs_utils.get_route_shapes( - selected_date = analysis_date, - itp_id_list = IDS_TO_RUN, - get_df = True, - crs = geography_utils.CA_NAD83Albers, - trip_df = trips - )[["calitp_itp_id", "calitp_url_number", "shape_id", "geometry"]] - - utils.geoparquet_gcs_export( - routelines, - COMPILED_CACHED_GCS, - f"{dataset}_{analysis_date}_{VERSION}.parquet" - ) - - - - elif VERSION == "v2": - operators_df = scheduled_operators(analysis_date) - - FEEDS_TO_RUN = sorted(operators_df.feed_key.unique().tolist()) - - logger.info(f"# operators to run: {len(FEEDS_TO_RUN)}") - - dataset = "trips" - logger.info(f"*********** Download {dataset} data ***********") - - keep_trip_cols = [ - "feed_key", "name", "regional_feed_type", - "service_date", "trip_key", "trip_id", - "route_key", "route_id", "route_type", - "route_short_name", "route_long_name", "route_desc", - "direction_id", - "shape_array_key", "shape_id", - "trip_first_departure_sec", "trip_last_arrival_sec", - "service_hours" - ] - - trips = gtfs_utils_v2.get_trips( - selected_date = analysis_date, - operator_feeds = FEEDS_TO_RUN, - trip_cols = keep_trip_cols, - get_df = True, - ) - - trips.to_parquet( - f"{COMPILED_CACHED_GCS}{dataset}_{analysis_date}_{VERSION}.parquet") - - dataset = "routelines" - logger.info(f"*********** Download {dataset} data ***********") - - keep_shape_cols = [ - "feed_key", - "shape_id", "shape_array_key", - "n_trips", - # n_trips is new column...can help if we want - # to choose between shape_ids - # geometry already returned when get_df is True - ] - - routelines = gtfs_utils_v2.get_shapes( - selected_date = analysis_date, - operator_feeds = FEEDS_TO_RUN, - shape_cols = keep_shape_cols, - get_df = True, - crs = geography_utils.CA_NAD83Albers, - ) - - utils.geoparquet_gcs_export( - routelines, - COMPILED_CACHED_GCS, - f"{dataset}_{analysis_date}_{VERSION}.parquet" - ) - - end = dt.datetime.now() - logger.info(f"execution time: {end - start}") From 7b950bcb7e578a5a2e28606eb35d282222293d98 Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Fri, 21 Jun 2024 00:06:44 +0000 Subject: [PATCH 08/10] update requirements for quarto, great_tables, polars --- _shared_utils/requirements.txt | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/_shared_utils/requirements.txt b/_shared_utils/requirements.txt index 8b78c73c8..4d32bf763 100644 --- a/_shared_utils/requirements.txt +++ b/_shared_utils/requirements.txt @@ -1,8 +1,10 @@ -e . +altair==5.3.0 +altair-transform==0.2.0 gtfs-segments==0.1.0 pyairtable==2.2.2 -great-tables==0.4.0 -polars==0.20.16 +great_tables==0.6.1 omegaconf==2.3.0 # better yaml configuration -altair==5.3.0 -altair-transform==0.2.0 +polars==0.20.29 +quarto-cli==1.4.554 +quarto==0.1.0 From cff8e100e2a4c05dc035c2fcd5edb36d609d56c9 Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Fri, 21 Jun 2024 00:06:58 +0000 Subject: [PATCH 09/10] remove requirements, move to shared_utils --- quarterly_performance_objective/requirements.txt | 4 ---- 1 file changed, 4 deletions(-) delete mode 100644 quarterly_performance_objective/requirements.txt diff --git a/quarterly_performance_objective/requirements.txt b/quarterly_performance_objective/requirements.txt deleted file mode 100644 index 72fb4a000..000000000 --- a/quarterly_performance_objective/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -great_tables==0.6.1 -polars==0.20.29 -quarto-cli==1.4.554 -quarto==0.1.0 \ No newline at end of file From a130209545bf064645fe32bb96d38201d5123682 Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Fri, 21 Jun 2024 00:20:13 +0000 Subject: [PATCH 10/10] publish report as html --- quarterly_performance_objective/Makefile | 13 +- .../new_report.html | 6743 +++++++++++++++++ .../new_report.ipynb | 333 - .../new_report.qmd | 25 +- 4 files changed, 6778 insertions(+), 336 deletions(-) create mode 100644 quarterly_performance_objective/new_report.html delete mode 100644 quarterly_performance_objective/new_report.ipynb diff --git a/quarterly_performance_objective/Makefile b/quarterly_performance_objective/Makefile index 26f478310..a6232d36a 100644 --- a/quarterly_performance_objective/Makefile +++ b/quarterly_performance_objective/Makefile @@ -2,4 +2,15 @@ quarterly_performance_report: #cd ../rt_segment_speeds/ && make && pip install -r requirements.txt && cd .. python clean_data.py python compile_time_series.py - #cd ../ && make build_quarterly_performance_metrics -f Makefile \ No newline at end of file + #cd ../ && make build_quarterly_performance_metrics -f Makefile + +quarto_report: + # this renders as html + #quarto render report.ipynb --execute + # to convert ipynb to qmd + quarto convert report.ipynb + # to convert qmd to ipynb + quarto convert report.qmd + #https://quarto.org/docs/computations/parameters.html#jupyter couldn't get this to work + #quarto render report.qmd --execute-params params.yml + quarto publish report.qmd \ No newline at end of file diff --git a/quarterly_performance_objective/new_report.html b/quarterly_performance_objective/new_report.html new file mode 100644 index 000000000..0e549b89b --- /dev/null +++ b/quarterly_performance_objective/new_report.html @@ -0,0 +1,6743 @@ + + + + + + + + + +Quarterly Performance Metrics + + + + + + + + + + + + + + + + + + + + + + +
+ +
+ +
+
+

Quarterly Performance Metrics

+
+ + + +
+ + + + +
+ + + +
+ + +
+

Statewide Metrics

+
+
+
+
+ + + ++++++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Service Hours and Speed
2024-Q2
CategoryServiceSpeed (mph)
Daily Service HoursService Hours per Route# RoutesService Hours per Route (time-series)Average Speed# RoutesSpeed (time-series)
On SHN10,999.522.6486
+ + + +47.022.322.622.622.422.322.522.6 + +
22.6280
+ + + +23.014.022.322.522.522.122.222.6 + +
Intersects SHN41,127.336.51,128
+ + + +47.023.037.537.336.836.036.236.5 + +
14.0817
+ + + +23.013.414.114.114.013.813.414.0 + +
Other20,060.246.5431
+ + + +47.023.046.246.145.444.645.246.5 + +
14.7308
+ + + +23.014.014.614.614.714.514.214.7 + +
On or Intersects SHN52,126.832.31,614
+ + + +47.023.033.233.032.532.032.232.3 + +
16.21,096
+ + + +23.014.016.016.216.215.915.616.2 + +
Total72,187.035.32,045
+ + + +47.023.035.935.835.234.635.035.3 + +
15.91,404
+ + + +23.014.015.715.815.915.615.315.9 + +
+ + +
+ +
+
+
+
+
+

District Breakdown

+
+
+
+
+ + + ++++++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
District 01 - Eureka
Service Hours and Speed 2024-Q2
CategoryServiceSpeed (mph)
Daily Service HoursService Hours per Route# RoutesService Hours per Route (time-series)Average Speed# RoutesSpeed (time-series)
On SHN123.110.312
+ + + +19.09.599.719.599.7710.410.210.3 + +
31.212
+ + + +31.216.031.230.629.628.829.431.2 + +
Intersects SHN127.59.813
+ + + +19.09.079.079.089.729.499.699.81 + +
15.912
+ + + +31.015.020.116.317.416.215.016.0 + +
Other56.418.83
+ + + +19.010.015.915.915.416.418.818.8 + +
On or Intersects SHN250.610.025
+ + + +19.09.339.389.339.759.919.9110.0 + +
23.424
+ + + +31.016.025.623.823.622.221.723.4 + +
Total269.410.426
+ + + +19.09.859.899.8510.210.510.710.4 + +
23.424
+ + + +31.016.025.623.823.622.221.723.4 + +
+ + +
+ +
+
+
+
+
+ + + ++++++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
District 02 - Redding
Service Hours and Speed 2024-Q2
CategoryServiceSpeed (mph)
Daily Service HoursService Hours per Route# RoutesService Hours per Route (time-series)Average Speed# RoutesSpeed (time-series)
On SHN240.28.030
+ + + +10.07.707.737.817.727.707.968.01 + +
22.48
+ + + +22.418.020.120.820.419.221.822.4 + +
Intersects SHN109.39.711
+ + + +10.08.009.819.789.789.789.729.65 + +
17.77
+ + + +22.016.918.017.817.817.816.917.7 + +
On or Intersects SHN349.58.541
+ + + +10.08.008.408.448.388.348.488.46 + +
20.215
+ + + +22.018.018.919.319.018.419.320.2 + +
Total349.58.541
+ + + +10.08.008.288.318.298.228.408.46 + +
20.215
+ + + +22.018.018.919.319.018.419.320.2 + +
+ + +
+ +
+
+
+
+
+ + + ++++++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
District 03 - Marysville
Service Hours and Speed 2024-Q2
CategoryServiceSpeed (mph)
Daily Service HoursService Hours per Route# RoutesService Hours per Route (time-series)Average Speed# RoutesSpeed (time-series)
On SHN469.67.265
+ + + +17.07.007.648.048.328.378.387.19 + +
23.727
+ + + +24.014.022.623.023.623.023.323.7 + +
Intersects SHN1,025.215.268
+ + + +17.07.0012.816.416.316.916.015.2 + +
13.939
+ + + +24.013.914.114.314.414.414.013.9 + +
Other440.617.425
+ + + +24.57.0010.822.724.523.521.317.4 + +
17.517
+ + + +24.014.018.817.316.616.916.817.5 + +
On or Intersects SHN1,494.811.2133
+ + + +17.07.0010.112.612.913.212.511.2 + +
17.966
+ + + +24.014.017.618.018.017.517.617.9 + +
Total1,935.412.2158
+ + + +17.07.0010.214.315.015.014.012.2 + +
17.883
+ + + +24.014.017.917.817.717.417.417.8 + +
+ + +
+ +
+
+
+
+
+ + + ++++++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
District 04 - Oakland
Service Hours and Speed 2024-Q2
CategoryServiceSpeed (mph)
Daily Service HoursService Hours per Route# RoutesService Hours per Route (time-series)Average Speed# RoutesSpeed (time-series)
On SHN2,580.634.675
+ + + +38.031.036.036.234.734.835.034.6 + +
17.158
+ + + +18.313.017.618.318.317.917.417.2 + +
Intersects SHN10,239.131.1329
+ + + +38.031.031.833.832.032.132.331.1 + +
13.7268
+ + + +17.013.013.513.813.613.313.013.6 + +
Other2,723.138.072
+ + + +41.831.040.141.840.540.641.338.0 + +
13.161
+ + + +17.012.613.213.513.412.712.613.1 + +
On or Intersects SHN12,819.731.8404
+ + + +38.031.032.634.332.532.632.831.8 + +
14.3326
+ + + +17.013.014.214.614.514.213.814.3 + +
Total15,542.832.7475
+ + + +38.031.033.835.533.733.834.132.7 + +
14.1388
+ + + +17.013.014.014.414.314.013.614.1 + +
+ + +
+ +
+
+
+
+
+ + + ++++++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
District 05 - San Luis Obispo
Service Hours and Speed 2024-Q2
CategoryServiceSpeed (mph)
Daily Service HoursService Hours per Route# RoutesService Hours per Route (time-series)Average Speed# RoutesSpeed (time-series)
On SHN865.117.749
+ + + +18.014.015.214.015.414.315.617.7 + +
24.532
+ + + +24.514.022.622.821.524.123.824.5 + +
Intersects SHN958.213.969
+ + + +18.013.513.714.215.814.113.513.9 + +
14.346
+ + + +24.013.713.713.713.814.013.814.3 + +
Other150.514.111
+ + + +18.011.111.212.211.113.712.814.1 + +
17.86
+ + + +24.014.016.116.816.917.818.617.8 + +
On or Intersects SHN1,823.315.4118
+ + + +18.014.014.214.215.614.214.315.4 + +
18.577
+ + + +24.014.016.716.716.718.217.618.5 + +
Total1,973.815.3129
+ + + +18.014.014.014.015.214.114.215.3 + +
18.483
+ + + +24.014.016.616.716.718.217.718.4 + +
+ + +
+ +
+
+
+
+
+ + + ++++++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
District 06 - Fresno
Service Hours and Speed 2024-Q2
CategoryServiceSpeed (mph)
Daily Service HoursService Hours per Route# RoutesService Hours per Route (time-series)Average Speed# RoutesSpeed (time-series)
On SHN493.111.842
+ + + +34.011.613.012.311.911.611.911.8 + +
30.612
+ + + +31.015.029.830.629.729.530.430.6 + +
Intersects SHN1,308.022.857
+ + + +34.012.028.323.225.625.825.222.8 + +
16.331
+ + + +31.015.015.616.415.415.715.316.3 + +
Other860.234.025
+ + + +45.412.045.433.337.637.438.534.0 + +
15.415
+ + + +31.014.214.215.014.414.514.415.4 + +
On or Intersects SHN1,801.118.299
+ + + +34.012.022.418.620.119.919.818.2 + +
20.343
+ + + +31.015.019.320.319.419.519.520.3 + +
Total2,661.321.4124
+ + + +34.012.028.121.924.224.024.021.4 + +
19.058
+ + + +31.015.017.718.917.818.118.119.0 + +
+ + +
+ +
+
+
+
+
+ + + ++++++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
District 07 - Los Angeles
Service Hours and Speed 2024-Q2
CategoryServiceSpeed (mph)
Daily Service HoursService Hours per Route# RoutesService Hours per Route (time-series)Average Speed# RoutesSpeed (time-series)
On SHN3,241.640.979
+ + + +78.036.638.639.837.837.236.640.9 + +
19.464
+ + + +21.713.021.720.620.619.419.219.4 + +
Intersects SHN17,208.958.5294
+ + + +78.041.060.660.158.458.357.258.5 + +
13.1206
+ + + +19.012.413.613.013.212.912.413.1 + +
Other10,475.478.0134
+ + + +78.041.070.472.571.467.970.678.0 + +
13.1110
+ + + +19.012.813.813.413.412.912.813.1 + +
On or Intersects SHN20,450.554.8373
+ + + +78.041.055.655.553.853.652.754.8 + +
14.6270
+ + + +19.013.015.514.915.014.414.114.6 + +
Total30,925.960.9508
+ + + +78.041.059.860.458.657.757.860.9 + +
14.1380
+ + + +19.013.015.014.514.614.013.714.1 + +
+ + +
+ +
+
+
+
+
+ + + ++++++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
District 08 - San Bernardino
Service Hours and Speed 2024-Q2
CategoryServiceSpeed (mph)
Daily Service HoursService Hours per Route# RoutesService Hours per Route (time-series)Average Speed# RoutesSpeed (time-series)
On SHN1,193.421.456
+ + + +28.019.119.120.722.521.421.121.4 + +
25.627
+ + + +28.617.028.627.427.626.925.825.6 + +
Intersects SHN1,995.128.171
+ + + +28.121.025.626.927.827.127.128.1 + +
17.048
+ + + +26.016.217.817.617.317.016.217.0 + +
Other1,596.423.867
+ + + +28.021.024.424.423.823.423.223.8 + +
19.831
+ + + +26.017.021.721.620.520.019.119.8 + +
On or Intersects SHN3,188.525.2127
+ + + +28.021.023.124.425.524.724.625.2 + +
20.175
+ + + +26.017.021.220.720.620.119.220.1 + +
Total4,784.924.7194
+ + + +28.021.023.524.424.924.224.124.7 + +
20.0106
+ + + +26.017.021.320.920.620.119.220.0 + +
+ + +
+ +
+
+
+
+
+ + + ++++++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
District 09 - Bishop
Service Hours and Speed 2024-Q2
CategoryServiceSpeed (mph)
Daily Service HoursService Hours per Route# RoutesService Hours per Route (time-series)Average Speed# RoutesSpeed (time-series)
On SHN71.69.38
+ + + +10.16.619.507.826.618.7310.19.34 + +
42.45
+ + + +56.312.0NA41.556.343.440.742.4 + +
Intersects SHN33.210.03
+ + + +27.58.0013.314.827.517.010.29.96 + +
11.82
+ + + +42.06.216.2112.013.814.911.511.8 + +
Other7.57.51
+ + + +18.57.5018.57.50 + +
18.31
+ + + +42.012.016.118.3 + +
On or Intersects SHN104.89.511
+ + + +12.68.0011.210.412.610.910.29.53 + +
34.17
+ + + +42.06.216.2124.639.330.231.434.1 + +
Total107.39.511
+ + + +13.58.0011.210.413.510.910.29.47 + +
33.48
+ + + +42.06.216.2124.632.730.231.433.4 + +
+ + +
+ +
+
+
+
+
+ + + ++++++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
District 10 - Stockton
Service Hours and Speed 2024-Q2
CategoryServiceSpeed (mph)
Daily Service HoursService Hours per Route# RoutesService Hours per Route (time-series)Average Speed# RoutesSpeed (time-series)
On SHN570.915.936
+ + + +21.015.315.315.315.816.015.415.9 + +
27.916
+ + + +29.714.029.728.628.328.327.627.9 + +
Intersects SHN1,010.615.665
+ + + +21.015.116.116.416.916.415.115.6 + +
14.132
+ + + +28.013.916.015.114.814.713.914.1 + +
Other715.520.635
+ + + +21.016.016.717.116.617.419.120.6 + +
16.519
+ + + +28.014.014.014.114.215.615.616.5 + +
On or Intersects SHN1,581.615.7101
+ + + +21.015.215.816.016.516.215.215.7 + +
18.648
+ + + +28.014.020.319.519.118.918.418.6 + +
Total2,297.117.0135
+ + + +21.016.016.016.216.516.516.217.0 + +
18.166
+ + + +28.014.019.218.618.118.217.618.0 + +
+ + +
+ +
+
+
+
+
+ + + ++++++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
District 11 - San Diego
Service Hours and Speed 2024-Q2
CategoryServiceSpeed (mph)
Daily Service HoursService Hours per Route# RoutesService Hours per Route (time-series)Average Speed# RoutesSpeed (time-series)
On SHN709.628.425
+ + + +43.024.424.424.725.527.633.128.4 + +
22.815
+ + + +23.514.022.023.523.522.222.022.8 + +
Intersects SHN4,888.342.9114
+ + + +43.828.041.741.742.142.343.842.9 + +
14.1103
+ + + +23.013.414.114.414.214.013.414.1 + +
Other900.233.327
+ + + +43.027.927.928.429.831.832.933.3 + +
15.820
+ + + +23.014.016.115.816.015.815.215.8 + +
On or Intersects SHN5,597.940.3139
+ + + +43.028.038.738.639.139.642.140.3 + +
15.2118
+ + + +23.014.015.015.615.315.014.415.2 + +
Total6,498.139.1166
+ + + +43.028.037.037.037.738.440.539.2 + +
15.3138
+ + + +23.014.015.215.615.415.114.615.3 + +
+ + +
+ +
+
+
+
+
+ + + ++++++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
District 12 - Irvine
Service Hours and Speed 2024-Q2
CategoryServiceSpeed (mph)
Daily Service HoursService Hours per Route# RoutesService Hours per Route (time-series)Average Speed# RoutesSpeed (time-series)
On SHN440.647.29
+ + + +67.047.053.160.761.854.855.747.2 + +
16.15
+ + + +16.614.016.416.616.216.115.816.0 + +
Intersects SHN2,206.066.833
+ + + +11547.011599.380.267.687.166.8 + +
14.321
+ + + +16.013.714.514.414.314.113.714.3 + +
Other2,177.064.734
+ + + +72.247.070.272.271.771.771.664.7 + +
14.129
+ + + +16.013.515.014.414.214.013.514.1 + +
On or Intersects SHN2,646.662.542
+ + + +10447.010492.676.364.880.962.5 + +
14.626
+ + + +16.014.014.914.814.614.514.114.6 + +
Total4,823.563.576
+ + + +89.247.089.283.374.467.777.163.5 + +
14.355
+ + + +16.013.814.914.614.414.213.814.3 + +
+ + +
+ +
+
+
+
+
+

Operator Breakdown

+

Only the SHN subtotal (on SHN and parallel) is shown for each operator.

+
+
+
+
+ + + ++++++++++++ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Service Hours and Speed
2024-Q2
nameorganization_nameCategoryServiceSpeed (mph)
Daily Service HoursService Hours per Route# RoutesService Hours per Route (time-series)Average Speed# RoutesSpeed (time-series)
Mountain Transit GMV ScheduleMountain Area Regional Transit AuthorityOn or Intersects SHN112.616.17
+ + + +1941.0016.1 + +
19.07
+ + + +36819 + +
OCTA ScheduleOrange County Transportation AuthorityOn or Intersects SHN1,395.853.726
+ + + +1941.0051.652.953.753.854.553.7 + +
14.626
+ + + +36.08.0014.914.814.614.514.114.6 + +
Needles ScheduleCity of NeedlesOn or Intersects SHN11.011.01
+ + + +1941111111111111 + +
Nevada County ScheduleNevada CountyOn or Intersects SHN50.88.56
+ + + +1941.009.049.048.858.478.478.47 + +
22.16
+ + + +36.08.0023.522.922.722.522.122.1 + +
North County ScheduleNorth County Transit DistrictOn or Intersects SHN881.024.037
+ + + +1941.0023.223.124.624.624.624.0 + +
17.134
+ + + +36.08.0016.717.417.316.715.617.1 + +
Norwalk Avail ScheduleCity of NorwalkOn or Intersects SHN235.939.36
+ + + +1941.0041.441.441.441.439.339.3 + +
11.86
+ + + +36.08.0014.712.512.011.911.811.8 + +
Mountain Transit ScheduleMountain Area Regional Transit AuthorityOn or Intersects SHN101.514.57
+ + + +1941.0012.412.413.014.014.514.5 + +
OmniTrans ScheduleOmniTransOn or Intersects SHN954.156.117
+ + + +1941.0048.250.754.055.155.856.1 + +
16.113
+ + + +36.08.00NANA16.216.015.616.2 + +
Redding ScheduleShasta CountyOn or Intersects SHN140.18.816
+ + + +1941.007.918.228.388.388.468.76 + +
20.215
+ + + +36.08.0018.919.319.018.419.320.2 + +
Pasadena ScheduleCity of PasadenaOn or Intersects SHN203.022.69
+ + + +1941.0020.319.922.422.622.622.6 + +
13.49
+ + + +36.08.0013.313.613.713.613.513.4 + +
Placer SchedulePlacer CountyOn or Intersects SHN104.913.18
+ + + +1941.0014.314.911.511.013.213.1 + +
Plumas SchedulePlumas Transit SystemsOn or Intersects SHN31.710.63
+ + + +1941.0011.29.979.1611.210.610.6 + +
PresidiGo SchedulePresidio TrustOn or Intersects SHN39.519.72
+ + + +1941.0019.019.019.019.719.719.7 + +
11.42
+ + + +36.06.848.3610.18.546.8410.411.4 + +
Riverside ScheduleRiverside Transit AgencyOn or Intersects SHN978.836.727
+ + + +1941.0036.436.737.635.836.136.7 + +
17.427
+ + + +36.08.0017.717.417.617.317.017.4 + +
Redwood Coast SchedulelRedwood Coast Transit AuthorityOn or Intersects SHN21.53.66
+ + + +1941.003.59 + +
21.26
+ + + +36.08.0021.2 + +
Morro Bay Cal-ITP ScheduleCity of Morro BayOn or Intersects SHN12.312.31
+ + + +1941.0012.312.312.3 + +
Palos Verdes PTA SchedulePalos Verdes Peninsula Transit AuthorityOn or Intersects SHN1.61.61
+ + + +1941.007.797.792.531.651.651.65 + +
Morongo Basin ScheduleBasin TransitOn or Intersects SHN69.911.76
+ + + +1941.0011.711.711.711.711.711.7 + +
La Campana ScheduleCity of BellOn or Intersects SHN9.69.61
+ + + +1941.009.609.609.609.60 + +
Montebello ScheduleCity of MontebelloOn or Intersects SHN244.148.85
+ + + +1941.0059.959.959.961.053.148.8 + +
Rosemead Passio ScheduleCity of RosemeadOn or Intersects SHN27.513.82
+ + + +1941.0013.813.8 + +
LADPW ScheduleLos Angeles CountyOn or Intersects SHN240.015.016
+ + + +1941.0015.015.015.415.015.015.0 + +
LAX FlyAway ScheduleLos Angeles World AirportsOn or Intersects SHN271.868.04
+ + + +1941.0040.840.840.840.856.368.0 + +
Laguna Beach ScheduleCity of Laguna BeachOn or Intersects SHN119.940.03
+ + + +1941.0019.219.245.340.040.040.0 + +
Lake ScheduleLake Transit AuthorityOn or Intersects SHN117.613.19
+ + + +1941.0012.712.712.013.113.113.1 + +
26.19
+ + + +36.08.0027.526.926.824.625.126.1 + +
Lassen ScheduleLassen Transit Service AgencyOn or Intersects SHN30.07.54
+ + + +1941.007.517.517.517.517.517.51 + +
Lawndale Beat GMV ScheduleCity of LawndaleOn or Intersects SHN20.810.42
+ + + +1941.0010.410.410.410.4 + +
10.62
+ + + +36.08.0012.010.69.4810.6 + +
Lawndale ScheduleCity of LawndaleOn or Intersects SHN20.810.42
+ + + +1941.0010.410.410.410.4 + +
Monterey Salinas ScheduleMonterey-Salinas TransitOn or Intersects SHN458.716.428
+ + + +1941.0016.615.916.215.616.216.4 + +
17.428
+ + + +36.08.0017.417.617.117.317.417.4 + +
Lompoc ScheduleCity of LompocOn or Intersects SHN43.58.75
+ + + +1941.008.718.718.688.718.718.71 + +
Lynwood ScheduleCity of LynwoodOn or Intersects SHN39.89.94
+ + + +1941.009.949.949.949.94 + +
Madera County Connection ScheduleMadera CountyOn or Intersects SHN36.89.24
+ + + +1941.009.219.219.859.219.219.21 + +
32.24
+ + + +36.08.0033.032.932.132.931.532.2 + +
Madera Metro ScheduleCity of MaderaOn or Intersects SHN56.414.14
+ + + +1941.0018.618.618.618.615.314.1 + +
Manteca ScheduleCity of MantecaOn or Intersects SHN20.210.12
+ + + +1941.0010.110.110.110.110.110.1 + +
Maywood ScheduleCity of MaywoodOn or Intersects SHN10.010.01
+ + + +1941101010101010 + +
Mendocino ScheduleMendocino Transit AuthorityOn or Intersects SHN88.812.77
+ + + +1941.0011.111.212.912.912.812.7 + +
22.47
+ + + +36.08.00NA23.823.122.422.122.4 + +
Merced GMV ScheduleTransit Joint Powers Authority for Merced CountyOn or Intersects SHN211.017.612
+ + + +1941.0018.218.217.717.617.617.6 + +
Merced ScheduleTransit Joint Powers Authority for Merced CountyOn or Intersects SHN207.818.911
+ + + +1941.0018.818.818.818.918.918.9 + +
Long Beach ScheduleLong Beach TransitOn or Intersects SHN914.348.119
+ + + +1941.0045.145.145.745.847.348.1 + +
11.619
+ + + +36.08.0012.112.011.711.811.611.6 + +
Roseville ScheduleCity of RosevilleOn or Intersects SHN98.44.920
+ + + +1941.005.135.135.064.924.924.92 + +
Visalia ScheduleCity of VisaliaOn or Intersects SHN314.626.212
+ + + +1941.0026.226.226.226.226.226.2 + +
19.612
+ + + +36.08.0016.219.719.918.618.719.6 + +
SBMTD ScheduleSanta Barbara Metropolitan Transit DistrictOn or Intersects SHN425.212.933
+ + + +1941.0012.514.618.612.813.012.9 + +
14.720
+ + + +36.08.0015.515.215.315.314.614.7 + +
Torrance ScheduleCity of TorranceOn or Intersects SHN375.134.111
+ + + +1941.0028.929.028.529.232.134.1 + +
14.811
+ + + +36.08.0016.215.715.114.614.214.8 + +
Tracy ScheduleCity of TracyOn or Intersects SHN49.15.59
+ + + +1941.005.455.45 + +
Trinity ScheduleTrinity CountyOn or Intersects SHN17.74.44
+ + + +1941.003.673.674.023.674.424.42 + +
Tuolumne Remix ScheduleTuolumne County Transit AgencyOn or Intersects SHN25.38.43
+ + + +1941.008.438.43 + +
Turlock ScheduleCity of TurlockOn or Intersects SHN34.08.54
+ + + +1941.009.289.289.568.518.518.51 + +
16.24
+ + + +36.08.0019.418.918.718.115.516.2 + +
UCSC ScheduleUniversity of California, Santa CruzOn or Intersects SHN0.70.71
+ + + +1940.731.000.73 + +
Unitrans ScheduleUniversity of California, DavisOn or Intersects SHN213.213.616
+ + + +1941.0013.614.910.313.012.913.6 + +
10.813
+ + + +36.08.0011.511.512.011.110.710.8 + +
VCTC GMV ScheduleCity of OjaiOn or Intersects SHN559.916.534
+ + + +1941.0017.017.116.816.5 + +
19.332
+ + + +36.08.0019.419.318.819.3 + +
VCTC GMV ScheduleVentura County Transportation CommissionOn or Intersects SHN565.716.434
+ + + +1941.0017.016.616.316.4 + +
18.332
+ + + +36.08.0019.118.517.018.3 + +
Victor Valley GMV ScheduleVictor Valley Transit AuthorityOn or Intersects SHN299.615.020
+ + + +1941.0014.614.614.815.015.015.0 + +
23.620
+ + + +36.08.0024.224.224.323.722.523.6 + +
Victor Valley ScheduleVictor Valley Transit AuthorityOn or Intersects SHN289.615.219
+ + + +1941.0014.814.815.015.315.215.2 + +
LA Metro Rail ScheduleLos Angeles County Metropolitan Transportation AuthorityOn or Intersects SHN970.5194.15
+ + + +1941.00114159173190194 + +
25.05
+ + + +36.08.0025.425.324.023.425.0 + +
YARTS ScheduleYosemite Area Regional Transportation SystemOn or Intersects SHN56.324.12
+ + + +1941.0026.425.623.420.428.824.2 + +
Yolobus ScheduleYolo County Transportation DistrictOn or Intersects SHN196.210.918
+ + + +1941.0010.810.810.810.810.810.9 + +
22.518
+ + + +36.08.00NA22.022.021.922.322.5 + +
Yuba-Sutter ScheduleYuba-Sutter Transit AuthorityOn or Intersects SHN159.711.414
+ + + +1941.0012.012.012.011.811.411.4 + +
Yuma ScheduleYuma County Intergovernmental Public Transportation AuthorityOn or Intersects SHN14.87.42
+ + + +1941.007.407.407.787.407.407.40 + +
eTrans ScheduleCity of EscalonOn or Intersects SHN3.43.41
+ + + +1941.003.403.403.403.403.403.40 + +
18.41
+ + + +36.08.00NA24.523.720.722.118.4 + +
Tehama ScheduleTehama CountyOn or Intersects SHN84.69.49
+ + + +1941.0011.911.911.99.40 + +
Tehama ScheduleSusanville Indian RancheriaOn or Intersects SHN74.89.38
+ + + +1941.0011.911.911.59.35 + +
Tahoe Transportation District ScheduleTahoe Transportation DistrictOn or Intersects SHN31.215.62
+ + + +1941.0015.615.617.315.615.615.6 + +
+ + + +36.08.0012.814.914.715.0NANA + +
Tahoe Transportation District GMV ScheduleTahoe Transportation DistrictOn or Intersects SHN35.117.62
+ + + +1941.0015.615.617.6 + +
15.42
+ + + +36.08.0014.313.515.4 + +
SLO ScheduleCity of San Luis ObispoOn or Intersects SHN88.013.27
+ + + +1941.0012.613.213.212.613.2 + +
SLORTA ScheduleSan Luis Obispo Regional Transit AuthorityOn or Intersects SHN167.811.215
+ + + +1941.0010.610.811.011.211.211.2 + +
24.014
+ + + +36.08.00NANANA24.823.924.0 + +
Sacramento ScheduleCity of Rancho CordovaOn or Intersects SHN974.721.246
+ + + +1941.0021.421.322.021.121.2 + +
15.142
+ + + +36.08.0015.315.915.615.215.1 + +
Sage Stage ScheduleModoc Transportation AgencyOn or Intersects SHN7.87.81
+ + + +1941.007.837.837.837.837.837.83 + +
San Diego ScheduleFlagship Cruises and Events Inc.On or Intersects SHN4,570.251.489
+ + + +1941.0049.549.250.651.351.4 + +
14.686
+ + + +36.08.0014.814.614.314.014.6 + +
San Diego ScheduleSan Diego Metropolitan Transit SystemOn or Intersects SHN4,577.354.584
+ + + +1941.0049.554.5 + +
14.283
+ + + +36.08.0014.414.2 + +
San Joaquin ScheduleSan Joaquin Regional Transit DistrictOn or Intersects SHN406.415.127
+ + + +1941.0011.812.212.812.814.315.0 + +
17.625
+ + + +36.08.0018.217.917.017.717.117.6 + +
Santa Cruz ScheduleSanta Cruz Metropolitan Transit DistrictOn or Intersects SHN506.226.619
+ + + +1941.0022.219.518.820.325.626.6 + +
18.619
+ + + +36.08.0018.017.817.617.817.818.6 + +
Roseville Transit GMV ScheduleCity of RosevilleOn or Intersects SHN107.15.121
+ + + +1941.005.10 + +
Santa Maria ScheduleCity of Santa MariaOn or Intersects SHN157.012.113
+ + + +1941.0012.112.115.812.112.112.1 + +
16.31
+ + + +36.08.0017.416.513.815.314.316.3 + +
Siskiyou ScheduleSiskiyou CountyOn or Intersects SHN44.18.85
+ + + +1941.009.688.047.927.687.678.82 + +
South County Transit Link ScheduleSacramento CountyOn or Intersects SHN42.214.13
+ + + +1941.0014.114.114.114.114.114.1 + +
Spirit Bus Passio ScheduleCity of Monterey ParkOn or Intersects SHN50.58.46
+ + + +1941.008.528.42 + +
StanRTA ScheduleStanislaus Regional Transit AuthorityOn or Intersects SHN700.126.926
+ + + +1941.0025.625.625.626.727.326.9 + +
20.826
+ + + +36.08.0022.721.221.320.920.720.8 + +
Stanford ScheduleStanford UniversityOn or Intersects SHN179.710.617
+ + + +1941.0010.610.610.410.610.610.6 + +
SunLine Avail ScheduleSunLine Transit AgencyOn or Intersects SHN40.210.14
+ + + +1941.004.696.367.827.8210.110.1 + +
TART, North Lake Tahoe ScheduleNorth Lake Tahoe ExpressOn or Intersects SHN91.415.26
+ + + +1941.0012.213.814.515.515.2 + +
TCRTA TripShot ScheduleTulare County Regional Transit AgencyOn or Intersects SHN258.310.325
+ + + +1941.0010.410.410.410.410.3 + +
Santa Ynez Mecatran ScheduleCity of SolvangOn or Intersects SHN32.616.32
+ + + +1941.0016.316.316.316.316.3 + +
21.12
+ + + +36.08.00NA20.520.820.221.1 + +
LA Metro Bus ScheduleLos Angeles County Metropolitan Transportation AuthorityOn or Intersects SHN9,498.6143.966
+ + + +1941.00147146146148148144 + +
13.863
+ + + +36.08.0014.113.914.013.613.213.8 + +
Santa Clarita ScheduleCity of Santa ClaritaOn or Intersects SHN215.310.221
+ + + +1941.0010.110.09.9110.110.210.2 + +
20.721
+ + + +36.08.0021.421.821.020.120.020.7 + +
Kings ScheduleKings County Area Public Transit AgencyOn or Intersects SHN146.212.212
+ + + +1941.0012.012.012.212.112.112.2 + +
22.711
+ + + +36.08.0022.222.221.221.922.422.7 + +
Bay Area 511 Rio Vista Delta Breeze ScheduleCity of Rio VistaOn or Intersects SHN7.37.31
+ + + +1941.007.277.277.277.277.27 + +
Bay Area 511 SFO AirTrain ScheduleSan Francisco International AirportOn or Intersects SHN342.385.64
+ + + +1941.0085.685.685.685.685.685.6 + +
Bay Area 511 SamTrans ScheduleSan Mateo County Transit DistrictOn or Intersects SHN1,475.023.164
+ + + +1941.0021.521.526.622.022.823.0 + +
13.362
+ + + +36.08.0013.413.912.812.912.613.3 + +
Bay Area 511 Santa Clara Transit ScheduleSanta Clara Valley Transportation AuthorityOn or Intersects SHN2,867.752.155
+ + + +1941.0051.054.553.452.053.052.1 + +
15.846
+ + + +36.08.0015.816.616.115.715.015.8 + +
Bay Area 511 Santa Rosa CityBus ScheduleCity of Santa RosaOn or Intersects SHN177.711.815
+ + + +1941.0012.613.713.312.712.411.8 + +
13.914
+ + + +36.08.0013.213.113.713.412.813.9 + +
Bay Area 511 SolTrans ScheduleSolano County TransitOn or Intersects SHN110.311.010
+ + + +1941.0010.410.210.211.0 + +
15.010
+ + + +36.08.0015.815.114.715.0 + +
Bay Area 511 SolTrans ScheduleSolano Transportation AuthorityOn or Intersects SHN112.610.211
+ + + +1941.0010.910.810.210.2 + +
14.711
+ + + +36.08.0015.114.815.214.7 + +
Bay Area 511 Sonoma County Transit ScheduleCloverdale TransitOn or Intersects SHN258.211.223
+ + + +1941.0010.611.011.111.211.2 + +
18.322
+ + + +36.08.0017.518.118.218.018.3 + +
Bay Area 511 Sonoma-Marin Area Rail Transit ScheduleSonoma-Marin Area Rail Transit DistrictOn or Intersects SHN48.848.81
+ + + +1941.0048.848.848.848.848.848.8 + +
36.21
+ + + +36.28.0035.636.235.436.136.136.2 + +
Bay Area 511 South San Francisco Shuttle ScheduleCity of South San FranciscoOn or Intersects SHN31.410.53
+ + + +1941.008.868.868.8610.510.510.5 + +
Bay Area 511 Tri Delta ScheduleEastern Contra Costa Transit AuthorityOn or Intersects SHN427.529.814
+ + + +1941.0030.730.730.229.028.529.8 + +
16.412
+ + + +36.08.0016.016.516.416.415.416.4 + +
Bay Area 511 Tri-Valley Wheels ScheduleLivermore-Amador Valley Transit AuthorityOn or Intersects SHN192.48.423
+ + + +1941.0010.210.211.510.210.28.37 + +
14.923
+ + + +36.08.0014.115.815.114.414.214.9 + +
Bay Area 511 Union City Transit ScheduleCity of Union CityOn or Intersects SHN131.526.35
+ + + +1941.0026.426.426.426.326.326.3 + +
14.25
+ + + +36.08.0014.214.313.013.914.314.2 + +
Bay Area 511 Vacaville City Coach ScheduleCity of VacavilleOn or Intersects SHN64.913.05
+ + + +1941.0012.812.812.913.013.013.0 + +
Bay Area 511 Vine Transit ScheduleNapa Valley Transportation AuthorityOn or Intersects SHN219.121.910
+ + + +1941.0021.821.822.121.921.921.9 + +
18.26
+ + + +36.08.0018.117.617.116.115.718.2 + +
Bay Area 511 WestCAT ScheduleWestern Contra Costa Transit AuthorityOn or Intersects SHN172.017.210
+ + + +1941.0017.917.717.617.717.817.2 + +
20.010
+ + + +36.08.0018.919.819.920.320.820.0 + +
Beach Cities GMV ScheduleCity of Redondo BeachOn or Intersects SHN91.245.62
+ + + +1941.0090.845.645.645.645.645.6 + +
11.62
+ + + +36.08.0011.612.012.011.711.211.6 + +
Bay Area 511 Petaluma ScheduleCity of PetalumaOn or Intersects SHN45.56.57
+ + + +1941.006.047.417.976.006.726.50 + +
14.96
+ + + +36.08.0016.414.714.013.914.014.9 + +
Bay Area 511 Mission Bay ScheduleMission Bay Transportation Management AgencyOn or Intersects SHN60.515.14
+ + + +1941.0015.115.115.115.1 + +
Bay Area 511 Marin ScheduleMarin County Transit DistrictOn or Intersects SHN293.722.613
+ + + +1941.0018.821.226.823.022.622.6 + +
14.813
+ + + +36.08.0015.014.715.415.514.814.8 + +
Bay Area 511 MVGO ScheduleMountain View Transportation Management AssociationOn or Intersects SHN43.95.58
+ + + +1941.005.775.775.7711.75.865.48 + +
12.08
+ + + +36.08.0014.514.0NA13.110.412.0 + +
LA DOT ScheduleCity of Los AngelesOn or Intersects SHN2,003.340.150
+ + + +1941.0040.640.640.640.640.140.1 + +
12.250
+ + + +36.08.0013.112.812.712.211.612.2 + +
Alhambra ScheduleCity of AlhambraOn or Intersects SHN50.425.22
+ + + +1941.0025.225.225.225.225.225.2 + +
Amador ScheduleAmador Regional Transit SystemOn or Intersects SHN39.06.56
+ + + +1941.006.506.506.506.506.506.50 + +
Anaheim Resort ScheduleAnaheim Transportation NetworkOn or Intersects SHN1,130.984.813
+ + + +2451.0024024513894.813484.8 + +
Antelope Valley Transit Authority ScheduleAntelope Valley Transit AuthorityOn or Intersects SHN408.034.012
+ + + +1941.0033.533.533.734.034.034.0 + +
18.38
+ + + +36.08.0022.222.322.821.521.118.3 + +
Arcadia ScheduleCity of ArcadiaOn or Intersects SHN10.810.81
+ + + +1941.0013.613.613.613.614.210.8 + +
Arvin ScheduleCity of ArvinOn or Intersects SHN14.14.73
+ + + +1941.004.694.694.694.69 + +
Auburn ScheduleCity of AuburnOn or Intersects SHN6.76.71
+ + + +1941.006.676.676.676.676.67 + +
Bear ScheduleUniversity of California, BerkeleyOn or Intersects SHN4.24.21
+ + + +1941.004.174.174.174.174.174.17 + +
B-Line ScheduleButte County Association of GovernmentsOn or Intersects SHN177.09.319
+ + + +1941.008.809.158.989.329.329.32 + +
19.419
+ + + +36.08.00NANA19.019.319.419.4 + +
Bay Area 511 AC Transit ScheduleAlameda-Contra Costa Transit DistrictOn or Intersects SHN2,719.237.672
+ + + +1941.0032.240.838.631.931.937.6 + +
13.272
+ + + +36.08.0013.513.413.113.313.113.2 + +
Bay Area 511 BART ScheduleSan Francisco Bay Area Rapid Transit DistrictOn or Intersects SHN219.973.33
+ + + +1941.0025.425.446.875.073.373.3 + +
Bay Area 511 Caltrain SchedulePeninsula Corridor Joint Powers BoardOn or Intersects SHN158.531.75
+ + + +1941.0032.032.033.231.731.731.7 + +
36.25
+ + + +36.78.0032.636.734.736.431.336.2 + +
Bay Area 511 Capitol Corridor ScheduleCapitol Corridor Joint Powers AuthorityOn or Intersects SHN11.811.81
+ + + +1941.0011.811.811.811.811.811.8 + +
Bay Area 511 Commute.org ScheduleCity of Menlo ParkOn or Intersects SHN130.96.919
+ + + +1941.006.89 + +
Bay Area 511 County Connection ScheduleCentral Contra Costa Transit AuthorityOn or Intersects SHN352.811.830
+ + + +1941.0022.020.416.915.411.811.8 + +
15.130
+ + + +36.08.0014.615.615.215.014.815.1 + +
Bay Area 511 Emery Go-Round ScheduleEmeryville Transportation Management AgencyOn or Intersects SHN38.438.41
+ + + +1941.0035.835.835.835.837.538.4 + +
7.91
+ + + +36.07.808.899.439.387.808.017.92 + +
Bay Area 511 Fairfield and Suisun Transit ScheduleCity of FairfieldOn or Intersects SHN68.718.84
+ + + +1941.0019.019.018.417.517.518.8 + +
16.84
+ + + +36.08.0016.517.317.216.716.416.8 + +
Baldwin Park ScheduleCity of Baldwin ParkOn or Intersects SHN64.812.95
+ + + +1941.0020.220.220.220.220.213.0 + +
Beaumont Pass ScheduleCity of BeaumontOn or Intersects SHN62.87.88
+ + + +1941.008.708.588.547.847.847.84 + +
26.08
+ + + +36.08.0025.122.626.524.824.126.0 + +
Bay Area 511 Muni ScheduleCity and County of San FranciscoOn or Intersects SHN5,028.2102.649
+ + + +1941.00103104104103102103 + +
9.249
+ + + +36.08.008.688.688.768.748.919.18 + +
Bellflower Bus ScheduleCity of BellflowerOn or Intersects SHN10.010.01
+ + + +1941101010101010 + +
El Segundo ScheduleCity of El SegundoOn or Intersects SHN2.92.91
+ + + +1941.002.922.922.92 + +
Elk Grove ScheduleCity of Elk GroveOn or Intersects SHN88.67.412
+ + + +1941.007.407.407.397.387.387.38 + +
21.812
+ + + +36.08.0021.521.021.621.321.021.8 + +
Flixbus ScheduleGreyhoundOn or Intersects SHN297.631.99
+ + + +1941.0039.840.037.431.431.9 + +
Foothill ScheduleCity of DuarteOn or Intersects SHN1,910.263.730
+ + + +1941.0060.060.360.964.064.063.7 + +
17.130
+ + + +36.08.0017.817.517.416.817.017.1 + +
Fresno County ScheduleFresno County Rural Transit AgencyOn or Intersects SHN20.85.24
+ + + +1941.005.195.195.195.195.195.19 + +
Fresno ScheduleCity of FresnoOn or Intersects SHN540.067.58
+ + + +1941.0060.460.465.167.567.567.5 + +
14.38
+ + + +36.08.0014.213.313.614.114.114.3 + +
GET ScheduleGolden Empire Transit DistrictOn or Intersects SHN378.134.411
+ + + +1941.0031.132.334.434.434.434.4 + +
17.011
+ + + +36.08.0017.517.417.316.816.817.0 + +
Get Around Town Express ScheduleCity of South GateOn or Intersects SHN13.313.31
+ + + +1941.0012.812.813.213.3 + +
El Monte ScheduleCity of El MonteOn or Intersects SHN63.79.17
+ + + +1941.009.049.049.049.049.069.10 + +
Glendale ScheduleCity of GlendaleOn or Intersects SHN234.924.710
+ + + +1941.0021.824.123.823.824.7 + +
Glenn ScheduleGlenn CountyOn or Intersects SHN19.519.51
+ + + +1941.0019.519.519.519.519.5 + +
Go West ScheduleCity of West CovinaOn or Intersects SHN21.910.92
+ + + +1941.0011.011.011.011.0 + +
Grapeline ScheduleCity of LodiOn or Intersects SHN56.96.39
+ + + +1941.005.965.735.365.415.416.32 + +
Guadalupe Flyer ScheduleCity of GuadalupeOn or Intersects SHN26.013.02
+ + + +19411313131313 + +
Humboldt ScheduleCity of EurekaOn or Intersects SHN89.09.210
+ + + +1941.008.768.889.20 + +
22.09
+ + + +36.08.0019.519.522.0 + +
Imperial Valley Transit ScheduleImperial County Transportation CommissionOn or Intersects SHN129.510.013
+ + + +1941.009.969.969.999.969.969.96 + +
Bell Gardens ScheduleCity of Bell GardensOn or Intersects SHN29.829.81
+ + + +1941.0029.829.829.829.829.829.8 + +
Kern ScheduleKern CountyOn or Intersects SHN154.811.913
+ + + +1941.0013.613.612.412.111.911.9 + +
Glendora ScheduleCity of GlendoraOn or Intersects SHN9.24.62
+ + + +1941.003.324.58 + +
El Dorado ScheduleEl Dorado County Transit AuthorityOn or Intersects SHN99.814.27
+ + + +1941.0013.013.514.214.214.214.2 + +
G Trans ScheduleCity of GardenaOn or Intersects SHN210.170.03
+ + + +1941.0098.798.781.670.270.270.0 + +
15.53
+ + + +36.08.00NANANANA14.415.5 + +
Cerritos on Wheels Website ScheduleCity of CerritosOn or Intersects SHN21.63.66
+ + + +1941.003.603.60 + +
BruinBus ScheduleUniversity of California, Los AngelesOn or Intersects SHN37.012.33
+ + + +1941.007.547.7118.59.2710.112.3 + +
16.41
+ + + +36.06.9316.316.46.9315.913.616.4 + +
Eastern Sierra ScheduleEastern Sierra Transit AuthorityOn or Intersects SHN104.89.511
+ + + +1941.0011.210.412.610.910.29.53 + +
34.17
+ + + +39.36.216.2124.639.330.231.434.1 + +
Burbank ScheduleCity of BurbankOn or Intersects SHN42.442.41
+ + + +1941.0025.341.542.142.442.442.4 + +
11.61
+ + + +36.08.0013.911.811.811.911.011.6 + +
Calabasas ScheduleCity of CalabasasOn or Intersects SHN19.34.84
+ + + +1941.004.634.824.824.82 + +
Calaveras ScheduleCalaveras Transit AgencyOn or Intersects SHN38.719.42
+ + + +1941.0019.419.419.419.419.4 + +
Cerritos on Wheels ScheduleCity of CerritosOn or Intersects SHN21.610.82
+ + + +1941.0010.810.810.810.810.810.8 + +
Clean Air Express ScheduleSanta Barbara County Association of GovernmentsOn or Intersects SHN28.97.24
+ + + +1941.007.356.807.617.247.247.24 + +
Clovis ScheduleCity of ClovisOn or Intersects SHN34.120.42
+ + + +1941.0017.120.425.417.117.120.4 + +
Big Blue Bus ScheduleCity of Santa MonicaOn or Intersects SHN1,129.762.818
+ + + +1941.0056.756.659.561.161.962.8 + +
11.218
+ + + +36.08.0011.911.711.611.210.611.2 + +
Commerce ScheduleCity of CommerceOn or Intersects SHN110.118.36
+ + + +1941.0018.318.318.318.318.318.3 + +
13.56
+ + + +36.08.0013.814.313.713.313.413.5 + +
County Express ScheduleSan Benito County Local Transportation AuthorityOn or Intersects SHN36.922.12
+ + + +1941.0015.215.919.116.14.1322.1 + +
Cudahy ScheduleCity of CudahyOn or Intersects SHN9.29.21
+ + + +1941.009.179.179.179.17 + +
Culver City ScheduleCity of Culver CityOn or Intersects SHN424.742.510
+ + + +1941.0037.937.941.839.239.242.5 + +
11.410
+ + + +36.08.0011.611.411.211.111.211.4 + +
Delano ScheduleCity of DelanoOn or Intersects SHN26.96.74
+ + + +1941.006.836.736.736.736.73 + +
Desert Roadrunner GMV SchedulePalo Verde Valley Transit AgencyOn or Intersects SHN35.27.05
+ + + +1941.007.057.057.057.05 + +
23.73
+ + + +36.08.0021.825.820.723.7 + +
Desert Roadrunner SchedulePalo Verde Valley Transit AgencyOn or Intersects SHN36.27.25
+ + + +1941.007.257.257.347.257.257.25 + +
DowneyLINK GMV ScheduleCity of DowneyOn or Intersects SHN53.710.75
+ + + +1941.0010.710.7 + +
Corona ScheduleCity of CoronaOn or Intersects SHN45.422.72
+ + + +1941.0022.722.722.722.722.722.7 + +
+ + +
+ +
+
+
+
+ +
+ + +
+ + + + + \ No newline at end of file diff --git a/quarterly_performance_objective/new_report.ipynb b/quarterly_performance_objective/new_report.ipynb deleted file mode 100644 index 5769638b4..000000000 --- a/quarterly_performance_objective/new_report.ipynb +++ /dev/null @@ -1,333 +0,0 @@ -{ - "cells": [ - { - "cell_type": "raw", - "metadata": {}, - "source": [ - "---\n", - "title: Quarterly Performance Metrics\n", - "execute:\n", - " echo: false\n", - "format:\n", - " html:\n", - " mainfont: sans-serif\n", - " monofont: sans-serif\n", - " anchor-sections: true\n", - " toc: true\n", - " toc-title: Contents\n", - " toc-depth: 3\n", - " code-links:\n", - " - text: Analysis Products\n", - " icon: bar-chart-fill\n", - " href: 'https://analysis.calitp.org'\n", - " - text: Reach Out!\n", - " icon: envelope\n", - " href: 'mailto:hello@calitp.org'\n", - "---" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import altair as alt\n", - "import geopandas as gpd\n", - "import pandas as pd\n", - "import polars as pl\n", - "\n", - "from great_tables import GT, _data_color, loc, md, nanoplot_options, style\n", - "\n", - "from update_vars import BUS_SERVICE_GCS" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def category_wrangling(\n", - " df: pd.DataFrame, \n", - " col: str = \"category\", \n", - " sort_key: list = [\"on_shn\", \"parallel\", \"other\", \"shn_subtotal\", \"total\"]\n", - ") -> pd.DataFrame:\n", - " \"\"\"\n", - " Custom sort order for categorical variable\n", - " https://stackoverflow.com/questions/23482668/sorting-by-a-custom-list-in-pandas\n", - " \"\"\"\n", - " category_values = {\n", - " \"on_shn\": \"On SHN\", \n", - " \"parallel\": \"Intersects SHN\",\n", - " \"other\": \"Other\",\n", - " \"shn_subtotal\": \"On or Intersects SHN\",\n", - " \"total\": \"Total\"\n", - " }\n", - " \n", - " df = df.sort_values(\n", - " col, key=lambda c: c.map(lambda e: sort_key.index(e))\n", - " ) \n", - " \n", - " df = df.assign(\n", - " category = df.category.map(category_values)\n", - " )\n", - " \n", - " return df\n", - "\n", - "def get_hex(color_name: str) -> str:\n", - " \"\"\"\n", - " Since some of the color names don't pull the hex code, \n", - " we'll grab it here.\n", - " https://github.com/posit-dev/great-tables/blob/main/great_tables/_data_color/constants.py\n", - " \"\"\"\n", - " return _data_color.constants.COLOR_NAME_TO_HEX[color_name]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "current_quarter = \"2024-Q2\"\n", - "\n", - "operator_df = pd.read_parquet(\n", - " f\"{BUS_SERVICE_GCS}\"\n", - " \"quarterly_metrics/operator_time_series.parquet\",\n", - " filters = [[(\"year_quarter\", \"==\", current_quarter)]]\n", - ").pipe(category_wrangling)\n", - "\n", - "district_df = pd.read_parquet(\n", - " f\"{BUS_SERVICE_GCS}\"\n", - " \"quarterly_metrics/district_time_series.parquet\",\n", - " filters = [[(\"year_quarter\", \"==\", current_quarter)]]\n", - ").pipe(category_wrangling)\n", - "\n", - "statewide_df = pd.read_parquet(\n", - " f\"{BUS_SERVICE_GCS}\"\n", - " \"quarterly_metrics/statewide_time_series.parquet\",\n", - " filters = [[(\"year_quarter\", \"==\", current_quarter)]]\n", - ").pipe(category_wrangling)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def shared_nano_options(\n", - " point_stroke_color: str,\n", - " line_stroke_color: str,\n", - " point_fill_color: str,\n", - " area_fill_color: str\n", - "):\n", - " nano_options = nanoplot_options(\n", - " data_point_radius=6,\n", - " data_point_stroke_color=get_hex(point_stroke_color),\n", - " data_point_fill_color=get_hex(point_fill_color),\n", - " data_point_stroke_width=3,\n", - " data_line_type=\"curved\",\n", - " data_line_stroke_color=get_hex(line_stroke_color),\n", - " data_line_stroke_width=8,\n", - " data_area_fill_color=get_hex(area_fill_color),\n", - " #vertical_guide_stroke_color=None,\n", - " show_y_axis_guide=True,\n", - " #show_vertical_guides=False,\n", - " interactive_data_values = True,\n", - " #reference_line_color=get_hex(\"salmon1\"),\n", - " show_reference_line=False\n", - " )\n", - " \n", - " return nano_options" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def plot_table(df: pd.DataFrame): \n", - " \n", - " MIN_SPEED, MAX_SPEED = df.speed_mph.min(), df.speed_mph.max()\n", - " MIN_SERVICE, MAX_SERVICE = df.service_hours_per_route.min(), df.service_hours_per_route.max()\n", - " \n", - " table = (\n", - " GT(pl.from_pandas(df))\n", - " .fmt_nanoplot(\n", - " columns=\"speed_mph_ts\",\n", - " plot_type=\"line\",\n", - " expand_y=[round(MIN_SPEED, 0), round(MAX_SPEED, 0)],\n", - " options=shared_nano_options(\n", - " point_stroke_color = \"black\",\n", - " line_stroke_color = \"green\",\n", - " point_fill_color = \"white\",\n", - " area_fill_color = \"seagreen2\")\n", - " ).fmt_nanoplot(\n", - " columns=\"service_hours_per_route_ts\",\n", - " plot_type=\"line\",\n", - " expand_y=[round(MIN_SERVICE, 0), round(MAX_SERVICE, 0)],\n", - " options=shared_nano_options(\n", - " point_stroke_color = \"black\", \n", - " line_stroke_color = \"steelblue1\",\n", - " point_fill_color = \"white\",\n", - " area_fill_color = \"lightskyblue2\", \n", - " )\n", - " ).fmt_number(\n", - " columns = [\n", - " \"daily_service_hours\", \n", - " \"service_hours_per_route\", \n", - " \"speed_mph\"], decimals=1\n", - " ).fmt_integer(\n", - " columns = [\"daily_routes\", \"daily_vp_routes\"]\n", - " ).cols_label(\n", - " category = \"Category\",\n", - " daily_service_hours = \"Daily Service Hours\",\n", - " service_hours_per_route = \"Service Hours per Route\",\n", - " speed_mph = \"Average Speed\",\n", - " daily_routes = \"# Routes\",\n", - " daily_vp_routes = \"# Routes\",\n", - " service_hours_per_route_ts = \"Service Hours per Route (time-series)\",\n", - " speed_mph_ts = \"Speed (time-series)\",\n", - " ).tab_header(\n", - " title = \"Service Hours and Speed\",\n", - " subtitle = f\"{current_quarter}\"\n", - " ).tab_spanner(\n", - " label=\"Service\", \n", - " columns=[\"daily_service_hours\", \n", - " \"service_hours_per_route\", \"daily_routes\", \n", - " \"service_hours_per_route_ts\"]\n", - " ).tab_spanner(\n", - " label=\"Speed (mph)\",\n", - " columns = [\"speed_mph\", \"daily_vp_routes\", \"speed_mph_ts\"]\n", - " ).tab_options(\n", - " container_width = \"100%\",\n", - " table_background_color=\"white\",\n", - " table_body_hlines_style=\"none\",\n", - " table_body_vlines_style=\"none\",\n", - " heading_background_color=\"white\",\n", - " column_labels_background_color=\"white\",\n", - " row_group_background_color=\"white\",\n", - " stub_background_color=\"white\",\n", - " source_notes_background_color=\"white\",\n", - " table_font_size=\"14px\",\n", - " heading_align=\"center\"\n", - " ).cols_hide(\n", - " [\"year_quarter\", \"service_hours\", \"n_routes\", \n", - " \"n_dates\", \"n_vp_routes\"]\n", - " ).sub_missing(\n", - " columns = [\"speed_mph\", \"speed_mph_ts\", \"daily_vp_routes\"],\n", - " missing_text = \"\"\n", - " ).tab_style(\n", - " style=style.text(weight=\"bold\"),\n", - " locations=loc.body(rows=pl.col(\"category\") == \"Total\")\n", - " ).tab_style(\n", - " style=style.text(\n", - " weight=\"normal\", style=\"italic\", color=get_hex(\"gray20\")),\n", - " locations=loc.body(\n", - " rows=pl.col(\"category\") == \"On or Intersects SHN\"),\n", - " ).cols_align(align=\"center\")\n", - " .cols_align(align=\"left\", columns=\"category\")\n", - " \n", - " )\n", - " \n", - " return table" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Statewide Metrics" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plot_table(statewide_df)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## District Breakdown" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "def district_table_specs(table, one_district, one_quarter):\n", - " table2 = (table\n", - " .cols_hide(\"caltrans_district\")\n", - " .tab_header(\n", - " title = f\"District {one_district}\",\n", - " subtitle = f\"Service Hours and Speed {one_quarter}\")\n", - " )\n", - " \n", - " return table2\n", - " \n", - "\n", - "for i in sorted(district_df.caltrans_district.unique()):\n", - " table = plot_table(\n", - " district_df[district_df.caltrans_district==i])\n", - " \n", - " table = district_table_specs(table, i, current_quarter)\n", - "\n", - " display(table)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Operator Breakdown" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "plot_table(operator_df)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.13" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/quarterly_performance_objective/new_report.qmd b/quarterly_performance_objective/new_report.qmd index de120c4e0..aba74537f 100644 --- a/quarterly_performance_objective/new_report.qmd +++ b/quarterly_performance_objective/new_report.qmd @@ -234,10 +234,31 @@ for i in sorted(district_df.caltrans_district.unique()): display(table) ``` -## Operator Breakdown +## Operator Breakdown + +Only the SHN subtotal (on SHN and parallel) is shown for each operator. ```{python} -plot_table(operator_df) + +def operator_table_specs(table): + table2 = (table + .cols_hide("caltrans_district") + .tab_style( + style=style.text( + weight="normal", style="normal", color=get_hex("black")), + locations=loc.body( + rows=pl.col("category") == "On or Intersects SHN"), + ) + ) + + return table2 + +table = plot_table( + operator_df[operator_df.category=="On or Intersects SHN"] +) + +table = operator_table_specs(table) +display(table) ```