From afb47f050eb284de8319bd9bbc5cb85b8e074dca Mon Sep 17 00:00:00 2001 From: Eric Dasmalchi Date: Thu, 14 Nov 2024 01:43:28 +0000 Subject: [PATCH 01/13] generate new speedmap state --- ca_transit_speed_maps/01_new_speedmaps.ipynb | 79 ++++++++++++++++---- 1 file changed, 65 insertions(+), 14 deletions(-) diff --git a/ca_transit_speed_maps/01_new_speedmaps.ipynb b/ca_transit_speed_maps/01_new_speedmaps.ipynb index 4cd218620..eb0dbbc4f 100644 --- a/ca_transit_speed_maps/01_new_speedmaps.ipynb +++ b/ca_transit_speed_maps/01_new_speedmaps.ipynb @@ -364,7 +364,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 17, "id": "281c341b-c3cc-48b3-b8d1-e1a698f3d7f4", "metadata": {}, "outputs": [ @@ -384,7 +384,7 @@ " 'spa_link': 'https://embeddable-maps.calitp.org/?state=eyJuYW1lIjogIm51bGwiLCAibGF5ZXJzIjogW3sibmFtZSI6ICJTcGVlZG1hcCBTZWdzIEFNIFBlYWsgMjAyNC0xMC0xNiIsICJ1cmwiOiAiaHR0cHM6Ly9zdG9yYWdlLmdvb2dsZWFwaXMuY29tL2NhbGl0cC1tYXAtdGlsZXMvdGVzdGluZy8xODJfYW1fcGVha19uZXcuZ2VvanNvbi5neiIsICJwcm9wZXJ0aWVzIjogeyJzdHJva2VkIjogZmFsc2UsICJoaWdobGlnaHRfc2F0dXJhdGlvbl9tdWx0aXBsaWVyIjogMC41LCAidG9vbHRpcF9zcGVlZF9rZXkiOiAicDIwX21waCJ9LCAidHlwZSI6ICJzcGVlZG1hcCJ9XSwgImxhdF9sb24iOiBbMzQuMDUzOTM1NzYwOTUzNywgLTExOC4yOTk5NzUyMzAwMzY2OF0sICJ6b29tIjogMTMsICJsZWdlbmRfdXJsIjogImh0dHBzOi8vc3RvcmFnZS5nb29nbGVhcGlzLmNvbS9jYWxpdHAtbWFwLXRpbGVzL3NwZWVkc19sZWdlbmRfY29sb3JfYWNjZXNzLnN2ZyJ9'}" ] }, - "execution_count": 23, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } @@ -393,6 +393,57 @@ "speedmap_state" ] }, + { + "cell_type": "code", + "execution_count": 18, + "id": "126c190b-ea97-4971-8c62-5ee2f2a36f61", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "writing to calitp-map-tiles/testing/182_am_peak_new.geojson.gz\n" + ] + } + ], + "source": [ + "# TODO clone render method from RtFilterMapper, to utils\n", + "new_speedmap_state = shared_utils.rt_utils.set_state_export(\n", + " period_test, filename=f'{itp_id}_{time_of_day_lower}_new', map_type='new_speedmap',\n", + " color_col='p20_mph', cmap=cmap, legend_url=url,\n", + " cache_seconds=0, map_title=f'Speedmap Segs {time_of_day} {analysis_date}')" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "cff6c1d2-0a15-41b7-a76e-e5e219340545", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'state_dict': {'name': 'null',\n", + " 'layers': [{'name': 'Speedmap Segs AM Peak 2024-10-16',\n", + " 'url': 'https://storage.googleapis.com/calitp-map-tiles/testing/182_am_peak_new.geojson.gz',\n", + " 'properties': {'stroked': False, 'highlight_saturation_multiplier': 0.5},\n", + " 'type': 'new_speedmap'}],\n", + " 'lat_lon': (34.0539357609537, -118.29997523003668),\n", + " 'zoom': 13,\n", + " 'legend_url': 'https://storage.googleapis.com/calitp-map-tiles/speeds_legend_color_access.svg'},\n", + " 'spa_link': 'https://embeddable-maps.calitp.org/?state=eyJuYW1lIjogIm51bGwiLCAibGF5ZXJzIjogW3sibmFtZSI6ICJTcGVlZG1hcCBTZWdzIEFNIFBlYWsgMjAyNC0xMC0xNiIsICJ1cmwiOiAiaHR0cHM6Ly9zdG9yYWdlLmdvb2dsZWFwaXMuY29tL2NhbGl0cC1tYXAtdGlsZXMvdGVzdGluZy8xODJfYW1fcGVha19uZXcuZ2VvanNvbi5neiIsICJwcm9wZXJ0aWVzIjogeyJzdHJva2VkIjogZmFsc2UsICJoaWdobGlnaHRfc2F0dXJhdGlvbl9tdWx0aXBsaWVyIjogMC41fSwgInR5cGUiOiAibmV3X3NwZWVkbWFwIn1dLCAibGF0X2xvbiI6IFszNC4wNTM5MzU3NjA5NTM3LCAtMTE4LjI5OTk3NTIzMDAzNjY4XSwgInpvb20iOiAxMywgImxlZ2VuZF91cmwiOiAiaHR0cHM6Ly9zdG9yYWdlLmdvb2dsZWFwaXMuY29tL2NhbGl0cC1tYXAtdGlsZXMvc3BlZWRzX2xlZ2VuZF9jb2xvcl9hY2Nlc3Muc3ZnIn0='}" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "new_speedmap_state" + ] + }, { "cell_type": "markdown", "id": "04c8da94-c9e6-4261-b6e6-3cd44a4eaa5d", @@ -403,7 +454,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 18, "id": "464bfe77-8c1a-4a40-bd53-940b7630bf13", "metadata": {}, "outputs": [], @@ -413,7 +464,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 19, "id": "16cbe969-8d8f-44a9-b48c-8febaa336620", "metadata": {}, "outputs": [], @@ -423,7 +474,7 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 20, "id": "69ab8ef9-40d7-431a-bb0b-fff8467165b7", "metadata": {}, "outputs": [], @@ -438,7 +489,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 21, "id": "8f0b7f7a-0150-40c1-8d3c-458955e4ba70", "metadata": {}, "outputs": [], @@ -468,7 +519,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 22, "id": "be49a590-13d0-47d9-a28c-de21729c373e", "metadata": {}, "outputs": [ @@ -477,23 +528,23 @@ "text/html": [ "\n", "\n", - "
\n", + "
\n", "" ], "text/plain": [ "alt.LayerChart(...)" ] }, - "execution_count": 22, + "execution_count": 32, "metadata": {}, "output_type": "execute_result" } From 7971f4683cac4b5d2ff46e6600e1e74f6042c85f Mon Sep 17 00:00:00 2001 From: Eric Dasmalchi Date: Thu, 14 Nov 2024 22:58:29 +0000 Subject: [PATCH 04/13] use organization_source_record_id instead of itp_id --- ca_transit_speed_maps/01_new_speedmaps.ipynb | 343 ++++++++++++++++-- .../build_speedmaps_index.py | 68 ---- ca_transit_speed_maps/stage_run_portfolio.py | 2 +- 3 files changed, 319 insertions(+), 94 deletions(-) delete mode 100644 ca_transit_speed_maps/build_speedmaps_index.py diff --git a/ca_transit_speed_maps/01_new_speedmaps.ipynb b/ca_transit_speed_maps/01_new_speedmaps.ipynb index ffaf7272f..46f231745 100644 --- a/ca_transit_speed_maps/01_new_speedmaps.ipynb +++ b/ca_transit_speed_maps/01_new_speedmaps.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 67, + "execution_count": 19, "id": "46898b5c-b5cc-4096-ab68-8c3e42fab870", "metadata": {}, "outputs": [], @@ -15,7 +15,7 @@ "from calitp_data_analysis import calitp_color_palette\n", "\n", "# from rt_analysis import rt_filter_map_plot\n", - "import build_speedmaps_index\n", + "import update_vars_index\n", "\n", "from IPython.display import display, Markdown\n", "import pandas as pd\n", @@ -26,12 +26,13 @@ "import shared_utils\n", "import segment_speed_utils\n", "\n", - "import altair as alt" + "import altair as alt\n", + "from calitp_data_analysis.tables import tbls" ] }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 20, "id": "cf3d8814-6f22-4a22-8b03-429ebb63a397", "metadata": {}, "outputs": [], @@ -41,7 +42,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 21, "id": "9f15ed6a-5ab7-4f57-9695-3f762781b74c", "metadata": { "tags": [ @@ -51,23 +52,23 @@ "outputs": [], "source": [ "## parameters cell\n", - "itp_id = 194" + "itp_id = 182" ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 22, "id": "a639fd84-29ca-4678-b568-2b1022701e14", "metadata": {}, "outputs": [], "source": [ - "analysis_date = build_speedmaps_index.ANALYSIS_DATE\n", + "analysis_date = update_vars_index.ANALYSIS_DATE\n", "import datetime as dt" ] }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 23, "id": "b4e52208-fedd-41e7-9888-b62212ff69e5", "metadata": {}, "outputs": [], @@ -77,44 +78,336 @@ }, { "cell_type": "code", - "execution_count": 14, - "id": "eea40f4b-c378-4b70-a80b-bbe79f8441bf", - "metadata": { - "tags": [] - }, + "execution_count": 24, + "id": "d4ccbe96-0c6a-4d04-a94b-1a7c151ad5cf", + "metadata": {}, "outputs": [], "source": [ - "speedmap_index = pd.read_parquet(f'_rt_progress_{analysis_date}.parquet') >> filter(_.organization_itp_id == itp_id)" + "speedmap_index = pd.read_parquet(f'_rt_progress_{analysis_date}.parquet')" ] }, { "cell_type": "code", - "execution_count": 15, - "id": "1b9762c8-0235-4d8f-b7a4-20a3213eb45e", + "execution_count": 27, + "id": "3afe1a90-a360-4d7e-aed1-0e1ae93ee2f1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
organization_namenamebase64_urlcaltrans_districtstatusanalysis_date
1Los Angeles County Metropolitan Transportation...LA Metro Bus ScheduleaHR0cHM6Ly9naXRsYWIuY29tL0xBQ01UQS9ndGZzX2J1cy...07 - Los Angelesspeedmap_segs_available2024-10-16
2Los Angeles County Metropolitan Transportation...LA Metro Rail ScheduleaHR0cHM6Ly9naXRsYWIuY29tL0xBQ01UQS9ndGZzX3JhaW...07 - Los Angelesspeedmap_segs_available2024-10-16
\n", + "
" + ], + "text/plain": [ + " organization_name name \\\n", + "1 Los Angeles County Metropolitan Transportation... LA Metro Bus Schedule \n", + "2 Los Angeles County Metropolitan Transportation... LA Metro Rail Schedule \n", + "\n", + " base64_url caltrans_district \\\n", + "1 aHR0cHM6Ly9naXRsYWIuY29tL0xBQ01UQS9ndGZzX2J1cy... 07 - Los Angeles \n", + "2 aHR0cHM6Ly9naXRsYWIuY29tL0xBQ01UQS9ndGZzX3JhaW... 07 - Los Angeles \n", + "\n", + " status analysis_date \n", + "1 speedmap_segs_available 2024-10-16 \n", + "2 speedmap_segs_available 2024-10-16 " + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "speedmap_index >> filter(_.name.str.contains('LA Metro'))" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "3c990768-aa38-4231-bdfc-710ed7ac61fa", "metadata": {}, "outputs": [], "source": [ - "path = f'{catalog.speedmap_segments.dir}{catalog.speedmap_segments.shape_stop_single_segment_detail}_{analysis_date}.parquet'" + "speedmap_index = pd.read_parquet(f'_rt_progress_{analysis_date}.parquet') >> filter(_.organization_itp_id == itp_id)" ] }, { "cell_type": "code", - "execution_count": 16, - "id": "9302c0ad-1201-4c5a-8267-ec52719cf777", + "execution_count": 12, + "id": "a226dd3e-b1ac-4b78-95a6-0ac92c473cdd", "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
organization_nameorganization_itp_idorganization_source_record_idcaltrans_district_is_currentanalysis_dateschedule_gtfs_dataset_keystatus
42Los Angeles County Metropolitan Transportation...182recPnGkwdpnr8jmHB07 - Los AngelesTrue2024-10-160666caf3ec1ecc96b74f4477ee4bc939speedmap_segs_available
\n", + "
" + ], + "text/plain": [ + " organization_name organization_itp_id \\\n", + "42 Los Angeles County Metropolitan Transportation... 182 \n", + "\n", + " organization_source_record_id caltrans_district _is_current analysis_date \\\n", + "42 recPnGkwdpnr8jmHB 07 - Los Angeles True 2024-10-16 \n", + "\n", + " schedule_gtfs_dataset_key status \n", + "42 0666caf3ec1ecc96b74f4477ee4bc939 speedmap_segs_available " + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "speedmap_index" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "516c71f4-f1a7-4477-91c0-cf9b15247ce6", + "metadata": { + "tags": [] + }, "outputs": [], "source": [ - "speedmap_segs = gpd.read_parquet(path) # aggregated" + "speedmap_index = pd.read_parquet(f'_rt_progress_{analysis_date}.parquet') >> filter(_.organization_itp_id == itp_id)\n", + "path = f'{catalog.speedmap_segments.dir}{catalog.speedmap_segments.shape_stop_single_segment_detail}_{analysis_date}.parquet'\n", + "speedmap_segs = gpd.read_parquet(path) # aggregated\n", + "# speedmap_segs = speedmap_segs >> filter(_.schedule_gtfs_dataset_key == speedmap_index.schedule_gtfs_dataset_key.iloc[0])" ] }, { "cell_type": "code", "execution_count": 17, - "id": "9f3c4c93-a5f5-40d7-9315-63e4fecbd738", + "id": "56ec8386-37ec-43f4-b209-4f77e55e5ddc", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
organization_namecaltrans_district
0Inyo County09 - Bishop
1City of Bishop09 - Bishop
2Bishop Paiute Tribe09 - Bishop
3Timbisha Shoshone Tribe09 - Bishop
4Lone Pine Paiute-Shoshone Tribe09 - Bishop
.........
1284Cambria Community Council05 - San Luis Obispo
1285San Luis Obispo Council of Governments05 - San Luis Obispo
1286San Luis Obispo Regional Transit Authority05 - San Luis Obispo
1287United Cerebral Palsy of San Luis Obispo County05 - San Luis Obispo
1288San Luis Obispo County Community College District05 - San Luis Obispo
\n", + "

1289 rows × 2 columns

\n", + "
" + ], + "text/plain": [ + " organization_name caltrans_district\n", + "0 Inyo County 09 - Bishop\n", + "1 City of Bishop 09 - Bishop\n", + "2 Bishop Paiute Tribe 09 - Bishop\n", + "3 Timbisha Shoshone Tribe 09 - Bishop\n", + "4 Lone Pine Paiute-Shoshone Tribe 09 - Bishop\n", + "... ... ...\n", + "1284 Cambria Community Council 05 - San Luis Obispo\n", + "1285 San Luis Obispo Council of Governments 05 - San Luis Obispo\n", + "1286 San Luis Obispo Regional Transit Authority 05 - San Luis Obispo\n", + "1287 United Cerebral Palsy of San Luis Obispo County 05 - San Luis Obispo\n", + "1288 San Luis Obispo County Community College District 05 - San Luis Obispo\n", + "\n", + "[1289 rows x 2 columns]" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "shared_utils.schedule_rt_utils.filter_dim_county_geography(analysis_date)" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "e768174b-aea5-455a-ba26-72182309a2c0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['schedule_gtfs_dataset_key', 'shape_array_key', 'shape_id', 'route_id',\n", + " 'direction_id', 'stop_pair', 'stop_pair_name', 'segment_id',\n", + " 'time_of_day', 'p50_mph', 'n_trips', 'p20_mph', 'p80_mph', 'name',\n", + " 'caltrans_district', 'organization_source_record_id',\n", + " 'organization_name', 'base64_url', 'geometry', 'n_trips_sch',\n", + " 'trips_hr_sch', 'route_short_name'],\n", + " dtype='object')" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "speedmap_segs = speedmap_segs >> filter(_.schedule_gtfs_dataset_key == speedmap_index.schedule_gtfs_dataset_key.iloc[0])" + "speedmap_segs.columns" ] }, { @@ -561,7 +854,7 @@ "id": "04c8da94-c9e6-4261-b6e6-3cd44a4eaa5d", "metadata": {}, "source": [ - "## docs!columns" + "## docs!" ] }, { diff --git a/ca_transit_speed_maps/build_speedmaps_index.py b/ca_transit_speed_maps/build_speedmaps_index.py deleted file mode 100644 index 20fcc2e0f..000000000 --- a/ca_transit_speed_maps/build_speedmaps_index.py +++ /dev/null @@ -1,68 +0,0 @@ -import os -os.environ["CALITP_BQ_MAX_BYTES"] = str(1_000_000_000_000) ## 1TB? -os.environ['USE_PYGEOS'] = '0' - -from siuba import * -import pandas as pd -import geopandas as gpd -import datetime as dt - -from calitp_data_analysis.tables import tbls -from shared_utils import rt_dates, rt_utils - -from segment_speed_utils.project_vars import ( - COMPILED_CACHED_VIEWS, - PROJECT_CRS, - SEGMENT_GCS, -) - -ANALYSIS_DATE = dt.date.fromisoformat(rt_dates.DATES['oct2024']) -PROGRESS_PATH = f'./_rt_progress_{ANALYSIS_DATE}.parquet' - -def build_speedmaps_index(analysis_date: dt.date, how: str = 'new') -> pd.DataFrame: - ''' - An index table for tracking down a given org's schedule/rt feeds. - Note that in limited cases, multiple orgs may share the same datasets - (VCTC combined feeds, SD Airport and SDMTS...) - ''' - analysis_dt = dt.datetime.combine(analysis_date, dt.time(0, 0)) - - dim_orgs = (tbls.mart_transit_database.dim_organizations() - >> filter(_._valid_from <= analysis_dt, _._valid_to > analysis_dt) - >> select(_.source_record_id, _.caltrans_district) - ) - - orgs_with_vp = (tbls.mart_transit_database.dim_provider_gtfs_data() - >> filter(_._valid_from <= analysis_dt, _._valid_to > analysis_dt, - _.public_customer_facing_or_regional_subfeed_fixed_route, - _.vehicle_positions_gtfs_dataset_key != None) - >> inner_join(_, dim_orgs, on = {'organization_source_record_id': 'source_record_id'}) - # TODO replace deprecated caltrans_district with via dim_county_geography - >> select(_.organization_itp_id, _.organization_name, _.organization_source_record_id, - _.caltrans_district, _._is_current, _.vehicle_positions_gtfs_dataset_key, - _.schedule_gtfs_dataset_key) - >> collect() - ) - assert (orgs_with_vp >> filter(_.caltrans_district.isna())).empty - orgs_with_vp = orgs_with_vp >> filter(-_.caltrans_district.isna()) - assert not orgs_with_vp.isnull().values.any() - orgs_with_vp['analysis_date'] = analysis_date - orgs_with_vp = orgs_with_vp >> distinct(_.organization_name, - _.organization_itp_id, _.organization_source_record_id, - _.caltrans_district, _._is_current, _.analysis_date, - _.schedule_gtfs_dataset_key - ) - if how == 'new': - speedmap_segs = gpd.read_parquet(f'{SEGMENT_GCS}rollup_singleday/speeds_shape_speedmap_segments_{analysis_date}.parquet') # aggregated - new_ix = (orgs_with_vp >> filter(_.schedule_gtfs_dataset_key.isin(speedmap_segs.schedule_gtfs_dataset_key.unique()))).copy() - new_ix['status'] = 'speedmap_segs_available' - return new_ix - else: - return orgs_with_vp - -if __name__ == "__main__": - - print(f'analysis date from shared_utils/rt_dates: {ANALYSIS_DATE}') - speedmaps_index = build_speedmaps_index(ANALYSIS_DATE, how = 'new') - # speedmaps_index = rt_utils.check_intermediate_data(speedmaps_index) - speedmaps_index.to_parquet(PROGRESS_PATH) \ No newline at end of file diff --git a/ca_transit_speed_maps/stage_run_portfolio.py b/ca_transit_speed_maps/stage_run_portfolio.py index 088803c33..e4418096d 100644 --- a/ca_transit_speed_maps/stage_run_portfolio.py +++ b/ca_transit_speed_maps/stage_run_portfolio.py @@ -42,7 +42,7 @@ def make_rt_site_yml(speedmaps_index_joined, chapter_dict['caption'] = f'District {district}' chapter_dict['params'] = {'district': district} chapter_dict['sections'] = \ - [{'itp_id': itp_id} for itp_id in filtered.organization_itp_id.to_list()] + [{'organization_source_record_id': organization_source_record_id} for organization_source_record_id in filtered.organization_source_record_id.to_list()] chapters_list += [chapter_dict] parts_list = [{'chapters': chapters_list}] From d3f8ac5359817f08f73eb4bf8792149d06cc2619 Mon Sep 17 00:00:00 2001 From: Eric Dasmalchi Date: Fri, 15 Nov 2024 00:40:50 +0000 Subject: [PATCH 05/13] wip --- ca_transit_speed_maps/01_new_speedmaps.ipynb | 597 +++---------------- ca_transit_speed_maps/speedmap_utils.py | 91 +++ ca_transit_speed_maps/update_vars_index.py | 45 ++ 3 files changed, 210 insertions(+), 523 deletions(-) create mode 100644 ca_transit_speed_maps/speedmap_utils.py create mode 100644 ca_transit_speed_maps/update_vars_index.py diff --git a/ca_transit_speed_maps/01_new_speedmaps.ipynb b/ca_transit_speed_maps/01_new_speedmaps.ipynb index 46f231745..ada43ac8a 100644 --- a/ca_transit_speed_maps/01_new_speedmaps.ipynb +++ b/ca_transit_speed_maps/01_new_speedmaps.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 19, + "execution_count": 47, "id": "46898b5c-b5cc-4096-ab68-8c3e42fab870", "metadata": {}, "outputs": [], @@ -25,6 +25,7 @@ "\n", "import shared_utils\n", "import segment_speed_utils\n", + "import speedmap_utils\n", "\n", "import altair as alt\n", "from calitp_data_analysis.tables import tbls" @@ -42,7 +43,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 32, "id": "9f15ed6a-5ab7-4f57-9695-3f762781b74c", "metadata": { "tags": [ @@ -52,12 +53,12 @@ "outputs": [], "source": [ "## parameters cell\n", - "itp_id = 182" + "organization_source_record_id = 'rec5ome04BbA9uf4y'" ] }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 33, "id": "a639fd84-29ca-4678-b568-2b1022701e14", "metadata": {}, "outputs": [], @@ -68,368 +69,126 @@ }, { "cell_type": "code", - "execution_count": 23, - "id": "b4e52208-fedd-41e7-9888-b62212ff69e5", - "metadata": {}, - "outputs": [], - "source": [ - "# speedmap_index = pd.read_parquet(f'_rt_progress_{analysis_date}.parquet')" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "id": "d4ccbe96-0c6a-4d04-a94b-1a7c151ad5cf", - "metadata": {}, + "execution_count": 41, + "id": "516c71f4-f1a7-4477-91c0-cf9b15247ce6", + "metadata": { + "tags": [] + }, "outputs": [], "source": [ - "speedmap_index = pd.read_parquet(f'_rt_progress_{analysis_date}.parquet')" + "%%capture\n", + "path = f'{catalog.speedmap_segments.dir}{catalog.speedmap_segments.shape_stop_single_segment_detail}_{analysis_date}.parquet'\n", + "speedmap_segs = gpd.read_parquet(path, filters=[['organization_source_record_id', '==', organization_source_record_id]]) # aggregated" ] }, { "cell_type": "code", - "execution_count": 27, - "id": "3afe1a90-a360-4d7e-aed1-0e1ae93ee2f1", + "execution_count": 43, + "id": "11303bd3-01bf-4af4-b0fa-63dbc375bd48", "metadata": {}, "outputs": [ { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
organization_namenamebase64_urlcaltrans_districtstatusanalysis_date
1Los Angeles County Metropolitan Transportation...LA Metro Bus ScheduleaHR0cHM6Ly9naXRsYWIuY29tL0xBQ01UQS9ndGZzX2J1cy...07 - Los Angelesspeedmap_segs_available2024-10-16
2Los Angeles County Metropolitan Transportation...LA Metro Rail ScheduleaHR0cHM6Ly9naXRsYWIuY29tL0xBQ01UQS9ndGZzX3JhaW...07 - Los Angelesspeedmap_segs_available2024-10-16
\n", - "
" - ], - "text/plain": [ - " organization_name name \\\n", - "1 Los Angeles County Metropolitan Transportation... LA Metro Bus Schedule \n", - "2 Los Angeles County Metropolitan Transportation... LA Metro Rail Schedule \n", - "\n", - " base64_url caltrans_district \\\n", - "1 aHR0cHM6Ly9naXRsYWIuY29tL0xBQ01UQS9ndGZzX2J1cy... 07 - Los Angeles \n", - "2 aHR0cHM6Ly9naXRsYWIuY29tL0xBQ01UQS9ndGZzX3JhaW... 07 - Los Angeles \n", - "\n", - " status analysis_date \n", - "1 speedmap_segs_available 2024-10-16 \n", - "2 speedmap_segs_available 2024-10-16 " - ] - }, - "execution_count": 27, - "metadata": {}, - "output_type": "execute_result" + "name": "stdout", + "output_type": "stream", + "text": [ + "{\"human_date\": \"October 16 2024 (Wednesday)\"}\n" + ] } ], "source": [ - "speedmap_index >> filter(_.name.str.contains('LA Metro'))" + "%%capture_parameters\n", + "human_date = analysis_date.strftime('%B %d %Y (%A)')\n", + "human_date" ] }, { "cell_type": "code", - "execution_count": 11, - "id": "3c990768-aa38-4231-bdfc-710ed7ac61fa", + "execution_count": 45, + "id": "649d7a14-8b01-4a84-afee-092bd3febbb8", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\"organization_name\": \"City of Culver City\"}\n" + ] + } + ], "source": [ - "speedmap_index = pd.read_parquet(f'_rt_progress_{analysis_date}.parquet') >> filter(_.organization_itp_id == itp_id)" + "%%capture_parameters\n", + "organization_name = speedmap_segs.organization_name.iloc[0]\n", + "organization_name" ] }, { "cell_type": "code", - "execution_count": 12, - "id": "a226dd3e-b1ac-4b78-95a6-0ac92c473cdd", + "execution_count": 56, + "id": "aebbea5a-8064-4845-9181-ac2a18f12d20", "metadata": {}, "outputs": [ { "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
organization_nameorganization_itp_idorganization_source_record_idcaltrans_district_is_currentanalysis_dateschedule_gtfs_dataset_keystatus
42Los Angeles County Metropolitan Transportation...182recPnGkwdpnr8jmHB07 - Los AngelesTrue2024-10-160666caf3ec1ecc96b74f4477ee4bc939speedmap_segs_available
\n", - "
" - ], "text/plain": [ - " organization_name organization_itp_id \\\n", - "42 Los Angeles County Metropolitan Transportation... 182 \n", - "\n", - " organization_source_record_id caltrans_district _is_current analysis_date \\\n", - "42 recPnGkwdpnr8jmHB 07 - Los Angeles True 2024-10-16 \n", - "\n", - " schedule_gtfs_dataset_key status \n", - "42 0666caf3ec1ecc96b74f4477ee4bc939 speedmap_segs_available " + "" ] }, - "execution_count": 12, + "execution_count": 56, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "speedmap_index" + "import importlib\n", + "importlib.reload(speedmap_utils)" ] }, { "cell_type": "code", - "execution_count": 13, - "id": "516c71f4-f1a7-4477-91c0-cf9b15247ce6", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "speedmap_index = pd.read_parquet(f'_rt_progress_{analysis_date}.parquet') >> filter(_.organization_itp_id == itp_id)\n", - "path = f'{catalog.speedmap_segments.dir}{catalog.speedmap_segments.shape_stop_single_segment_detail}_{analysis_date}.parquet'\n", - "speedmap_segs = gpd.read_parquet(path) # aggregated\n", - "# speedmap_segs = speedmap_segs >> filter(_.schedule_gtfs_dataset_key == speedmap_index.schedule_gtfs_dataset_key.iloc[0])" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "56ec8386-37ec-43f4-b209-4f77e55e5ddc", + "execution_count": 57, + "id": "22e9db57-91b0-43a4-b7df-797058d14e7f", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
organization_namecaltrans_district
0Inyo County09 - Bishop
1City of Bishop09 - Bishop
2Bishop Paiute Tribe09 - Bishop
3Timbisha Shoshone Tribe09 - Bishop
4Lone Pine Paiute-Shoshone Tribe09 - Bishop
.........
1284Cambria Community Council05 - San Luis Obispo
1285San Luis Obispo Council of Governments05 - San Luis Obispo
1286San Luis Obispo Regional Transit Authority05 - San Luis Obispo
1287United Cerebral Palsy of San Luis Obispo County05 - San Luis Obispo
1288San Luis Obispo County Community College District05 - San Luis Obispo
\n", - "

1289 rows × 2 columns

\n", - "
" - ], - "text/plain": [ - " organization_name caltrans_district\n", - "0 Inyo County 09 - Bishop\n", - "1 City of Bishop 09 - Bishop\n", - "2 Bishop Paiute Tribe 09 - Bishop\n", - "3 Timbisha Shoshone Tribe 09 - Bishop\n", - "4 Lone Pine Paiute-Shoshone Tribe 09 - Bishop\n", - "... ... ...\n", - "1284 Cambria Community Council 05 - San Luis Obispo\n", - "1285 San Luis Obispo Council of Governments 05 - San Luis Obispo\n", - "1286 San Luis Obispo Regional Transit Authority 05 - San Luis Obispo\n", - "1287 United Cerebral Palsy of San Luis Obispo County 05 - San Luis Obispo\n", - "1288 San Luis Obispo County Community College District 05 - San Luis Obispo\n", - "\n", - "[1289 rows x 2 columns]" - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "shared_utils.schedule_rt_utils.filter_dim_county_geography(analysis_date)" + "speedmap_segs = speedmap_utils.prepare_segment_gdf(speedmap_segs)" ] }, { "cell_type": "code", - "execution_count": 18, - "id": "e768174b-aea5-455a-ba26-72182309a2c0", + "execution_count": 63, + "id": "f4eb76cb-ce2f-496b-a6aa-f6081bacc89f", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Index(['schedule_gtfs_dataset_key', 'shape_array_key', 'shape_id', 'route_id',\n", - " 'direction_id', 'stop_pair', 'stop_pair_name', 'segment_id',\n", - " 'time_of_day', 'p50_mph', 'n_trips', 'p20_mph', 'p80_mph', 'name',\n", - " 'caltrans_district', 'organization_source_record_id',\n", - " 'organization_name', 'base64_url', 'geometry', 'n_trips_sch',\n", - " 'trips_hr_sch', 'route_short_name'],\n", - " dtype='object')" - ] - }, - "execution_count": 18, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "speedmap_segs.columns" + "shn = gpd.read_parquet(shared_utils.rt_utils.SHN_PATH)\n", + "this_shn = shn >> filter(_.District.isin([int(x[:2]) for x in speedmap_segs.caltrans_district.unique()]))" ] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 46, "id": "edc84eda-ce2b-481f-ac78-6b7209cb25f4", "metadata": {}, "outputs": [], "source": [ - "# TODO to utils\n", - "def time_period_and_arrowize(gdf, time_period):\n", - " \n", - " gdf = gdf >> filter(_.time_of_day == time_period)\n", - " gdf = gdf.to_crs(calitp_data_analysis.geography_utils.CA_NAD83Albers)\n", + "# # TODO to utils\n", + "# def prepare_segment_gdf(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:\n", + "# '''\n", + "# Project segment speeds gdf and add column for rich speedmap display\n", + "# '''\n", + "# gdf = gdf.to_crs(calitp_data_analysis.geography_utils.CA_NAD83Albers)\n", + "# # TODO move upstream and investigate\n", + "# gdf['fast_slow_ratio'] = gdf.p80_mph / gdf.p20_mph\n", + "# gdf.fast_slow_ratio = gdf.fast_slow_ratio.replace(np.inf, 3)\n", + "# gdf = gdf.round(1)\n", "\n", - " ## shift to right side of road to display direction\n", - " gdf.geometry = gdf.geometry.apply(shared_utils.rt_utils.try_parallel)\n", - " gdf = gdf.apply(shared_utils.rt_utils.arrowize_by_frequency, axis=1, frequency_col='trips_hr_sch')\n", + "# ## shift to right side of road to display direction\n", + "# gdf.geometry = gdf.geometry.apply(shared_utils.rt_utils.try_parallel)\n", + "# gdf = gdf.apply(shared_utils.rt_utils.arrowize_by_frequency, axis=1, frequency_col='trips_hr_sch')\n", "\n", - " gdf = gdf >> arrange(_.trips_hr_sch)\n", + "# gdf = gdf >> arrange(_.trips_hr_sch)\n", "\n", - " return gdf" + "# return gdf" ] }, { @@ -452,200 +211,6 @@ "period_test = time_period_and_arrowize(speedmap_segs, time_of_day)" ] }, - { - "cell_type": "code", - "execution_count": 51, - "id": "cedc030e-8d19-4a0b-b3a5-5a3d6c0dce23", - "metadata": {}, - "outputs": [], - "source": [ - "cmap = shared_utils.rt_utils.ACCESS_ZERO_THIRTY_COLORSCALE\n", - "url = shared_utils.rt_utils.ACCESS_SPEEDMAP_LEGEND_URL" - ] - }, - { - "cell_type": "code", - "execution_count": 52, - "id": "b6bb6f3c-c360-4974-8e72-34d13ef267a2", - "metadata": {}, - "outputs": [], - "source": [ - "period_test['fast_slow_ratio'] = period_test.p80_mph / period_test.p20_mph" - ] - }, - { - "cell_type": "code", - "execution_count": 68, - "id": "1c35a23b-8254-438a-8864-2e440598a5bd", - "metadata": {}, - "outputs": [], - "source": [ - "period_test.fast_slow_ratio = period_test.fast_slow_ratio.replace(np.inf, 3)" - ] - }, - { - "cell_type": "code", - "execution_count": 69, - "id": "6a846d59-2d6b-4f22-81c7-d6acf7140232", - "metadata": {}, - "outputs": [], - "source": [ - "period_test = period_test.round(1)" - ] - }, - { - "cell_type": "code", - "execution_count": 70, - "id": "87c7db4f-3da9-4ba4-915b-88432a841b39", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
schedule_gtfs_dataset_keyshape_array_keyshape_idroute_iddirection_idstop_pairstop_pair_namesegment_idtime_of_dayp50_mph...namecaltrans_districtorganization_source_record_idorganization_namebase64_urlgeometryn_trips_schtrips_hr_schroute_short_namefast_slow_ratio
1080015d67d5b75b5cf2b710bbadadfb75f59ce7f6d9232b32ee1401935c947a1e113170.040097__40095Bridgeway & Napa St__Bridgeway & Easterby St40097-40095-1AM Peak13.1...Bay Area 511 Marin Schedule04 - OaklandrecNOb7pqBRlQVG5eMarin County Transit DistrictaHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZW...POLYGON ((-218717.778 -14345.020, -218718.216 ...10.3171.0
1081015d67d5b75b5cf2b710bbadadfb75f59ce7f6d9232b32ee1401935c947a1e113170.040095__40092Bridgeway & Easterby St__Bridgeway & Nevada St40095-40092-1AM Peak22.8...Bay Area 511 Marin Schedule04 - OaklandrecNOb7pqBRlQVG5eMarin County Transit DistrictaHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZW...POLYGON ((-219100.744 -14266.315, -219102.537 ...10.3171.0
\n", - "

2 rows × 23 columns

\n", - "
" - ], - "text/plain": [ - " schedule_gtfs_dataset_key shape_array_key \\\n", - "1080 015d67d5b75b5cf2b710bbadadfb75f5 9ce7f6d9232b32ee1401935c947a1e11 \n", - "1081 015d67d5b75b5cf2b710bbadadfb75f5 9ce7f6d9232b32ee1401935c947a1e11 \n", - "\n", - " shape_id route_id direction_id stop_pair \\\n", - "1080 3 17 0.0 40097__40095 \n", - "1081 3 17 0.0 40095__40092 \n", - "\n", - " stop_pair_name segment_id \\\n", - "1080 Bridgeway & Napa St__Bridgeway & Easterby St 40097-40095-1 \n", - "1081 Bridgeway & Easterby St__Bridgeway & Nevada St 40095-40092-1 \n", - "\n", - " time_of_day p50_mph ... name \\\n", - "1080 AM Peak 13.1 ... Bay Area 511 Marin Schedule \n", - "1081 AM Peak 22.8 ... Bay Area 511 Marin Schedule \n", - "\n", - " caltrans_district organization_source_record_id \\\n", - "1080 04 - Oakland recNOb7pqBRlQVG5e \n", - "1081 04 - Oakland recNOb7pqBRlQVG5e \n", - "\n", - " organization_name \\\n", - "1080 Marin County Transit District \n", - "1081 Marin County Transit District \n", - "\n", - " base64_url \\\n", - "1080 aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZW... \n", - "1081 aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZW... \n", - "\n", - " geometry n_trips_sch \\\n", - "1080 POLYGON ((-218717.778 -14345.020, -218718.216 ... 1 \n", - "1081 POLYGON ((-219100.744 -14266.315, -219102.537 ... 1 \n", - "\n", - " trips_hr_sch route_short_name fast_slow_ratio \n", - "1080 0.3 17 1.0 \n", - "1081 0.3 17 1.0 \n", - "\n", - "[2 rows x 23 columns]" - ] - }, - "execution_count": 70, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "period_test >> head(2)" - ] - }, { "cell_type": "markdown", "id": "4e4c26e4-afca-48e1-b9fc-53393858a414", @@ -654,31 +219,17 @@ "## export map" ] }, - { - "cell_type": "code", - "execution_count": 71, - "id": "d483e182-b36c-487e-a194-7a9e48d5b32f", - "metadata": {}, - "outputs": [], - "source": [ - "time_of_day_lower = time_of_day.lower().replace(' ', '_')" - ] - }, { "cell_type": "code", "execution_count": 72, - "id": "51e108ee-19f5-4266-8848-fb5ac57f5108", + "id": "402ffc16-eb23-42e1-ac38-b1468a65f438", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "writing to calitp-map-tiles/testing/194_am_peak_new.geojson.gz\n" - ] - } - ], + "outputs": [], "source": [ + "time_of_day_lower = time_of_day.lower().replace(' ', '_')\n", + "\n", + "gdf = gdf >> filter(_.time_of_day == time_period)\n", + "\n", "# TODO clone render method from RtFilterMapper, to utils\n", "speedmap_state = shared_utils.rt_utils.set_state_export(\n", " period_test, filename=f'{itp_id}_{time_of_day_lower}_new', map_type='speedmap',\n", diff --git a/ca_transit_speed_maps/speedmap_utils.py b/ca_transit_speed_maps/speedmap_utils.py new file mode 100644 index 000000000..ae68f7d3c --- /dev/null +++ b/ca_transit_speed_maps/speedmap_utils.py @@ -0,0 +1,91 @@ +import pandas as pd +from siuba import * +import numpy as np +import geopandas as gpd +import update_vars_index +from shared_utils import rt_utils +from calitp_data_analysis.geography_utils import CA_NAD83Albers + +catalog = shared_utils.catalog_utils.get_catalog('gtfs_analytics_data') + +#def read_detail_segments(itp_id: int) -> gpd.GeoDataFrame: +# ''' +# Read detailed speedmap segments (all times of day including interpolated segs) +# for a given itp_id (legacy compatability, may switch to an alternate identifer...) +# ''' +# speedmap_index = pd.read_parquet(f'_rt_progress_{analysis_date}.parquet') >> filter(_.organization_itp_id == itp_id) +# path = f'{catalog.speedmap_segments.dir}{catalog.speedmap_segments.shape_stop_single_segment_detail}_{analysis_date}.parquet' +# speedmap_segs = gpd.read_parquet(path) # aggregated +# speedmap_segs = speedmap_segs >> filter(_.schedule_gtfs_dataset_key == speedmap_index.schedule_gtfs_dataset_key.iloc[0]) +# +# return speedmap_segs + +def prepare_segment_gdf(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: + ''' + Project segment speeds gdf and add column for rich speedmap display + ''' + gdf = gdf.to_crs(CA_NAD83Albers) + # TODO move upstream and investigate + gdf['fast_slow_ratio'] = gdf.p80_mph / gdf.p20_mph + gdf.fast_slow_ratio = gdf.fast_slow_ratio.replace(np.inf, 3) + gdf = gdf.round(1) + + ## shift to right side of road to display direction + gdf.geometry = gdf.geometry.apply(rt_utils.try_parallel) + gdf = gdf.apply(rt_utils.arrowize_by_frequency, axis=1, frequency_col='trips_hr_sch') + + gdf = gdf >> arrange(_.trips_hr_sch) + + return gdf + +def render_spa_link(spa_map_url: str, text='Full Map') -> None: + + display(Markdown(f'Open {text} in New Tab')) + return + +def display_spa_map(spa_map_url: str, width: int=1000, height: int=650) -> None: + ''' + Display map from external simple web app in the notebook/JupyterBook context via an IFrame. + Will show most recent map set using self.map_gz_export + Width/height defaults are current best option for JupyterBook, don't change for portfolio use + width, height: int (pixels) + ''' + i = IFrame(spa_map_url, width=width, height=height) + display(i) + return + +def map_shn(district_gdf: gpd.GeoDataFrame): + dist = district_gdf.District.iloc[0] + filename = f'{dist}_SHN' + title = f"D{dist} State Highway Network" + + export_result = set_state_export(district_gdf, subfolder = update_vars_index.GEOJSON_SUBFOLDER, filename = filename, + map_type = 'state_highway_network', map_title = title) + spa_map_state = export_result['state_dict'] + return spa_map_state + +#from shared_utils.rt_utils import ACCESS_ZERO_THIRTY_COLORSCALE, , , +def map_time_period(district_gdf: gpd.GeoDataFrame, speedmap_segs: gpd.GeoDataFrame, time_of_day: str, + map_type: str): + ''' + Always add State Highway Network first. + ''' + time_of_day_lower = time_of_day.lower().replace(' ', '_') + gdf = gdf >> filter(_.time_of_day == time_of_day) + color_col = {'new_speedmap': 'p20_mph', 'new_speed_variation': 'fast_slow_ratio'}[map_type] + shn_state = map_shn(district_gdf) + filename = f"{speedmap_segs.organization_source_record_id.iloc[0]}_{map_type}" + + if map_type == 'new_speedmap': + cmap = rt_utils.ACCESS_ZERO_THIRTY_COLORSCALE + legend_url = rt_utils.ACCESS_SPEEDMAP_LEGEND_URL + elif map_type == 'new_speed_variation' + cmap = rt_utils.VARIANCE_FIXED_COLORSCALE + legend_url = rt_utils.VARIANCE_LEGEND_URL + + speedmap_state = rt_utils.set_state_export( + period_test, subfolder = update_vars_index.GEOJSON_SUBFOLDER, filename=filename, + map_type=map_type, + color_col=color_col, cmap=cmap, legend_url=legend_url, + cache_seconds=0, map_title=f'Speedmap Segs {time_of_day} {analysis_date}', + existing_state = shn_state) \ No newline at end of file diff --git a/ca_transit_speed_maps/update_vars_index.py b/ca_transit_speed_maps/update_vars_index.py new file mode 100644 index 000000000..cb1f4f61a --- /dev/null +++ b/ca_transit_speed_maps/update_vars_index.py @@ -0,0 +1,45 @@ +import os +os.environ["CALITP_BQ_MAX_BYTES"] = str(1_000_000_000_000) ## 1TB? +os.environ['USE_PYGEOS'] = '0' + +from siuba import * +import pandas as pd +import geopandas as gpd +import datetime as dt + +from calitp_data_analysis.tables import tbls +from shared_utils import rt_dates, rt_utils, catalog_utils, schedule_rt_utils + +from segment_speed_utils.project_vars import ( + COMPILED_CACHED_VIEWS, + PROJECT_CRS, + SEGMENT_GCS, +) + +catalog = catalog_utils.get_catalog('gtfs_analytics_data') + +ANALYSIS_DATE = dt.date.fromisoformat(rt_dates.DATES['oct2024']) +PROGRESS_PATH = f'./_rt_progress_{ANALYSIS_DATE}.parquet' +GEOJSON_SUBFOLDER = f'segment_speeds_{ANALYSIS_DATE}/' + +def build_speedmaps_index(analysis_date: dt.date) -> pd.DataFrame: + ''' + An index table for tracking down a given org's schedule/rt feeds. + Note that in limited cases, multiple orgs may share the same datasets + (VCTC combined feeds, SD Airport and SDMTS...) + ''' + path = f'{catalog.speedmap_segments.dir}{catalog.speedmap_segments.shape_stop_single_segment_detail}_{analysis_date}.parquet' + speedmap_segs = gpd.read_parquet(path) >> distinct(_.organization_name, _.organization_source_record_id, _.name, + _.base64_url) + districts = schedule_rt_utils.filter_dim_county_geography(analysis_date) + new_ix = speedmap_segs >> inner_join(_, districts, on = 'organization_name') + new_ix['status'] = 'speedmap_segs_available' + new_ix['analysis_date'] = analysis_date + return new_ix + +if __name__ == "__main__": + + print(f'analysis date from shared_utils/rt_dates: {ANALYSIS_DATE}') + speedmaps_index = build_speedmaps_index(ANALYSIS_DATE, how = 'new') + # speedmaps_index = rt_utils.check_intermediate_data(speedmaps_index) + speedmaps_index.to_parquet(PROGRESS_PATH) \ No newline at end of file From 7c1d2979e598ebe6d1e7916b234f9f377e84dd16 Mon Sep 17 00:00:00 2001 From: Eric Dasmalchi Date: Fri, 15 Nov 2024 00:54:59 +0000 Subject: [PATCH 06/13] chart function --- ca_transit_speed_maps/01_new_speedmaps.ipynb | 140 ++++++++++++++----- ca_transit_speed_maps/speedmap_utils.py | 67 ++++++--- 2 files changed, 153 insertions(+), 54 deletions(-) diff --git a/ca_transit_speed_maps/01_new_speedmaps.ipynb b/ca_transit_speed_maps/01_new_speedmaps.ipynb index ada43ac8a..1131af478 100644 --- a/ca_transit_speed_maps/01_new_speedmaps.ipynb +++ b/ca_transit_speed_maps/01_new_speedmaps.ipynb @@ -123,7 +123,7 @@ }, { "cell_type": "code", - "execution_count": 56, + "execution_count": 66, "id": "aebbea5a-8064-4845-9181-ac2a18f12d20", "metadata": {}, "outputs": [ @@ -133,7 +133,7 @@ "" ] }, - "execution_count": 56, + "execution_count": 66, "metadata": {}, "output_type": "execute_result" } @@ -410,46 +410,22 @@ }, { "cell_type": "code", - "execution_count": 28, - "id": "464bfe77-8c1a-4a40-bd53-940b7630bf13", - "metadata": {}, - "outputs": [], - "source": [ - "domain = cmap.index" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "id": "16cbe969-8d8f-44a9-b48c-8febaa336620", - "metadata": {}, - "outputs": [], - "source": [ - "range_ = [cmap.rgb_hex_str(i) for i in cmap.index]" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "id": "69ab8ef9-40d7-431a-bb0b-fff8467165b7", + "execution_count": 31, + "id": "0565942e-a76c-4c8d-afe7-7d0f9c61d5f0", "metadata": {}, "outputs": [], "source": [ + "domain = cmap.index\n", + "\n", + "range_ = [cmap.rgb_hex_str(i) for i in cmap.index]\n", + "\n", "df = speedmap_segs[['time_of_day', 'p50_mph', 'p20_mph', 'p80_mph']]\n", "df = df >> group_by(_.time_of_day) >> summarize(p50_mph = _.p50_mph.quantile(.5),\n", " p20_mph = _.p20_mph.quantile(.5),\n", " p80_mph = _.p80_mph.quantile(.5),)\n", "df['p50 - p20'] = -(df['p50_mph'] - df['p20_mph'])\n", - "df['p80 - p50'] = df['p80_mph'] - df['p50_mph']" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "id": "8f0b7f7a-0150-40c1-8d3c-458955e4ba70", - "metadata": {}, - "outputs": [], - "source": [ + "df['p80 - p50'] = df['p80_mph'] - df['p50_mph']\n", + "\n", "error_bars = alt.Chart(df).mark_errorbar(thickness=5, color='gray', opacity=.6).encode(\n", " y = alt.Y(\"p50_mph:Q\", title='Segment Speed (mph): 20, 50, 80%ile'),\n", " yError=(\"p50 - p20:Q\"),\n", @@ -561,6 +537,102 @@ "source": [ "chart" ] + }, + { + "cell_type": "code", + "execution_count": 67, + "id": "40979f68-b80b-4879-8892-3271a74efc8b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + "
\n", + "" + ], + "text/plain": [ + "alt.LayerChart(...)" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "speedmap_utils.chart_speeds_by_time_period(speedmap_segs)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb6d466d-6d8e-4144-b1b8-69526553f347", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { diff --git a/ca_transit_speed_maps/speedmap_utils.py b/ca_transit_speed_maps/speedmap_utils.py index ae68f7d3c..6dbc5578c 100644 --- a/ca_transit_speed_maps/speedmap_utils.py +++ b/ca_transit_speed_maps/speedmap_utils.py @@ -5,21 +5,12 @@ import update_vars_index from shared_utils import rt_utils from calitp_data_analysis.geography_utils import CA_NAD83Albers +import datetime as dt +import altair as alt +from IPython.display import display, Markdown, IFrame catalog = shared_utils.catalog_utils.get_catalog('gtfs_analytics_data') -#def read_detail_segments(itp_id: int) -> gpd.GeoDataFrame: -# ''' -# Read detailed speedmap segments (all times of day including interpolated segs) -# for a given itp_id (legacy compatability, may switch to an alternate identifer...) -# ''' -# speedmap_index = pd.read_parquet(f'_rt_progress_{analysis_date}.parquet') >> filter(_.organization_itp_id == itp_id) -# path = f'{catalog.speedmap_segments.dir}{catalog.speedmap_segments.shape_stop_single_segment_detail}_{analysis_date}.parquet' -# speedmap_segs = gpd.read_parquet(path) # aggregated -# speedmap_segs = speedmap_segs >> filter(_.schedule_gtfs_dataset_key == speedmap_index.schedule_gtfs_dataset_key.iloc[0]) -# -# return speedmap_segs - def prepare_segment_gdf(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: ''' Project segment speeds gdf and add column for rich speedmap display @@ -64,9 +55,8 @@ def map_shn(district_gdf: gpd.GeoDataFrame): spa_map_state = export_result['state_dict'] return spa_map_state -#from shared_utils.rt_utils import ACCESS_ZERO_THIRTY_COLORSCALE, , , -def map_time_period(district_gdf: gpd.GeoDataFrame, speedmap_segs: gpd.GeoDataFrame, time_of_day: str, - map_type: str): +def map_time_period(district_gdf: gpd.GeoDataFrame, speedmap_segs: gpd.GeoDataFrame, analysis_date: dt.date, + time_of_day: str, map_type: str): ''' Always add State Highway Network first. ''' @@ -74,18 +64,55 @@ def map_time_period(district_gdf: gpd.GeoDataFrame, speedmap_segs: gpd.GeoDataFr gdf = gdf >> filter(_.time_of_day == time_of_day) color_col = {'new_speedmap': 'p20_mph', 'new_speed_variation': 'fast_slow_ratio'}[map_type] shn_state = map_shn(district_gdf) - filename = f"{speedmap_segs.organization_source_record_id.iloc[0]}_{map_type}" + display_date = analysis_date.strftime('%B %d %Y (%A)') + filename = f"{analysis_date}_{speedmap_segs.organization_source_record_id.iloc[0]}_{map_type}" + title = f"{speedmap_segs.organization_name.iloc[0]} {display_date} {time_of_day}" if map_type == 'new_speedmap': cmap = rt_utils.ACCESS_ZERO_THIRTY_COLORSCALE legend_url = rt_utils.ACCESS_SPEEDMAP_LEGEND_URL - elif map_type == 'new_speed_variation' + elif map_type == 'new_speed_variation': cmap = rt_utils.VARIANCE_FIXED_COLORSCALE legend_url = rt_utils.VARIANCE_LEGEND_URL - speedmap_state = rt_utils.set_state_export( + export_result = rt_utils.set_state_export( period_test, subfolder = update_vars_index.GEOJSON_SUBFOLDER, filename=filename, map_type=map_type, color_col=color_col, cmap=cmap, legend_url=legend_url, - cache_seconds=0, map_title=f'Speedmap Segs {time_of_day} {analysis_date}', - existing_state = shn_state) \ No newline at end of file + map_title=title, + existing_state = shn_state) + + spa_link = export_result['spa_link'] + return spa_link + +def chart_speeds_by_time_period(speedmap_segs: gpd.GeoDataFrame) -> None: + cmap = rt_utils.ACCESS_ZERO_THIRTY_COLORSCALE + domain = cmap.index + range_ = [cmap.rgb_hex_str(i) for i in cmap.index] + df = speedmap_segs[['time_of_day', 'p50_mph', 'p20_mph', 'p80_mph']] + df = df >> group_by(_.time_of_day) >> summarize(p50_mph = _.p50_mph.quantile(.5), + p20_mph = _.p20_mph.quantile(.5), + p80_mph = _.p80_mph.quantile(.5),) + df['p50 - p20'] = -(df['p50_mph'] - df['p20_mph']) + df['p80 - p50'] = df['p80_mph'] - df['p50_mph'] + error_bars = alt.Chart(df).mark_errorbar(thickness=5, color='gray', opacity=.6).encode( + y = alt.Y("p50_mph:Q", title='Segment Speed (mph): 20, 50, 80%ile'), + yError=("p50 - p20:Q"), + yError2=("p80 - p50:Q"), + x = alt.X("time_of_day:N", sort=['Early AM', 'AM Peak', 'Midday', 'PM Peak', 'Evening', 'Owl']), + tooltip=[alt.Tooltip('p20_mph:Q', title="p20 mph"), alt.Tooltip('p50_mph:Q', title="p50 mph"), + alt.Tooltip('p80_mph:Q', title="p80 mph")] + ).properties(width=400) + points = alt.Chart(df).mark_point(filled=True, size = 300, opacity = 1).encode( + alt.Y("p50_mph:Q"), + alt.X("time_of_day:N", sort=['Early AM', 'AM Peak', 'Midday', 'PM Peak', 'Evening', 'Owl'], + title='Time of Day'), + color=alt.Color('p50_mph', title='Median Segment Speed (mph)').scale(domain=domain, range = range_), + tooltip=[alt.Tooltip('p50_mph:Q', title="p50 mph")], + ) + chart = error_bars + points + chart = chart.configure(axis = alt.AxisConfig(labelFontSize=14, titleFontSize=18), + legend = alt.LegendConfig(titleFontSize=14, labelFontSize=14, titleLimit=250, + titleOrient='left', labelOffset=100)) + display(chart) + return \ No newline at end of file From aaa9b98b53c0164a346856c59e4b28a360fc7ab7 Mon Sep 17 00:00:00 2001 From: Eric Dasmalchi Date: Fri, 15 Nov 2024 01:17:55 +0000 Subject: [PATCH 07/13] relatively clean parameterized nb --- ca_transit_speed_maps/01_new_speedmaps.ipynb | 569 +++---------------- ca_transit_speed_maps/speedmap_utils.py | 18 +- 2 files changed, 77 insertions(+), 510 deletions(-) diff --git a/ca_transit_speed_maps/01_new_speedmaps.ipynb b/ca_transit_speed_maps/01_new_speedmaps.ipynb index 1131af478..4fb4aa1f7 100644 --- a/ca_transit_speed_maps/01_new_speedmaps.ipynb +++ b/ca_transit_speed_maps/01_new_speedmaps.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 47, + "execution_count": null, "id": "46898b5c-b5cc-4096-ab68-8c3e42fab870", "metadata": {}, "outputs": [], @@ -10,40 +10,21 @@ "%%capture\n", "import warnings\n", "warnings.filterwarnings('ignore')\n", - "\n", "import calitp_data_analysis.magics\n", - "from calitp_data_analysis import calitp_color_palette\n", - "\n", - "# from rt_analysis import rt_filter_map_plot\n", "import update_vars_index\n", - "\n", - "from IPython.display import display, Markdown\n", "import pandas as pd\n", - "import numpy as np\n", "import geopandas as gpd\n", "from siuba import *\n", - "\n", - "import shared_utils\n", - "import segment_speed_utils\n", + "from shared_utils import catalog_utils, rt_utils\n", "import speedmap_utils\n", "\n", - "import altair as alt\n", - "from calitp_data_analysis.tables import tbls" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "cf3d8814-6f22-4a22-8b03-429ebb63a397", - "metadata": {}, - "outputs": [], - "source": [ - "catalog = shared_utils.catalog_utils.get_catalog('gtfs_analytics_data')" + "analysis_date = update_vars_index.ANALYSIS_DATE\n", + "catalog = catalog_utils.get_catalog('gtfs_analytics_data')" ] }, { "cell_type": "code", - "execution_count": 32, + "execution_count": null, "id": "9f15ed6a-5ab7-4f57-9695-3f762781b74c", "metadata": { "tags": [ @@ -58,43 +39,21 @@ }, { "cell_type": "code", - "execution_count": 33, - "id": "a639fd84-29ca-4678-b568-2b1022701e14", + "execution_count": null, + "id": "23f682c8-187e-4586-a0b2-00dd7569e0c4", "metadata": {}, "outputs": [], - "source": [ - "analysis_date = update_vars_index.ANALYSIS_DATE\n", - "import datetime as dt" - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "id": "516c71f4-f1a7-4477-91c0-cf9b15247ce6", - "metadata": { - "tags": [] - }, - "outputs": [], "source": [ "%%capture\n", - "path = f'{catalog.speedmap_segments.dir}{catalog.speedmap_segments.shape_stop_single_segment_detail}_{analysis_date}.parquet'\n", - "speedmap_segs = gpd.read_parquet(path, filters=[['organization_source_record_id', '==', organization_source_record_id]]) # aggregated" + "speedmap_segs, this_shn = speedmap_utils.read_segments_shn(organization_source_record_id=organization_source_record_id)" ] }, { "cell_type": "code", - "execution_count": 43, + "execution_count": null, "id": "11303bd3-01bf-4af4-b0fa-63dbc375bd48", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{\"human_date\": \"October 16 2024 (Wednesday)\"}\n" - ] - } - ], + "outputs": [], "source": [ "%%capture_parameters\n", "human_date = analysis_date.strftime('%B %d %Y (%A)')\n", @@ -103,18 +62,10 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": null, "id": "649d7a14-8b01-4a84-afee-092bd3febbb8", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{\"organization_name\": \"City of Culver City\"}\n" - ] - } - ], + "outputs": [], "source": [ "%%capture_parameters\n", "organization_name = speedmap_segs.organization_name.iloc[0]\n", @@ -123,516 +74,124 @@ }, { "cell_type": "code", - "execution_count": 66, + "execution_count": null, "id": "aebbea5a-8064-4845-9181-ac2a18f12d20", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 66, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import importlib\n", - "importlib.reload(speedmap_utils)" - ] - }, - { - "cell_type": "code", - "execution_count": 57, - "id": "22e9db57-91b0-43a4-b7df-797058d14e7f", - "metadata": {}, "outputs": [], "source": [ - "speedmap_segs = speedmap_utils.prepare_segment_gdf(speedmap_segs)" + "import importlib\n", + "importlib.reload(speedmap_utils)\n", + "importlib.reload(update_vars_index)" ] }, { - "cell_type": "code", - "execution_count": 63, - "id": "f4eb76cb-ce2f-496b-a6aa-f6081bacc89f", + "cell_type": "markdown", + "id": "d0b9c978-e6a6-4646-a2c0-e3a603f1dfc3", "metadata": {}, - "outputs": [], "source": [ - "shn = gpd.read_parquet(shared_utils.rt_utils.SHN_PATH)\n", - "this_shn = shn >> filter(_.District.isin([int(x[:2]) for x in speedmap_segs.caltrans_district.unique()]))" + "# {organization_name}" ] }, { - "cell_type": "code", - "execution_count": 46, - "id": "edc84eda-ce2b-481f-ac78-6b7209cb25f4", - "metadata": {}, - "outputs": [], + "cell_type": "markdown", + "id": "9470c739-9b57-4a80-86fe-ca62844a9331", + "metadata": { + "tags": [] + }, "source": [ - "# # TODO to utils\n", - "# def prepare_segment_gdf(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:\n", - "# '''\n", - "# Project segment speeds gdf and add column for rich speedmap display\n", - "# '''\n", - "# gdf = gdf.to_crs(calitp_data_analysis.geography_utils.CA_NAD83Albers)\n", - "# # TODO move upstream and investigate\n", - "# gdf['fast_slow_ratio'] = gdf.p80_mph / gdf.p20_mph\n", - "# gdf.fast_slow_ratio = gdf.fast_slow_ratio.replace(np.inf, 3)\n", - "# gdf = gdf.round(1)\n", - "\n", - "# ## shift to right side of road to display direction\n", - "# gdf.geometry = gdf.geometry.apply(shared_utils.rt_utils.try_parallel)\n", - "# gdf = gdf.apply(shared_utils.rt_utils.arrowize_by_frequency, axis=1, frequency_col='trips_hr_sch')\n", + "## About These Maps:\n", "\n", - "# gdf = gdf >> arrange(_.trips_hr_sch)\n", - "\n", - "# return gdf" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "3a9fa4ff-51a4-4e8d-b1c1-a8568025746a", - "metadata": {}, - "outputs": [], - "source": [ - "time_of_day = 'AM Peak'" - ] - }, - { - "cell_type": "code", - "execution_count": 50, - "id": "6af2c7fd-b769-4fbf-bbcf-75080972cc67", - "metadata": {}, - "outputs": [], - "source": [ - "period_test = time_period_and_arrowize(speedmap_segs, time_of_day)" + "* Each map shows bus (and rail, if applicable) speeds for {organization_name}, with a map each for the morning peak, midday, and afternoon peak periods on {human_date}.\n", + "* On the map, routes are split into segments corresponding to the distance between two stops, allowing you to focus on specific portions of the route experiencing a slowdown.\n", + "* Route segments are arrow-shaped to indicate direction of travel. Thicker segments indicate more frequent transit routes.\n", + "* State Highway Network routes are outlined in gray.\n", + "* Hover over, a segment with your mouse to see the exact speed, route name, and transit service frequency in that segment. \n", + " * Higher-frequency routes (3+ trips per hour) are especially important, since those slowdowns correspond to more vehicles, and riders, delayed in traffic." ] }, { "cell_type": "markdown", - "id": "4e4c26e4-afca-48e1-b9fc-53393858a414", - "metadata": {}, + "id": "db37b33a-6e9f-4a31-9b83-4a2a5a078241", + "metadata": { + "tags": [] + }, "source": [ - "## export map" + "## AM Peak" ] }, { "cell_type": "code", - "execution_count": 72, - "id": "402ffc16-eb23-42e1-ac38-b1468a65f438", + "execution_count": null, + "id": "0a6d0609-3517-46be-a08f-0d04129a62ff", "metadata": {}, "outputs": [], "source": [ - "time_of_day_lower = time_of_day.lower().replace(' ', '_')\n", - "\n", - "gdf = gdf >> filter(_.time_of_day == time_period)\n", - "\n", - "# TODO clone render method from RtFilterMapper, to utils\n", - "speedmap_state = shared_utils.rt_utils.set_state_export(\n", - " period_test, filename=f'{itp_id}_{time_of_day_lower}_new', map_type='speedmap',\n", - " color_col='p20_mph', cmap=cmap, legend_url=url,\n", - " cache_seconds=0, map_title=f'Speedmap Segs {time_of_day} {analysis_date}')" - ] - }, - { - "cell_type": "code", - "execution_count": 73, - "id": "281c341b-c3cc-48b3-b8d1-e1a698f3d7f4", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'state_dict': {'name': 'null',\n", - " 'layers': [{'name': 'Speedmap Segs AM Peak 2024-10-16',\n", - " 'url': 'https://storage.googleapis.com/calitp-map-tiles/testing/194_am_peak_new.geojson.gz',\n", - " 'properties': {'stroked': False,\n", - " 'highlight_saturation_multiplier': 0.5,\n", - " 'tooltip_speed_key': 'p20_mph'},\n", - " 'type': 'speedmap'}],\n", - " 'lat_lon': (37.98273520932856, -122.54536343118872),\n", - " 'zoom': 13,\n", - " 'legend_url': 'https://storage.googleapis.com/calitp-map-tiles/speeds_legend_color_access.svg'},\n", - " 'spa_link': 'https://embeddable-maps.calitp.org/?state=eyJuYW1lIjogIm51bGwiLCAibGF5ZXJzIjogW3sibmFtZSI6ICJTcGVlZG1hcCBTZWdzIEFNIFBlYWsgMjAyNC0xMC0xNiIsICJ1cmwiOiAiaHR0cHM6Ly9zdG9yYWdlLmdvb2dsZWFwaXMuY29tL2NhbGl0cC1tYXAtdGlsZXMvdGVzdGluZy8xOTRfYW1fcGVha19uZXcuZ2VvanNvbi5neiIsICJwcm9wZXJ0aWVzIjogeyJzdHJva2VkIjogZmFsc2UsICJoaWdobGlnaHRfc2F0dXJhdGlvbl9tdWx0aXBsaWVyIjogMC41LCAidG9vbHRpcF9zcGVlZF9rZXkiOiAicDIwX21waCJ9LCAidHlwZSI6ICJzcGVlZG1hcCJ9XSwgImxhdF9sb24iOiBbMzcuOTgyNzM1MjA5MzI4NTYsIC0xMjIuNTQ1MzYzNDMxMTg4NzJdLCAiem9vbSI6IDEzLCAibGVnZW5kX3VybCI6ICJodHRwczovL3N0b3JhZ2UuZ29vZ2xlYXBpcy5jb20vY2FsaXRwLW1hcC10aWxlcy9zcGVlZHNfbGVnZW5kX2NvbG9yX2FjY2Vzcy5zdmcifQ=='}" - ] - }, - "execution_count": 73, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "speedmap_state" - ] - }, - { - "cell_type": "code", - "execution_count": 79, - "id": "126c190b-ea97-4971-8c62-5ee2f2a36f61", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "writing to calitp-map-tiles/testing/194_am_peak_new.geojson.gz\n" - ] - } - ], - "source": [ - "# TODO clone render method from RtFilterMapper, to utils\n", - "new_speedmap_state = shared_utils.rt_utils.set_state_export(\n", - " period_test, filename=f'{itp_id}_{time_of_day_lower}_new', map_type='new_speedmap',\n", - " color_col='p20_mph', cmap=cmap, legend_url=url,\n", - " cache_seconds=0, map_title=f'Speedmap Segs {time_of_day} {analysis_date}')" - ] - }, - { - "cell_type": "code", - "execution_count": 75, - "id": "cff6c1d2-0a15-41b7-a76e-e5e219340545", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'state_dict': {'name': 'null',\n", - " 'layers': [{'name': 'Speedmap Segs AM Peak 2024-10-16',\n", - " 'url': 'https://storage.googleapis.com/calitp-map-tiles/testing/194_am_peak_new.geojson.gz',\n", - " 'properties': {'stroked': False,\n", - " 'highlight_saturation_multiplier': 0.5,\n", - " 'tooltip_speed_key': 'p20_mph'},\n", - " 'type': 'new_speedmap'}],\n", - " 'lat_lon': (37.98273520932856, -122.54536343118872),\n", - " 'zoom': 13,\n", - " 'legend_url': 'https://storage.googleapis.com/calitp-map-tiles/speeds_legend_color_access.svg'},\n", - " 'spa_link': 'https://embeddable-maps.calitp.org/?state=eyJuYW1lIjogIm51bGwiLCAibGF5ZXJzIjogW3sibmFtZSI6ICJTcGVlZG1hcCBTZWdzIEFNIFBlYWsgMjAyNC0xMC0xNiIsICJ1cmwiOiAiaHR0cHM6Ly9zdG9yYWdlLmdvb2dsZWFwaXMuY29tL2NhbGl0cC1tYXAtdGlsZXMvdGVzdGluZy8xOTRfYW1fcGVha19uZXcuZ2VvanNvbi5neiIsICJwcm9wZXJ0aWVzIjogeyJzdHJva2VkIjogZmFsc2UsICJoaWdobGlnaHRfc2F0dXJhdGlvbl9tdWx0aXBsaWVyIjogMC41LCAidG9vbHRpcF9zcGVlZF9rZXkiOiAicDIwX21waCJ9LCAidHlwZSI6ICJuZXdfc3BlZWRtYXAifV0sICJsYXRfbG9uIjogWzM3Ljk4MjczNTIwOTMyODU2LCAtMTIyLjU0NTM2MzQzMTE4ODcyXSwgInpvb20iOiAxMywgImxlZ2VuZF91cmwiOiAiaHR0cHM6Ly9zdG9yYWdlLmdvb2dsZWFwaXMuY29tL2NhbGl0cC1tYXAtdGlsZXMvc3BlZWRzX2xlZ2VuZF9jb2xvcl9hY2Nlc3Muc3ZnIn0='}" - ] - }, - "execution_count": 75, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "new_speedmap_state" + "%%capture\n", + "link = speedmap_utils.map_time_period(district_gdf=this_shn, speedmap_segs=speedmap_segs, analysis_date=analysis_date,\n", + " time_of_day='AM Peak', map_type='new_speedmap')" ] }, { "cell_type": "code", - "execution_count": 80, - "id": "6cf0c367-c44f-482e-a94d-c4cecc91c43a", + "execution_count": null, + "id": "22dea4bc-91c3-43b9-a540-d9b52237f51f", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "writing to calitp-map-tiles/testing/194_am_peak_new_var.geojson.gz\n" - ] - } - ], + "outputs": [], "source": [ - "# TODO clone render method from RtFilterMapper, to utils\n", - "new_speedmap_state_var = shared_utils.rt_utils.set_state_export(\n", - " period_test, filename=f'{itp_id}_{time_of_day_lower}_new_var', map_type='new_speed_variation',\n", - " color_col='fast_slow_ratio', cmap=shared_utils.rt_utils.VARIANCE_FIXED_COLORSCALE,\n", - " legend_url=shared_utils.rt_utils.VARIANCE_LEGEND_URL,\n", - " cache_seconds=0, map_title=f'Speedmap Segs {time_of_day} {analysis_date}')" + "speedmap_utils.render_spa_link(link)" ] }, { "cell_type": "code", - "execution_count": 81, - "id": "aa2c5179-3a8b-4e93-a316-b3ded6381eb8", + "execution_count": null, + "id": "7019c47a-0620-458a-a45a-005f288ee805", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'state_dict': {'name': 'null',\n", - " 'layers': [{'name': 'Speedmap Segs AM Peak 2024-10-16',\n", - " 'url': 'https://storage.googleapis.com/calitp-map-tiles/testing/194_am_peak_new_var.geojson.gz',\n", - " 'properties': {'stroked': False, 'highlight_saturation_multiplier': 0.5},\n", - " 'type': 'new_speed_variation'}],\n", - " 'lat_lon': (37.98273520932856, -122.54536343118872),\n", - " 'zoom': 13,\n", - " 'legend_url': 'https://storage.googleapis.com/calitp-map-tiles/variance_legend.svg'},\n", - " 'spa_link': 'https://embeddable-maps.calitp.org/?state=eyJuYW1lIjogIm51bGwiLCAibGF5ZXJzIjogW3sibmFtZSI6ICJTcGVlZG1hcCBTZWdzIEFNIFBlYWsgMjAyNC0xMC0xNiIsICJ1cmwiOiAiaHR0cHM6Ly9zdG9yYWdlLmdvb2dsZWFwaXMuY29tL2NhbGl0cC1tYXAtdGlsZXMvdGVzdGluZy8xOTRfYW1fcGVha19uZXdfdmFyLmdlb2pzb24uZ3oiLCAicHJvcGVydGllcyI6IHsic3Ryb2tlZCI6IGZhbHNlLCAiaGlnaGxpZ2h0X3NhdHVyYXRpb25fbXVsdGlwbGllciI6IDAuNX0sICJ0eXBlIjogIm5ld19zcGVlZF92YXJpYXRpb24ifV0sICJsYXRfbG9uIjogWzM3Ljk4MjczNTIwOTMyODU2LCAtMTIyLjU0NTM2MzQzMTE4ODcyXSwgInpvb20iOiAxMywgImxlZ2VuZF91cmwiOiAiaHR0cHM6Ly9zdG9yYWdlLmdvb2dsZWFwaXMuY29tL2NhbGl0cC1tYXAtdGlsZXMvdmFyaWFuY2VfbGVnZW5kLnN2ZyJ9'}" - ] - }, - "execution_count": 81, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "new_speedmap_state_var" + "speedmap_utils.display_spa_map(link)" ] }, { "cell_type": "code", - "execution_count": 78, - "id": "b234655e-1739-4d4a-aaca-747c5e8c3d88", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Index(['schedule_gtfs_dataset_key', 'shape_array_key', 'shape_id', 'route_id',\n", - " 'direction_id', 'stop_pair', 'stop_pair_name', 'segment_id',\n", - " 'time_of_day', 'p50_mph', 'n_trips', 'p20_mph', 'p80_mph', 'name',\n", - " 'caltrans_district', 'organization_source_record_id',\n", - " 'organization_name', 'base64_url', 'geometry', 'n_trips_sch',\n", - " 'trips_hr_sch', 'route_short_name', 'fast_slow_ratio'],\n", - " dtype='object')" - ] - }, - "execution_count": 78, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "period_test.columns" - ] - }, - { - "cell_type": "markdown", - "id": "04c8da94-c9e6-4261-b6e6-3cd44a4eaa5d", + "execution_count": null, + "id": "82a148cd-1561-4c88-bccf-f552307c2a3d", "metadata": {}, + "outputs": [], "source": [ - "## docs!" + "%%capture\n", + "link = speedmap_utils.map_time_period(district_gdf=this_shn, speedmap_segs=speedmap_segs, analysis_date=analysis_date,\n", + " time_of_day='AM Peak', map_type='new_speed_variation')" ] }, { "cell_type": "code", - "execution_count": 31, - "id": "0565942e-a76c-4c8d-afe7-7d0f9c61d5f0", + "execution_count": null, + "id": "adf4b569-0029-4295-bc14-70017df6f28e", "metadata": {}, "outputs": [], "source": [ - "domain = cmap.index\n", - "\n", - "range_ = [cmap.rgb_hex_str(i) for i in cmap.index]\n", - "\n", - "df = speedmap_segs[['time_of_day', 'p50_mph', 'p20_mph', 'p80_mph']]\n", - "df = df >> group_by(_.time_of_day) >> summarize(p50_mph = _.p50_mph.quantile(.5),\n", - " p20_mph = _.p20_mph.quantile(.5),\n", - " p80_mph = _.p80_mph.quantile(.5),)\n", - "df['p50 - p20'] = -(df['p50_mph'] - df['p20_mph'])\n", - "df['p80 - p50'] = df['p80_mph'] - df['p50_mph']\n", - "\n", - "error_bars = alt.Chart(df).mark_errorbar(thickness=5, color='gray', opacity=.6).encode(\n", - " y = alt.Y(\"p50_mph:Q\", title='Segment Speed (mph): 20, 50, 80%ile'),\n", - " yError=(\"p50 - p20:Q\"),\n", - " yError2=(\"p80 - p50:Q\"),\n", - " x = alt.X(\"time_of_day:N\", sort=['Early AM', 'AM Peak', 'Midday', 'PM Peak', 'Evening', 'Owl']),\n", - " tooltip=[alt.Tooltip('p20_mph:Q', title=\"p20 mph\"), alt.Tooltip('p50_mph:Q', title=\"p50 mph\"),\n", - " alt.Tooltip('p80_mph:Q', title=\"p80 mph\")]\n", - ").properties(width=400)\n", - "\n", - "points = alt.Chart(df).mark_point(filled=True, size = 300, opacity = 1).encode(\n", - " alt.Y(\"p50_mph:Q\"),\n", - " alt.X(\"time_of_day:N\", sort=['Early AM', 'AM Peak', 'Midday', 'PM Peak', 'Evening', 'Owl'],\n", - " title='Time of Day'),\n", - " color=alt.Color('p50_mph', title='Median Segment Speed (mph)').scale(domain=domain, range = range_),\n", - " tooltip=[alt.Tooltip('p50_mph:Q', title=\"p50 mph\")],\n", - ")\n", - "\n", - "chart = error_bars + points\n", - "chart = chart.configure(axis = alt.AxisConfig(labelFontSize=14, titleFontSize=18),\n", - " legend = alt.LegendConfig(titleFontSize=14, labelFontSize=14, titleLimit=250,\n", - " titleOrient='left', labelOffset=100))" + "speedmap_utils.render_spa_link(link)" ] }, { "cell_type": "code", - "execution_count": 32, - "id": "be49a590-13d0-47d9-a28c-de21729c373e", + "execution_count": null, + "id": "d497e0bc-a756-4f05-94d7-174fa45a458c", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "\n", - "
\n", - "" - ], - "text/plain": [ - "alt.LayerChart(...)" - ] - }, - "execution_count": 32, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "chart" + "speedmap_utils.display_spa_map(link)" ] }, { "cell_type": "code", - "execution_count": 67, + "execution_count": null, "id": "40979f68-b80b-4879-8892-3271a74efc8b", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "\n", - "\n", - "
\n", - "" - ], - "text/plain": [ - "alt.LayerChart(...)" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "speedmap_utils.chart_speeds_by_time_period(speedmap_segs)" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cb6d466d-6d8e-4144-b1b8-69526553f347", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/ca_transit_speed_maps/speedmap_utils.py b/ca_transit_speed_maps/speedmap_utils.py index 6dbc5578c..9d71f8c67 100644 --- a/ca_transit_speed_maps/speedmap_utils.py +++ b/ca_transit_speed_maps/speedmap_utils.py @@ -3,13 +3,21 @@ import numpy as np import geopandas as gpd import update_vars_index -from shared_utils import rt_utils +from shared_utils import rt_utils, catalog_utils from calitp_data_analysis.geography_utils import CA_NAD83Albers import datetime as dt import altair as alt from IPython.display import display, Markdown, IFrame +catalog = catalog_utils.get_catalog('gtfs_analytics_data') -catalog = shared_utils.catalog_utils.get_catalog('gtfs_analytics_data') +def read_segments_shn(organization_source_record_id: str) -> (gpd.GeoDataFrame, gpd.GeoDataFrame): + path = f'{catalog.speedmap_segments.dir}{catalog.speedmap_segments.shape_stop_single_segment_detail}_{update_vars_index.ANALYSIS_DATE}.parquet' + speedmap_segs = gpd.read_parquet(path, filters=[['organization_source_record_id', '==', organization_source_record_id]]) # aggregated + speedmap_segs = prepare_segment_gdf(speedmap_segs) + shn = gpd.read_parquet(rt_utils.SHN_PATH) + this_shn = shn >> filter(_.District.isin([int(x[:2]) for x in speedmap_segs.caltrans_district.unique()])) + + return (speedmap_segs, this_shn) def prepare_segment_gdf(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: ''' @@ -50,7 +58,7 @@ def map_shn(district_gdf: gpd.GeoDataFrame): filename = f'{dist}_SHN' title = f"D{dist} State Highway Network" - export_result = set_state_export(district_gdf, subfolder = update_vars_index.GEOJSON_SUBFOLDER, filename = filename, + export_result = rt_utils.set_state_export(district_gdf, subfolder = update_vars_index.GEOJSON_SUBFOLDER, filename = filename, map_type = 'state_highway_network', map_title = title) spa_map_state = export_result['state_dict'] return spa_map_state @@ -61,7 +69,7 @@ def map_time_period(district_gdf: gpd.GeoDataFrame, speedmap_segs: gpd.GeoDataFr Always add State Highway Network first. ''' time_of_day_lower = time_of_day.lower().replace(' ', '_') - gdf = gdf >> filter(_.time_of_day == time_of_day) + speedmap_segs = speedmap_segs >> filter(_.time_of_day == time_of_day) color_col = {'new_speedmap': 'p20_mph', 'new_speed_variation': 'fast_slow_ratio'}[map_type] shn_state = map_shn(district_gdf) display_date = analysis_date.strftime('%B %d %Y (%A)') @@ -76,7 +84,7 @@ def map_time_period(district_gdf: gpd.GeoDataFrame, speedmap_segs: gpd.GeoDataFr legend_url = rt_utils.VARIANCE_LEGEND_URL export_result = rt_utils.set_state_export( - period_test, subfolder = update_vars_index.GEOJSON_SUBFOLDER, filename=filename, + speedmap_segs, subfolder = update_vars_index.GEOJSON_SUBFOLDER, filename=filename, map_type=map_type, color_col=color_col, cmap=cmap, legend_url=legend_url, map_title=title, From ddc90ce846ced18c03613b7cceb5708cc52d6e2d Mon Sep 17 00:00:00 2001 From: Eric Dasmalchi Date: Fri, 15 Nov 2024 01:32:03 +0000 Subject: [PATCH 08/13] new parameterized nb is good to go pending readme/copy updates! --- _shared_utils/shared_utils/rt_utils.py | 6 +- ca_transit_speed_maps/01_new_speedmaps.ipynb | 269 ++++++++++++++++-- .../check_stage_intermediate.py | 52 ---- ca_transit_speed_maps/check_test_mapping.py | 56 ---- ca_transit_speed_maps/index_filter_d4.py | 22 -- 5 files changed, 250 insertions(+), 155 deletions(-) delete mode 100644 ca_transit_speed_maps/check_stage_intermediate.py delete mode 100644 ca_transit_speed_maps/check_test_mapping.py delete mode 100644 ca_transit_speed_maps/index_filter_d4.py diff --git a/_shared_utils/shared_utils/rt_utils.py b/_shared_utils/shared_utils/rt_utils.py index 24b5afb9f..58a2f2504 100644 --- a/_shared_utils/shared_utils/rt_utils.py +++ b/_shared_utils/shared_utils/rt_utils.py @@ -828,7 +828,8 @@ def get_operators(analysis_date, operator_list, verbose=False): def spa_map_export_link( - gdf: gpd.GeoDataFrame, path: str, state: dict, site: str = SPA_MAP_SITE, cache_seconds: int = 3600 + gdf: gpd.GeoDataFrame, path: str, state: dict, + site: str = SPA_MAP_SITE, cache_seconds: int = 3600, verbose: bool = False ): """ Called via set_state_export. Handles stream writing of gzipped geojson to GCS bucket, @@ -837,7 +838,8 @@ def spa_map_export_link( assert cache_seconds in range(3601), "cache must be 0-3600 seconds" geojson_str = gdf.to_json() geojson_bytes = geojson_str.encode("utf-8") - print(f"writing to {path}") + if verbose: + print(f"writing to {path}") with fs.open(path, "wb") as writer: # write out to public-facing GCS? with gzip.GzipFile(fileobj=writer, mode="w") as gz: gz.write(geojson_bytes) diff --git a/ca_transit_speed_maps/01_new_speedmaps.ipynb b/ca_transit_speed_maps/01_new_speedmaps.ipynb index 4fb4aa1f7..70f7b25a2 100644 --- a/ca_transit_speed_maps/01_new_speedmaps.ipynb +++ b/ca_transit_speed_maps/01_new_speedmaps.ipynb @@ -11,15 +11,9 @@ "import warnings\n", "warnings.filterwarnings('ignore')\n", "import calitp_data_analysis.magics\n", - "import update_vars_index\n", - "import pandas as pd\n", - "import geopandas as gpd\n", - "from siuba import *\n", - "from shared_utils import catalog_utils, rt_utils\n", - "import speedmap_utils\n", + "from update_vars_index import ANALYSIS_DATE\n", "\n", - "analysis_date = update_vars_index.ANALYSIS_DATE\n", - "catalog = catalog_utils.get_catalog('gtfs_analytics_data')" + "import speedmap_utils" ] }, { @@ -56,7 +50,7 @@ "outputs": [], "source": [ "%%capture_parameters\n", - "human_date = analysis_date.strftime('%B %d %Y (%A)')\n", + "human_date = ANALYSIS_DATE.strftime('%B %d %Y (%A)')\n", "human_date" ] }, @@ -72,18 +66,6 @@ "organization_name" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "aebbea5a-8064-4845-9181-ac2a18f12d20", - "metadata": {}, - "outputs": [], - "source": [ - "import importlib\n", - "importlib.reload(speedmap_utils)\n", - "importlib.reload(update_vars_index)" - ] - }, { "cell_type": "markdown", "id": "d0b9c978-e6a6-4646-a2c0-e3a603f1dfc3", @@ -119,6 +101,14 @@ "## AM Peak" ] }, + { + "cell_type": "markdown", + "id": "6e2aa3d4-acef-4e64-9fa6-07a95ae5112b", + "metadata": {}, + "source": [ + "### 20th Percentile Speeds by Segment" + ] + }, { "cell_type": "code", "execution_count": null, @@ -127,7 +117,7 @@ "outputs": [], "source": [ "%%capture\n", - "link = speedmap_utils.map_time_period(district_gdf=this_shn, speedmap_segs=speedmap_segs, analysis_date=analysis_date,\n", + "link = speedmap_utils.map_time_period(district_gdf=this_shn, speedmap_segs=speedmap_segs, analysis_date=ANALYSIS_DATE,\n", " time_of_day='AM Peak', map_type='new_speedmap')" ] }, @@ -151,6 +141,17 @@ "speedmap_utils.display_spa_map(link)" ] }, + { + "cell_type": "markdown", + "id": "02bafcad-7e1a-43f7-86ba-9b3c56135a62", + "metadata": {}, + "source": [ + "### Variation in Speeds by Segment\n", + "\n", + "* This visualization shows variation as the ratio between the 80th percentile and 20th percentile speeds in each segment\n", + "* Segments with high variation in speeds make it difficult for transit operators to set accurate schedules, and can cause inconsistent service for riders" + ] + }, { "cell_type": "code", "execution_count": null, @@ -159,7 +160,7 @@ "outputs": [], "source": [ "%%capture\n", - "link = speedmap_utils.map_time_period(district_gdf=this_shn, speedmap_segs=speedmap_segs, analysis_date=analysis_date,\n", + "link = speedmap_utils.map_time_period(district_gdf=this_shn, speedmap_segs=speedmap_segs, analysis_date=ANALYSIS_DATE,\n", " time_of_day='AM Peak', map_type='new_speed_variation')" ] }, @@ -183,6 +184,204 @@ "speedmap_utils.display_spa_map(link)" ] }, + { + "cell_type": "markdown", + "id": "4f60fd2b-66e0-4257-a21c-bdeeece6e8c6", + "metadata": { + "tags": [] + }, + "source": [ + "## Midday" + ] + }, + { + "cell_type": "markdown", + "id": "9fdee73b-a79a-461b-9298-053e4523c41d", + "metadata": {}, + "source": [ + "### 20th Percentile Speeds by Segment" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "876ffe14-cc3f-4257-be00-b06ef8363a16", + "metadata": {}, + "outputs": [], + "source": [ + "%%capture\n", + "link = speedmap_utils.map_time_period(district_gdf=this_shn, speedmap_segs=speedmap_segs, analysis_date=ANALYSIS_DATE,\n", + " time_of_day='Midday', map_type='new_speedmap')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d6858d75-4d44-4ab2-8c44-7f5f91149083", + "metadata": {}, + "outputs": [], + "source": [ + "speedmap_utils.render_spa_link(link)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a1e92ff3-cbe6-45ca-b063-97807caf21c5", + "metadata": {}, + "outputs": [], + "source": [ + "speedmap_utils.display_spa_map(link)" + ] + }, + { + "cell_type": "markdown", + "id": "296f23f8-026d-4a98-9ce1-43ec2c048b2e", + "metadata": {}, + "source": [ + "### Variation in Speeds by Segment\n", + "\n", + "* This visualization shows variation as the ratio between the 80th percentile and 20th percentile speeds in each segment\n", + "* Segments with high variation in speeds make it difficult for transit operators to set accurate schedules, and can cause inconsistent service for riders" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "31d820ac-0242-47f0-9968-07f4b7122313", + "metadata": {}, + "outputs": [], + "source": [ + "%%capture\n", + "link = speedmap_utils.map_time_period(district_gdf=this_shn, speedmap_segs=speedmap_segs, analysis_date=ANALYSIS_DATE,\n", + " time_of_day='Midday', map_type='new_speed_variation')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ba8f1e35-3503-41d7-a5ad-60606267cdab", + "metadata": {}, + "outputs": [], + "source": [ + "speedmap_utils.render_spa_link(link)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2633b5c9-3510-4690-8b7b-9abbff45b8e0", + "metadata": {}, + "outputs": [], + "source": [ + "speedmap_utils.display_spa_map(link)" + ] + }, + { + "cell_type": "markdown", + "id": "7f5c3087-8fe4-47fc-bccd-d1eb2f8e092e", + "metadata": { + "tags": [] + }, + "source": [ + "## PM Peak" + ] + }, + { + "cell_type": "markdown", + "id": "15ced50f-7135-4807-a01f-b9768256b27b", + "metadata": {}, + "source": [ + "### 20th Percentile Speeds by Segment" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b341b050-4fa9-4f25-a276-886b3c62a586", + "metadata": {}, + "outputs": [], + "source": [ + "%%capture\n", + "link = speedmap_utils.map_time_period(district_gdf=this_shn, speedmap_segs=speedmap_segs, analysis_date=ANALYSIS_DATE,\n", + " time_of_day='PM Peak', map_type='new_speedmap')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d4cf9996-0239-4cb4-92c5-ed04d57a7b4c", + "metadata": {}, + "outputs": [], + "source": [ + "speedmap_utils.render_spa_link(link)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c6ea5f19-d416-4bf5-ab9b-3199af92ed90", + "metadata": {}, + "outputs": [], + "source": [ + "speedmap_utils.display_spa_map(link)" + ] + }, + { + "cell_type": "markdown", + "id": "19324dd0-38e2-44b6-9552-2c67f44ffa4a", + "metadata": {}, + "source": [ + "### Variation in Speeds by Segment\n", + "\n", + "* This visualization shows variation as the ratio between the 80th percentile and 20th percentile speeds in each segment\n", + "* Segments with high variation in speeds make it difficult for transit operators to set accurate schedules, and can cause inconsistent service for riders" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eb50554c-cb63-464d-9d0f-ed722d24f422", + "metadata": {}, + "outputs": [], + "source": [ + "%%capture\n", + "link = speedmap_utils.map_time_period(district_gdf=this_shn, speedmap_segs=speedmap_segs, analysis_date=ANALYSIS_DATE,\n", + " time_of_day='PM Peak', map_type='new_speed_variation')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b11b79c6-d87d-4563-9cd9-83a5a7294db6", + "metadata": {}, + "outputs": [], + "source": [ + "speedmap_utils.render_spa_link(link)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "37190c5b-5943-4172-9a91-892bf36c7a2b", + "metadata": {}, + "outputs": [], + "source": [ + "speedmap_utils.display_spa_map(link)" + ] + }, + { + "cell_type": "markdown", + "id": "73f4bc99-f78f-4f37-b050-5f12cbe4ca7a", + "metadata": { + "tags": [] + }, + "source": [ + "## Daily Speed Patterns\n", + "\n", + "* This chart shows the distribution of segment speeds in each time of day. It can help find times of day where transit riders experience the slowest overall speeds, or highest variation in speeds." + ] + }, { "cell_type": "code", "execution_count": null, @@ -192,6 +391,30 @@ "source": [ "speedmap_utils.chart_speeds_by_time_period(speedmap_segs)" ] + }, + { + "cell_type": "markdown", + "id": "5dadca19-df7a-4fc8-8955-210b51f4b1c4", + "metadata": {}, + "source": [ + "## Additional Time Periods" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "05570269-2c18-4d49-b879-b3466e735838", + "metadata": {}, + "outputs": [], + "source": [ + "for time_period in [period for period in speedmap_segs.time_of_day.unique() if period not in ['AM Peak', 'Midday', 'PM Peak']]:\n", + " link = speedmap_utils.map_time_period(district_gdf=this_shn, speedmap_segs=speedmap_segs, analysis_date=ANALYSIS_DATE,\n", + " time_of_day=time_period, map_type='new_speedmap')\n", + " speedmap_utils.render_spa_link(link, text=f\"{time_period} Speeds\")\n", + " link = speedmap_utils.map_time_period(district_gdf=this_shn, speedmap_segs=speedmap_segs, analysis_date=ANALYSIS_DATE,\n", + " time_of_day=time_period, map_type='new_speed_variation')\n", + " speedmap_utils.render_spa_link(link, text=f\"{time_period} Speed Variation\")" + ] } ], "metadata": { diff --git a/ca_transit_speed_maps/check_stage_intermediate.py b/ca_transit_speed_maps/check_stage_intermediate.py deleted file mode 100644 index 47d5a7450..000000000 --- a/ca_transit_speed_maps/check_stage_intermediate.py +++ /dev/null @@ -1,52 +0,0 @@ -import os -os.environ["CALITP_BQ_MAX_BYTES"] = str(1_000_000_000_000) ## 1TB? -os.environ['USE_PYGEOS'] = '0' - -from siuba import * -import pandas as pd -import datetime as dt - -import shared_utils -from rt_analysis import rt_parser -import tqdm -import warnings -from build_speedmaps_index import ANALYSIS_DATE - -def stage_intermediate_data(row: pd.Series, pbar: tqdm.tqdm) -> None: - ''' - Call using pd.apply for convienient iteration. - Save progress to parquet after running each agency in case script is interrupted - That progress parquet (when complete) is used in next script, actual output is ignored - ''' - global speedmaps_index_joined - analysis_date = row.analysis_date - progress_path = f'./_rt_progress_{analysis_date}.parquet' - - if row.status != 'already_ran': - try: - rt_day = rt_parser.OperatorDayAnalysis(row.organization_itp_id, - analysis_date, pbar) - rt_day.export_views_gcs() - row.status = 'already_ran' - except Exception as e: - print(f'{row.organization_itp_id} parser failed: {e}') - row.status = 'parser_failed' - speedmaps_index_joined.loc[row.name] = row - speedmaps_index_joined.to_parquet(progress_path) - - return - -if __name__ == "__main__": - - speedmaps_index_joined = shared_utils.rt_utils.check_intermediate_data( - analysis_date = ANALYSIS_DATE) # check if this stage needed - if speedmaps_index_joined.status.isin(['already_ran', 'parser_failed', - 'map_confirmed', 'map_failed']).all(): - print('already attempted to stage all intermediate data:') - else: - pbar = tqdm.tqdm() - with warnings.catch_warnings(): - warnings.simplefilter("ignore") - _ = speedmaps_index_joined.apply(stage_intermediate_data, axis = 1, args=[pbar]) - print() - print('intermediate data stage attempt complete:') diff --git a/ca_transit_speed_maps/check_test_mapping.py b/ca_transit_speed_maps/check_test_mapping.py deleted file mode 100644 index 267d7e41b..000000000 --- a/ca_transit_speed_maps/check_test_mapping.py +++ /dev/null @@ -1,56 +0,0 @@ -import os -os.environ["CALITP_BQ_MAX_BYTES"] = str(1_000_000_000_000) ## 1TB? -os.environ['USE_PYGEOS'] = '0' - -from siuba import * -import pandas as pd -import datetime as dt - -from shared_utils import rt_utils -from rt_analysis import rt_filter_map_plot -import tqdm -import warnings -from build_speedmaps_index import ANALYSIS_DATE - - -def check_map_gen(row, pbar): - ''' - Call using pd.apply for convienient iteration. - Save progress to parquet after attempting each agency's map in case script is interrupted - That progress parquet (when complete) is used in next script, actual output is ignored - ''' - - global speedmaps_index_joined - analysis_date = row.analysis_date - progress_path = f'./_rt_progress_{analysis_date}.parquet' - - if row.status not in ('parser_failed', 'map_confirmed'): - try: - rt_day = rt_filter_map_plot.from_gcs(row.organization_itp_id, - analysis_date, pbar) - rt_day.set_filter(start_time='06:00', end_time='09:00') - _m = rt_day.segment_speed_map() - row.status = 'map_confirmed' - except Exception as e: - print(f'{row.organization_itp_id} map test failed: {e}') - row.status = 'map_failed' - speedmaps_index_joined.loc[row.name] = row - speedmaps_index_joined.to_parquet(progress_path) - - return - -if __name__ == "__main__": - - speedmaps_index_joined = rt_utils.check_intermediate_data( - analysis_date = ANALYSIS_DATE) - # check if this stage needed - if speedmaps_index_joined.status.isin(['map_confirmed', 'map_failed', 'parser_failed']).all(): - print('already attempted to test all maps:') - else: - pbar = tqdm.tqdm() - with warnings.catch_warnings(): - warnings.simplefilter("ignore") - _ = speedmaps_index_joined.apply(check_map_gen, axis = 1, args=[pbar]) - print() - print('map testing complete:') - print(speedmaps_index_joined.status.value_counts()) \ No newline at end of file diff --git a/ca_transit_speed_maps/index_filter_d4.py b/ca_transit_speed_maps/index_filter_d4.py deleted file mode 100644 index 7a2175194..000000000 --- a/ca_transit_speed_maps/index_filter_d4.py +++ /dev/null @@ -1,22 +0,0 @@ -import os -os.environ["CALITP_BQ_MAX_BYTES"] = str(1_000_000_000_000) ## 1TB? -os.environ['USE_PYGEOS'] = '0' - -import pandas as pd -from siuba import * - -import datetime as dt -import os -from shared_utils import rt_dates - -if __name__ == "__main__": - - keys = ['nov2022a', 'nov2022b', 'nov2022c', 'nov2022d'] - dates = [rt_dates.DATES[key] for key in keys] - for date in dates: - path = f'./_rt_progress_{date}.parquet' - if os.path.exists(path): - df = pd.read_parquet(path) - df = df >> filter(_.caltrans_district == '04 - Oakland') - print(f'{path} filtered to d4 only!') - df.to_parquet(path) From 19f48d958f60110c21ed4460136b820c9d6ad43f Mon Sep 17 00:00:00 2001 From: Eric Dasmalchi Date: Fri, 15 Nov 2024 17:24:17 +0000 Subject: [PATCH 09/13] test generating yml, test la metro --- ca_transit_speed_maps/01_new_speedmaps.ipynb | 2 +- ca_transit_speed_maps/stage_run_portfolio.py | 9 +- portfolio/sites/rt.yml | 192 ++++++++++--------- 3 files changed, 106 insertions(+), 97 deletions(-) diff --git a/ca_transit_speed_maps/01_new_speedmaps.ipynb b/ca_transit_speed_maps/01_new_speedmaps.ipynb index 70f7b25a2..c7845260c 100644 --- a/ca_transit_speed_maps/01_new_speedmaps.ipynb +++ b/ca_transit_speed_maps/01_new_speedmaps.ipynb @@ -28,7 +28,7 @@ "outputs": [], "source": [ "## parameters cell\n", - "organization_source_record_id = 'rec5ome04BbA9uf4y'" + "organization_source_record_id = 'recPnGkwdpnr8jmHB'" ] }, { diff --git a/ca_transit_speed_maps/stage_run_portfolio.py b/ca_transit_speed_maps/stage_run_portfolio.py index e4418096d..f791a4f3a 100644 --- a/ca_transit_speed_maps/stage_run_portfolio.py +++ b/ca_transit_speed_maps/stage_run_portfolio.py @@ -18,7 +18,7 @@ import pyaml import yaml -from build_speedmaps_index import ANALYSIS_DATE +from update_vars_index import ANALYSIS_DATE, PROGRESS_PATH def make_rt_site_yml(speedmaps_index_joined, rt_site_path = '../portfolio/sites/rt.yml'): @@ -72,8 +72,7 @@ def deploy_portfolio(): if __name__ == "__main__": - speedmaps_index_joined = rt_utils.check_intermediate_data( - analysis_date = ANALYSIS_DATE) + speedmaps_index_joined = pd.read_parquet(PROGRESS_PATH) make_rt_site_yml(speedmaps_index_joined) - stage_portfolio() - deploy_portfolio() \ No newline at end of file + #stage_portfolio() + #deploy_portfolio() \ No newline at end of file diff --git a/portfolio/sites/rt.yml b/portfolio/sites/rt.yml index 4131b2841..0bafa1093 100644 --- a/portfolio/sites/rt.yml +++ b/portfolio/sites/rt.yml @@ -6,140 +6,150 @@ parts: params: district: 01 - Eureka sections: - - itp_id: 18 - - itp_id: 108 - - itp_id: 135 - - itp_id: 159 - - itp_id: 198 - - itp_id: 261 + - organization_source_record_id: recaaoqEDvwhcmIVT + - organization_source_record_id: recynxkqEoo9dJEvw + - organization_source_record_id: recaa3naoNR4a5RsJ + - organization_source_record_id: recPwXKbGLL4aIqXV + - organization_source_record_id: recpWBEjXzLHqCjhE + - organization_source_record_id: recOnKhqF25crJt4q - caption: District 02 - Redding params: district: 02 - Redding sections: - - itp_id: 259 - - itp_id: 334 + - organization_source_record_id: recI1fuNpr306H0hw + - organization_source_record_id: recx4ZG2lvZb7kGAL + - organization_source_record_id: recKN0Q28cOQdsy5L + - organization_source_record_id: recEtov4XTJZDjtuj - caption: District 03 - Marysville params: district: 03 - Marysville sections: - - itp_id: 48 - - itp_id: 105 - - itp_id: 489 - - itp_id: 221 - - itp_id: 273 - - itp_id: 331 - - itp_id: 351 - - itp_id: 372 + - organization_source_record_id: recf7l9tozKXOmqqZ + - organization_source_record_id: recaJnArpFEk5QooE + - organization_source_record_id: rec43oyrfhtPDdRHj + - organization_source_record_id: reczUcQgqgtMpkpKC + - organization_source_record_id: recX9lccSE1jmjsmG + - organization_source_record_id: rec3u4aMplqObcoTR + - organization_source_record_id: recS1JKEz73Hev8pJ + - organization_source_record_id: recH2FdHvrL7nIpHA - caption: District 04 - Oakland params: district: 04 - Oakland sections: - - itp_id: 4 - - itp_id: 56 - - itp_id: 61 - - itp_id: 282 - - itp_id: 110 - - itp_id: 247 - - itp_id: 301 - - itp_id: 350 - - itp_id: 70 - - itp_id: 336 - - itp_id: 127 - - itp_id: 167 - - itp_id: 194 - - itp_id: 218 - - itp_id: 246 - - itp_id: 257 - - itp_id: 280 - - itp_id: 290 - - itp_id: 294 - - itp_id: 310 - - itp_id: 381 - - itp_id: 314 - - itp_id: 315 - - itp_id: 33 - - itp_id: 368 + - organization_source_record_id: recOZgevYf7Jimm9L + - organization_source_record_id: recvEBkSBc7UxlarC + - organization_source_record_id: recjnaKVDEgulsko3 + - organization_source_record_id: rechaapWbeffO33OX + - organization_source_record_id: recot6qBamlOoLcrM + - organization_source_record_id: recUbGDXhPdx3494b + - organization_source_record_id: recsNvWvLQwGsoxlE + - organization_source_record_id: recmB4uxrVLRXYF3L + - organization_source_record_id: recRM3c9Zfaft4V2B + - organization_source_record_id: recn8zTmGbYZv1qxV + - organization_source_record_id: recEEJVeGrHGoTwgj + - organization_source_record_id: rec75nESVRqpwNiLv + - organization_source_record_id: recoX7qMhlPrgfuz3 + - organization_source_record_id: recANs4M9yDhvDyob + - organization_source_record_id: recNOb7pqBRlQVG5e + - organization_source_record_id: recIeP8mUucOsbvbz + - organization_source_record_id: rec9RGrya9SjWYp2p + - organization_source_record_id: rec2ErIfztLMJ4rgG + - organization_source_record_id: recsBfXgev9ICDCY1 + - organization_source_record_id: recmatCuQAUrNcs8j + - organization_source_record_id: recw3mRsmKDTNnVlT + - organization_source_record_id: recC5CT95EufmQCXr + - organization_source_record_id: reclLbvvKE1V4zsZg + - organization_source_record_id: rec7ShjfgRPLU0yjY + - organization_source_record_id: recDupUxInMUgxeiz + - organization_source_record_id: recUmm4gcNXaqrwpn + - organization_source_record_id: recD9x7GHPrN46v6m + - organization_source_record_id: recIKnsnTdKQ0vsiv - caption: District 05 - San Luis Obispo params: district: 05 - San Luis Obispo sections: - - itp_id: 287 - - itp_id: 298 - - itp_id: 208 - - itp_id: 289 - - itp_id: 293 - - itp_id: 296 + - organization_source_record_id: recMM99msxjmc6PPv + - organization_source_record_id: rec9zGMJgNnes75K1 + - organization_source_record_id: reckp33bhAuZlmO1M + - organization_source_record_id: receZJ9sEnP9vy3g0 + - organization_source_record_id: reciakGBN1DP9dK9N + - organization_source_record_id: recswCrw6a6htmXJ4 + - organization_source_record_id: recDFXyxaWQpiY4mw - caption: District 06 - Fresno params: district: 06 - Fresno sections: - - itp_id: 116 - - itp_id: 361 - - itp_id: 126 - - itp_id: 146 - - itp_id: 148 - - itp_id: 474 + - organization_source_record_id: rec0bKUhHAajTNIlf + - organization_source_record_id: rec6gNibVKjDVy4aX + - organization_source_record_id: recIh3vq8jwuuJlvL + - organization_source_record_id: reczIiFqdL5AXTpm1 + - organization_source_record_id: rec5MLJKrAgeM8f6H + - organization_source_record_id: recWLwvtjXhiVWjKt - caption: District 07 - Los Angeles params: district: 07 - Los Angeles sections: - - itp_id: 16 - - itp_id: 45 - - itp_id: 54 - - itp_id: 75 - - itp_id: 87 - - itp_id: 183 - - itp_id: 210 - - itp_id: 228 - - itp_id: 231 - - itp_id: 243 - - itp_id: 260 - - itp_id: 295 - - itp_id: 308 - - itp_id: 337 - - itp_id: 339 - - itp_id: 123 - - itp_id: 170 - - itp_id: 182 - - itp_id: 165 - - itp_id: 380 + - organization_source_record_id: recxsWR0KRrQTdjmg + - organization_source_record_id: rec6z2ivjTxc8Sag3 + - organization_source_record_id: recD4Vzt0EDC3VY7I + - organization_source_record_id: recd0hxcumVMausDU + - organization_source_record_id: rec5ome04BbA9uf4y + - organization_source_record_id: recj8LXdeSurpSRNU + - organization_source_record_id: rec4pgjrmdhCh4z01 + - organization_source_record_id: recojKzQsBzE1hjVu + - organization_source_record_id: recPkUZgzjCjk5GrV + - organization_source_record_id: reckQmUdXUzHFmlVf + - organization_source_record_id: reczF5Y8R9CUJmfSy + - organization_source_record_id: rec8zhnCPETu6qEiH + - organization_source_record_id: recImm5SmW2zq9rMT + - organization_source_record_id: rec1ErIn9gG1Isk5W + - organization_source_record_id: recPJULRJk1Yn824N + - organization_source_record_id: recvzE9NXgGMmqcTH + - organization_source_record_id: rec2LbE0IBKBAkvQr + - organization_source_record_id: recS7GnKTcQVX20HE + - organization_source_record_id: rec00qSzZL8KqiXAo + - organization_source_record_id: recPnGkwdpnr8jmHB + - organization_source_record_id: recPnGkwdpnr8jmHB + - organization_source_record_id: recBW9vkDYWIKPIu4 + - organization_source_record_id: rec7EN71rsZxDFxZd - caption: District 08 - San Bernardino params: district: 08 - San Bernardino sections: - - itp_id: 214 - - itp_id: 232 - - itp_id: 238 - - itp_id: 269 - - itp_id: 327 - - itp_id: 360 + - organization_source_record_id: recuGkFhN2WXGK67H + - organization_source_record_id: reciWrBgYsAIm9eKK + - organization_source_record_id: recHbquam1bWEwC3P + - organization_source_record_id: recG5aXxDPI645S86 + - organization_source_record_id: recGcv4NidDjwVSiN + - organization_source_record_id: recYgajd92VLqio1p + - organization_source_record_id: recJcXMNC5MUm2uDe - caption: District 09 - Bishop params: district: 09 - Bishop sections: - - itp_id: 99 + - organization_source_record_id: recRajLNJI206nnAr - caption: District 10 - Stockton params: district: 10 - Stockton sections: - - itp_id: 107 - - itp_id: 349 - - itp_id: 10 - - itp_id: 284 - - itp_id: 484 - - itp_id: 482 + - organization_source_record_id: recNeVOEwWUtYe8xm + - organization_source_record_id: recmQVsPDG6sLtD6j + - organization_source_record_id: recpgYVeU3VePMeWx + - organization_source_record_id: recZgWVXkpix390of + - organization_source_record_id: recSiaaMmBXW7fUZS + - organization_source_record_id: reclbzT9trIiGwjBB - caption: District 11 - San Diego params: district: 11 - San Diego sections: - - itp_id: 226 - - itp_id: 277 - - itp_id: 278 + - organization_source_record_id: rech5YtfjpQvVIBAF + - organization_source_record_id: recRBcrX4ZvTyvSnm + - organization_source_record_id: recfbLFdDnCxgIfAB + - organization_source_record_id: recZALk4vysuoTVjF - caption: District 12 - Irvine params: district: 12 - Irvine sections: - - itp_id: 14 - - itp_id: 235 + - organization_source_record_id: recfma7GNR5lQTTTg readme: ./ca_transit_speed_maps/README.md title: California Transit Speed Maps From dcdc178c7491b5db3373d2500d545e0297adc43d Mon Sep 17 00:00:00 2001 From: Eric Dasmalchi Date: Fri, 15 Nov 2024 17:46:18 +0000 Subject: [PATCH 10/13] new copy --- ca_transit_speed_maps/01_new_speedmaps.ipynb | 16 +++++++++----- ca_transit_speed_maps/README.md | 23 ++++++++++++-------- 2 files changed, 25 insertions(+), 14 deletions(-) diff --git a/ca_transit_speed_maps/01_new_speedmaps.ipynb b/ca_transit_speed_maps/01_new_speedmaps.ipynb index c7845260c..ecaf1e026 100644 --- a/ca_transit_speed_maps/01_new_speedmaps.ipynb +++ b/ca_transit_speed_maps/01_new_speedmaps.ipynb @@ -28,7 +28,7 @@ "outputs": [], "source": [ "## parameters cell\n", - "organization_source_record_id = 'recPnGkwdpnr8jmHB'" + "organization_source_record_id = 'recf7l9tozKXOmqqZ'" ] }, { @@ -98,7 +98,7 @@ "tags": [] }, "source": [ - "## AM Peak" + "## AM Peak (07:00-10:00)" ] }, { @@ -191,7 +191,7 @@ "tags": [] }, "source": [ - "## Midday" + "## Midday (10:00-1500)" ] }, { @@ -284,7 +284,7 @@ "tags": [] }, "source": [ - "## PM Peak" + "## PM Peak (1500-2000)" ] }, { @@ -397,7 +397,13 @@ "id": "5dadca19-df7a-4fc8-8955-210b51f4b1c4", "metadata": {}, "source": [ - "## Additional Time Periods" + "## Additional Time Periods\n", + "\n", + "These links provide speed and speed variation maps outside the AM Peak, Midday, and PM Peak periods, if available.\n", + "\n", + "* Evening is 20:00-24:00\n", + "* Owl is 00:00-04:00\n", + "* Early AM is 04:00-07:00" ] }, { diff --git a/ca_transit_speed_maps/README.md b/ca_transit_speed_maps/README.md index 1b346bcee..dbca276c6 100644 --- a/ca_transit_speed_maps/README.md +++ b/ca_transit_speed_maps/README.md @@ -21,25 +21,30 @@ Select source code can be found at: [https://github.com/cal-itp/data-analyses/blob/main/_shared_utils/shared_utils/rt_utils.py](https://github.com/cal-itp/data-analyses/blob/main/_shared_utils/shared_utils/rt_utils.py) -## Definitions +## Time Period Definitions -* AM Peak: 0600-0900 -* Midday: 1000-1400 -* PM Peak: 1500-1900 +* AM Peak: 07:00-10:00 +* Midday: 10:00-15:00 +* PM Peak: 15:00-20:00 +* Evening: 20:00-24:00 +* Owl: 00:00-04:00 +* Early AM: 04:00-07:00 ## Methodology -Segment speed is estimated using the time and distance between vehicle positions reports, with distance being measured linearly along the corresponding transit route. These maps show speeds along segments, which are calculated by interpolating the two nearest position reports for each trip in order to estimate speed along each segment, then taking the 20th percentile of speeds for that segment in each period (morning peak, afternoon peak, and midday). This site shows data for a specific day, usually a Wednesday (and not a holiday or other date we believe could be atypical). +Segment speed is estimated using the time and distance between vehicle positions reports, with distance being measured linearly along the corresponding transit route. These maps show speeds along segments, which are calculated by interpolating the two nearest position reports for each trip in order to estimate speed along each segment, then taking the 20th percentile of speeds for that segment in each time period. This site shows data for a specific day, usually a Wednesday (and not a holiday or other date we believe could be atypical). Generally, segments are constructed from one stop to the next, however, if the distance between stops is large we add interpolated segments every kilometer to provide additional resolution for rural and express services. We use the ratio between 80th and 20th percentile speeds in a segment to measure speed variation. -## Frequently Asked Questions +## What's New? -Are colorblind safe speed maps available? +We're aligining with segment speeds published via the [open data portal!](https://gis.data.ca.gov/datasets/4937eeb59fdb4e56ae75e64688c7f2c0_0/explore) -_Yes, by following the "Open Colorblind Safe Map in New Tab" links displayed before each speed map. Variation maps already use a colorblind safe scheme._ +While the datasets don't fully match just yet, this site is now based on the same underlying data. This should help us update it more often, enhance accuracy for complex route shapes, and include as many transit operators as possible. + +The site also includes a more informative speed and variation by time of day chart at the bottom of the page. ## Data Sources Archived GTFS-Realtime Vehicle Positions data, plus corresponding GTFS Schedule data. @@ -48,7 +53,7 @@ Each map includes a link to download its geospatial data in a gzip-compressed Ge ## Ongoing Work -The Cal-ITP team is working to share broader speed and delay data using the California Open Data Portal. +The Cal-ITP team is working to transition the site to a rolling average of recent dates, instead of a single day. Questions or feedback? Please email hello@calitp.org From b20a214529ea481451be4118cc79a1e6ec8237d4 Mon Sep 17 00:00:00 2001 From: Eric Dasmalchi Date: Fri, 15 Nov 2024 17:47:16 +0000 Subject: [PATCH 11/13] one last edit --- ca_transit_speed_maps/01_new_speedmaps.ipynb | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/ca_transit_speed_maps/01_new_speedmaps.ipynb b/ca_transit_speed_maps/01_new_speedmaps.ipynb index ecaf1e026..45edcaf82 100644 --- a/ca_transit_speed_maps/01_new_speedmaps.ipynb +++ b/ca_transit_speed_maps/01_new_speedmaps.ipynb @@ -83,7 +83,7 @@ "source": [ "## About These Maps:\n", "\n", - "* Each map shows bus (and rail, if applicable) speeds for {organization_name}, with a map each for the morning peak, midday, and afternoon peak periods on {human_date}.\n", + "* Each map shows bus (and rail, if applicable) speeds for {organization_name}, with a map each for the AM Peak, Midday, and PM Peak periods on {human_date}.\n", "* On the map, routes are split into segments corresponding to the distance between two stops, allowing you to focus on specific portions of the route experiencing a slowdown.\n", "* Route segments are arrow-shaped to indicate direction of travel. Thicker segments indicate more frequent transit routes.\n", "* State Highway Network routes are outlined in gray.\n", @@ -191,7 +191,7 @@ "tags": [] }, "source": [ - "## Midday (10:00-1500)" + "## Midday (10:00-15:00)" ] }, { @@ -284,7 +284,7 @@ "tags": [] }, "source": [ - "## PM Peak (1500-2000)" + "## PM Peak (15:00-20:00)" ] }, { From 1fdd5fe558d850134a6ff8f3c2865bc897b826c5 Mon Sep 17 00:00:00 2001 From: Eric Dasmalchi Date: Fri, 15 Nov 2024 17:52:55 +0000 Subject: [PATCH 12/13] final tweaks --- ca_transit_speed_maps/Makefile | 14 +------------- ca_transit_speed_maps/speedmap_utils.py | 8 +++++++- ca_transit_speed_maps/stage_run_portfolio.py | 4 ++-- ca_transit_speed_maps/update_vars_index.py | 2 +- 4 files changed, 11 insertions(+), 17 deletions(-) diff --git a/ca_transit_speed_maps/Makefile b/ca_transit_speed_maps/Makefile index 122694c9d..7e94c2eca 100644 --- a/ca_transit_speed_maps/Makefile +++ b/ca_transit_speed_maps/Makefile @@ -1,18 +1,6 @@ generate_speedmaps_quickly: - python build_speedmaps_index.py - python stage_run_portfolio.py - -generate_speedmaps_slowly: - python build_speedmaps_index.py - python check_stage_intermediate.py - python check_test_mapping.py + python update_vars_index.py python stage_run_portfolio.py -generate_d4_data: - python build_speedmaps_index.py - python index_filter_d4.py - python check_stage_intermediate.py - python check_test_mapping.py - clean_speedmap_progress: rm _rt_progress* diff --git a/ca_transit_speed_maps/speedmap_utils.py b/ca_transit_speed_maps/speedmap_utils.py index 9d71f8c67..82142a249 100644 --- a/ca_transit_speed_maps/speedmap_utils.py +++ b/ca_transit_speed_maps/speedmap_utils.py @@ -11,6 +11,9 @@ catalog = catalog_utils.get_catalog('gtfs_analytics_data') def read_segments_shn(organization_source_record_id: str) -> (gpd.GeoDataFrame, gpd.GeoDataFrame): + ''' + Get filtered detailed speedmap segments for an organization, and relevant district SHN. + ''' path = f'{catalog.speedmap_segments.dir}{catalog.speedmap_segments.shape_stop_single_segment_detail}_{update_vars_index.ANALYSIS_DATE}.parquet' speedmap_segs = gpd.read_parquet(path, filters=[['organization_source_record_id', '==', organization_source_record_id]]) # aggregated speedmap_segs = prepare_segment_gdf(speedmap_segs) @@ -45,7 +48,6 @@ def render_spa_link(spa_map_url: str, text='Full Map') -> None: def display_spa_map(spa_map_url: str, width: int=1000, height: int=650) -> None: ''' Display map from external simple web app in the notebook/JupyterBook context via an IFrame. - Will show most recent map set using self.map_gz_export Width/height defaults are current best option for JupyterBook, don't change for portfolio use width, height: int (pixels) ''' @@ -94,6 +96,10 @@ def map_time_period(district_gdf: gpd.GeoDataFrame, speedmap_segs: gpd.GeoDataFr return spa_link def chart_speeds_by_time_period(speedmap_segs: gpd.GeoDataFrame) -> None: + ''' + Use Altair to chart p20,p50,p80 speeds by time of day. + Match speedmap colorscale. + ''' cmap = rt_utils.ACCESS_ZERO_THIRTY_COLORSCALE domain = cmap.index range_ = [cmap.rgb_hex_str(i) for i in cmap.index] diff --git a/ca_transit_speed_maps/stage_run_portfolio.py b/ca_transit_speed_maps/stage_run_portfolio.py index f791a4f3a..bbe91d881 100644 --- a/ca_transit_speed_maps/stage_run_portfolio.py +++ b/ca_transit_speed_maps/stage_run_portfolio.py @@ -74,5 +74,5 @@ def deploy_portfolio(): speedmaps_index_joined = pd.read_parquet(PROGRESS_PATH) make_rt_site_yml(speedmaps_index_joined) - #stage_portfolio() - #deploy_portfolio() \ No newline at end of file + stage_portfolio() + deploy_portfolio() \ No newline at end of file diff --git a/ca_transit_speed_maps/update_vars_index.py b/ca_transit_speed_maps/update_vars_index.py index cb1f4f61a..49f8459ce 100644 --- a/ca_transit_speed_maps/update_vars_index.py +++ b/ca_transit_speed_maps/update_vars_index.py @@ -40,6 +40,6 @@ def build_speedmaps_index(analysis_date: dt.date) -> pd.DataFrame: if __name__ == "__main__": print(f'analysis date from shared_utils/rt_dates: {ANALYSIS_DATE}') - speedmaps_index = build_speedmaps_index(ANALYSIS_DATE, how = 'new') + speedmaps_index = build_speedmaps_index(ANALYSIS_DATE) # speedmaps_index = rt_utils.check_intermediate_data(speedmaps_index) speedmaps_index.to_parquet(PROGRESS_PATH) \ No newline at end of file From ec8c1008eaae128f997c047e1036af7ac4dde043 Mon Sep 17 00:00:00 2001 From: Eric Dasmalchi Date: Fri, 15 Nov 2024 17:53:29 +0000 Subject: [PATCH 13/13] format --- _shared_utils/shared_utils/rt_utils.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/_shared_utils/shared_utils/rt_utils.py b/_shared_utils/shared_utils/rt_utils.py index 58a2f2504..d36348062 100644 --- a/_shared_utils/shared_utils/rt_utils.py +++ b/_shared_utils/shared_utils/rt_utils.py @@ -828,8 +828,12 @@ def get_operators(analysis_date, operator_list, verbose=False): def spa_map_export_link( - gdf: gpd.GeoDataFrame, path: str, state: dict, - site: str = SPA_MAP_SITE, cache_seconds: int = 3600, verbose: bool = False + gdf: gpd.GeoDataFrame, + path: str, + state: dict, + site: str = SPA_MAP_SITE, + cache_seconds: int = 3600, + verbose: bool = False, ): """ Called via set_state_export. Handles stream writing of gzipped geojson to GCS bucket, @@ -855,9 +859,14 @@ def set_state_export( bucket: str = SPA_MAP_BUCKET, subfolder: str = "testing/", filename: str = "test2", - map_type: Literal["speedmap", "speed_variation", - "new_speedmap", "new_speed_variation" - "hqta_areas", "hqta_stops", "state_highway_network"] = None, + map_type: Literal[ + "speedmap", + "speed_variation", + "new_speedmap", + "new_speed_variation" "hqta_areas", + "hqta_stops", + "state_highway_network", + ] = None, map_title: str = "Map", cmap: branca.colormap.ColorMap = None, color_col: str = None,