diff --git a/conveyal_update/conveyal_vars.py b/conveyal_update/conveyal_vars.py index e06f901ad..3ed9cb2c9 100644 --- a/conveyal_update/conveyal_vars.py +++ b/conveyal_update/conveyal_vars.py @@ -1,14 +1,17 @@ import datetime as dt gcs_path = 'gs://calitp-analytics-data/data-analyses/conveyal_update/' -target_date = dt.date(2023, 10, 18) +target_date = dt.date(2024, 2, 14) osm_file = 'us-west-latest.osm.pbf' # http://download.geofabrik.de/north-america/us-west-latest.osm.pbf # first download with wget... conveyal_regions = {} -# boundaries correspond to Conveyal Analysis regions + boundaries correspond to Conveyal Analysis regions conveyal_regions['norcal'] = {'north': 42.03909, 'south': 39.07038, 'east': -119.60541, 'west': -124.49158} conveyal_regions['central'] = {'north': 39.64165, 'south': 35.87347, 'east': -117.53174, 'west': -123.83789} conveyal_regions['socal'] = {'north': 35.8935, 'south': 32.5005, 'east': -114.13121, 'west': -121.46759} -conveyal_regions['mojave'] = {'north': 37.81629, 'south': 34.89945, 'east': -114.59015, 'west': -118.38043} \ No newline at end of file +conveyal_regions['mojave'] = {'north': 37.81629, 'south': 34.89945, 'east': -114.59015, 'west': -118.38043} + +# # special region for one-off Centennial Corridor +# conveyal_regions['bakersfield'] = {'north': 36.81, 'south': 34.13, 'east': -117.12, 'west': -120.65} \ No newline at end of file diff --git a/conveyal_update/match_feeds_regions.py b/conveyal_update/match_feeds_regions.py index f269bade2..318fc8e8b 100644 --- a/conveyal_update/match_feeds_regions.py +++ b/conveyal_update/match_feeds_regions.py @@ -1,7 +1,8 @@ import os os.environ['USE_PYGEOS'] = '0' os.environ["CALITP_BQ_MAX_BYTES"] = str(800_000_000_000) -from shared_utils import gtfs_utils_v2, geography_utils +from shared_utils import gtfs_utils_v2 +from calitp_data_analysis import geography_utils import pandas as pd from siuba import * diff --git a/sb125_analyses/vmt_transit_sketch/_utils.py b/sb125_analyses/vmt_transit_sketch/_utils.py index 1a5a7f181..fd5c44c72 100644 --- a/sb125_analyses/vmt_transit_sketch/_utils.py +++ b/sb125_analyses/vmt_transit_sketch/_utils.py @@ -1,7 +1,7 @@ import pygris import geopandas as gpd from siuba import * -from shared_utils.geography_utils import CA_NAD83Albers +from calitp_data_analysis.geography_utils import CA_NAD83Albers GCS_PATH = 'gs://calitp-analytics-data/data-analyses/sb125/vmt_transit_sketch/' diff --git a/sb125_analyses/vmt_transit_sketch/corridor_selection.ipynb b/sb125_analyses/vmt_transit_sketch/corridor_selection.ipynb new file mode 100644 index 000000000..a5984a904 --- /dev/null +++ b/sb125_analyses/vmt_transit_sketch/corridor_selection.ipynb @@ -0,0 +1,1368 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "9e8158c2-a7f9-4b3c-a518-037132adf0c3", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import geopandas as gpd\n", + "from siuba import *" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "81cddca7-bea4-494d-b0cb-02508d52b380", + "metadata": {}, + "outputs": [], + "source": [ + "import zipfile" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "15475e21-db2a-4f64-a2bc-38f2b76b9a4f", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# ! pip install pygris" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "682268e8-78fc-4609-afc5-294f3c650b5e", + "metadata": {}, + "outputs": [], + "source": [ + "import _utils\n", + "import importlib\n", + "importlib.reload(_utils)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e269a698-e8bf-4af7-a53c-45265ca7e5ad", + "metadata": {}, + "outputs": [], + "source": [ + "import shared_utils\n", + "from calitp_data_analysis import geography_utils, utils" + ] + }, + { + "cell_type": "markdown", + "id": "0664b67b-b63d-4357-b855-20a33bc8d6d7", + "metadata": {}, + "source": [ + "# Selecting Corridors" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "684770aa-d035-45d3-a3a4-ada6c51c2692", + "metadata": {}, + "outputs": [], + "source": [ + "analysis_date = '2023-04-15'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b1424118-5e7c-4e00-9eee-1630d251cb14", + "metadata": {}, + "outputs": [], + "source": [ + "feeds = shared_utils.gtfs_utils_v2.schedule_daily_feed_to_gtfs_dataset_name(selected_date=analysis_date)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "081ae78d-9f96-4bca-b181-c6c40f3f8e76", + "metadata": {}, + "outputs": [], + "source": [ + "tracts = _utils.get_tract_geoms()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "53cb95cd-7e8c-455d-ab47-fe3968582541", + "metadata": {}, + "outputs": [], + "source": [ + "def trips_to_stops(trip_df, feed_list):\n", + " st = shared_utils.gtfs_utils_v2.get_stop_times(analysis_date, feed_list, trip_df=trip_df)\n", + " st = st >> distinct(_.stop_id, _.stop_sequence) >> collect()\n", + " st = stops >> select(_.stop_id, _.geometry) >> inner_join(_, st, on='stop_id')\n", + " return st" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1ed8886d-6403-438e-b5c6-b8dee2c61290", + "metadata": {}, + "outputs": [], + "source": [ + "def sjoin_tracts(stops_gdf, tracts_gdf, buffer_m):\n", + " \n", + " stops_gdf = stops_gdf.to_crs(geography_utils.CA_NAD83Albers)\n", + " assert stops_gdf.crs == tracts_gdf.crs\n", + " \n", + " stops_gdf.geometry = stops_gdf.buffer(buffer_m)\n", + " tracts_sjoined = gpd.sjoin(tracts_gdf, stops_gdf) >> distinct(_.GEOID, _keep_all=True)\n", + " \n", + " return tracts_sjoined" + ] + }, + { + "cell_type": "markdown", + "id": "1ca1517e-876d-49ae-82c1-973e02116745", + "metadata": { + "tags": [] + }, + "source": [ + "## Wilshire" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ed2d3a3a-3473-4174-91b7-56c4e6759e9d", + "metadata": {}, + "outputs": [], + "source": [ + "metro = feeds >> filter(_.name.str.contains('LA Metro Bus'))\n", + "metro" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "030ebc15-6824-47a6-b781-11628b74d848", + "metadata": {}, + "outputs": [], + "source": [ + "stops = shared_utils.gtfs_utils_v2.get_stops(analysis_date, metro.feed_key)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c1e324d3-7891-40f2-bd7b-2389fa36ee26", + "metadata": {}, + "outputs": [], + "source": [ + "trips = shared_utils.gtfs_utils_v2.get_trips(analysis_date, metro.feed_key)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "384bc785-79ed-4ca8-be5d-14a0480e98cf", + "metadata": {}, + "outputs": [], + "source": [ + "trips.route_short_name.unique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1b6fde5e-1394-4350-b483-1c1e6ad1dabb", + "metadata": {}, + "outputs": [], + "source": [ + "trips_720 = trips >> filter(_.route_short_name.isin(['720']), _.direction_id == 0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fed54ea9-8157-425a-889a-1069d77265ca", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "st_720 = trips_to_stops(trips_720, metro.feed_key)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "38b16a39-5d16-45bd-a9e4-71c22cacdb7f", + "metadata": {}, + "outputs": [], + "source": [ + "# only keep W of Wil/Wstn\n", + "st_720 = st_720 >> filter(_.stop_sequence <= 11)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3e04bc76-99c8-4930-a754-fc4c2c0f3597", + "metadata": {}, + "outputs": [], + "source": [ + "# st_720.explore()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cfb4f847-c5a0-43e2-b8d9-c11c4469c954", + "metadata": {}, + "outputs": [], + "source": [ + "wilshire = sjoin_tracts(st_720, tracts, 804)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "600ff9a4-5bbe-4078-9829-721842084f89", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# wilshire.explore()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "be46c69b-c2fa-4a37-ab0a-fff6ccd63cf2", + "metadata": {}, + "outputs": [], + "source": [ + "# wilshire.to_file('wilshire.geojson')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "18a68442-9ab6-4973-868f-b22ace3ea90d", + "metadata": {}, + "outputs": [], + "source": [ + "# includes non-corridor vmt...\n", + "\n", + "# trips_all = gpd.read_parquet('outputs/new_trips_with_uza.parquet')\n", + "\n", + "# trips_all >> filter(_.GEOID.isin(wilshire_results.GEOID))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6c4e8f0e-941b-4bdd-b5ad-1a7e02602ad6", + "metadata": {}, + "outputs": [], + "source": [ + "wilshire_results = gpd.read_parquet('outputs/wilshire_trips_with_uza.parquet')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8714149d-fd48-4c11-bd13-c3ec1af6ef2a", + "metadata": {}, + "outputs": [], + "source": [ + "# (wilshire_results >> select(-_.geometry)).to_csv('wilshire.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0db59945-15cc-4800-8cd6-0efd01a21bfa", + "metadata": {}, + "outputs": [], + "source": [ + "# utils.make_zipped_shapefile(wilshire_results, 'wilsh')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "611272a6-9add-4218-90c9-ea3004d5e829", + "metadata": {}, + "outputs": [], + "source": [ + "wilshire_results.explore(column = 'projected_new_transit_trips', scheme = 'NaturalBreaks')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3e882b73-446a-4b31-8a5d-e569fa4016ac", + "metadata": {}, + "outputs": [], + "source": [ + "wilshire_results.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "645f8132-8062-4a3f-8b97-df79483b44df", + "metadata": {}, + "outputs": [], + "source": [ + "wilshire_results.describe()" + ] + }, + { + "cell_type": "markdown", + "id": "5cf215ef-e257-4405-b4a6-c1c0eab26116", + "metadata": { + "tags": [] + }, + "source": [ + "## Fresno Route 1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7ac39af3-5a44-44b5-a084-ce24fb115874", + "metadata": {}, + "outputs": [], + "source": [ + "fresno = feeds >> filter(_.name.str.contains('Fresno Sch'))\n", + "fresno" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b1bde719-a199-41c9-b152-487c95a43732", + "metadata": {}, + "outputs": [], + "source": [ + "stops = shared_utils.gtfs_utils_v2.get_stops(analysis_date, fresno.feed_key)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "38f2c41e-88d5-4e2e-a3ee-7857746f6b78", + "metadata": {}, + "outputs": [], + "source": [ + "trips = shared_utils.gtfs_utils_v2.get_trips(analysis_date, fresno.feed_key)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3d2981b-4b1e-439d-b9bc-748e76e1db6b", + "metadata": {}, + "outputs": [], + "source": [ + "trips.route_short_name.unique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7128fd7b-ab96-47bf-999e-33d7cd586546", + "metadata": {}, + "outputs": [], + "source": [ + "trips_1 = trips >> filter(_.route_short_name.isin(['01']), _.direction_id == 0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e3fd0090-1cfe-4215-941f-4c627ce9b470", + "metadata": {}, + "outputs": [], + "source": [ + "trips_1 = trips_1 >> filter(_.trip_instance_key == 'db65a5adda0fc0a2744580354516ac68')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a9d4ad6d-7a4a-48f2-91e1-7559c621015a", + "metadata": {}, + "outputs": [], + "source": [ + "st_1 = trips_to_stops(trips_1, fresno.feed_key)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2431c3e0-9338-4693-a02b-93a17962e196", + "metadata": {}, + "outputs": [], + "source": [ + "st_1 = st_1 >> filter(_.stop_sequence < 20) # vertical portion only" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "31156153-36c0-4d32-b834-553e8f8a95c6", + "metadata": {}, + "outputs": [], + "source": [ + "# st_1.explore()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "35393a07-ba6e-4c67-a427-a8bc07fa0345", + "metadata": {}, + "outputs": [], + "source": [ + "fresno = sjoin_tracts(st_1, tracts, 804) # half-mile" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ea4f36ad-72f2-4959-846b-21baeab21a83", + "metadata": {}, + "outputs": [], + "source": [ + "# fresno.explore()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9dc9d26c-5743-4e2c-b107-0992cce1023c", + "metadata": {}, + "outputs": [], + "source": [ + "fresno.to_file('fresno.geojson')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7f5dd373-bf14-4e1a-b47d-81f85eb3231f", + "metadata": {}, + "outputs": [], + "source": [ + "fresno_results = gpd.read_parquet('outputs/fresno_trips_with_uza.parquet')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82878ff2-007f-41b3-a378-2b808b05f5c0", + "metadata": {}, + "outputs": [], + "source": [ + "# (wilshire_results >> select(-_.geometry)).to_csv('wilshire.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "203d5e11-c54f-42ea-8c19-d9fe36bc2643", + "metadata": {}, + "outputs": [], + "source": [ + "# utils.make_zipped_shapefile(wilshire_results, 'wilsh')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9e2a3a9c-5b72-47da-bfcb-73bf05b955b4", + "metadata": {}, + "outputs": [], + "source": [ + "fresno_results.explore(column = 'projected_new_transit_trips', scheme = 'NaturalBreaks')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "294f0011-d722-4cd1-b3e4-88c3de380b21", + "metadata": {}, + "outputs": [], + "source": [ + "fresno_results.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ef648be9-0c5a-40fa-8351-591175802794", + "metadata": {}, + "outputs": [], + "source": [ + "fresno_results.describe()" + ] + }, + { + "cell_type": "markdown", + "id": "0378b6eb-ab6c-40f3-94a4-3aec913d6a3d", + "metadata": { + "tags": [] + }, + "source": [ + "## San Pablo Ave" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8e8a3159-40a9-4763-afa2-a325679d9ff2", + "metadata": {}, + "outputs": [], + "source": [ + "ac = feeds >> filter(_.name.str.contains('AC Transit'))\n", + "ac" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6aec2329-d7f7-463e-86f0-12b609048529", + "metadata": {}, + "outputs": [], + "source": [ + "stops = shared_utils.gtfs_utils_v2.get_stops(analysis_date, ac.feed_key)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5c960435-da71-4074-aaea-15a8f12b18ca", + "metadata": {}, + "outputs": [], + "source": [ + "trips = shared_utils.gtfs_utils_v2.get_trips(analysis_date, ac.feed_key)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "85ba91ca-e882-47b8-a343-ac5eab3b0a4e", + "metadata": {}, + "outputs": [], + "source": [ + "trips.route_short_name.unique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fcbd0633-395a-4bea-8c64-b21cb5ecd64f", + "metadata": {}, + "outputs": [], + "source": [ + "trips_72r = trips >> filter(_.route_short_name.isin(['72R']), _.direction_id == 0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e1359516-eddf-4cde-ba35-32dd8f7e5535", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "st_72r = trips_to_stops(trips_72r, ac.feed_key)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3f7cee73-690e-45bd-9d09-203f031e53e4", + "metadata": {}, + "outputs": [], + "source": [ + "# st_72r.explore()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71962307-67d1-4670-b3e2-14dea1c0770f", + "metadata": {}, + "outputs": [], + "source": [ + "san_pablo = sjoin_tracts(st_72r, tracts, 804) # half-mile" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "27970e7d-d3a2-44c9-9c5b-aa839cf6c4dc", + "metadata": {}, + "outputs": [], + "source": [ + "# san_pablo.explore()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4dd4b619-1bbb-4880-b5fb-0450c848b779", + "metadata": {}, + "outputs": [], + "source": [ + "san_pablo.to_file('san_pablo.geojson')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "edfc5475-af4a-42a9-81ea-f041f7135938", + "metadata": {}, + "outputs": [], + "source": [ + "san_pablo_results = gpd.read_parquet('outputs/sanpablo_trips_with_uza.parquet')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2eb69c0d-5508-487b-bbbf-a70a478ce164", + "metadata": {}, + "outputs": [], + "source": [ + "# (wilshire_results >> select(-_.geometry)).to_csv('wilshire.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dff30ab0-80d7-43f8-b276-ad4f8e877f26", + "metadata": {}, + "outputs": [], + "source": [ + "# utils.make_zipped_shapefile(wilshire_results, 'wilsh')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a2eb977c-6779-4916-8576-385c6808e21a", + "metadata": {}, + "outputs": [], + "source": [ + "san_pablo_results.explore(column = 'projected_new_transit_trips', scheme = 'NaturalBreaks')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5dea582-473e-4157-98bb-5a3572631b42", + "metadata": {}, + "outputs": [], + "source": [ + "san_pablo_results.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "369166fe-baf8-4bc2-86f8-c637b86c23bd", + "metadata": {}, + "outputs": [], + "source": [ + "san_pablo_results.describe()" + ] + }, + { + "cell_type": "markdown", + "id": "73b906dd-19e4-496e-b468-bd47fb3082be", + "metadata": { + "tags": [] + }, + "source": [ + "## Eureka H Street/Purple Route" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7b248c72-9b00-4451-9843-02cb2c80c39a", + "metadata": {}, + "outputs": [], + "source": [ + "eureka = feeds >> filter(_.name.str.contains('Humboldt Schedule'))\n", + "eureka" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2136f99f-3c01-406e-8c60-9b3bba2f9920", + "metadata": {}, + "outputs": [], + "source": [ + "stops = shared_utils.gtfs_utils_v2.get_stops(analysis_date, eureka.feed_key)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3b75f1b9-184c-4667-81a4-261b1105249e", + "metadata": {}, + "outputs": [], + "source": [ + "trips = shared_utils.gtfs_utils_v2.get_trips(analysis_date, eureka.feed_key)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3030e4cc-b573-4083-9e2c-ee8c947160ef", + "metadata": {}, + "outputs": [], + "source": [ + "trips.route_short_name.unique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4750a733-a639-4b7f-a78f-da1683c6a594", + "metadata": {}, + "outputs": [], + "source": [ + "trips.route_long_name.unique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "19722dc1-21ad-4186-93d3-36b6522ba246", + "metadata": {}, + "outputs": [], + "source": [ + "trips_rainbow = trips >> filter(_.route_long_name.isin(['Rainbow Route']), _.direction_id == 0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "24226a59-fc6c-4f75-ab46-70d561a2d20e", + "metadata": {}, + "outputs": [], + "source": [ + "tr" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c1b2637-2446-4efd-b61a-19c08d534292", + "metadata": {}, + "outputs": [], + "source": [ + "# trips_1 = trips_1 >> filter(_.trip_instance_key == 'db65a5adda0fc0a2744580354516ac68')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "93a000d8-6e6b-47aa-9a82-98932989ba7b", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "st_rainbow = trips_to_stops(trips_purple, eureka.feed_key)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eeaeaafc-819c-4727-9768-2a904a6437df", + "metadata": {}, + "outputs": [], + "source": [ + "st_rainbow = st_rainbow >> filter(_.stop_sequence >= 35)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eb39f64d-adaf-4391-b205-1b12ef6a1760", + "metadata": {}, + "outputs": [], + "source": [ + "# st_rainbow.explore()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "370fca77-e994-471f-8888-010b52738bec", + "metadata": {}, + "outputs": [], + "source": [ + "eureka = sjoin_tracts(st_rainbow, tracts, 804) # half-mile" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9f2386c4-bc1b-4b3b-bb80-16b04fe30112", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "eureka.explore()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e604495b-b0bf-46ce-8e14-ccac930dafbc", + "metadata": {}, + "outputs": [], + "source": [ + "eureka.to_file('eureka.geojson')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ef087597-674c-4256-8ce4-6ca7c0617665", + "metadata": {}, + "outputs": [], + "source": [ + "eureka_results = gpd.read_parquet('outputs/eureka_trips_with_uza.parquet')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "74c2a66f-ff8e-482f-9c57-da742fbe5f42", + "metadata": {}, + "outputs": [], + "source": [ + "# (wilshire_results >> select(-_.geometry)).to_csv('wilshire.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e7299e91-893e-4b81-979a-f700196c6a96", + "metadata": {}, + "outputs": [], + "source": [ + "# utils.make_zipped_shapefile(wilshire_results, 'wilsh')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6ff25cc5-b2aa-4e71-a5dc-2d70396c3805", + "metadata": {}, + "outputs": [], + "source": [ + "eureka_results.explore(column = 'projected_new_transit_trips', scheme = 'NaturalBreaks')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3020e29d-a035-4248-b990-efc0947d02dd", + "metadata": {}, + "outputs": [], + "source": [ + "eureka_results.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3a4e5ef0-f5d0-43ba-9895-d5542989383c", + "metadata": {}, + "outputs": [], + "source": [ + "eureka_results.describe()" + ] + }, + { + "cell_type": "markdown", + "id": "6d4fdb29-2b3b-4055-ada9-b5b149db9f6c", + "metadata": {}, + "source": [ + "# All Corridors Summary" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "98680b32-440f-4fc2-867c-ede1a1967393", + "metadata": {}, + "outputs": [], + "source": [ + "fresno_results['corridor'] = 'Fresno'\n", + "san_pablo_results['corridor'] = 'San Pablo Ave'\n", + "wilshire_results['corridor'] = 'Wilshire'\n", + "eureka_results['corridor'] = 'Eureka'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "263af4e4-07fe-4f74-ba78-b54829905a40", + "metadata": {}, + "outputs": [], + "source": [ + "all_results = pd.concat([fresno_results, san_pablo_results, wilshire_results, eureka_results])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d3249b0a-2d67-4dba-bb58-7ab50b930185", + "metadata": {}, + "outputs": [], + "source": [ + "(all_results >> group_by(_.corridor)\n", + " >> summarize(total_new_transit_trips = _.projected_new_transit_trips.sum(),\n", + " total_population = _.total_pop.sum(),\n", + " total_vmt = _.total_mi_auto.sum(),\n", + " p50_auto_trip_mi = _.p50_mi_auto.quantile(.5),\n", + " total_auto_trips = _.total_trips_auto.sum()\n", + " )\n", + "\n", + ").to_csv('vmt_transit_corridors.csv')" + ] + }, + { + "cell_type": "markdown", + "id": "3a01d280-a612-4e72-8b06-e98aae3426d6", + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, + "source": [ + "## Redding Route 4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c2fd7bc1-a575-4b5b-adb3-63c7866cacc2", + "metadata": {}, + "outputs": [], + "source": [ + "redding = feeds >> filter(_.name.str.contains('Redding'))\n", + "redding" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "909cd786-51d5-40fb-b997-532e67378fe7", + "metadata": {}, + "outputs": [], + "source": [ + "stops = shared_utils.gtfs_utils_v2.get_stops(analysis_date, redding.feed_key)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "334d3d45-0f38-4fb8-ae58-814d6429eee0", + "metadata": {}, + "outputs": [], + "source": [ + "trips = shared_utils.gtfs_utils_v2.get_trips(analysis_date, redding.feed_key)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "70b8f4d5-c70e-4320-9756-c13d9c919a58", + "metadata": {}, + "outputs": [], + "source": [ + "trips.route_short_name.unique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8cece935-15cf-4eab-a58f-311a0927d8ae", + "metadata": {}, + "outputs": [], + "source": [ + "trips_4 = trips >> filter(_.route_short_name.isin(['4']), _.direction_id == 0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b56d6acc-d992-4d48-a5e4-affb42e3605f", + "metadata": {}, + "outputs": [], + "source": [ + "# trips_1 = trips_1 >> filter(_.trip_instance_key == 'db65a5adda0fc0a2744580354516ac68')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1b27cec5-ea22-4265-b7cb-6898794ae577", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "st_4 = trips_to_stops(trips_4, redding.feed_key)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2e239235-82d3-42ed-963d-d2d196fb1d8a", + "metadata": {}, + "outputs": [], + "source": [ + "# st_4.explore()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "af4426e8-4d86-4ed9-9cd4-096d4df9eeaa", + "metadata": {}, + "outputs": [], + "source": [ + "redding = sjoin_tracts(st_4, tracts, 804) # half-mile" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c325c88e-7ddf-4f24-85f5-b2e49ea88dd6", + "metadata": {}, + "outputs": [], + "source": [ + "# redding.explore()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5387c9f7-9b77-49ef-b934-dc4f56836e28", + "metadata": {}, + "outputs": [], + "source": [ + "redding.to_file('redding.geojson')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5cf3dfc7-2893-4cde-bae7-8e9fcdb751ca", + "metadata": {}, + "outputs": [], + "source": [ + "redding_results = gpd.read_parquet('outputs/redding_trips_with_uza.parquet')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6e947b18-2710-4d71-89c4-0d256524e774", + "metadata": {}, + "outputs": [], + "source": [ + "# (wilshire_results >> select(-_.geometry)).to_csv('wilshire.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b4c33a16-fb03-4d87-bd02-1a957b17be51", + "metadata": {}, + "outputs": [], + "source": [ + "# utils.make_zipped_shapefile(wilshire_results, 'wilsh')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0cd7c5d1-499b-4109-b942-d1e3f93e6c97", + "metadata": {}, + "outputs": [], + "source": [ + "redding_results.explore(column = 'projected_new_transit_trips', scheme = 'NaturalBreaks')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6e439a7b-5a7b-4aa7-bb80-5626f24600c6", + "metadata": {}, + "outputs": [], + "source": [ + "redding_results.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4f883ae3-9dbd-4a39-935e-9e521159e7b3", + "metadata": {}, + "outputs": [], + "source": [ + "redding_results.describe()" + ] + }, + { + "cell_type": "markdown", + "id": "086c18f7-c7cf-41a2-9147-727740f781e5", + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, + "source": [ + "## MST (table)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5f958167-a291-4aba-8566-111c5e713be9", + "metadata": {}, + "outputs": [], + "source": [ + "mst = feeds >> filter(_.name.str.contains('Monterey'))\n", + "mst" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d68764b0-476a-4af8-8560-b931a8afba51", + "metadata": {}, + "outputs": [], + "source": [ + "stops = shared_utils.gtfs_utils_v2.get_stops(analysis_date, mst.feed_key)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3bbf4ee7-7bd2-4ae2-968b-b17a5ef2514d", + "metadata": {}, + "outputs": [], + "source": [ + "stops.columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f07ef246-b92d-4e00-989b-b1800744a694", + "metadata": {}, + "outputs": [], + "source": [ + "trips = shared_utils.gtfs_utils_v2.get_trips(analysis_date, mst.feed_key)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fd8b773-d935-4c13-8f9e-84a8cba153c9", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "trips.columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8fe0c258-f084-4030-94e6-b0a44f5f5498", + "metadata": {}, + "outputs": [], + "source": [ + "trips.route_short_name.unique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a6763fd7-c9cc-44e1-b7e9-8c8c919e3f91", + "metadata": {}, + "outputs": [], + "source": [ + "ab_trips = trips >> filter(_.route_short_name.isin(['A', 'B']), _.direction_id == 0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f9151728-3f87-45b0-a799-eb2b126ea434", + "metadata": {}, + "outputs": [], + "source": [ + "trips_20 = trips >> filter(_.route_short_name == '20', _.direction_id == 0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "44c2f67b-74b7-4551-82b9-167f2744081b", + "metadata": {}, + "outputs": [], + "source": [ + "st_20 = shared_utils.gtfs_utils_v2.get_stop_times(analysis_date, mst.feed_key, trip_df=trips_20)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fc5f463c-b3c6-4f80-86d4-9835c650eebb", + "metadata": {}, + "outputs": [], + "source": [ + "st_20 = st_20 >> distinct(_.stop_id, _.stop_sequence) >> collect()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "23f3a7b3-cd32-480f-ab1f-cc616c02e77a", + "metadata": {}, + "outputs": [], + "source": [ + "st_20 = stops >> select(_.stop_id, _.geometry) >> inner_join(_, st_20, on='stop_id')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8b71bbe9-26d0-42ec-8b30-a7cfdee2236e", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "st_20 = trips_to_stops(trips_20)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "99b2afea-3280-422c-b3b7-6e1c5ff54b5d", + "metadata": {}, + "outputs": [], + "source": [ + "# SURF BRT area for joins...\n", + "st_20 = st_20 >> filter(_.stop_sequence <= 27)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c3da388c-807d-424b-9e69-51588401ef2a", + "metadata": {}, + "outputs": [], + "source": [ + "# st_20.explore()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0bfacea0-f958-4a65-90f3-2fec0ab04fd6", + "metadata": {}, + "outputs": [], + "source": [ + "st_ab = trips_to_stops(ab_trips)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25ab4e55-c0cf-4919-b9ef-41e665c9a136", + "metadata": {}, + "outputs": [], + "source": [ + "# st_ab.explore()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "27a834eb-34b0-46a4-9f0d-75534812a336", + "metadata": {}, + "outputs": [], + "source": [ + "surf_corridor = pd.concat([st_20, st_ab])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "72671c73-1331-4888-a6e4-5a8cc3a34a16", + "metadata": {}, + "outputs": [], + "source": [ + "surf_corridor.explore()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/sb125_analyses/vmt_transit_sketch/read_process_data.ipynb b/sb125_analyses/vmt_transit_sketch/read_process_data.ipynb index c04a89e70..2cc71e981 100644 --- a/sb125_analyses/vmt_transit_sketch/read_process_data.ipynb +++ b/sb125_analyses/vmt_transit_sketch/read_process_data.ipynb @@ -2,19 +2,20 @@ "cells": [ { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "id": "9e8158c2-a7f9-4b3c-a518-037132adf0c3", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import geopandas as gpd\n", - "from siuba import *" + "from siuba import *\n", + "import numpy as np" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "id": "81cddca7-bea4-494d-b0cb-02508d52b380", "metadata": {}, "outputs": [], @@ -22,6 +23,60 @@ "import zipfile" ] }, + { + "cell_type": "code", + "execution_count": 3, + "id": "37b679d4-f8bd-4450-bf9f-50b68e8570b4", + "metadata": {}, + "outputs": [], + "source": [ + "from calitp_data_analysis import get_fs" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "80324d88-ea3a-45a6-9362-933a2395ed31", + "metadata": {}, + "outputs": [], + "source": [ + "fs = get_fs()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "3dcaf650-43fe-4532-9060-442b067ef173", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# ! pip install pygris" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "682268e8-78fc-4609-afc5-294f3c650b5e", + "metadata": {}, + "outputs": [], + "source": [ + "import _utils\n", + "# import importlib\n", + "# importlib.reload(_utils)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "5aeca438-6285-4cca-b375-ab8aa3849e42", + "metadata": {}, + "outputs": [], + "source": [ + "GCS_PATH = 'gs://calitp-analytics-data/data-analyses/sb125/vmt_transit_sketch/'" + ] + }, { "cell_type": "markdown", "id": "b076a21f-5a53-4b75-b140-0e4947099e42", @@ -34,289 +89,266 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 8, "id": "85a89737-f90d-488f-9310-ca83557e476c", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "def read_group_replica(zip_path):\n", " '''\n", - " zip_path: path to zip file containing a Replica trips export\n", + " zip_path: path to zip file containing a Replica trips export csv \n", " '''\n", - " replica_filename = 'replica-mode_split_test-02_01_24-trips_dataset.csv'\n", - " with zipfile.ZipFile(zip_path) as z:\n", - " with z.open(replica_filename) as f:\n", + " def parse_csv(zipfile):\n", + " csvs = [f for f in z.namelist() if f[-3:] == 'csv']\n", + " assert len(csvs) == 1\n", + " with z.open(csvs[0]) as f:\n", " df = pd.read_csv(f)\n", + " return df\n", + " \n", + " if zip_path[:3] == 'gs:':\n", + " with fs.open(zip_path) as f:\n", + " with zipfile.ZipFile(f) as z:\n", + " df = parse_csv(z)\n", + " else:\n", + " with zipfile.ZipFile(f) as z:\n", + " df = parse_csv(z)\n", + " \n", + "\n", " df = (df >> filter(_.primary_mode.isin(['private_auto', 'auto_passenger', 'on_demand_auto', 'public_transit']))\n", " >> select(-_.origin_trct_2020, -_.activity_id)\n", " )\n", " df['is_auto'] = df.primary_mode.str.contains('auto')\n", + " return df\n", " grouped = (df >> group_by(_.origin_trct_fips_2020, _.is_auto)\n", " >> summarize(n = _.shape[0], p50_distance = _.trip_distance_miles.quantile(.5),\n", " p75_distance = _.trip_distance_miles.quantile(.75),\n", " p90_distance = _.trip_distance_miles.quantile(.9),\n", - " total_miles = _.trip_distance_miles.sum(),\n", + " total_miles = _.trip_distance_miles.sum(), \n", " )\n", " )\n", - " # parquet_path = f'./intermediate/{zip_path.split(\".zip\")[0]}.parquet'\n", - " # grouped.to_parquet(parquet_path)\n", - " # print(f'grouped data -> {parquet_path}')\n", + "\n", " return grouped" ] }, { "cell_type": "code", - "execution_count": 4, - "id": "ec3470ac-5f0e-49a0-9000-f371f952bc74", + "execution_count": 9, + "id": "f0df73e2-7ebe-431f-b533-6139cc9b79c0", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'gs://calitp-analytics-data/data-analyses/sb125/vmt_transit_sketch/replica_raw/corridors/replica-fresno-trips_dataset.zip'" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "zip_path = f'replica_raw/replica-la_north-trips_dataset.zip'" + "f'{GCS_PATH}replica_raw/corridors/replica-fresno-trips_dataset.zip'" ] }, { "cell_type": "code", - "execution_count": 5, - "id": "868a0ae4-b076-4775-beba-fdc9ba764b27", + "execution_count": 10, + "id": "c506f859-1ffc-4e36-ba03-1c4393ba4d9e", "metadata": {}, "outputs": [], "source": [ - "replica_filename = 'replica-mode_split_test-02_01_24-trips_dataset.csv'" + "fresno_raw = read_group_replica(f'{GCS_PATH}replica_raw/corridors/replica-fresno-trips_dataset.zip')" ] }, { "cell_type": "code", - "execution_count": 6, - "id": "b4c08637-9bbc-4727-af6d-14dc1c66b4a1", + "execution_count": 11, + "id": "51c4c2e4-91d8-47ef-b2a2-ca30c6e2b84f", "metadata": {}, "outputs": [ { - "name": "stderr", - "output_type": "stream", - "text": [ - "/tmp/ipykernel_472/3605967939.py:3: DtypeWarning: Columns (6,7,8) have mixed types. Specify dtype option on import or set low_memory=False.\n", - " df = pd.read_csv(f)\n" - ] + "data": { + "text/html": [ + "
\n", + " | trip_duration_minutes | \n", + "trip_distance_miles | \n", + "origin_trct_fips_2020 | \n", + "
---|---|---|---|
count | \n", + "156196.000000 | \n", + "156196.000000 | \n", + "1.561960e+05 | \n", + "
mean | \n", + "8.027331 | \n", + "2.489494 | \n", + "6.019003e+09 | \n", + "
std | \n", + "6.041116 | \n", + "2.305581 | \n", + "1.731910e+03 | \n", + "
min | \n", + "0.000000 | \n", + "0.100000 | \n", + "6.019000e+09 | \n", + "
25% | \n", + "3.000000 | \n", + "0.800000 | \n", + "6.019002e+09 | \n", + "
50% | \n", + "7.000000 | \n", + "1.600000 | \n", + "6.019004e+09 | \n", + "
75% | \n", + "11.000000 | \n", + "3.500000 | \n", + "6.019005e+09 | \n", + "
max | \n", + "93.000000 | \n", + "17.800000 | \n", + "6.019005e+09 | \n", + "