diff --git a/conveyal_update/conveyal_vars.py b/conveyal_update/conveyal_vars.py index e06f901ad..3ed9cb2c9 100644 --- a/conveyal_update/conveyal_vars.py +++ b/conveyal_update/conveyal_vars.py @@ -1,14 +1,17 @@ import datetime as dt gcs_path = 'gs://calitp-analytics-data/data-analyses/conveyal_update/' -target_date = dt.date(2023, 10, 18) +target_date = dt.date(2024, 2, 14) osm_file = 'us-west-latest.osm.pbf' # http://download.geofabrik.de/north-america/us-west-latest.osm.pbf # first download with wget... conveyal_regions = {} -# boundaries correspond to Conveyal Analysis regions + boundaries correspond to Conveyal Analysis regions conveyal_regions['norcal'] = {'north': 42.03909, 'south': 39.07038, 'east': -119.60541, 'west': -124.49158} conveyal_regions['central'] = {'north': 39.64165, 'south': 35.87347, 'east': -117.53174, 'west': -123.83789} conveyal_regions['socal'] = {'north': 35.8935, 'south': 32.5005, 'east': -114.13121, 'west': -121.46759} -conveyal_regions['mojave'] = {'north': 37.81629, 'south': 34.89945, 'east': -114.59015, 'west': -118.38043} \ No newline at end of file +conveyal_regions['mojave'] = {'north': 37.81629, 'south': 34.89945, 'east': -114.59015, 'west': -118.38043} + +# # special region for one-off Centennial Corridor +# conveyal_regions['bakersfield'] = {'north': 36.81, 'south': 34.13, 'east': -117.12, 'west': -120.65} \ No newline at end of file diff --git a/conveyal_update/match_feeds_regions.py b/conveyal_update/match_feeds_regions.py index f269bade2..318fc8e8b 100644 --- a/conveyal_update/match_feeds_regions.py +++ b/conveyal_update/match_feeds_regions.py @@ -1,7 +1,8 @@ import os os.environ['USE_PYGEOS'] = '0' os.environ["CALITP_BQ_MAX_BYTES"] = str(800_000_000_000) -from shared_utils import gtfs_utils_v2, geography_utils +from shared_utils import gtfs_utils_v2 +from calitp_data_analysis import geography_utils import pandas as pd from siuba import * diff --git a/sb125_analyses/vmt_transit_sketch/_utils.py b/sb125_analyses/vmt_transit_sketch/_utils.py index 1a5a7f181..fd5c44c72 100644 --- a/sb125_analyses/vmt_transit_sketch/_utils.py +++ b/sb125_analyses/vmt_transit_sketch/_utils.py @@ -1,7 +1,7 @@ import pygris import geopandas as gpd from siuba import * -from shared_utils.geography_utils import CA_NAD83Albers +from calitp_data_analysis.geography_utils import CA_NAD83Albers GCS_PATH = 'gs://calitp-analytics-data/data-analyses/sb125/vmt_transit_sketch/' diff --git a/sb125_analyses/vmt_transit_sketch/corridor_selection.ipynb b/sb125_analyses/vmt_transit_sketch/corridor_selection.ipynb new file mode 100644 index 000000000..a5984a904 --- /dev/null +++ b/sb125_analyses/vmt_transit_sketch/corridor_selection.ipynb @@ -0,0 +1,1368 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "9e8158c2-a7f9-4b3c-a518-037132adf0c3", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import geopandas as gpd\n", + "from siuba import *" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "81cddca7-bea4-494d-b0cb-02508d52b380", + "metadata": {}, + "outputs": [], + "source": [ + "import zipfile" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "15475e21-db2a-4f64-a2bc-38f2b76b9a4f", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# ! pip install pygris" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "682268e8-78fc-4609-afc5-294f3c650b5e", + "metadata": {}, + "outputs": [], + "source": [ + "import _utils\n", + "import importlib\n", + "importlib.reload(_utils)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e269a698-e8bf-4af7-a53c-45265ca7e5ad", + "metadata": {}, + "outputs": [], + "source": [ + "import shared_utils\n", + "from calitp_data_analysis import geography_utils, utils" + ] + }, + { + "cell_type": "markdown", + "id": "0664b67b-b63d-4357-b855-20a33bc8d6d7", + "metadata": {}, + "source": [ + "# Selecting Corridors" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "684770aa-d035-45d3-a3a4-ada6c51c2692", + "metadata": {}, + "outputs": [], + "source": [ + "analysis_date = '2023-04-15'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b1424118-5e7c-4e00-9eee-1630d251cb14", + "metadata": {}, + "outputs": [], + "source": [ + "feeds = shared_utils.gtfs_utils_v2.schedule_daily_feed_to_gtfs_dataset_name(selected_date=analysis_date)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "081ae78d-9f96-4bca-b181-c6c40f3f8e76", + "metadata": {}, + "outputs": [], + "source": [ + "tracts = _utils.get_tract_geoms()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "53cb95cd-7e8c-455d-ab47-fe3968582541", + "metadata": {}, + "outputs": [], + "source": [ + "def trips_to_stops(trip_df, feed_list):\n", + " st = shared_utils.gtfs_utils_v2.get_stop_times(analysis_date, feed_list, trip_df=trip_df)\n", + " st = st >> distinct(_.stop_id, _.stop_sequence) >> collect()\n", + " st = stops >> select(_.stop_id, _.geometry) >> inner_join(_, st, on='stop_id')\n", + " return st" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1ed8886d-6403-438e-b5c6-b8dee2c61290", + "metadata": {}, + "outputs": [], + "source": [ + "def sjoin_tracts(stops_gdf, tracts_gdf, buffer_m):\n", + " \n", + " stops_gdf = stops_gdf.to_crs(geography_utils.CA_NAD83Albers)\n", + " assert stops_gdf.crs == tracts_gdf.crs\n", + " \n", + " stops_gdf.geometry = stops_gdf.buffer(buffer_m)\n", + " tracts_sjoined = gpd.sjoin(tracts_gdf, stops_gdf) >> distinct(_.GEOID, _keep_all=True)\n", + " \n", + " return tracts_sjoined" + ] + }, + { + "cell_type": "markdown", + "id": "1ca1517e-876d-49ae-82c1-973e02116745", + "metadata": { + "tags": [] + }, + "source": [ + "## Wilshire" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ed2d3a3a-3473-4174-91b7-56c4e6759e9d", + "metadata": {}, + "outputs": [], + "source": [ + "metro = feeds >> filter(_.name.str.contains('LA Metro Bus'))\n", + "metro" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "030ebc15-6824-47a6-b781-11628b74d848", + "metadata": {}, + "outputs": [], + "source": [ + "stops = shared_utils.gtfs_utils_v2.get_stops(analysis_date, metro.feed_key)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c1e324d3-7891-40f2-bd7b-2389fa36ee26", + "metadata": {}, + "outputs": [], + "source": [ + "trips = shared_utils.gtfs_utils_v2.get_trips(analysis_date, metro.feed_key)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "384bc785-79ed-4ca8-be5d-14a0480e98cf", + "metadata": {}, + "outputs": [], + "source": [ + "trips.route_short_name.unique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1b6fde5e-1394-4350-b483-1c1e6ad1dabb", + "metadata": {}, + "outputs": [], + "source": [ + "trips_720 = trips >> filter(_.route_short_name.isin(['720']), _.direction_id == 0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fed54ea9-8157-425a-889a-1069d77265ca", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "st_720 = trips_to_stops(trips_720, metro.feed_key)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "38b16a39-5d16-45bd-a9e4-71c22cacdb7f", + "metadata": {}, + "outputs": [], + "source": [ + "# only keep W of Wil/Wstn\n", + "st_720 = st_720 >> filter(_.stop_sequence <= 11)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3e04bc76-99c8-4930-a754-fc4c2c0f3597", + "metadata": {}, + "outputs": [], + "source": [ + "# st_720.explore()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cfb4f847-c5a0-43e2-b8d9-c11c4469c954", + "metadata": {}, + "outputs": [], + "source": [ + "wilshire = sjoin_tracts(st_720, tracts, 804)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "600ff9a4-5bbe-4078-9829-721842084f89", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# wilshire.explore()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "be46c69b-c2fa-4a37-ab0a-fff6ccd63cf2", + "metadata": {}, + "outputs": [], + "source": [ + "# wilshire.to_file('wilshire.geojson')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "18a68442-9ab6-4973-868f-b22ace3ea90d", + "metadata": {}, + "outputs": [], + "source": [ + "# includes non-corridor vmt...\n", + "\n", + "# trips_all = gpd.read_parquet('outputs/new_trips_with_uza.parquet')\n", + "\n", + "# trips_all >> filter(_.GEOID.isin(wilshire_results.GEOID))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6c4e8f0e-941b-4bdd-b5ad-1a7e02602ad6", + "metadata": {}, + "outputs": [], + "source": [ + "wilshire_results = gpd.read_parquet('outputs/wilshire_trips_with_uza.parquet')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8714149d-fd48-4c11-bd13-c3ec1af6ef2a", + "metadata": {}, + "outputs": [], + "source": [ + "# (wilshire_results >> select(-_.geometry)).to_csv('wilshire.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0db59945-15cc-4800-8cd6-0efd01a21bfa", + "metadata": {}, + "outputs": [], + "source": [ + "# utils.make_zipped_shapefile(wilshire_results, 'wilsh')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "611272a6-9add-4218-90c9-ea3004d5e829", + "metadata": {}, + "outputs": [], + "source": [ + "wilshire_results.explore(column = 'projected_new_transit_trips', scheme = 'NaturalBreaks')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3e882b73-446a-4b31-8a5d-e569fa4016ac", + "metadata": {}, + "outputs": [], + "source": [ + "wilshire_results.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "645f8132-8062-4a3f-8b97-df79483b44df", + "metadata": {}, + "outputs": [], + "source": [ + "wilshire_results.describe()" + ] + }, + { + "cell_type": "markdown", + "id": "5cf215ef-e257-4405-b4a6-c1c0eab26116", + "metadata": { + "tags": [] + }, + "source": [ + "## Fresno Route 1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7ac39af3-5a44-44b5-a084-ce24fb115874", + "metadata": {}, + "outputs": [], + "source": [ + "fresno = feeds >> filter(_.name.str.contains('Fresno Sch'))\n", + "fresno" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b1bde719-a199-41c9-b152-487c95a43732", + "metadata": {}, + "outputs": [], + "source": [ + "stops = shared_utils.gtfs_utils_v2.get_stops(analysis_date, fresno.feed_key)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "38f2c41e-88d5-4e2e-a3ee-7857746f6b78", + "metadata": {}, + "outputs": [], + "source": [ + "trips = shared_utils.gtfs_utils_v2.get_trips(analysis_date, fresno.feed_key)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3d2981b-4b1e-439d-b9bc-748e76e1db6b", + "metadata": {}, + "outputs": [], + "source": [ + "trips.route_short_name.unique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7128fd7b-ab96-47bf-999e-33d7cd586546", + "metadata": {}, + "outputs": [], + "source": [ + "trips_1 = trips >> filter(_.route_short_name.isin(['01']), _.direction_id == 0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e3fd0090-1cfe-4215-941f-4c627ce9b470", + "metadata": {}, + "outputs": [], + "source": [ + "trips_1 = trips_1 >> filter(_.trip_instance_key == 'db65a5adda0fc0a2744580354516ac68')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a9d4ad6d-7a4a-48f2-91e1-7559c621015a", + "metadata": {}, + "outputs": [], + "source": [ + "st_1 = trips_to_stops(trips_1, fresno.feed_key)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2431c3e0-9338-4693-a02b-93a17962e196", + "metadata": {}, + "outputs": [], + "source": [ + "st_1 = st_1 >> filter(_.stop_sequence < 20) # vertical portion only" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "31156153-36c0-4d32-b834-553e8f8a95c6", + "metadata": {}, + "outputs": [], + "source": [ + "# st_1.explore()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "35393a07-ba6e-4c67-a427-a8bc07fa0345", + "metadata": {}, + "outputs": [], + "source": [ + "fresno = sjoin_tracts(st_1, tracts, 804) # half-mile" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ea4f36ad-72f2-4959-846b-21baeab21a83", + "metadata": {}, + "outputs": [], + "source": [ + "# fresno.explore()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9dc9d26c-5743-4e2c-b107-0992cce1023c", + "metadata": {}, + "outputs": [], + "source": [ + "fresno.to_file('fresno.geojson')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7f5dd373-bf14-4e1a-b47d-81f85eb3231f", + "metadata": {}, + "outputs": [], + "source": [ + "fresno_results = gpd.read_parquet('outputs/fresno_trips_with_uza.parquet')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82878ff2-007f-41b3-a378-2b808b05f5c0", + "metadata": {}, + "outputs": [], + "source": [ + "# (wilshire_results >> select(-_.geometry)).to_csv('wilshire.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "203d5e11-c54f-42ea-8c19-d9fe36bc2643", + "metadata": {}, + "outputs": [], + "source": [ + "# utils.make_zipped_shapefile(wilshire_results, 'wilsh')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9e2a3a9c-5b72-47da-bfcb-73bf05b955b4", + "metadata": {}, + "outputs": [], + "source": [ + "fresno_results.explore(column = 'projected_new_transit_trips', scheme = 'NaturalBreaks')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "294f0011-d722-4cd1-b3e4-88c3de380b21", + "metadata": {}, + "outputs": [], + "source": [ + "fresno_results.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ef648be9-0c5a-40fa-8351-591175802794", + "metadata": {}, + "outputs": [], + "source": [ + "fresno_results.describe()" + ] + }, + { + "cell_type": "markdown", + "id": "0378b6eb-ab6c-40f3-94a4-3aec913d6a3d", + "metadata": { + "tags": [] + }, + "source": [ + "## San Pablo Ave" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8e8a3159-40a9-4763-afa2-a325679d9ff2", + "metadata": {}, + "outputs": [], + "source": [ + "ac = feeds >> filter(_.name.str.contains('AC Transit'))\n", + "ac" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6aec2329-d7f7-463e-86f0-12b609048529", + "metadata": {}, + "outputs": [], + "source": [ + "stops = shared_utils.gtfs_utils_v2.get_stops(analysis_date, ac.feed_key)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5c960435-da71-4074-aaea-15a8f12b18ca", + "metadata": {}, + "outputs": [], + "source": [ + "trips = shared_utils.gtfs_utils_v2.get_trips(analysis_date, ac.feed_key)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "85ba91ca-e882-47b8-a343-ac5eab3b0a4e", + "metadata": {}, + "outputs": [], + "source": [ + "trips.route_short_name.unique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fcbd0633-395a-4bea-8c64-b21cb5ecd64f", + "metadata": {}, + "outputs": [], + "source": [ + "trips_72r = trips >> filter(_.route_short_name.isin(['72R']), _.direction_id == 0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e1359516-eddf-4cde-ba35-32dd8f7e5535", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "st_72r = trips_to_stops(trips_72r, ac.feed_key)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3f7cee73-690e-45bd-9d09-203f031e53e4", + "metadata": {}, + "outputs": [], + "source": [ + "# st_72r.explore()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "71962307-67d1-4670-b3e2-14dea1c0770f", + "metadata": {}, + "outputs": [], + "source": [ + "san_pablo = sjoin_tracts(st_72r, tracts, 804) # half-mile" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "27970e7d-d3a2-44c9-9c5b-aa839cf6c4dc", + "metadata": {}, + "outputs": [], + "source": [ + "# san_pablo.explore()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4dd4b619-1bbb-4880-b5fb-0450c848b779", + "metadata": {}, + "outputs": [], + "source": [ + "san_pablo.to_file('san_pablo.geojson')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "edfc5475-af4a-42a9-81ea-f041f7135938", + "metadata": {}, + "outputs": [], + "source": [ + "san_pablo_results = gpd.read_parquet('outputs/sanpablo_trips_with_uza.parquet')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2eb69c0d-5508-487b-bbbf-a70a478ce164", + "metadata": {}, + "outputs": [], + "source": [ + "# (wilshire_results >> select(-_.geometry)).to_csv('wilshire.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "dff30ab0-80d7-43f8-b276-ad4f8e877f26", + "metadata": {}, + "outputs": [], + "source": [ + "# utils.make_zipped_shapefile(wilshire_results, 'wilsh')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a2eb977c-6779-4916-8576-385c6808e21a", + "metadata": {}, + "outputs": [], + "source": [ + "san_pablo_results.explore(column = 'projected_new_transit_trips', scheme = 'NaturalBreaks')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d5dea582-473e-4157-98bb-5a3572631b42", + "metadata": {}, + "outputs": [], + "source": [ + "san_pablo_results.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "369166fe-baf8-4bc2-86f8-c637b86c23bd", + "metadata": {}, + "outputs": [], + "source": [ + "san_pablo_results.describe()" + ] + }, + { + "cell_type": "markdown", + "id": "73b906dd-19e4-496e-b468-bd47fb3082be", + "metadata": { + "tags": [] + }, + "source": [ + "## Eureka H Street/Purple Route" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7b248c72-9b00-4451-9843-02cb2c80c39a", + "metadata": {}, + "outputs": [], + "source": [ + "eureka = feeds >> filter(_.name.str.contains('Humboldt Schedule'))\n", + "eureka" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2136f99f-3c01-406e-8c60-9b3bba2f9920", + "metadata": {}, + "outputs": [], + "source": [ + "stops = shared_utils.gtfs_utils_v2.get_stops(analysis_date, eureka.feed_key)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3b75f1b9-184c-4667-81a4-261b1105249e", + "metadata": {}, + "outputs": [], + "source": [ + "trips = shared_utils.gtfs_utils_v2.get_trips(analysis_date, eureka.feed_key)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3030e4cc-b573-4083-9e2c-ee8c947160ef", + "metadata": {}, + "outputs": [], + "source": [ + "trips.route_short_name.unique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4750a733-a639-4b7f-a78f-da1683c6a594", + "metadata": {}, + "outputs": [], + "source": [ + "trips.route_long_name.unique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "19722dc1-21ad-4186-93d3-36b6522ba246", + "metadata": {}, + "outputs": [], + "source": [ + "trips_rainbow = trips >> filter(_.route_long_name.isin(['Rainbow Route']), _.direction_id == 0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "24226a59-fc6c-4f75-ab46-70d561a2d20e", + "metadata": {}, + "outputs": [], + "source": [ + "tr" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8c1b2637-2446-4efd-b61a-19c08d534292", + "metadata": {}, + "outputs": [], + "source": [ + "# trips_1 = trips_1 >> filter(_.trip_instance_key == 'db65a5adda0fc0a2744580354516ac68')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "93a000d8-6e6b-47aa-9a82-98932989ba7b", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "st_rainbow = trips_to_stops(trips_purple, eureka.feed_key)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eeaeaafc-819c-4727-9768-2a904a6437df", + "metadata": {}, + "outputs": [], + "source": [ + "st_rainbow = st_rainbow >> filter(_.stop_sequence >= 35)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eb39f64d-adaf-4391-b205-1b12ef6a1760", + "metadata": {}, + "outputs": [], + "source": [ + "# st_rainbow.explore()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "370fca77-e994-471f-8888-010b52738bec", + "metadata": {}, + "outputs": [], + "source": [ + "eureka = sjoin_tracts(st_rainbow, tracts, 804) # half-mile" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9f2386c4-bc1b-4b3b-bb80-16b04fe30112", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "eureka.explore()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e604495b-b0bf-46ce-8e14-ccac930dafbc", + "metadata": {}, + "outputs": [], + "source": [ + "eureka.to_file('eureka.geojson')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ef087597-674c-4256-8ce4-6ca7c0617665", + "metadata": {}, + "outputs": [], + "source": [ + "eureka_results = gpd.read_parquet('outputs/eureka_trips_with_uza.parquet')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "74c2a66f-ff8e-482f-9c57-da742fbe5f42", + "metadata": {}, + "outputs": [], + "source": [ + "# (wilshire_results >> select(-_.geometry)).to_csv('wilshire.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e7299e91-893e-4b81-979a-f700196c6a96", + "metadata": {}, + "outputs": [], + "source": [ + "# utils.make_zipped_shapefile(wilshire_results, 'wilsh')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6ff25cc5-b2aa-4e71-a5dc-2d70396c3805", + "metadata": {}, + "outputs": [], + "source": [ + "eureka_results.explore(column = 'projected_new_transit_trips', scheme = 'NaturalBreaks')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3020e29d-a035-4248-b990-efc0947d02dd", + "metadata": {}, + "outputs": [], + "source": [ + "eureka_results.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3a4e5ef0-f5d0-43ba-9895-d5542989383c", + "metadata": {}, + "outputs": [], + "source": [ + "eureka_results.describe()" + ] + }, + { + "cell_type": "markdown", + "id": "6d4fdb29-2b3b-4055-ada9-b5b149db9f6c", + "metadata": {}, + "source": [ + "# All Corridors Summary" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "98680b32-440f-4fc2-867c-ede1a1967393", + "metadata": {}, + "outputs": [], + "source": [ + "fresno_results['corridor'] = 'Fresno'\n", + "san_pablo_results['corridor'] = 'San Pablo Ave'\n", + "wilshire_results['corridor'] = 'Wilshire'\n", + "eureka_results['corridor'] = 'Eureka'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "263af4e4-07fe-4f74-ba78-b54829905a40", + "metadata": {}, + "outputs": [], + "source": [ + "all_results = pd.concat([fresno_results, san_pablo_results, wilshire_results, eureka_results])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d3249b0a-2d67-4dba-bb58-7ab50b930185", + "metadata": {}, + "outputs": [], + "source": [ + "(all_results >> group_by(_.corridor)\n", + " >> summarize(total_new_transit_trips = _.projected_new_transit_trips.sum(),\n", + " total_population = _.total_pop.sum(),\n", + " total_vmt = _.total_mi_auto.sum(),\n", + " p50_auto_trip_mi = _.p50_mi_auto.quantile(.5),\n", + " total_auto_trips = _.total_trips_auto.sum()\n", + " )\n", + "\n", + ").to_csv('vmt_transit_corridors.csv')" + ] + }, + { + "cell_type": "markdown", + "id": "3a01d280-a612-4e72-8b06-e98aae3426d6", + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, + "source": [ + "## Redding Route 4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c2fd7bc1-a575-4b5b-adb3-63c7866cacc2", + "metadata": {}, + "outputs": [], + "source": [ + "redding = feeds >> filter(_.name.str.contains('Redding'))\n", + "redding" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "909cd786-51d5-40fb-b997-532e67378fe7", + "metadata": {}, + "outputs": [], + "source": [ + "stops = shared_utils.gtfs_utils_v2.get_stops(analysis_date, redding.feed_key)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "334d3d45-0f38-4fb8-ae58-814d6429eee0", + "metadata": {}, + "outputs": [], + "source": [ + "trips = shared_utils.gtfs_utils_v2.get_trips(analysis_date, redding.feed_key)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "70b8f4d5-c70e-4320-9756-c13d9c919a58", + "metadata": {}, + "outputs": [], + "source": [ + "trips.route_short_name.unique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8cece935-15cf-4eab-a58f-311a0927d8ae", + "metadata": {}, + "outputs": [], + "source": [ + "trips_4 = trips >> filter(_.route_short_name.isin(['4']), _.direction_id == 0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b56d6acc-d992-4d48-a5e4-affb42e3605f", + "metadata": {}, + "outputs": [], + "source": [ + "# trips_1 = trips_1 >> filter(_.trip_instance_key == 'db65a5adda0fc0a2744580354516ac68')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1b27cec5-ea22-4265-b7cb-6898794ae577", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "st_4 = trips_to_stops(trips_4, redding.feed_key)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2e239235-82d3-42ed-963d-d2d196fb1d8a", + "metadata": {}, + "outputs": [], + "source": [ + "# st_4.explore()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "af4426e8-4d86-4ed9-9cd4-096d4df9eeaa", + "metadata": {}, + "outputs": [], + "source": [ + "redding = sjoin_tracts(st_4, tracts, 804) # half-mile" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c325c88e-7ddf-4f24-85f5-b2e49ea88dd6", + "metadata": {}, + "outputs": [], + "source": [ + "# redding.explore()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5387c9f7-9b77-49ef-b934-dc4f56836e28", + "metadata": {}, + "outputs": [], + "source": [ + "redding.to_file('redding.geojson')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5cf3dfc7-2893-4cde-bae7-8e9fcdb751ca", + "metadata": {}, + "outputs": [], + "source": [ + "redding_results = gpd.read_parquet('outputs/redding_trips_with_uza.parquet')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6e947b18-2710-4d71-89c4-0d256524e774", + "metadata": {}, + "outputs": [], + "source": [ + "# (wilshire_results >> select(-_.geometry)).to_csv('wilshire.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b4c33a16-fb03-4d87-bd02-1a957b17be51", + "metadata": {}, + "outputs": [], + "source": [ + "# utils.make_zipped_shapefile(wilshire_results, 'wilsh')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0cd7c5d1-499b-4109-b942-d1e3f93e6c97", + "metadata": {}, + "outputs": [], + "source": [ + "redding_results.explore(column = 'projected_new_transit_trips', scheme = 'NaturalBreaks')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6e439a7b-5a7b-4aa7-bb80-5626f24600c6", + "metadata": {}, + "outputs": [], + "source": [ + "redding_results.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4f883ae3-9dbd-4a39-935e-9e521159e7b3", + "metadata": {}, + "outputs": [], + "source": [ + "redding_results.describe()" + ] + }, + { + "cell_type": "markdown", + "id": "086c18f7-c7cf-41a2-9147-727740f781e5", + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, + "source": [ + "## MST (table)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5f958167-a291-4aba-8566-111c5e713be9", + "metadata": {}, + "outputs": [], + "source": [ + "mst = feeds >> filter(_.name.str.contains('Monterey'))\n", + "mst" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d68764b0-476a-4af8-8560-b931a8afba51", + "metadata": {}, + "outputs": [], + "source": [ + "stops = shared_utils.gtfs_utils_v2.get_stops(analysis_date, mst.feed_key)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3bbf4ee7-7bd2-4ae2-968b-b17a5ef2514d", + "metadata": {}, + "outputs": [], + "source": [ + "stops.columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f07ef246-b92d-4e00-989b-b1800744a694", + "metadata": {}, + "outputs": [], + "source": [ + "trips = shared_utils.gtfs_utils_v2.get_trips(analysis_date, mst.feed_key)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fd8b773-d935-4c13-8f9e-84a8cba153c9", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "trips.columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8fe0c258-f084-4030-94e6-b0a44f5f5498", + "metadata": {}, + "outputs": [], + "source": [ + "trips.route_short_name.unique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a6763fd7-c9cc-44e1-b7e9-8c8c919e3f91", + "metadata": {}, + "outputs": [], + "source": [ + "ab_trips = trips >> filter(_.route_short_name.isin(['A', 'B']), _.direction_id == 0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f9151728-3f87-45b0-a799-eb2b126ea434", + "metadata": {}, + "outputs": [], + "source": [ + "trips_20 = trips >> filter(_.route_short_name == '20', _.direction_id == 0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "44c2f67b-74b7-4551-82b9-167f2744081b", + "metadata": {}, + "outputs": [], + "source": [ + "st_20 = shared_utils.gtfs_utils_v2.get_stop_times(analysis_date, mst.feed_key, trip_df=trips_20)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fc5f463c-b3c6-4f80-86d4-9835c650eebb", + "metadata": {}, + "outputs": [], + "source": [ + "st_20 = st_20 >> distinct(_.stop_id, _.stop_sequence) >> collect()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "23f3a7b3-cd32-480f-ab1f-cc616c02e77a", + "metadata": {}, + "outputs": [], + "source": [ + "st_20 = stops >> select(_.stop_id, _.geometry) >> inner_join(_, st_20, on='stop_id')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8b71bbe9-26d0-42ec-8b30-a7cfdee2236e", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "st_20 = trips_to_stops(trips_20)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "99b2afea-3280-422c-b3b7-6e1c5ff54b5d", + "metadata": {}, + "outputs": [], + "source": [ + "# SURF BRT area for joins...\n", + "st_20 = st_20 >> filter(_.stop_sequence <= 27)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c3da388c-807d-424b-9e69-51588401ef2a", + "metadata": {}, + "outputs": [], + "source": [ + "# st_20.explore()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0bfacea0-f958-4a65-90f3-2fec0ab04fd6", + "metadata": {}, + "outputs": [], + "source": [ + "st_ab = trips_to_stops(ab_trips)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25ab4e55-c0cf-4919-b9ef-41e665c9a136", + "metadata": {}, + "outputs": [], + "source": [ + "# st_ab.explore()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "27a834eb-34b0-46a4-9f0d-75534812a336", + "metadata": {}, + "outputs": [], + "source": [ + "surf_corridor = pd.concat([st_20, st_ab])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "72671c73-1331-4888-a6e4-5a8cc3a34a16", + "metadata": {}, + "outputs": [], + "source": [ + "surf_corridor.explore()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/sb125_analyses/vmt_transit_sketch/read_process_data.ipynb b/sb125_analyses/vmt_transit_sketch/read_process_data.ipynb index c04a89e70..2cc71e981 100644 --- a/sb125_analyses/vmt_transit_sketch/read_process_data.ipynb +++ b/sb125_analyses/vmt_transit_sketch/read_process_data.ipynb @@ -2,19 +2,20 @@ "cells": [ { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "id": "9e8158c2-a7f9-4b3c-a518-037132adf0c3", "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import geopandas as gpd\n", - "from siuba import *" + "from siuba import *\n", + "import numpy as np" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 2, "id": "81cddca7-bea4-494d-b0cb-02508d52b380", "metadata": {}, "outputs": [], @@ -22,6 +23,60 @@ "import zipfile" ] }, + { + "cell_type": "code", + "execution_count": 3, + "id": "37b679d4-f8bd-4450-bf9f-50b68e8570b4", + "metadata": {}, + "outputs": [], + "source": [ + "from calitp_data_analysis import get_fs" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "80324d88-ea3a-45a6-9362-933a2395ed31", + "metadata": {}, + "outputs": [], + "source": [ + "fs = get_fs()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "3dcaf650-43fe-4532-9060-442b067ef173", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# ! pip install pygris" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "682268e8-78fc-4609-afc5-294f3c650b5e", + "metadata": {}, + "outputs": [], + "source": [ + "import _utils\n", + "# import importlib\n", + "# importlib.reload(_utils)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "5aeca438-6285-4cca-b375-ab8aa3849e42", + "metadata": {}, + "outputs": [], + "source": [ + "GCS_PATH = 'gs://calitp-analytics-data/data-analyses/sb125/vmt_transit_sketch/'" + ] + }, { "cell_type": "markdown", "id": "b076a21f-5a53-4b75-b140-0e4947099e42", @@ -34,289 +89,266 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 8, "id": "85a89737-f90d-488f-9310-ca83557e476c", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "def read_group_replica(zip_path):\n", " '''\n", - " zip_path: path to zip file containing a Replica trips export\n", + " zip_path: path to zip file containing a Replica trips export csv \n", " '''\n", - " replica_filename = 'replica-mode_split_test-02_01_24-trips_dataset.csv'\n", - " with zipfile.ZipFile(zip_path) as z:\n", - " with z.open(replica_filename) as f:\n", + " def parse_csv(zipfile):\n", + " csvs = [f for f in z.namelist() if f[-3:] == 'csv']\n", + " assert len(csvs) == 1\n", + " with z.open(csvs[0]) as f:\n", " df = pd.read_csv(f)\n", + " return df\n", + " \n", + " if zip_path[:3] == 'gs:':\n", + " with fs.open(zip_path) as f:\n", + " with zipfile.ZipFile(f) as z:\n", + " df = parse_csv(z)\n", + " else:\n", + " with zipfile.ZipFile(f) as z:\n", + " df = parse_csv(z)\n", + " \n", + "\n", " df = (df >> filter(_.primary_mode.isin(['private_auto', 'auto_passenger', 'on_demand_auto', 'public_transit']))\n", " >> select(-_.origin_trct_2020, -_.activity_id)\n", " )\n", " df['is_auto'] = df.primary_mode.str.contains('auto')\n", + " return df\n", " grouped = (df >> group_by(_.origin_trct_fips_2020, _.is_auto)\n", " >> summarize(n = _.shape[0], p50_distance = _.trip_distance_miles.quantile(.5),\n", " p75_distance = _.trip_distance_miles.quantile(.75),\n", " p90_distance = _.trip_distance_miles.quantile(.9),\n", - " total_miles = _.trip_distance_miles.sum(),\n", + " total_miles = _.trip_distance_miles.sum(), \n", " )\n", " )\n", - " # parquet_path = f'./intermediate/{zip_path.split(\".zip\")[0]}.parquet'\n", - " # grouped.to_parquet(parquet_path)\n", - " # print(f'grouped data -> {parquet_path}')\n", + "\n", " return grouped" ] }, { "cell_type": "code", - "execution_count": 4, - "id": "ec3470ac-5f0e-49a0-9000-f371f952bc74", + "execution_count": 9, + "id": "f0df73e2-7ebe-431f-b533-6139cc9b79c0", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'gs://calitp-analytics-data/data-analyses/sb125/vmt_transit_sketch/replica_raw/corridors/replica-fresno-trips_dataset.zip'" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "zip_path = f'replica_raw/replica-la_north-trips_dataset.zip'" + "f'{GCS_PATH}replica_raw/corridors/replica-fresno-trips_dataset.zip'" ] }, { "cell_type": "code", - "execution_count": 5, - "id": "868a0ae4-b076-4775-beba-fdc9ba764b27", + "execution_count": 10, + "id": "c506f859-1ffc-4e36-ba03-1c4393ba4d9e", "metadata": {}, "outputs": [], "source": [ - "replica_filename = 'replica-mode_split_test-02_01_24-trips_dataset.csv'" + "fresno_raw = read_group_replica(f'{GCS_PATH}replica_raw/corridors/replica-fresno-trips_dataset.zip')" ] }, { "cell_type": "code", - "execution_count": 6, - "id": "b4c08637-9bbc-4727-af6d-14dc1c66b4a1", + "execution_count": 11, + "id": "51c4c2e4-91d8-47ef-b2a2-ca30c6e2b84f", "metadata": {}, "outputs": [ { - "name": "stderr", - "output_type": "stream", - "text": [ - "/tmp/ipykernel_472/3605967939.py:3: DtypeWarning: Columns (6,7,8) have mixed types. Specify dtype option on import or set low_memory=False.\n", - " df = pd.read_csv(f)\n" - ] + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
trip_duration_minutestrip_distance_milesorigin_trct_fips_2020
count156196.000000156196.0000001.561960e+05
mean8.0273312.4894946.019003e+09
std6.0411162.3055811.731910e+03
min0.0000000.1000006.019000e+09
25%3.0000000.8000006.019002e+09
50%7.0000001.6000006.019004e+09
75%11.0000003.5000006.019005e+09
max93.00000017.8000006.019005e+09
\n", + "
" + ], + "text/plain": [ + " trip_duration_minutes trip_distance_miles origin_trct_fips_2020\n", + "count 156196.000000 156196.000000 1.561960e+05\n", + "mean 8.027331 2.489494 6.019003e+09\n", + "std 6.041116 2.305581 1.731910e+03\n", + "min 0.000000 0.100000 6.019000e+09\n", + "25% 3.000000 0.800000 6.019002e+09\n", + "50% 7.000000 1.600000 6.019004e+09\n", + "75% 11.000000 3.500000 6.019005e+09\n", + "max 93.000000 17.800000 6.019005e+09" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "with zipfile.ZipFile(zip_path) as z:\n", - " with z.open(replica_filename) as f:\n", - " df = pd.read_csv(f)" + "fresno_raw.describe()" ] }, { "cell_type": "code", "execution_count": 12, - "id": "5875f7f5-2c1f-40f6-95fa-0a8d106b1e7a", + "id": "7122b15b-574a-4cf1-84ab-0af08f703e57", "metadata": {}, "outputs": [], "source": [ - "miles_all = df.trip_distance_miles.sum()" + "# (wilshire_raw >> filter(_.is_auto)).trip_distance_miles.hist()" ] }, { "cell_type": "code", "execution_count": 13, - "id": "01a823a1-de52-4d08-8b80-a024c1397f95", + "id": "9423782c-0b5a-474a-bf5a-99ae2e6812b3", "metadata": {}, "outputs": [], "source": [ - "shorter = (df >> filter(_.trip_distance_miles < _.trip_distance_miles.quantile(.95))).trip_distance_miles.sum()" + "# (wilshire_raw >> filter(_.is_auto, _.trip_distance_miles < 4)).trip_distance_miles.hist()" ] }, { - "cell_type": "code", - "execution_count": 14, - "id": "132b4a03-9f73-4978-8a35-ae5c130f7f73", + "cell_type": "markdown", + "id": "412218ad-4f25-49de-92df-a00cc6becc70", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0.5640656816072517" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], "source": [ - "shorter / miles_all" + "## grouping" ] }, { "cell_type": "code", - "execution_count": 4, - "id": "6c80cc92-a452-4d92-bf2f-d1f40a83442d", + "execution_count": 14, + "id": "dae2c9bb-5121-4966-85ed-111bd525c924", "metadata": {}, "outputs": [], "source": [ - "# zip_path = 'replica-la_north-trips_dataset.zip'\n", - "\n", - "# replica_filename = 'replica-mode_split_test-02_01_24-trips_dataset.csv'\n", - "# with zipfile.ZipFile(zip_path) as z:\n", - "# with z.open(replica_filename) as f:\n", - "# df = pd.read_csv(f)" + "all_regions = ['central_a', 'central_b', 'north', 'la_north',\n", + " 'la_south', 'sandiego', 'socal_a', 'socal_b']" ] }, { "cell_type": "code", - "execution_count": 5, - "id": "0b50f321-feee-4789-8f17-eee4f4a59f3f", + "execution_count": 15, + "id": "15544ecf-1140-426f-be93-5ce53e2b5f7e", "metadata": {}, "outputs": [], "source": [ - "# df >> head(10)" + "# grouped = pd.DataFrame()\n", + "# for region in ['eureka']:\n", + "# print(region)\n", + "# # note replica filename includes date of download...\n", + "# df = read_group_replica(f'replica_raw/corridors/replica-{region}-trips_dataset.zip')\n", + "# grouped = pd.concat([grouped, df])" ] }, { "cell_type": "code", - "execution_count": 6, - "id": "dae2c9bb-5121-4966-85ed-111bd525c924", + "execution_count": 16, + "id": "1878b238-eec9-450c-bf3e-359016485495", "metadata": {}, "outputs": [], "source": [ - "all_regions = ['central_a', 'central_b', 'north', 'la_north',\n", - " 'la_south', 'sandiego', 'socal_a', 'socal_b']" + "# grouped.to_parquet('intermediate/eureka_grouped.parquet')" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 17, "id": "9fb929ae-0c66-4dfb-9a0e-604bfedef078", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "central_a\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/tmp/ipykernel_444/3995037246.py:8: DtypeWarning: Columns (6,7,8) have mixed types. Specify dtype option on import or set low_memory=False.\n", - " df = pd.read_csv(f)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "central_b\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/tmp/ipykernel_444/3995037246.py:8: DtypeWarning: Columns (6,7,8) have mixed types. Specify dtype option on import or set low_memory=False.\n", - " df = pd.read_csv(f)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "north\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/tmp/ipykernel_444/3995037246.py:8: DtypeWarning: Columns (6,7,8) have mixed types. Specify dtype option on import or set low_memory=False.\n", - " df = pd.read_csv(f)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "la_north\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/tmp/ipykernel_444/3995037246.py:8: DtypeWarning: Columns (6,7,8) have mixed types. Specify dtype option on import or set low_memory=False.\n", - " df = pd.read_csv(f)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "la_south\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/tmp/ipykernel_444/3995037246.py:8: DtypeWarning: Columns (6,7,8) have mixed types. Specify dtype option on import or set low_memory=False.\n", - " df = pd.read_csv(f)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "sandiego\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/tmp/ipykernel_444/3995037246.py:8: DtypeWarning: Columns (6,7,8) have mixed types. Specify dtype option on import or set low_memory=False.\n", - " df = pd.read_csv(f)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "socal_a\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/tmp/ipykernel_444/3995037246.py:8: DtypeWarning: Columns (6,7,8) have mixed types. Specify dtype option on import or set low_memory=False.\n", - " df = pd.read_csv(f)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "socal_b\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/tmp/ipykernel_444/3995037246.py:8: DtypeWarning: Columns (6,7,8) have mixed types. Specify dtype option on import or set low_memory=False.\n", - " df = pd.read_csv(f)\n" - ] - } - ], + "metadata": { + "tags": [] + }, + "outputs": [], "source": [ - "grouped = pd.DataFrame()\n", - "for region in all_regions:\n", - " print(region)\n", - " df = read_group_replica(f'replica_raw/replica-{region}-trips_dataset.zip')\n", - " grouped = pd.concat([grouped, df])\n", + "# grouped = pd.DataFrame()\n", + "# for region in all_regions:\n", + "# print(region)\n", + "# df = read_group_replica(f'replica_raw/replica-{region}-trips_dataset.zip')\n", + "# grouped = pd.concat([grouped, df])\n", "\n", - "grouped.to_parquet('intermediate/replica_grouped.parquet')" + "# grouped.to_parquet('intermediate/replica_grouped.parquet')" ] }, { @@ -326,44 +358,85 @@ "source": [ "# Read back in grouped data\n", "\n", - "* number of trips, median distance, and total miles travelled by auto yes/no and Census tract" + "* number of trips, median distance, and total miles travelled by auto yes/no and Census tract\n", + "* TODO non-manual regions :)" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 18, "id": "240ddd8f-6a36-44e1-a1c0-32e6f50c3cac", "metadata": {}, "outputs": [], "source": [ "# grouped = pd.DataFrame()\n", "# for region in all_regions:\n", - "# grouped = pd.concat([grouped, pd.read_parquet(f'intermediate/replica-{region}-trips_dataset.parquet')])" + "# grouped = pd.concat([grouped, pd.read_parquet(f'{GCS_PATH}intermediate/replica-{region}-trips_dataset.parquet')])" ] }, { "cell_type": "code", - "execution_count": 33, + "execution_count": 19, + "id": "0de2a7b3-f1f1-43df-8e7e-2cf4b416378c", + "metadata": {}, + "outputs": [], + "source": [ + "# grouped" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "f1867025-ad72-4ff5-931c-30982a65f0af", + "metadata": {}, + "outputs": [], + "source": [ + "from calitp_data_analysis import utils" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "79dafb24-bebd-4ae6-91d4-8d8e80983c76", + "metadata": {}, + "outputs": [], + "source": [ + "# utils.geoparquet_gcs_export?" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "56e5f48c-649f-4e8f-8ef2-49ce8521cf2c", + "metadata": {}, + "outputs": [], + "source": [ + "# utils.geoparquet_gcs_export(grouped, f'{GCS_PATH}intermediate/', 'replica_grouped')" + ] + }, + { + "cell_type": "code", + "execution_count": 23, "id": "7596b001-2c1c-488f-b089-c375c0ddff4f", "metadata": {}, "outputs": [], "source": [ - "replica_grouped = pd.read_parquet('intermediate/replica_grouped.parquet')" + "replica_grouped = pd.read_parquet(f'{GCS_PATH}intermediate/replica_grouped.parquet')" ] }, { "cell_type": "code", - "execution_count": 34, + "execution_count": 24, "id": "23b9ae5b-bd02-4978-b7d1-3e4b49de53b5", "metadata": {}, "outputs": [], "source": [ - "tracts_feeds = gpd.read_parquet('intermediate/feeds_tract_geo.parquet')" + "tracts_feeds = gpd.read_parquet(f'{GCS_PATH}intermediate/feeds_tract_geo.parquet')" ] }, { "cell_type": "code", - "execution_count": 35, + "execution_count": 25, "id": "73088513-9710-4c9a-a205-d531c3807345", "metadata": {}, "outputs": [], @@ -373,7 +446,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": 26, "id": "61ce4f74-45e9-4c85-ad34-d2ac9c5cfc32", "metadata": {}, "outputs": [], @@ -383,7 +456,48 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 27, + "id": "3f9512da-00e9-4768-a79c-8570c4a0bec2", + "metadata": {}, + "outputs": [], + "source": [ + "# replica_grouped = pd.read_parquet(f'intermediate/wilshire_grouped.parquet')" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "240b6b96-ea68-49f2-8509-883af8f0c36b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "11288.75" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "18062 / 1.6" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "fa0e75af-11d9-4051-8b0f-3a8d95c166be", + "metadata": {}, + "outputs": [], + "source": [ + "# (replica_grouped >> filter(_.is_auto))" + ] + }, + { + "cell_type": "code", + "execution_count": 30, "id": "06f007da-ad8d-4801-8595-85c5d44fe921", "metadata": {}, "outputs": [], @@ -393,20 +507,27 @@ " replica_df: df from read_group_replica\n", " tracts_feeds_df: gdf from stops_by_tract_agency\n", " '''\n", + " auto_trip_counts = (replica_df >> filter(_.is_auto)\n", + " >> group_by(_.origin_trct_fips_2020)\n", + " >> summarize(auto_trips = _.n.sum())\n", + " )\n", " replica_df.is_auto = replica_df.is_auto.map(lambda x: 'yes' if x else 'no')\n", " \n", " df2 = replica_df >> spread('is_auto', 'p50_distance') >> select(-_.n, -_.total_miles)\n", " df2 = df2.rename(columns={'no': 'p50_mi_transit', 'yes': 'p50_mi_auto'})\n", - " \n", - " df3 = replica_df >> spread('is_auto', 'total_miles') >> select(-_.p50_distance, -_.n)\n", + " df3 = replica_df >> spread('is_auto', 'total_miles') >> select(-_.n, -_.p50_distance)\n", " df3 = df3.rename(columns={'no': 'total_mi_transit', 'yes': 'total_mi_auto'})\n", - " \n", " df2 = df2 >> inner_join(_, df3, on = 'origin_trct_fips_2020')\n", + " df2 = df2 >> inner_join(_, auto_trip_counts, on = 'origin_trct_fips_2020')\n", " \n", + " if 'p50_mi_transit' not in df2.columns:\n", + " df2['p50_mi_transit'] = np.nan\n", + " df2['total_mi_transit'] = np.nan\n", " df2 = (df2 >> group_by(_.origin_trct_fips_2020)\n", " >> summarize(p50_mi_transit = _.p50_mi_transit.max(), p50_mi_auto = _.p50_mi_auto.max(),\n", " total_mi_transit = _.total_mi_transit.max(),\n", - " total_mi_auto = _.total_mi_auto.max()\n", + " total_mi_auto = _.total_mi_auto.max(),\n", + " total_trips_auto = _.auto_trips.sum()\n", " )\n", " )\n", " \n", @@ -431,7 +552,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 31, "id": "d661657b-cac7-4e9a-806d-48579c0516f9", "metadata": { "tags": [] @@ -446,12 +567,12 @@ } ], "source": [ - "df2 = process_grouped_data(replica_grouped, tracts_feeds)" + "processed_df = process_grouped_data(replica_grouped, tracts_feeds)" ] }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 32, "id": "50f04f86-6f13-4654-952e-58dc07a23faa", "metadata": {}, "outputs": [ @@ -481,6 +602,7 @@ " p50_mi_auto\n", " total_mi_transit\n", " total_mi_auto\n", + " total_trips_auto\n", " p50_transit_longer\n", " no_transit_replica\n", " no_transit\n", @@ -497,6 +619,7 @@ " 6.9\n", " 5185.3\n", " 165410.8\n", + " 53636\n", " False\n", " False\n", " False\n", @@ -511,6 +634,7 @@ " 3.4\n", " 3638.1\n", " 58763.4\n", + " 28600\n", " True\n", " False\n", " False\n", @@ -525,6 +649,7 @@ " 3.7\n", " 6130.7\n", " 122736.5\n", + " 57740\n", " True\n", " False\n", " False\n", @@ -542,29 +667,29 @@ "1 6001400200 4.1 3.4 3638.1 \n", "2 6001400300 4.7 3.7 6130.7 \n", "\n", - " total_mi_auto p50_transit_longer no_transit_replica no_transit \\\n", - "0 165410.8 False False False \n", - "1 58763.4 True False False \n", - "2 122736.5 True False False \n", + " total_mi_auto total_trips_auto p50_transit_longer no_transit_replica \\\n", + "0 165410.8 53636 False False \n", + "1 58763.4 28600 True False \n", + "2 122736.5 57740 True False \n", "\n", - " total_mi new_transit_mi projected_new_transit_trips \n", - "0 170596.1 45956.698096 10942.0 \n", - "1 62401.5 16326.454094 3982.0 \n", - "2 128867.2 34100.338526 7255.0 " + " no_transit total_mi new_transit_mi projected_new_transit_trips \n", + "0 False 170596.1 45956.698096 10942.0 \n", + "1 False 62401.5 16326.454094 3982.0 \n", + "2 False 128867.2 34100.338526 7255.0 " ] }, - "execution_count": 39, + "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df2 >> head(3)" + "processed_df >> head(3)" ] }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 33, "id": "8169ca83-e540-48f4-af93-076a535f00c6", "metadata": {}, "outputs": [ @@ -614,41 +739,30 @@ "1 True 1.294277e+08" ] }, - "execution_count": 40, + "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "no_transit = df2 >> group_by(_.no_transit) >> summarize(total_mi_auto = _.total_mi_auto.sum())\n", + "no_transit = processed_df >> group_by(_.no_transit) >> summarize(total_mi_auto = _.total_mi_auto.sum())\n", "no_transit" ] }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 34, "id": "c7175031-a66c-4f90-8dca-193198b9d932", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'11.0 percent of VMT in tracts with no transit per GTFS Warehouse stops'" - ] - }, - "execution_count": 41, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "no_transit_pct = no_transit.iloc[1, 1] / no_transit.iloc[0, 1]\n", - "f'{round(no_transit_pct*100, 0)} percent of VMT in tracts with no transit per GTFS Warehouse stops'" + "# no_transit_pct = no_transit.iloc[1, 1] / no_transit.iloc[0, 1]\n", + "# f'{round(no_transit_pct*100, 0)} percent of VMT in tracts with no transit per GTFS Warehouse stops'" ] }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 35, "id": "6476da44-36ac-4604-89d5-76966eb9411d", "metadata": {}, "outputs": [ @@ -660,42 +774,19 @@ "Name: p50_transit_longer, dtype: int64" ] }, - "execution_count": 42, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df2.p50_transit_longer.value_counts()" - ] - }, - { - "cell_type": "code", - "execution_count": 43, - "id": "682268e8-78fc-4609-afc5-294f3c650b5e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 43, + "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "import _utils\n", - "import importlib\n", - "importlib.reload(_utils)" + "processed_df.p50_transit_longer.value_counts()" ] }, { "cell_type": "code", - "execution_count": 44, - "id": "e83400ec-c295-40b3-91d6-3c3bd1ffb5f4", + "execution_count": 36, + "id": "70bdaf9e-397b-46d4-bc0a-a73722cf8591", "metadata": {}, "outputs": [ { @@ -712,357 +803,169 @@ }, { "cell_type": "code", - "execution_count": 45, - "id": "76766b3c-efc1-4ad7-9d47-3f465c616abf", - "metadata": {}, - "outputs": [], - "source": [ - "# tract_geo = gpd.read_file('./tl_2020_06_tract.zip') >> select(_.GEOID, _.geometry)" - ] - }, - { - "cell_type": "code", - "execution_count": 46, - "id": "876be135-baf9-41bb-880f-22cf9df2c4a9", - "metadata": {}, - "outputs": [], - "source": [ - "tract_geo.GEOID = tract_geo.GEOID.astype('int64')" - ] - }, - { - "cell_type": "code", - "execution_count": 47, - "id": "03ecd820-d9e5-4c08-82ae-0929638e3af4", - "metadata": {}, - "outputs": [], - "source": [ - "gdf = tract_geo >> inner_join(_, df2, on = {'GEOID': 'origin_trct_fips_2020'}) >> select(-_.origin_trct_fips_2020)" - ] - }, - { - "cell_type": "code", - "execution_count": 48, - "id": "3a830524-3c8f-4915-8768-3062ddbf7db0", - "metadata": {}, - "outputs": [], - "source": [ - "ca_uzas = gpd.read_parquet('intermediate/ca_uza.parquet')" - ] - }, - { - "cell_type": "code", - "execution_count": 49, - "id": "d00b301f-ec03-4525-b415-53f9f65a77c2", + "execution_count": 37, + "id": "c10b8a4f-b3de-4494-81b7-942c2b230c30", "metadata": {}, "outputs": [], "source": [ - "uza_joined = gpd.sjoin(gdf, ca_uzas, how = 'left')" + "ca_uzas = gpd.read_parquet(f'{GCS_PATH}intermediate/ca_uza.parquet')" ] }, { "cell_type": "code", - "execution_count": 50, - "id": "4cb23dd6-94b8-4b36-952b-054a06c04192", + "execution_count": 59, + "id": "8d2f3dec-892d-4ffc-85fb-8e8866a51f42", "metadata": {}, "outputs": [], "source": [ - "# TODO to util, other source?\n", - "\n", - "census_pop = gpd.read_file('census_ntd/DECENNIALPL2020.P1_2024-02-01T163251.zip')\n", - "\n", - "tract_pop = census_pop[['GEO_ID', 'P1_001N']].iloc[2:,:]\n", + "# census_pop = gpd.read_file(f'./census_ntd/DECENNIALPL2020.P1_2024-02-01T163251.zip')\n", "\n", - "tract_pop.GEO_ID = tract_pop.GEO_ID.map(lambda x: x.split('US')[1])\n", + "# census_cleaned = census_pop.iloc[2:,:][['GEO_ID', 'P1_001N']]\n", "\n", - "tract_pop.GEO_ID = tract_pop.GEO_ID.astype('int64')\n", - "\n", - "tract_pop = tract_pop >> select(_.total_pop == _.P1_001N, _.GEOID == _.GEO_ID)" - ] - }, - { - "cell_type": "code", - "execution_count": 51, - "id": "31623fcf-efd7-4863-8f12-71a04ac45410", - "metadata": {}, - "outputs": [], - "source": [ - "uza_joined = uza_joined >> inner_join(_, tract_pop, on = 'GEOID')" + "# census_cleaned.to_parquet(f'{GCS_PATH}census_ntd/DECENNIALPL2020.parquet')" ] }, { "cell_type": "code", - "execution_count": 52, - "id": "fb733ab6-007a-4747-91dd-ec40688a066f", + "execution_count": 60, + "id": "d7b17b29-29b2-40f3-9038-c3b0b75f39f8", "metadata": {}, "outputs": [], "source": [ - "uza_joined.total_pop = uza_joined.total_pop.astype('int64')\n", - "uza_joined['new_trips_per_capita'] = uza_joined.projected_new_transit_trips / uza_joined.total_pop" + "census_cleaned = pd.read_parquet(f'{GCS_PATH}census_ntd/DECENNIALPL2020.parquet')" ] }, { "cell_type": "code", - "execution_count": 53, - "id": "addb6ac5-5f4d-41dc-9a7a-0d6644bb699c", + "execution_count": 61, + "id": "8995132b-5eba-48e8-bfdb-585f1ff1e9d8", "metadata": {}, "outputs": [], "source": [ - "uza_joined.to_parquet('outputs/new_trips_with_uza.parquet')" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "id": "666ebaa0-7d29-4f56-a3fd-5ad7391b15c8", - "metadata": {}, - "outputs": [], - "source": [ - "# uza_joined = uza_joined >> filter(_.P1_001N != 0) # remove tracts where nobody lives" - ] - }, - { - "cell_type": "markdown", - "id": "019d1eaf-c3a7-4b71-a0b9-f9b041a04280", - "metadata": {}, - "source": [ - "# \"What if VMT decreased by 25% per the CARB target, and all those trips were on (existing) transit instead?\"\n", - "\n", - "## VMT is a spatial phenomenon, our analysis should be spatial too\n", - "\n", - "* Start with \"big data\" weekday residential VMT per Census tract via Replica\n", - "* Per target, future VMT should be 25% less\n", - "* Assume tripmaking remains constant, and that transit entirely replaces that VMT\n", - " * optional: find tracts with no transit service, hold their VMT constant and redistribute missed target among remaining tracts (30% reduction instead of 25% perhaps?)\n", + "def attach_tracts_pop(processed_df, tract_geo, ca_uzas, census_cleaned):\n", " \n", - "## From reduced VMT to transit trips\n", - "\n", - "* Replica gives transit trip lengths but it may not be reliable (\"good for auto, less so for transit\")\n", - " * It's generally showing the median transit trip as longer than the median auto trip, which seems questionable\n", - " * We have plenty of good spatial data on transit service _provision_, but not ridership (generally agency-level only)\n", - " * May need to refer to research/default to a fixed \"median transit trip\" length based on population density\n", - "* Regardless, get a rough estimate by dividing reduced VMT in each tract by median transit trip distance\n", - "* Reality check using derived modeshare number?\n", - "\n", - "## Connecting our estimate to California's transit provider landscape\n", - "\n", - "* Proportionally assign new trips per census tract to transit operators\n", - " * ~By number of stops in tract? OK for bus but will dramatically undercount rail~\n", - " * By each operator's proportion of regional ridership (from NTD)? Will overcount in tracts on the edge of large operator service areas, but perhaps preferable\n", - "* Can then create operator-level estimates of increased ridership and service hour provision\n", - " * This is where we have the best estimates of existing ridership..." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "2db21010-14d2-47d1-b8be-9fd270cca949", - "metadata": {}, - "outputs": [], - "source": [ - "import geopandas as gpd" - ] - }, - { - "cell_type": "markdown", - "id": "345b6bfd-f569-442f-9641-4f5f5dd4c40c", - "metadata": {}, - "source": [ - "## Mapping..." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b84ee354-9bc8-4808-b89e-39de27d24b71", - "metadata": {}, - "outputs": [], - "source": [ - "gdf.explore(column = 'total_mi_transit', scheme = 'NaturalBreaks')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2bde07bb-30d9-48b7-86ae-1055c15c4aac", - "metadata": {}, - "outputs": [], - "source": [ - "gdf.explore(column = 'total_mi_auto', scheme = 'NaturalBreaks')" + " tract_geo.GEOID = tract_geo.GEOID.astype('int64')\n", + " gdf = (tract_geo >> inner_join(_, processed_df, on = {'GEOID': 'origin_trct_fips_2020'})\n", + " >> select(-_.origin_trct_fips_2020))\n", + " uza_joined = gpd.sjoin(gdf, ca_uzas, how = 'left')\n", + " \n", + " tract_pop = census_cleaned\n", + " tract_pop.GEO_ID = tract_pop.GEO_ID.map(lambda x: x.split('US')[1])\n", + " tract_pop.GEO_ID = tract_pop.GEO_ID.astype('int64')\n", + " tract_pop = tract_pop >> select(_.total_pop == _.P1_001N, _.GEOID == _.GEO_ID)\n", + " uza_joined = uza_joined >> inner_join(_, tract_pop, on = 'GEOID')\n", + " uza_joined.total_pop = uza_joined.total_pop.astype('int64')\n", + " uza_joined['new_trips_per_capita'] = uza_joined.projected_new_transit_trips / uza_joined.total_pop\n", + " \n", + " return uza_joined" ] }, { "cell_type": "code", - "execution_count": null, - "id": "0a9c55c5-b17e-4e4f-b0ec-198d2cd89b14", + "execution_count": 63, + "id": "81f381a4-bb80-4fb7-97fa-853111914712", "metadata": {}, "outputs": [], "source": [ - "(gdf >> filter(_.no_transit)).explore()" - ] - }, - { - "cell_type": "markdown", - "id": "76a258eb-cd26-4d2e-8e0c-8ac50c5c1449", - "metadata": {}, - "source": [ - "## New transit trips" + "uza_joined = attach_tracts_pop(processed_df, tract_geo, ca_uzas, census_cleaned)" ] }, { "cell_type": "code", "execution_count": null, - "id": "ea9768db-f59e-4d73-a21c-c903736b4421", - "metadata": {}, + "id": "e0740b97-48ce-4543-a560-9936c559e4df", + "metadata": { + "tags": [] + }, "outputs": [], "source": [ - "# gdf.explore(column = 'new_transit_mi', scheme = 'NaturalBreaks')" + "# for region in ['sanpablo', 'eureka', 'fresno', 'wilshire']:\n", + "# replica_grouped = pd.read_parquet(f'intermediate/{region}_grouped.parquet')\n", + "# processed_df = process_grouped_data(replica_grouped, tracts_feeds)\n", + "# uza_joined = attach_tracts_pop(processed_df)\n", + "# display(uza_joined >> head(3))\n", + "# uza_joined.to_parquet(f'outputs/{region}_trips_with_uza.parquet')" ] }, { "cell_type": "code", "execution_count": null, - "id": "a6d4647f-d6a3-4935-ab1f-613738ffecc5", + "id": "666ebaa0-7d29-4f56-a3fd-5ad7391b15c8", "metadata": {}, "outputs": [], "source": [ - "gdf = gdf >> filter(_.new_trips_per_capita < _.new_trips_per_capita.quantile(.99))" + "# uza_joined = uza_joined >> filter(_.P1_001N != 0) # remove tracts where nobody lives" ] }, { "cell_type": "code", - "execution_count": null, - "id": "d560c555-ced9-43e5-bfb7-923a2c6a65ca", + "execution_count": 68, + "id": "6ed72b61-2c14-4eb2-a188-00384f52082a", "metadata": {}, "outputs": [], "source": [ - "gdf.explore(column = 'new_trips_per_capita', scheme = 'Quantiles')" + "uza_joined = (uza_joined\n", + " >> select(-_.index_right)\n", + " )" ] }, { "cell_type": "code", - "execution_count": null, - "id": "8fb69137-fb4e-4140-9600-1155712585fc", + "execution_count": 71, + "id": "029334e4-be41-4f93-b95d-67687bd2d70e", "metadata": {}, "outputs": [], "source": [ - "gdf.explore(column = 'projected_new_transit_trips', scheme = 'NaturalBreaks')" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "id": "718a534e-e8f5-4a2f-8698-f87c298d7ba0", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "17797968.0" - ] - }, - "execution_count": 32, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "gdf.projected_new_transit_trips.sum()" - ] - }, - { - "cell_type": "markdown", - "id": "db1e79d4-2958-4483-bee9-7ea58ecd4406", - "metadata": {}, - "source": [ - "About 18 million new daily trips across LA/Orange/San Diego/Imperial Counties. For reference, LA Metro's daily ridership is around 1 million. Current regional transit modeshare is only about 5%..." - ] - }, - { - "cell_type": "markdown", - "id": "6b708ffd-6d83-45e3-86c1-39d30ec2f5a8", - "metadata": {}, - "source": [ - "## Next Steps\n", - "\n", - "* caveat: other strategies (land use, active modes...)\n", - "* caveat: induced travel\n", - "* stratify into \"good transit, not riding\", \"bad transit\"\n", - "* LODES o/d data? Replica? -> Conveyal transit o/d find that \"good transit but not riding it\"\n", - " * find what doesn't show up in aggregate accessibility...\n", - "* https://walker-data.com/pygris/" + "uza_joined['vmt_quantile'] = pd.qcut(uza_joined.total_mi_auto, 4, labels = ['p25', 'p50', 'p75', 'p100'])" ] }, { "cell_type": "code", - "execution_count": 99, - "id": "37b679d4-f8bd-4450-bf9f-50b68e8570b4", + "execution_count": 72, + "id": "8f1b132b-8f81-4923-9efe-92f3162ee64c", "metadata": {}, "outputs": [], "source": [ - "from calitp_data_analysis import get_fs" + "!mkdir export" ] }, { "cell_type": "code", - "execution_count": 100, - "id": "b0572b17-a9c2-4128-ab67-fa650c87fda0", + "execution_count": 75, + "id": "ec2cc1b4-002b-498a-b1f0-055f5252fb70", "metadata": {}, "outputs": [], "source": [ - "fs = get_fs()" - ] - }, - { - "cell_type": "code", - "execution_count": 102, - "id": "76539b31-f757-4703-9f7a-2eea60834d06", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'gs://calitp-analytics-data/data-analyses/sb125/vmt_transit_sketch/'" - ] - }, - "execution_count": 102, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "_utils.GCS_PATH" + "uza_joined.vmt_quantile = uza_joined.vmt_quantile.astype(str)" ] }, { "cell_type": "code", - "execution_count": 111, - "id": "1518eea5-d2f1-4807-b392-4a4f2624439d", + "execution_count": 79, + "id": "354a6bea-fbc5-49d7-b531-2597f240510e", "metadata": {}, "outputs": [], "source": [ - "lpath = 'replica_raw/'" + "utils.geoparquet_gcs_export(uza_joined, f'{GCS_PATH}outputs/', 'new_trips_with_uza')" ] }, { "cell_type": "code", - "execution_count": 112, - "id": "e0048f15-f124-431d-9fae-35aa7ef3dd72", + "execution_count": 77, + "id": "30983d03-5c50-4480-85db-ed6666e34bd8", "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "[None, None, None, None, None, None, None, None, None, None]" - ] - }, - "execution_count": 112, - "metadata": {}, - "output_type": "execute_result" + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_532/1657757500.py:1: UserWarning: Column names longer than 10 characters will be truncated when saved to ESRI Shapefile.\n", + " uza_joined.to_file('./export/vmt_with_quantiles.shp')\n" + ] } ], "source": [ - "fs.put(lpath, _utils.GCS_PATH + lpath, recursive=True)" + "uza_joined.to_file('./export/vmt_with_quantiles.shp')" ] } ],