From 4811956ccea8bc81c69439f3eb698379ba7f3c2c Mon Sep 17 00:00:00 2001 From: Eric Dasmalchi Date: Thu, 15 Feb 2024 17:06:07 +0000 Subject: [PATCH 1/5] update imports, bakersfield osm for conveyal --- conveyal_update/conveyal_vars.py | 9 ++++++--- conveyal_update/match_feeds_regions.py | 3 ++- sb125_analyses/vmt_transit_sketch/_utils.py | 2 +- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/conveyal_update/conveyal_vars.py b/conveyal_update/conveyal_vars.py index e06f901ad..3ed9cb2c9 100644 --- a/conveyal_update/conveyal_vars.py +++ b/conveyal_update/conveyal_vars.py @@ -1,14 +1,17 @@ import datetime as dt gcs_path = 'gs://calitp-analytics-data/data-analyses/conveyal_update/' -target_date = dt.date(2023, 10, 18) +target_date = dt.date(2024, 2, 14) osm_file = 'us-west-latest.osm.pbf' # http://download.geofabrik.de/north-america/us-west-latest.osm.pbf # first download with wget... conveyal_regions = {} -# boundaries correspond to Conveyal Analysis regions + boundaries correspond to Conveyal Analysis regions conveyal_regions['norcal'] = {'north': 42.03909, 'south': 39.07038, 'east': -119.60541, 'west': -124.49158} conveyal_regions['central'] = {'north': 39.64165, 'south': 35.87347, 'east': -117.53174, 'west': -123.83789} conveyal_regions['socal'] = {'north': 35.8935, 'south': 32.5005, 'east': -114.13121, 'west': -121.46759} -conveyal_regions['mojave'] = {'north': 37.81629, 'south': 34.89945, 'east': -114.59015, 'west': -118.38043} \ No newline at end of file +conveyal_regions['mojave'] = {'north': 37.81629, 'south': 34.89945, 'east': -114.59015, 'west': -118.38043} + +# # special region for one-off Centennial Corridor +# conveyal_regions['bakersfield'] = {'north': 36.81, 'south': 34.13, 'east': -117.12, 'west': -120.65} \ No newline at end of file diff --git a/conveyal_update/match_feeds_regions.py b/conveyal_update/match_feeds_regions.py index f269bade2..318fc8e8b 100644 --- a/conveyal_update/match_feeds_regions.py +++ b/conveyal_update/match_feeds_regions.py @@ -1,7 +1,8 @@ import os os.environ['USE_PYGEOS'] = '0' os.environ["CALITP_BQ_MAX_BYTES"] = str(800_000_000_000) -from shared_utils import gtfs_utils_v2, geography_utils +from shared_utils import gtfs_utils_v2 +from calitp_data_analysis import geography_utils import pandas as pd from siuba import * diff --git a/sb125_analyses/vmt_transit_sketch/_utils.py b/sb125_analyses/vmt_transit_sketch/_utils.py index 1a5a7f181..fd5c44c72 100644 --- a/sb125_analyses/vmt_transit_sketch/_utils.py +++ b/sb125_analyses/vmt_transit_sketch/_utils.py @@ -1,7 +1,7 @@ import pygris import geopandas as gpd from siuba import * -from shared_utils.geography_utils import CA_NAD83Albers +from calitp_data_analysis.geography_utils import CA_NAD83Albers GCS_PATH = 'gs://calitp-analytics-data/data-analyses/sb125/vmt_transit_sketch/' From 583f8210e2279bbceaf8412052663f241995c10e Mon Sep 17 00:00:00 2001 From: Eric Dasmalchi Date: Thu, 15 Feb 2024 18:19:54 +0000 Subject: [PATCH 2/5] start reworking corridors --- .../corridor_selection.ipynb | 623 ++++++++++++++++++ .../read_process_data.ipynb | 264 ++------ 2 files changed, 686 insertions(+), 201 deletions(-) create mode 100644 sb125_analyses/vmt_transit_sketch/corridor_selection.ipynb diff --git a/sb125_analyses/vmt_transit_sketch/corridor_selection.ipynb b/sb125_analyses/vmt_transit_sketch/corridor_selection.ipynb new file mode 100644 index 000000000..88ad8aa98 --- /dev/null +++ b/sb125_analyses/vmt_transit_sketch/corridor_selection.ipynb @@ -0,0 +1,623 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 3, + "id": "9e8158c2-a7f9-4b3c-a518-037132adf0c3", + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import geopandas as gpd\n", + "from siuba import *" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "81cddca7-bea4-494d-b0cb-02508d52b380", + "metadata": {}, + "outputs": [], + "source": [ + "import zipfile" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "15475e21-db2a-4f64-a2bc-38f2b76b9a4f", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# ! pip install pygris" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "682268e8-78fc-4609-afc5-294f3c650b5e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import _utils\n", + "import importlib\n", + "importlib.reload(_utils)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "e269a698-e8bf-4af7-a53c-45265ca7e5ad", + "metadata": {}, + "outputs": [], + "source": [ + "import shared_utils\n", + "from calitp_data_analysis import geography_utils" + ] + }, + { + "cell_type": "markdown", + "id": "0664b67b-b63d-4357-b855-20a33bc8d6d7", + "metadata": {}, + "source": [ + "# Selecting Corridors" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "684770aa-d035-45d3-a3a4-ada6c51c2692", + "metadata": {}, + "outputs": [], + "source": [ + "analysis_date = '2023-04-15'" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b1424118-5e7c-4e00-9eee-1630d251cb14", + "metadata": {}, + "outputs": [], + "source": [ + "feeds = shared_utils.gtfs_utils_v2.schedule_daily_feed_to_gtfs_dataset_name(selected_date=analysis_date)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "081ae78d-9f96-4bca-b181-c6c40f3f8e76", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Using FIPS code '06' for input 'CA'\n" + ] + } + ], + "source": [ + "tracts = _utils.get_tract_geoms()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "53cb95cd-7e8c-455d-ab47-fe3968582541", + "metadata": {}, + "outputs": [], + "source": [ + "def trips_to_stops(trip_df, feed_list):\n", + " st = shared_utils.gtfs_utils_v2.get_stop_times(analysis_date, feed_list, trip_df=trip_df)\n", + " st = st >> distinct(_.stop_id, _.stop_sequence) >> collect()\n", + " st = stops >> select(_.stop_id, _.geometry) >> inner_join(_, st, on='stop_id')\n", + " return st" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1ed8886d-6403-438e-b5c6-b8dee2c61290", + "metadata": {}, + "outputs": [], + "source": [ + "def sjoin_tracts(stops_gdf, tracts_gdf, buffer_m):\n", + " \n", + " stops_gdf = stops_gdf.to_crs(geography_utils.CA_NAD83Albers)\n", + " assert stops_gdf.crs == tracts_gdf.crs\n", + " \n", + " stops_gdf.geometry = stops_gdf.buffer(buffer_m)\n", + " tracts_sjoined = gpd.sjoin(tracts_gdf, stops_gdf) >> distinct(_.GEOID, _keep_all=True)\n", + " \n", + " return tracts_sjoined" + ] + }, + { + "cell_type": "markdown", + "id": "086c18f7-c7cf-41a2-9147-727740f781e5", + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, + "source": [ + "## MST (table)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5f958167-a291-4aba-8566-111c5e713be9", + "metadata": {}, + "outputs": [], + "source": [ + "mst = feeds >> filter(_.name.str.contains('Monterey'))\n", + "mst" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d68764b0-476a-4af8-8560-b931a8afba51", + "metadata": {}, + "outputs": [], + "source": [ + "stops = shared_utils.gtfs_utils_v2.get_stops(analysis_date, mst.feed_key)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3bbf4ee7-7bd2-4ae2-968b-b17a5ef2514d", + "metadata": {}, + "outputs": [], + "source": [ + "stops.columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f07ef246-b92d-4e00-989b-b1800744a694", + "metadata": {}, + "outputs": [], + "source": [ + "trips = shared_utils.gtfs_utils_v2.get_trips(analysis_date, mst.feed_key)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fd8b773-d935-4c13-8f9e-84a8cba153c9", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "trips.columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8fe0c258-f084-4030-94e6-b0a44f5f5498", + "metadata": {}, + "outputs": [], + "source": [ + "trips.route_short_name.unique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a6763fd7-c9cc-44e1-b7e9-8c8c919e3f91", + "metadata": {}, + "outputs": [], + "source": [ + "ab_trips = trips >> filter(_.route_short_name.isin(['A', 'B']), _.direction_id == 0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f9151728-3f87-45b0-a799-eb2b126ea434", + "metadata": {}, + "outputs": [], + "source": [ + "trips_20 = trips >> filter(_.route_short_name == '20', _.direction_id == 0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "44c2f67b-74b7-4551-82b9-167f2744081b", + "metadata": {}, + "outputs": [], + "source": [ + "st_20 = shared_utils.gtfs_utils_v2.get_stop_times(analysis_date, mst.feed_key, trip_df=trips_20)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fc5f463c-b3c6-4f80-86d4-9835c650eebb", + "metadata": {}, + "outputs": [], + "source": [ + "st_20 = st_20 >> distinct(_.stop_id, _.stop_sequence) >> collect()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "23f3a7b3-cd32-480f-ab1f-cc616c02e77a", + "metadata": {}, + "outputs": [], + "source": [ + "st_20 = stops >> select(_.stop_id, _.geometry) >> inner_join(_, st_20, on='stop_id')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8b71bbe9-26d0-42ec-8b30-a7cfdee2236e", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "st_20 = trips_to_stops(trips_20)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "99b2afea-3280-422c-b3b7-6e1c5ff54b5d", + "metadata": {}, + "outputs": [], + "source": [ + "# SURF BRT area for joins...\n", + "st_20 = st_20 >> filter(_.stop_sequence <= 27)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c3da388c-807d-424b-9e69-51588401ef2a", + "metadata": {}, + "outputs": [], + "source": [ + "# st_20.explore()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0bfacea0-f958-4a65-90f3-2fec0ab04fd6", + "metadata": {}, + "outputs": [], + "source": [ + "st_ab = trips_to_stops(ab_trips)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "25ab4e55-c0cf-4919-b9ef-41e665c9a136", + "metadata": {}, + "outputs": [], + "source": [ + "# st_ab.explore()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "27a834eb-34b0-46a4-9f0d-75534812a336", + "metadata": {}, + "outputs": [], + "source": [ + "surf_corridor = pd.concat([st_20, st_ab])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "72671c73-1331-4888-a6e4-5a8cc3a34a16", + "metadata": {}, + "outputs": [], + "source": [ + "surf_corridor.explore()" + ] + }, + { + "cell_type": "markdown", + "id": "1ca1517e-876d-49ae-82c1-973e02116745", + "metadata": {}, + "source": [ + "## Wilshire" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ed2d3a3a-3473-4174-91b7-56c4e6759e9d", + "metadata": {}, + "outputs": [], + "source": [ + "metro = feeds >> filter(_.name.str.contains('LA Metro Bus'))\n", + "metro" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "030ebc15-6824-47a6-b781-11628b74d848", + "metadata": {}, + "outputs": [], + "source": [ + "stops = shared_utils.gtfs_utils_v2.get_stops(analysis_date, metro.feed_key)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c1e324d3-7891-40f2-bd7b-2389fa36ee26", + "metadata": {}, + "outputs": [], + "source": [ + "trips = shared_utils.gtfs_utils_v2.get_trips(analysis_date, metro.feed_key)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "384bc785-79ed-4ca8-be5d-14a0480e98cf", + "metadata": {}, + "outputs": [], + "source": [ + "trips.route_short_name.unique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1b6fde5e-1394-4350-b483-1c1e6ad1dabb", + "metadata": {}, + "outputs": [], + "source": [ + "trips_720 = trips >> filter(_.route_short_name.isin(['720']), _.direction_id == 0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fed54ea9-8157-425a-889a-1069d77265ca", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "st_720 = trips_to_stops(trips_720, metro.feed_key)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "38b16a39-5d16-45bd-a9e4-71c22cacdb7f", + "metadata": {}, + "outputs": [], + "source": [ + "# only keep W of Wil/Wstn\n", + "st_720 = st_720 >> filter(_.stop_sequence <= 11)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3e04bc76-99c8-4930-a754-fc4c2c0f3597", + "metadata": {}, + "outputs": [], + "source": [ + "st_720.explore()" + ] + }, + { + "cell_type": "markdown", + "id": "5cf215ef-e257-4405-b4a6-c1c0eab26116", + "metadata": {}, + "source": [ + "## Fresno Route 1" + ] + }, + { + "cell_type": "markdown", + "id": "0378b6eb-ab6c-40f3-94a4-3aec913d6a3d", + "metadata": {}, + "source": [ + "## San Pablo Ave" + ] + }, + { + "cell_type": "markdown", + "id": "3a01d280-a612-4e72-8b06-e98aae3426d6", + "metadata": {}, + "source": [ + "## Eureka Route TBD" + ] + }, + { + "cell_type": "markdown", + "id": "c5d6a929-e7e4-4074-9467-2e7aaed57ddb", + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, + "source": [ + "## Sjoin and calculate" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a3551c17-c263-41b6-9f81-632afc134988", + "metadata": {}, + "outputs": [], + "source": [ + "surf_corridor = surf_corridor.to_crs(geography_utils.CA_NAD83Albers)\n", + "st_720 = st_720.to_crs(geography_utils.CA_NAD83Albers)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2e3caf3a-42b1-4520-be55-1f5c59655607", + "metadata": {}, + "outputs": [], + "source": [ + "# half-mile buffer stop areas, corridors should be broader...\n", + "surf_corridor.geometry = surf_corridor.buffer(804) \n", + "st_720.geometry = st_720.buffer(804)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "52860ef0-afdf-478a-98c8-6a2eed4d44f1", + "metadata": {}, + "outputs": [], + "source": [ + "gdf = gdf >> select(-_.index_left, -_.index_right)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1e6c6cbc-c388-43c3-8340-25d7b1519152", + "metadata": {}, + "outputs": [], + "source": [ + "surf = gpd.sjoin(gdf, surf_corridor) >> distinct(_.GEOID, _keep_all=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "884d4536-874d-432b-8e0e-fbe71c9160fb", + "metadata": {}, + "outputs": [], + "source": [ + "surf.explore(column = 'projected_new_transit_trips', scheme = 'NaturalBreaks')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8821c382-1d27-4dd3-994b-6d73e9b384b1", + "metadata": {}, + "outputs": [], + "source": [ + "wilshire = gpd.sjoin(gdf, st_720) >> distinct(_.GEOID, _keep_all=True)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b6541ddd-66b0-4462-abc0-d9542bf1e082", + "metadata": {}, + "outputs": [], + "source": [ + "wilshire.explore(column = 'projected_new_transit_trips', scheme = 'NaturalBreaks')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4088fd65-a36c-4a54-a859-4bb954c5ce4c", + "metadata": {}, + "outputs": [], + "source": [ + "surf.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a2169877-c50e-48f1-9729-7d37a7841e85", + "metadata": {}, + "outputs": [], + "source": [ + "wilshire.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "00959a08-aa6f-4a64-8f38-f90636a84d8a", + "metadata": {}, + "outputs": [], + "source": [ + "surf.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "481d4803-9e48-4cee-9fe9-181e3caa33f9", + "metadata": {}, + "outputs": [], + "source": [ + "wilshire.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b693889c-093a-46b4-8160-1c58f5512551", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + }, + "widgets": { + "application/vnd.jupyter.widget-state+json": { + "state": {}, + "version_major": 2, + "version_minor": 0 + } + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/sb125_analyses/vmt_transit_sketch/read_process_data.ipynb b/sb125_analyses/vmt_transit_sketch/read_process_data.ipynb index c04a89e70..7c50f7b3b 100644 --- a/sb125_analyses/vmt_transit_sketch/read_process_data.ipynb +++ b/sb125_analyses/vmt_transit_sketch/read_process_data.ipynb @@ -22,6 +22,39 @@ "import zipfile" ] }, + { + "cell_type": "code", + "execution_count": 99, + "id": "37b679d4-f8bd-4450-bf9f-50b68e8570b4", + "metadata": {}, + "outputs": [], + "source": [ + "from calitp_data_analysis import get_fs" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "682268e8-78fc-4609-afc5-294f3c650b5e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 43, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import _utils\n", + "# import importlib\n", + "# importlib.reload(_utils)" + ] + }, { "cell_type": "markdown", "id": "b076a21f-5a53-4b75-b140-0e4947099e42", @@ -36,7 +69,9 @@ "cell_type": "code", "execution_count": 3, "id": "85a89737-f90d-488f-9310-ca83557e476c", - "metadata": {}, + "metadata": { + "tags": [] + }, "outputs": [], "source": [ "def read_group_replica(zip_path):\n", @@ -186,7 +221,13 @@ "cell_type": "code", "execution_count": 7, "id": "9fb929ae-0c66-4dfb-9a0e-604bfedef078", - "metadata": {}, + "metadata": { + "collapsed": true, + "jupyter": { + "outputs_hidden": true + }, + "tags": [] + }, "outputs": [ { "name": "stdout", @@ -669,29 +710,6 @@ "df2.p50_transit_longer.value_counts()" ] }, - { - "cell_type": "code", - "execution_count": 43, - "id": "682268e8-78fc-4609-afc5-294f3c650b5e", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 43, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "import _utils\n", - "import importlib\n", - "importlib.reload(_utils)" - ] - }, { "cell_type": "code", "execution_count": 44, @@ -823,184 +841,10 @@ }, { "cell_type": "markdown", - "id": "019d1eaf-c3a7-4b71-a0b9-f9b041a04280", - "metadata": {}, - "source": [ - "# \"What if VMT decreased by 25% per the CARB target, and all those trips were on (existing) transit instead?\"\n", - "\n", - "## VMT is a spatial phenomenon, our analysis should be spatial too\n", - "\n", - "* Start with \"big data\" weekday residential VMT per Census tract via Replica\n", - "* Per target, future VMT should be 25% less\n", - "* Assume tripmaking remains constant, and that transit entirely replaces that VMT\n", - " * optional: find tracts with no transit service, hold their VMT constant and redistribute missed target among remaining tracts (30% reduction instead of 25% perhaps?)\n", - " \n", - "## From reduced VMT to transit trips\n", - "\n", - "* Replica gives transit trip lengths but it may not be reliable (\"good for auto, less so for transit\")\n", - " * It's generally showing the median transit trip as longer than the median auto trip, which seems questionable\n", - " * We have plenty of good spatial data on transit service _provision_, but not ridership (generally agency-level only)\n", - " * May need to refer to research/default to a fixed \"median transit trip\" length based on population density\n", - "* Regardless, get a rough estimate by dividing reduced VMT in each tract by median transit trip distance\n", - "* Reality check using derived modeshare number?\n", - "\n", - "## Connecting our estimate to California's transit provider landscape\n", - "\n", - "* Proportionally assign new trips per census tract to transit operators\n", - " * ~By number of stops in tract? OK for bus but will dramatically undercount rail~\n", - " * By each operator's proportion of regional ridership (from NTD)? Will overcount in tracts on the edge of large operator service areas, but perhaps preferable\n", - "* Can then create operator-level estimates of increased ridership and service hour provision\n", - " * This is where we have the best estimates of existing ridership..." - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "2db21010-14d2-47d1-b8be-9fd270cca949", - "metadata": {}, - "outputs": [], - "source": [ - "import geopandas as gpd" - ] - }, - { - "cell_type": "markdown", - "id": "345b6bfd-f569-442f-9641-4f5f5dd4c40c", - "metadata": {}, - "source": [ - "## Mapping..." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b84ee354-9bc8-4808-b89e-39de27d24b71", - "metadata": {}, - "outputs": [], - "source": [ - "gdf.explore(column = 'total_mi_transit', scheme = 'NaturalBreaks')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2bde07bb-30d9-48b7-86ae-1055c15c4aac", - "metadata": {}, - "outputs": [], - "source": [ - "gdf.explore(column = 'total_mi_auto', scheme = 'NaturalBreaks')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0a9c55c5-b17e-4e4f-b0ec-198d2cd89b14", - "metadata": {}, - "outputs": [], - "source": [ - "(gdf >> filter(_.no_transit)).explore()" - ] - }, - { - "cell_type": "markdown", - "id": "76a258eb-cd26-4d2e-8e0c-8ac50c5c1449", + "id": "63e712e1-900d-430e-962e-72ed8dd30bbf", "metadata": {}, "source": [ - "## New transit trips" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ea9768db-f59e-4d73-a21c-c903736b4421", - "metadata": {}, - "outputs": [], - "source": [ - "# gdf.explore(column = 'new_transit_mi', scheme = 'NaturalBreaks')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a6d4647f-d6a3-4935-ab1f-613738ffecc5", - "metadata": {}, - "outputs": [], - "source": [ - "gdf = gdf >> filter(_.new_trips_per_capita < _.new_trips_per_capita.quantile(.99))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d560c555-ced9-43e5-bfb7-923a2c6a65ca", - "metadata": {}, - "outputs": [], - "source": [ - "gdf.explore(column = 'new_trips_per_capita', scheme = 'Quantiles')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8fb69137-fb4e-4140-9600-1155712585fc", - "metadata": {}, - "outputs": [], - "source": [ - "gdf.explore(column = 'projected_new_transit_trips', scheme = 'NaturalBreaks')" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "id": "718a534e-e8f5-4a2f-8698-f87c298d7ba0", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "17797968.0" - ] - }, - "execution_count": 32, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "gdf.projected_new_transit_trips.sum()" - ] - }, - { - "cell_type": "markdown", - "id": "db1e79d4-2958-4483-bee9-7ea58ecd4406", - "metadata": {}, - "source": [ - "About 18 million new daily trips across LA/Orange/San Diego/Imperial Counties. For reference, LA Metro's daily ridership is around 1 million. Current regional transit modeshare is only about 5%..." - ] - }, - { - "cell_type": "markdown", - "id": "6b708ffd-6d83-45e3-86c1-39d30ec2f5a8", - "metadata": {}, - "source": [ - "## Next Steps\n", - "\n", - "* caveat: other strategies (land use, active modes...)\n", - "* caveat: induced travel\n", - "* stratify into \"good transit, not riding\", \"bad transit\"\n", - "* LODES o/d data? Replica? -> Conveyal transit o/d find that \"good transit but not riding it\"\n", - " * find what doesn't show up in aggregate accessibility...\n", - "* https://walker-data.com/pygris/" - ] - }, - { - "cell_type": "code", - "execution_count": 99, - "id": "37b679d4-f8bd-4450-bf9f-50b68e8570b4", - "metadata": {}, - "outputs": [], - "source": [ - "from calitp_data_analysis import get_fs" + "## Quick GCS Upload" ] }, { @@ -1064,6 +908,24 @@ "source": [ "fs.put(lpath, _utils.GCS_PATH + lpath, recursive=True)" ] + }, + { + "cell_type": "markdown", + "id": "b27a8e5c-9d03-435c-bd5c-8a5de83a6d88", + "metadata": {}, + "source": [ + "## Pulling _corridor_ level data\n", + "\n", + "* First, get corridor geoms" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1a5d9b48-6f90-4e85-87e8-aa4c94d01aa3", + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { From e277b9c69b4b6da421474bd8fcd0128d7d743249 Mon Sep 17 00:00:00 2001 From: Eric Dasmalchi Date: Tue, 20 Feb 2024 20:01:18 +0000 Subject: [PATCH 3/5] add fresno 1 corridor --- .../corridor_selection.ipynb | 374 +++++++++- .../read_process_data.ipynb | 653 ++++-------------- 2 files changed, 468 insertions(+), 559 deletions(-) diff --git a/sb125_analyses/vmt_transit_sketch/corridor_selection.ipynb b/sb125_analyses/vmt_transit_sketch/corridor_selection.ipynb index 88ad8aa98..1a5de903d 100644 --- a/sb125_analyses/vmt_transit_sketch/corridor_selection.ipynb +++ b/sb125_analyses/vmt_transit_sketch/corridor_selection.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "9e8158c2-a7f9-4b3c-a518-037132adf0c3", "metadata": {}, "outputs": [], @@ -14,7 +14,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "81cddca7-bea4-494d-b0cb-02508d52b380", "metadata": {}, "outputs": [], @@ -24,7 +24,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "15475e21-db2a-4f64-a2bc-38f2b76b9a4f", "metadata": { "tags": [] @@ -36,21 +36,10 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "682268e8-78fc-4609-afc5-294f3c650b5e", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "import _utils\n", "import importlib\n", @@ -59,13 +48,13 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "id": "e269a698-e8bf-4af7-a53c-45265ca7e5ad", "metadata": {}, "outputs": [], "source": [ "import shared_utils\n", - "from calitp_data_analysis import geography_utils" + "from calitp_data_analysis import geography_utils, utils" ] }, { @@ -78,7 +67,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "684770aa-d035-45d3-a3a4-ada6c51c2692", "metadata": {}, "outputs": [], @@ -98,18 +87,10 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "id": "081ae78d-9f96-4bca-b181-c6c40f3f8e76", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Using FIPS code '06' for input 'CA'\n" - ] - } - ], + "outputs": [], "source": [ "tracts = _utils.get_tract_geoms()" ] @@ -346,7 +327,9 @@ { "cell_type": "markdown", "id": "1ca1517e-876d-49ae-82c1-973e02116745", - "metadata": {}, + "metadata": { + "tags": [] + }, "source": [ "## Wilshire" ] @@ -432,17 +415,296 @@ "metadata": {}, "outputs": [], "source": [ - "st_720.explore()" + "# st_720.explore()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cfb4f847-c5a0-43e2-b8d9-c11c4469c954", + "metadata": {}, + "outputs": [], + "source": [ + "wilshire = sjoin_tracts(st_720, tracts, 804)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "600ff9a4-5bbe-4078-9829-721842084f89", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# wilshire.explore()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "be46c69b-c2fa-4a37-ab0a-fff6ccd63cf2", + "metadata": {}, + "outputs": [], + "source": [ + "wilshire.to_file('wilshire.geojson')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "18a68442-9ab6-4973-868f-b22ace3ea90d", + "metadata": {}, + "outputs": [], + "source": [ + "# includes non-corridor vmt...\n", + "\n", + "# trips_all = gpd.read_parquet('outputs/new_trips_with_uza.parquet')\n", + "\n", + "# trips_all >> filter(_.GEOID.isin(wilshire_results.GEOID))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6c4e8f0e-941b-4bdd-b5ad-1a7e02602ad6", + "metadata": {}, + "outputs": [], + "source": [ + "wilshire_results = gpd.read_parquet('outputs/wilshire_trips_with_uza.parquet')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8714149d-fd48-4c11-bd13-c3ec1af6ef2a", + "metadata": {}, + "outputs": [], + "source": [ + "(wilshire_results >> select(-_.geometry)).to_csv('wilshire.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0db59945-15cc-4800-8cd6-0efd01a21bfa", + "metadata": {}, + "outputs": [], + "source": [ + "utils.make_zipped_shapefile(wilshire_results, 'wilsh')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "611272a6-9add-4218-90c9-ea3004d5e829", + "metadata": {}, + "outputs": [], + "source": [ + "# wilshire_results.explore(column = 'projected_new_transit_trips', scheme = 'NaturalBreaks')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3e882b73-446a-4b31-8a5d-e569fa4016ac", + "metadata": {}, + "outputs": [], + "source": [ + "wilshire_results.sum()" ] }, { "cell_type": "markdown", "id": "5cf215ef-e257-4405-b4a6-c1c0eab26116", - "metadata": {}, + "metadata": { + "tags": [] + }, "source": [ "## Fresno Route 1" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "7ac39af3-5a44-44b5-a084-ce24fb115874", + "metadata": {}, + "outputs": [], + "source": [ + "fresno = feeds >> filter(_.name.str.contains('Fresno Sch'))\n", + "fresno" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b1bde719-a199-41c9-b152-487c95a43732", + "metadata": {}, + "outputs": [], + "source": [ + "stops = shared_utils.gtfs_utils_v2.get_stops(analysis_date, fresno.feed_key)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "38f2c41e-88d5-4e2e-a3ee-7857746f6b78", + "metadata": {}, + "outputs": [], + "source": [ + "trips = shared_utils.gtfs_utils_v2.get_trips(analysis_date, fresno.feed_key)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3d2981b-4b1e-439d-b9bc-748e76e1db6b", + "metadata": {}, + "outputs": [], + "source": [ + "trips.route_short_name.unique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7128fd7b-ab96-47bf-999e-33d7cd586546", + "metadata": {}, + "outputs": [], + "source": [ + "trips_1 = trips >> filter(_.route_short_name.isin(['01']), _.direction_id == 0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e3fd0090-1cfe-4215-941f-4c627ce9b470", + "metadata": {}, + "outputs": [], + "source": [ + "trips_1 = trips_1 >> filter(_.trip_instance_key == 'db65a5adda0fc0a2744580354516ac68')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a9d4ad6d-7a4a-48f2-91e1-7559c621015a", + "metadata": {}, + "outputs": [], + "source": [ + "st_1 = trips_to_stops(trips_1, fresno.feed_key)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2431c3e0-9338-4693-a02b-93a17962e196", + "metadata": {}, + "outputs": [], + "source": [ + "st_1 = st_1 >> filter(_.stop_sequence < 20) # vertical portion only" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "31156153-36c0-4d32-b834-553e8f8a95c6", + "metadata": {}, + "outputs": [], + "source": [ + "# st_1.explore()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "35393a07-ba6e-4c67-a427-a8bc07fa0345", + "metadata": {}, + "outputs": [], + "source": [ + "fresno = sjoin_tracts(st_1, tracts, 804) # half-mile" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ea4f36ad-72f2-4959-846b-21baeab21a83", + "metadata": {}, + "outputs": [], + "source": [ + "# fresno.explore()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9dc9d26c-5743-4e2c-b107-0992cce1023c", + "metadata": {}, + "outputs": [], + "source": [ + "fresno.to_file('fresno.geojson')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7f5dd373-bf14-4e1a-b47d-81f85eb3231f", + "metadata": {}, + "outputs": [], + "source": [ + "fresno_results = gpd.read_parquet('outputs/fresno_trips_with_uza.parquet')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82878ff2-007f-41b3-a378-2b808b05f5c0", + "metadata": {}, + "outputs": [], + "source": [ + "# (wilshire_results >> select(-_.geometry)).to_csv('wilshire.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "203d5e11-c54f-42ea-8c19-d9fe36bc2643", + "metadata": {}, + "outputs": [], + "source": [ + "# utils.make_zipped_shapefile(wilshire_results, 'wilsh')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9e2a3a9c-5b72-47da-bfcb-73bf05b955b4", + "metadata": {}, + "outputs": [], + "source": [ + "fresno_results.explore(column = 'projected_new_transit_trips', scheme = 'NaturalBreaks')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "294f0011-d722-4cd1-b3e4-88c3de380b21", + "metadata": {}, + "outputs": [], + "source": [ + "fresno_results.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ef648be9-0c5a-40fa-8351-591175802794", + "metadata": {}, + "outputs": [], + "source": [ + "fresno_results.describe()" + ] + }, { "cell_type": "markdown", "id": "0378b6eb-ab6c-40f3-94a4-3aec913d6a3d", @@ -463,7 +725,6 @@ "cell_type": "markdown", "id": "c5d6a929-e7e4-4074-9467-2e7aaed57ddb", "metadata": { - "jp-MarkdownHeadingCollapsed": true, "tags": [] }, "source": [ @@ -583,13 +844,56 @@ "wilshire.sum()" ] }, + { + "cell_type": "markdown", + "id": "a9ae4745-dedf-4365-8869-9996558e2270", + "metadata": {}, + "source": [ + "# BART" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "56865daf-a6e5-41e5-a038-0cb0f2f6f79c", + "metadata": {}, + "outputs": [], + "source": [ + "bart = feeds >> filter(_.name.str.contains('BART'))\n", + "bart" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6bcbf87f-b0d4-420a-b32c-6a5ecb82f781", + "metadata": {}, + "outputs": [], + "source": [ + "shapes = shared_utils.gtfs_utils_v2.get_shapes(analysis_date, bart.feed_key)" + ] + }, { "cell_type": "code", "execution_count": null, - "id": "b693889c-093a-46b4-8160-1c58f5512551", + "id": "bf94d274-264e-4120-ad0c-7a1cf4f2f703", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "shapes.to_file('../../../csis-metrics/project_prioritization/accessibility/refactor/BART.geojson')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "11af10fe-820f-483f-b6d0-3b91ef70b7fd", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "trips = shared_utils.gtfs_utils_v2.get_trips(analysis_date, metro.feed_key)" + ] } ], "metadata": { diff --git a/sb125_analyses/vmt_transit_sketch/read_process_data.ipynb b/sb125_analyses/vmt_transit_sketch/read_process_data.ipynb index 7c50f7b3b..157a099e7 100644 --- a/sb125_analyses/vmt_transit_sketch/read_process_data.ipynb +++ b/sb125_analyses/vmt_transit_sketch/read_process_data.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "id": "9e8158c2-a7f9-4b3c-a518-037132adf0c3", "metadata": {}, "outputs": [], @@ -14,7 +14,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "81cddca7-bea4-494d-b0cb-02508d52b380", "metadata": {}, "outputs": [], @@ -24,7 +24,7 @@ }, { "cell_type": "code", - "execution_count": 99, + "execution_count": null, "id": "37b679d4-f8bd-4450-bf9f-50b68e8570b4", "metadata": {}, "outputs": [], @@ -34,21 +34,10 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": null, "id": "682268e8-78fc-4609-afc5-294f3c650b5e", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "" - ] - }, - "execution_count": 43, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "import _utils\n", "# import importlib\n", @@ -67,18 +56,17 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "id": "85a89737-f90d-488f-9310-ca83557e476c", "metadata": { "tags": [] }, "outputs": [], "source": [ - "def read_group_replica(zip_path):\n", + "def read_group_replica(zip_path, replica_filename = 'replica-mode_split_test-02_01_24-trips_dataset.csv'):\n", " '''\n", " zip_path: path to zip file containing a Replica trips export\n", " '''\n", - " replica_filename = 'replica-mode_split_test-02_01_24-trips_dataset.csv'\n", " with zipfile.ZipFile(zip_path) as z:\n", " with z.open(replica_filename) as f:\n", " df = pd.read_csv(f)\n", @@ -99,9 +87,20 @@ " return grouped" ] }, + { + "cell_type": "markdown", + "id": "a48da885-0c7c-41a6-be13-8364e7fdc48c", + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, + "source": [ + "## quick vmt" + ] + }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "ec3470ac-5f0e-49a0-9000-f371f952bc74", "metadata": {}, "outputs": [], @@ -111,7 +110,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "id": "868a0ae4-b076-4775-beba-fdc9ba764b27", "metadata": {}, "outputs": [], @@ -121,19 +120,10 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "b4c08637-9bbc-4727-af6d-14dc1c66b4a1", "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/tmp/ipykernel_472/3605967939.py:3: DtypeWarning: Columns (6,7,8) have mixed types. Specify dtype option on import or set low_memory=False.\n", - " df = pd.read_csv(f)\n" - ] - } - ], + "outputs": [], "source": [ "with zipfile.ZipFile(zip_path) as z:\n", " with z.open(replica_filename) as f:\n", @@ -142,7 +132,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": null, "id": "5875f7f5-2c1f-40f6-95fa-0a8d106b1e7a", "metadata": {}, "outputs": [], @@ -152,7 +142,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "id": "01a823a1-de52-4d08-8b80-a024c1397f95", "metadata": {}, "outputs": [], @@ -162,202 +152,75 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": null, "id": "132b4a03-9f73-4978-8a35-ae5c130f7f73", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0.5640656816072517" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "shorter / miles_all" ] }, + { + "cell_type": "markdown", + "id": "412218ad-4f25-49de-92df-a00cc6becc70", + "metadata": {}, + "source": [ + "## grouping" + ] + }, { "cell_type": "code", - "execution_count": 4, - "id": "6c80cc92-a452-4d92-bf2f-d1f40a83442d", + "execution_count": null, + "id": "dae2c9bb-5121-4966-85ed-111bd525c924", "metadata": {}, "outputs": [], "source": [ - "# zip_path = 'replica-la_north-trips_dataset.zip'\n", - "\n", - "# replica_filename = 'replica-mode_split_test-02_01_24-trips_dataset.csv'\n", - "# with zipfile.ZipFile(zip_path) as z:\n", - "# with z.open(replica_filename) as f:\n", - "# df = pd.read_csv(f)" + "all_regions = ['central_a', 'central_b', 'north', 'la_north',\n", + " 'la_south', 'sandiego', 'socal_a', 'socal_b']" ] }, { "cell_type": "code", - "execution_count": 5, - "id": "0b50f321-feee-4789-8f17-eee4f4a59f3f", + "execution_count": null, + "id": "15544ecf-1140-426f-be93-5ce53e2b5f7e", "metadata": {}, "outputs": [], "source": [ - "# df >> head(10)" + "grouped = pd.DataFrame()\n", + "for region in ['fresno']:\n", + " print(region)\n", + " # note replica filename includes date of download...\n", + " df = read_group_replica(f'replica_raw/corridors/replica-{region}-trips_dataset.zip',\n", + " replica_filename='replica-mode_split_test-02_20_24-trips_dataset.csv')\n", + " grouped = pd.concat([grouped, df])" ] }, { "cell_type": "code", - "execution_count": 6, - "id": "dae2c9bb-5121-4966-85ed-111bd525c924", + "execution_count": null, + "id": "1878b238-eec9-450c-bf3e-359016485495", "metadata": {}, "outputs": [], "source": [ - "all_regions = ['central_a', 'central_b', 'north', 'la_north',\n", - " 'la_south', 'sandiego', 'socal_a', 'socal_b']" + "grouped.to_parquet('intermediate/fresno_grouped.parquet')" ] }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "id": "9fb929ae-0c66-4dfb-9a0e-604bfedef078", "metadata": { - "collapsed": true, - "jupyter": { - "outputs_hidden": true - }, "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "central_a\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/tmp/ipykernel_444/3995037246.py:8: DtypeWarning: Columns (6,7,8) have mixed types. Specify dtype option on import or set low_memory=False.\n", - " df = pd.read_csv(f)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "central_b\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/tmp/ipykernel_444/3995037246.py:8: DtypeWarning: Columns (6,7,8) have mixed types. Specify dtype option on import or set low_memory=False.\n", - " df = pd.read_csv(f)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "north\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/tmp/ipykernel_444/3995037246.py:8: DtypeWarning: Columns (6,7,8) have mixed types. Specify dtype option on import or set low_memory=False.\n", - " df = pd.read_csv(f)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "la_north\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/tmp/ipykernel_444/3995037246.py:8: DtypeWarning: Columns (6,7,8) have mixed types. Specify dtype option on import or set low_memory=False.\n", - " df = pd.read_csv(f)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "la_south\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/tmp/ipykernel_444/3995037246.py:8: DtypeWarning: Columns (6,7,8) have mixed types. Specify dtype option on import or set low_memory=False.\n", - " df = pd.read_csv(f)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "sandiego\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/tmp/ipykernel_444/3995037246.py:8: DtypeWarning: Columns (6,7,8) have mixed types. Specify dtype option on import or set low_memory=False.\n", - " df = pd.read_csv(f)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "socal_a\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/tmp/ipykernel_444/3995037246.py:8: DtypeWarning: Columns (6,7,8) have mixed types. Specify dtype option on import or set low_memory=False.\n", - " df = pd.read_csv(f)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "socal_b\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/tmp/ipykernel_444/3995037246.py:8: DtypeWarning: Columns (6,7,8) have mixed types. Specify dtype option on import or set low_memory=False.\n", - " df = pd.read_csv(f)\n" - ] - } - ], + "outputs": [], "source": [ - "grouped = pd.DataFrame()\n", - "for region in all_regions:\n", - " print(region)\n", - " df = read_group_replica(f'replica_raw/replica-{region}-trips_dataset.zip')\n", - " grouped = pd.concat([grouped, df])\n", + "# grouped = pd.DataFrame()\n", + "# for region in all_regions:\n", + "# print(region)\n", + "# df = read_group_replica(f'replica_raw/replica-{region}-trips_dataset.zip')\n", + "# grouped = pd.concat([grouped, df])\n", "\n", - "grouped.to_parquet('intermediate/replica_grouped.parquet')" + "# grouped.to_parquet('intermediate/replica_grouped.parquet')" ] }, { @@ -367,12 +230,13 @@ "source": [ "# Read back in grouped data\n", "\n", - "* number of trips, median distance, and total miles travelled by auto yes/no and Census tract" + "* number of trips, median distance, and total miles travelled by auto yes/no and Census tract\n", + "* TODO non-manual regions :)" ] }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "id": "240ddd8f-6a36-44e1-a1c0-32e6f50c3cac", "metadata": {}, "outputs": [], @@ -384,17 +248,27 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": null, "id": "7596b001-2c1c-488f-b089-c375c0ddff4f", "metadata": {}, "outputs": [], "source": [ - "replica_grouped = pd.read_parquet('intermediate/replica_grouped.parquet')" + "# replica_grouped = pd.read_parquet('intermediate/replica_grouped.parquet')" ] }, { "cell_type": "code", - "execution_count": 34, + "execution_count": null, + "id": "f4b313d7-62e5-46b4-ab93-6b25d176bdeb", + "metadata": {}, + "outputs": [], + "source": [ + "replica_grouped = pd.read_parquet('intermediate/fresno_grouped.parquet')" + ] + }, + { + "cell_type": "code", + "execution_count": null, "id": "23b9ae5b-bd02-4978-b7d1-3e4b49de53b5", "metadata": {}, "outputs": [], @@ -404,7 +278,7 @@ }, { "cell_type": "code", - "execution_count": 35, + "execution_count": null, "id": "73088513-9710-4c9a-a205-d531c3807345", "metadata": {}, "outputs": [], @@ -414,7 +288,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": null, "id": "61ce4f74-45e9-4c85-ad34-d2ac9c5cfc32", "metadata": {}, "outputs": [], @@ -424,7 +298,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": null, "id": "06f007da-ad8d-4801-8595-85c5d44fe921", "metadata": {}, "outputs": [], @@ -472,366 +346,119 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": null, "id": "d661657b-cac7-4e9a-806d-48579c0516f9", "metadata": { "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.2778337212327877\n" - ] - } - ], - "source": [ - "df2 = process_grouped_data(replica_grouped, tracts_feeds)" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "id": "50f04f86-6f13-4654-952e-58dc07a23faa", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
origin_trct_fips_2020p50_mi_transitp50_mi_autototal_mi_transittotal_mi_autop50_transit_longerno_transit_replicano_transittotal_minew_transit_miprojected_new_transit_trips
060014001004.26.95185.3165410.8FalseFalseFalse170596.145956.69809610942.0
160014002004.13.43638.158763.4TrueFalseFalse62401.516326.4540943982.0
260014003004.73.76130.7122736.5TrueFalseFalse128867.234100.3385267255.0
\n", - "
" - ], - "text/plain": [ - " origin_trct_fips_2020 p50_mi_transit p50_mi_auto total_mi_transit \\\n", - "0 6001400100 4.2 6.9 5185.3 \n", - "1 6001400200 4.1 3.4 3638.1 \n", - "2 6001400300 4.7 3.7 6130.7 \n", - "\n", - " total_mi_auto p50_transit_longer no_transit_replica no_transit \\\n", - "0 165410.8 False False False \n", - "1 58763.4 True False False \n", - "2 122736.5 True False False \n", - "\n", - " total_mi new_transit_mi projected_new_transit_trips \n", - "0 170596.1 45956.698096 10942.0 \n", - "1 62401.5 16326.454094 3982.0 \n", - "2 128867.2 34100.338526 7255.0 " - ] - }, - "execution_count": 39, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df2 >> head(3)" - ] - }, - { - "cell_type": "code", - "execution_count": 40, - "id": "8169ca83-e540-48f4-af93-076a535f00c6", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
no_transittotal_mi_auto
0False1.162508e+09
1True1.294277e+08
\n", - "
" - ], - "text/plain": [ - " no_transit total_mi_auto\n", - "0 False 1.162508e+09\n", - "1 True 1.294277e+08" - ] - }, - "execution_count": 40, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "no_transit = df2 >> group_by(_.no_transit) >> summarize(total_mi_auto = _.total_mi_auto.sum())\n", - "no_transit" - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "id": "c7175031-a66c-4f90-8dca-193198b9d932", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'11.0 percent of VMT in tracts with no transit per GTFS Warehouse stops'" - ] - }, - "execution_count": 41, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "no_transit_pct = no_transit.iloc[1, 1] / no_transit.iloc[0, 1]\n", - "f'{round(no_transit_pct*100, 0)} percent of VMT in tracts with no transit per GTFS Warehouse stops'" - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "id": "6476da44-36ac-4604-89d5-76966eb9411d", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "True 6571\n", - "False 2534\n", - "Name: p50_transit_longer, dtype: int64" - ] - }, - "execution_count": 42, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df2.p50_transit_longer.value_counts()" - ] - }, - { - "cell_type": "code", - "execution_count": 44, - "id": "e83400ec-c295-40b3-91d6-3c3bd1ffb5f4", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Using FIPS code '06' for input 'CA'\n" - ] - } - ], - "source": [ - "tract_geo = _utils.get_tract_geoms()" - ] - }, - { - "cell_type": "code", - "execution_count": 45, - "id": "76766b3c-efc1-4ad7-9d47-3f465c616abf", - "metadata": {}, "outputs": [], "source": [ - "# tract_geo = gpd.read_file('./tl_2020_06_tract.zip') >> select(_.GEOID, _.geometry)" + "processed_df = process_grouped_data(replica_grouped, tracts_feeds)" ] }, { "cell_type": "code", - "execution_count": 46, - "id": "876be135-baf9-41bb-880f-22cf9df2c4a9", + "execution_count": null, + "id": "50f04f86-6f13-4654-952e-58dc07a23faa", "metadata": {}, "outputs": [], "source": [ - "tract_geo.GEOID = tract_geo.GEOID.astype('int64')" + "processed_df >> head(3)" ] }, { "cell_type": "code", - "execution_count": 47, - "id": "03ecd820-d9e5-4c08-82ae-0929638e3af4", + "execution_count": null, + "id": "8169ca83-e540-48f4-af93-076a535f00c6", "metadata": {}, "outputs": [], "source": [ - "gdf = tract_geo >> inner_join(_, df2, on = {'GEOID': 'origin_trct_fips_2020'}) >> select(-_.origin_trct_fips_2020)" + "no_transit = processed_df >> group_by(_.no_transit) >> summarize(total_mi_auto = _.total_mi_auto.sum())\n", + "no_transit" ] }, { "cell_type": "code", - "execution_count": 48, - "id": "3a830524-3c8f-4915-8768-3062ddbf7db0", + "execution_count": null, + "id": "c7175031-a66c-4f90-8dca-193198b9d932", "metadata": {}, "outputs": [], "source": [ - "ca_uzas = gpd.read_parquet('intermediate/ca_uza.parquet')" + "no_transit_pct = no_transit.iloc[1, 1] / no_transit.iloc[0, 1]\n", + "f'{round(no_transit_pct*100, 0)} percent of VMT in tracts with no transit per GTFS Warehouse stops'" ] }, { "cell_type": "code", - "execution_count": 49, - "id": "d00b301f-ec03-4525-b415-53f9f65a77c2", + "execution_count": null, + "id": "6476da44-36ac-4604-89d5-76966eb9411d", "metadata": {}, "outputs": [], "source": [ - "uza_joined = gpd.sjoin(gdf, ca_uzas, how = 'left')" + "processed_df.p50_transit_longer.value_counts()" ] }, { "cell_type": "code", - "execution_count": 50, - "id": "4cb23dd6-94b8-4b36-952b-054a06c04192", + "execution_count": null, + "id": "8995132b-5eba-48e8-bfdb-585f1ff1e9d8", "metadata": {}, "outputs": [], "source": [ - "# TODO to util, other source?\n", - "\n", - "census_pop = gpd.read_file('census_ntd/DECENNIALPL2020.P1_2024-02-01T163251.zip')\n", - "\n", - "tract_pop = census_pop[['GEO_ID', 'P1_001N']].iloc[2:,:]\n", - "\n", - "tract_pop.GEO_ID = tract_pop.GEO_ID.map(lambda x: x.split('US')[1])\n", - "\n", - "tract_pop.GEO_ID = tract_pop.GEO_ID.astype('int64')\n", - "\n", - "tract_pop = tract_pop >> select(_.total_pop == _.P1_001N, _.GEOID == _.GEO_ID)" + "def attach_tracts_pop(processed_df):\n", + " \n", + " tract_geo = _utils.get_tract_geoms()\n", + " tract_geo.GEOID = tract_geo.GEOID.astype('int64')\n", + " gdf = (tract_geo >> inner_join(_, processed_df, on = {'GEOID': 'origin_trct_fips_2020'})\n", + " >> select(-_.origin_trct_fips_2020))\n", + " ca_uzas = gpd.read_parquet('intermediate/ca_uza.parquet')\n", + " uza_joined = gpd.sjoin(gdf, ca_uzas, how = 'left')\n", + " \n", + " census_pop = gpd.read_file('census_ntd/DECENNIALPL2020.P1_2024-02-01T163251.zip')\n", + " tract_pop = census_pop[['GEO_ID', 'P1_001N']].iloc[2:,:]\n", + " tract_pop.GEO_ID = tract_pop.GEO_ID.map(lambda x: x.split('US')[1])\n", + " tract_pop.GEO_ID = tract_pop.GEO_ID.astype('int64')\n", + " tract_pop = tract_pop >> select(_.total_pop == _.P1_001N, _.GEOID == _.GEO_ID)\n", + " uza_joined = uza_joined >> inner_join(_, tract_pop, on = 'GEOID')\n", + " uza_joined.total_pop = uza_joined.total_pop.astype('int64')\n", + " uza_joined['new_trips_per_capita'] = uza_joined.projected_new_transit_trips / uza_joined.total_pop\n", + " \n", + " return uza_joined" ] }, { "cell_type": "code", - "execution_count": 51, - "id": "31623fcf-efd7-4863-8f12-71a04ac45410", + "execution_count": null, + "id": "81f381a4-bb80-4fb7-97fa-853111914712", "metadata": {}, "outputs": [], "source": [ - "uza_joined = uza_joined >> inner_join(_, tract_pop, on = 'GEOID')" + "uza_joined = attach_tracts_pop(processed_df)" ] }, { "cell_type": "code", - "execution_count": 52, - "id": "fb733ab6-007a-4747-91dd-ec40688a066f", + "execution_count": null, + "id": "10cb6149-4676-4b64-9ae7-b53f621f815b", "metadata": {}, "outputs": [], "source": [ - "uza_joined.total_pop = uza_joined.total_pop.astype('int64')\n", - "uza_joined['new_trips_per_capita'] = uza_joined.projected_new_transit_trips / uza_joined.total_pop" + "uza_joined.to_parquet('outputs/fresno_trips_with_uza.parquet')" ] }, { "cell_type": "code", - "execution_count": 53, + "execution_count": null, "id": "addb6ac5-5f4d-41dc-9a7a-0d6644bb699c", "metadata": {}, "outputs": [], "source": [ - "uza_joined.to_parquet('outputs/new_trips_with_uza.parquet')" + "# uza_joined.to_parquet('outputs/new_trips_with_uza.parquet')" ] }, { "cell_type": "code", - "execution_count": 34, + "execution_count": null, "id": "666ebaa0-7d29-4f56-a3fd-5ad7391b15c8", "metadata": {}, "outputs": [], @@ -849,7 +476,7 @@ }, { "cell_type": "code", - "execution_count": 100, + "execution_count": null, "id": "b0572b17-a9c2-4128-ab67-fa650c87fda0", "metadata": {}, "outputs": [], @@ -859,28 +486,17 @@ }, { "cell_type": "code", - "execution_count": 102, + "execution_count": null, "id": "76539b31-f757-4703-9f7a-2eea60834d06", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'gs://calitp-analytics-data/data-analyses/sb125/vmt_transit_sketch/'" - ] - }, - "execution_count": 102, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "_utils.GCS_PATH" ] }, { "cell_type": "code", - "execution_count": 111, + "execution_count": null, "id": "1518eea5-d2f1-4807-b392-4a4f2624439d", "metadata": {}, "outputs": [], @@ -890,21 +506,10 @@ }, { "cell_type": "code", - "execution_count": 112, + "execution_count": null, "id": "e0048f15-f124-431d-9fae-35aa7ef3dd72", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[None, None, None, None, None, None, None, None, None, None]" - ] - }, - "execution_count": 112, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "fs.put(lpath, _utils.GCS_PATH + lpath, recursive=True)" ] From ebac98d6635e2b088b2df669668c0b60be283b13 Mon Sep 17 00:00:00 2001 From: Eric Dasmalchi Date: Wed, 21 Feb 2024 22:21:53 +0000 Subject: [PATCH 4/5] wip --- .../corridor_selection.ipynb | 811 ++++++++++++++---- .../read_process_data.ipynb | 145 +++- 2 files changed, 741 insertions(+), 215 deletions(-) diff --git a/sb125_analyses/vmt_transit_sketch/corridor_selection.ipynb b/sb125_analyses/vmt_transit_sketch/corridor_selection.ipynb index 1a5de903d..a5984a904 100644 --- a/sb125_analyses/vmt_transit_sketch/corridor_selection.ipynb +++ b/sb125_analyses/vmt_transit_sketch/corridor_selection.ipynb @@ -129,770 +129,1211 @@ }, { "cell_type": "markdown", - "id": "086c18f7-c7cf-41a2-9147-727740f781e5", + "id": "1ca1517e-876d-49ae-82c1-973e02116745", + "metadata": { + "tags": [] + }, + "source": [ + "## Wilshire" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ed2d3a3a-3473-4174-91b7-56c4e6759e9d", + "metadata": {}, + "outputs": [], + "source": [ + "metro = feeds >> filter(_.name.str.contains('LA Metro Bus'))\n", + "metro" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "030ebc15-6824-47a6-b781-11628b74d848", + "metadata": {}, + "outputs": [], + "source": [ + "stops = shared_utils.gtfs_utils_v2.get_stops(analysis_date, metro.feed_key)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c1e324d3-7891-40f2-bd7b-2389fa36ee26", + "metadata": {}, + "outputs": [], + "source": [ + "trips = shared_utils.gtfs_utils_v2.get_trips(analysis_date, metro.feed_key)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "384bc785-79ed-4ca8-be5d-14a0480e98cf", + "metadata": {}, + "outputs": [], + "source": [ + "trips.route_short_name.unique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1b6fde5e-1394-4350-b483-1c1e6ad1dabb", + "metadata": {}, + "outputs": [], + "source": [ + "trips_720 = trips >> filter(_.route_short_name.isin(['720']), _.direction_id == 0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fed54ea9-8157-425a-889a-1069d77265ca", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "st_720 = trips_to_stops(trips_720, metro.feed_key)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "38b16a39-5d16-45bd-a9e4-71c22cacdb7f", + "metadata": {}, + "outputs": [], + "source": [ + "# only keep W of Wil/Wstn\n", + "st_720 = st_720 >> filter(_.stop_sequence <= 11)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3e04bc76-99c8-4930-a754-fc4c2c0f3597", + "metadata": {}, + "outputs": [], + "source": [ + "# st_720.explore()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cfb4f847-c5a0-43e2-b8d9-c11c4469c954", + "metadata": {}, + "outputs": [], + "source": [ + "wilshire = sjoin_tracts(st_720, tracts, 804)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "600ff9a4-5bbe-4078-9829-721842084f89", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# wilshire.explore()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "be46c69b-c2fa-4a37-ab0a-fff6ccd63cf2", + "metadata": {}, + "outputs": [], + "source": [ + "# wilshire.to_file('wilshire.geojson')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "18a68442-9ab6-4973-868f-b22ace3ea90d", + "metadata": {}, + "outputs": [], + "source": [ + "# includes non-corridor vmt...\n", + "\n", + "# trips_all = gpd.read_parquet('outputs/new_trips_with_uza.parquet')\n", + "\n", + "# trips_all >> filter(_.GEOID.isin(wilshire_results.GEOID))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6c4e8f0e-941b-4bdd-b5ad-1a7e02602ad6", + "metadata": {}, + "outputs": [], + "source": [ + "wilshire_results = gpd.read_parquet('outputs/wilshire_trips_with_uza.parquet')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8714149d-fd48-4c11-bd13-c3ec1af6ef2a", + "metadata": {}, + "outputs": [], + "source": [ + "# (wilshire_results >> select(-_.geometry)).to_csv('wilshire.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0db59945-15cc-4800-8cd6-0efd01a21bfa", + "metadata": {}, + "outputs": [], + "source": [ + "# utils.make_zipped_shapefile(wilshire_results, 'wilsh')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "611272a6-9add-4218-90c9-ea3004d5e829", + "metadata": {}, + "outputs": [], + "source": [ + "wilshire_results.explore(column = 'projected_new_transit_trips', scheme = 'NaturalBreaks')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3e882b73-446a-4b31-8a5d-e569fa4016ac", + "metadata": {}, + "outputs": [], + "source": [ + "wilshire_results.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "645f8132-8062-4a3f-8b97-df79483b44df", + "metadata": {}, + "outputs": [], + "source": [ + "wilshire_results.describe()" + ] + }, + { + "cell_type": "markdown", + "id": "5cf215ef-e257-4405-b4a6-c1c0eab26116", + "metadata": { + "tags": [] + }, + "source": [ + "## Fresno Route 1" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7ac39af3-5a44-44b5-a084-ce24fb115874", + "metadata": {}, + "outputs": [], + "source": [ + "fresno = feeds >> filter(_.name.str.contains('Fresno Sch'))\n", + "fresno" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b1bde719-a199-41c9-b152-487c95a43732", + "metadata": {}, + "outputs": [], + "source": [ + "stops = shared_utils.gtfs_utils_v2.get_stops(analysis_date, fresno.feed_key)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "38f2c41e-88d5-4e2e-a3ee-7857746f6b78", + "metadata": {}, + "outputs": [], + "source": [ + "trips = shared_utils.gtfs_utils_v2.get_trips(analysis_date, fresno.feed_key)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3d2981b-4b1e-439d-b9bc-748e76e1db6b", + "metadata": {}, + "outputs": [], + "source": [ + "trips.route_short_name.unique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7128fd7b-ab96-47bf-999e-33d7cd586546", + "metadata": {}, + "outputs": [], + "source": [ + "trips_1 = trips >> filter(_.route_short_name.isin(['01']), _.direction_id == 0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e3fd0090-1cfe-4215-941f-4c627ce9b470", + "metadata": {}, + "outputs": [], + "source": [ + "trips_1 = trips_1 >> filter(_.trip_instance_key == 'db65a5adda0fc0a2744580354516ac68')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a9d4ad6d-7a4a-48f2-91e1-7559c621015a", + "metadata": {}, + "outputs": [], + "source": [ + "st_1 = trips_to_stops(trips_1, fresno.feed_key)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2431c3e0-9338-4693-a02b-93a17962e196", + "metadata": {}, + "outputs": [], + "source": [ + "st_1 = st_1 >> filter(_.stop_sequence < 20) # vertical portion only" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "31156153-36c0-4d32-b834-553e8f8a95c6", + "metadata": {}, + "outputs": [], + "source": [ + "# st_1.explore()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "35393a07-ba6e-4c67-a427-a8bc07fa0345", + "metadata": {}, + "outputs": [], + "source": [ + "fresno = sjoin_tracts(st_1, tracts, 804) # half-mile" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ea4f36ad-72f2-4959-846b-21baeab21a83", + "metadata": {}, + "outputs": [], + "source": [ + "# fresno.explore()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9dc9d26c-5743-4e2c-b107-0992cce1023c", + "metadata": {}, + "outputs": [], + "source": [ + "fresno.to_file('fresno.geojson')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7f5dd373-bf14-4e1a-b47d-81f85eb3231f", + "metadata": {}, + "outputs": [], + "source": [ + "fresno_results = gpd.read_parquet('outputs/fresno_trips_with_uza.parquet')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "82878ff2-007f-41b3-a378-2b808b05f5c0", + "metadata": {}, + "outputs": [], + "source": [ + "# (wilshire_results >> select(-_.geometry)).to_csv('wilshire.csv')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "203d5e11-c54f-42ea-8c19-d9fe36bc2643", + "metadata": {}, + "outputs": [], + "source": [ + "# utils.make_zipped_shapefile(wilshire_results, 'wilsh')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9e2a3a9c-5b72-47da-bfcb-73bf05b955b4", + "metadata": {}, + "outputs": [], + "source": [ + "fresno_results.explore(column = 'projected_new_transit_trips', scheme = 'NaturalBreaks')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "294f0011-d722-4cd1-b3e4-88c3de380b21", + "metadata": {}, + "outputs": [], + "source": [ + "fresno_results.sum()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ef648be9-0c5a-40fa-8351-591175802794", + "metadata": {}, + "outputs": [], + "source": [ + "fresno_results.describe()" + ] + }, + { + "cell_type": "markdown", + "id": "0378b6eb-ab6c-40f3-94a4-3aec913d6a3d", "metadata": { - "jp-MarkdownHeadingCollapsed": true, "tags": [] }, "source": [ - "## MST (table)" + "## San Pablo Ave" ] }, { "cell_type": "code", "execution_count": null, - "id": "5f958167-a291-4aba-8566-111c5e713be9", + "id": "8e8a3159-40a9-4763-afa2-a325679d9ff2", "metadata": {}, "outputs": [], "source": [ - "mst = feeds >> filter(_.name.str.contains('Monterey'))\n", - "mst" + "ac = feeds >> filter(_.name.str.contains('AC Transit'))\n", + "ac" ] }, { "cell_type": "code", "execution_count": null, - "id": "d68764b0-476a-4af8-8560-b931a8afba51", + "id": "6aec2329-d7f7-463e-86f0-12b609048529", "metadata": {}, "outputs": [], "source": [ - "stops = shared_utils.gtfs_utils_v2.get_stops(analysis_date, mst.feed_key)" + "stops = shared_utils.gtfs_utils_v2.get_stops(analysis_date, ac.feed_key)" ] }, { "cell_type": "code", "execution_count": null, - "id": "3bbf4ee7-7bd2-4ae2-968b-b17a5ef2514d", + "id": "5c960435-da71-4074-aaea-15a8f12b18ca", "metadata": {}, "outputs": [], "source": [ - "stops.columns" + "trips = shared_utils.gtfs_utils_v2.get_trips(analysis_date, ac.feed_key)" ] }, { "cell_type": "code", "execution_count": null, - "id": "f07ef246-b92d-4e00-989b-b1800744a694", + "id": "85ba91ca-e882-47b8-a343-ac5eab3b0a4e", "metadata": {}, "outputs": [], "source": [ - "trips = shared_utils.gtfs_utils_v2.get_trips(analysis_date, mst.feed_key)" + "trips.route_short_name.unique()" ] }, { "cell_type": "code", "execution_count": null, - "id": "2fd8b773-d935-4c13-8f9e-84a8cba153c9", - "metadata": { - "tags": [] - }, + "id": "fcbd0633-395a-4bea-8c64-b21cb5ecd64f", + "metadata": {}, "outputs": [], "source": [ - "trips.columns" + "trips_72r = trips >> filter(_.route_short_name.isin(['72R']), _.direction_id == 0)" ] }, { "cell_type": "code", "execution_count": null, - "id": "8fe0c258-f084-4030-94e6-b0a44f5f5498", - "metadata": {}, + "id": "e1359516-eddf-4cde-ba35-32dd8f7e5535", + "metadata": { + "tags": [] + }, "outputs": [], "source": [ - "trips.route_short_name.unique()" + "st_72r = trips_to_stops(trips_72r, ac.feed_key)" ] }, { "cell_type": "code", "execution_count": null, - "id": "a6763fd7-c9cc-44e1-b7e9-8c8c919e3f91", + "id": "3f7cee73-690e-45bd-9d09-203f031e53e4", "metadata": {}, "outputs": [], "source": [ - "ab_trips = trips >> filter(_.route_short_name.isin(['A', 'B']), _.direction_id == 0)" + "# st_72r.explore()" ] }, { "cell_type": "code", "execution_count": null, - "id": "f9151728-3f87-45b0-a799-eb2b126ea434", + "id": "71962307-67d1-4670-b3e2-14dea1c0770f", "metadata": {}, "outputs": [], "source": [ - "trips_20 = trips >> filter(_.route_short_name == '20', _.direction_id == 0)" + "san_pablo = sjoin_tracts(st_72r, tracts, 804) # half-mile" ] }, { "cell_type": "code", "execution_count": null, - "id": "44c2f67b-74b7-4551-82b9-167f2744081b", + "id": "27970e7d-d3a2-44c9-9c5b-aa839cf6c4dc", "metadata": {}, "outputs": [], "source": [ - "st_20 = shared_utils.gtfs_utils_v2.get_stop_times(analysis_date, mst.feed_key, trip_df=trips_20)" + "# san_pablo.explore()" ] }, { "cell_type": "code", "execution_count": null, - "id": "fc5f463c-b3c6-4f80-86d4-9835c650eebb", + "id": "4dd4b619-1bbb-4880-b5fb-0450c848b779", "metadata": {}, "outputs": [], "source": [ - "st_20 = st_20 >> distinct(_.stop_id, _.stop_sequence) >> collect()" + "san_pablo.to_file('san_pablo.geojson')" ] }, { "cell_type": "code", "execution_count": null, - "id": "23f3a7b3-cd32-480f-ab1f-cc616c02e77a", + "id": "edfc5475-af4a-42a9-81ea-f041f7135938", "metadata": {}, "outputs": [], "source": [ - "st_20 = stops >> select(_.stop_id, _.geometry) >> inner_join(_, st_20, on='stop_id')" + "san_pablo_results = gpd.read_parquet('outputs/sanpablo_trips_with_uza.parquet')" ] }, { "cell_type": "code", "execution_count": null, - "id": "8b71bbe9-26d0-42ec-8b30-a7cfdee2236e", - "metadata": { - "tags": [] - }, + "id": "2eb69c0d-5508-487b-bbbf-a70a478ce164", + "metadata": {}, "outputs": [], "source": [ - "st_20 = trips_to_stops(trips_20)" + "# (wilshire_results >> select(-_.geometry)).to_csv('wilshire.csv')" ] }, { "cell_type": "code", "execution_count": null, - "id": "99b2afea-3280-422c-b3b7-6e1c5ff54b5d", + "id": "dff30ab0-80d7-43f8-b276-ad4f8e877f26", "metadata": {}, "outputs": [], "source": [ - "# SURF BRT area for joins...\n", - "st_20 = st_20 >> filter(_.stop_sequence <= 27)" + "# utils.make_zipped_shapefile(wilshire_results, 'wilsh')" ] }, { "cell_type": "code", "execution_count": null, - "id": "c3da388c-807d-424b-9e69-51588401ef2a", + "id": "a2eb977c-6779-4916-8576-385c6808e21a", "metadata": {}, "outputs": [], "source": [ - "# st_20.explore()" + "san_pablo_results.explore(column = 'projected_new_transit_trips', scheme = 'NaturalBreaks')" ] }, { "cell_type": "code", "execution_count": null, - "id": "0bfacea0-f958-4a65-90f3-2fec0ab04fd6", + "id": "d5dea582-473e-4157-98bb-5a3572631b42", "metadata": {}, "outputs": [], "source": [ - "st_ab = trips_to_stops(ab_trips)" + "san_pablo_results.sum()" ] }, { "cell_type": "code", "execution_count": null, - "id": "25ab4e55-c0cf-4919-b9ef-41e665c9a136", + "id": "369166fe-baf8-4bc2-86f8-c637b86c23bd", "metadata": {}, "outputs": [], "source": [ - "# st_ab.explore()" + "san_pablo_results.describe()" + ] + }, + { + "cell_type": "markdown", + "id": "73b906dd-19e4-496e-b468-bd47fb3082be", + "metadata": { + "tags": [] + }, + "source": [ + "## Eureka H Street/Purple Route" ] }, { "cell_type": "code", "execution_count": null, - "id": "27a834eb-34b0-46a4-9f0d-75534812a336", + "id": "7b248c72-9b00-4451-9843-02cb2c80c39a", "metadata": {}, "outputs": [], "source": [ - "surf_corridor = pd.concat([st_20, st_ab])" + "eureka = feeds >> filter(_.name.str.contains('Humboldt Schedule'))\n", + "eureka" ] }, { "cell_type": "code", "execution_count": null, - "id": "72671c73-1331-4888-a6e4-5a8cc3a34a16", + "id": "2136f99f-3c01-406e-8c60-9b3bba2f9920", "metadata": {}, "outputs": [], "source": [ - "surf_corridor.explore()" + "stops = shared_utils.gtfs_utils_v2.get_stops(analysis_date, eureka.feed_key)" ] }, { - "cell_type": "markdown", - "id": "1ca1517e-876d-49ae-82c1-973e02116745", - "metadata": { - "tags": [] - }, + "cell_type": "code", + "execution_count": null, + "id": "3b75f1b9-184c-4667-81a4-261b1105249e", + "metadata": {}, + "outputs": [], "source": [ - "## Wilshire" + "trips = shared_utils.gtfs_utils_v2.get_trips(analysis_date, eureka.feed_key)" ] }, { "cell_type": "code", "execution_count": null, - "id": "ed2d3a3a-3473-4174-91b7-56c4e6759e9d", + "id": "3030e4cc-b573-4083-9e2c-ee8c947160ef", "metadata": {}, "outputs": [], "source": [ - "metro = feeds >> filter(_.name.str.contains('LA Metro Bus'))\n", - "metro" + "trips.route_short_name.unique()" ] }, { "cell_type": "code", "execution_count": null, - "id": "030ebc15-6824-47a6-b781-11628b74d848", + "id": "4750a733-a639-4b7f-a78f-da1683c6a594", "metadata": {}, "outputs": [], "source": [ - "stops = shared_utils.gtfs_utils_v2.get_stops(analysis_date, metro.feed_key)" + "trips.route_long_name.unique()" ] }, { "cell_type": "code", "execution_count": null, - "id": "c1e324d3-7891-40f2-bd7b-2389fa36ee26", + "id": "19722dc1-21ad-4186-93d3-36b6522ba246", "metadata": {}, "outputs": [], "source": [ - "trips = shared_utils.gtfs_utils_v2.get_trips(analysis_date, metro.feed_key)" + "trips_rainbow = trips >> filter(_.route_long_name.isin(['Rainbow Route']), _.direction_id == 0)" ] }, { "cell_type": "code", "execution_count": null, - "id": "384bc785-79ed-4ca8-be5d-14a0480e98cf", + "id": "24226a59-fc6c-4f75-ab46-70d561a2d20e", "metadata": {}, "outputs": [], "source": [ - "trips.route_short_name.unique()" + "tr" ] }, { "cell_type": "code", "execution_count": null, - "id": "1b6fde5e-1394-4350-b483-1c1e6ad1dabb", + "id": "8c1b2637-2446-4efd-b61a-19c08d534292", "metadata": {}, "outputs": [], "source": [ - "trips_720 = trips >> filter(_.route_short_name.isin(['720']), _.direction_id == 0)" + "# trips_1 = trips_1 >> filter(_.trip_instance_key == 'db65a5adda0fc0a2744580354516ac68')" ] }, { "cell_type": "code", "execution_count": null, - "id": "fed54ea9-8157-425a-889a-1069d77265ca", + "id": "93a000d8-6e6b-47aa-9a82-98932989ba7b", "metadata": { "tags": [] }, "outputs": [], "source": [ - "st_720 = trips_to_stops(trips_720, metro.feed_key)" + "st_rainbow = trips_to_stops(trips_purple, eureka.feed_key)" ] }, { "cell_type": "code", "execution_count": null, - "id": "38b16a39-5d16-45bd-a9e4-71c22cacdb7f", + "id": "eeaeaafc-819c-4727-9768-2a904a6437df", "metadata": {}, "outputs": [], "source": [ - "# only keep W of Wil/Wstn\n", - "st_720 = st_720 >> filter(_.stop_sequence <= 11)" + "st_rainbow = st_rainbow >> filter(_.stop_sequence >= 35)" ] }, { "cell_type": "code", "execution_count": null, - "id": "3e04bc76-99c8-4930-a754-fc4c2c0f3597", + "id": "eb39f64d-adaf-4391-b205-1b12ef6a1760", "metadata": {}, "outputs": [], "source": [ - "# st_720.explore()" + "# st_rainbow.explore()" ] }, { "cell_type": "code", "execution_count": null, - "id": "cfb4f847-c5a0-43e2-b8d9-c11c4469c954", + "id": "370fca77-e994-471f-8888-010b52738bec", "metadata": {}, "outputs": [], "source": [ - "wilshire = sjoin_tracts(st_720, tracts, 804)" + "eureka = sjoin_tracts(st_rainbow, tracts, 804) # half-mile" ] }, { "cell_type": "code", "execution_count": null, - "id": "600ff9a4-5bbe-4078-9829-721842084f89", + "id": "9f2386c4-bc1b-4b3b-bb80-16b04fe30112", "metadata": { "tags": [] }, "outputs": [], "source": [ - "# wilshire.explore()" + "eureka.explore()" ] }, { "cell_type": "code", "execution_count": null, - "id": "be46c69b-c2fa-4a37-ab0a-fff6ccd63cf2", + "id": "e604495b-b0bf-46ce-8e14-ccac930dafbc", "metadata": {}, "outputs": [], "source": [ - "wilshire.to_file('wilshire.geojson')" + "eureka.to_file('eureka.geojson')" ] }, { "cell_type": "code", "execution_count": null, - "id": "18a68442-9ab6-4973-868f-b22ace3ea90d", + "id": "ef087597-674c-4256-8ce4-6ca7c0617665", "metadata": {}, "outputs": [], "source": [ - "# includes non-corridor vmt...\n", - "\n", - "# trips_all = gpd.read_parquet('outputs/new_trips_with_uza.parquet')\n", - "\n", - "# trips_all >> filter(_.GEOID.isin(wilshire_results.GEOID))" + "eureka_results = gpd.read_parquet('outputs/eureka_trips_with_uza.parquet')" ] }, { "cell_type": "code", "execution_count": null, - "id": "6c4e8f0e-941b-4bdd-b5ad-1a7e02602ad6", + "id": "74c2a66f-ff8e-482f-9c57-da742fbe5f42", "metadata": {}, "outputs": [], "source": [ - "wilshire_results = gpd.read_parquet('outputs/wilshire_trips_with_uza.parquet')" + "# (wilshire_results >> select(-_.geometry)).to_csv('wilshire.csv')" ] }, { "cell_type": "code", "execution_count": null, - "id": "8714149d-fd48-4c11-bd13-c3ec1af6ef2a", + "id": "e7299e91-893e-4b81-979a-f700196c6a96", "metadata": {}, "outputs": [], "source": [ - "(wilshire_results >> select(-_.geometry)).to_csv('wilshire.csv')" + "# utils.make_zipped_shapefile(wilshire_results, 'wilsh')" ] }, { "cell_type": "code", "execution_count": null, - "id": "0db59945-15cc-4800-8cd6-0efd01a21bfa", + "id": "6ff25cc5-b2aa-4e71-a5dc-2d70396c3805", "metadata": {}, "outputs": [], "source": [ - "utils.make_zipped_shapefile(wilshire_results, 'wilsh')" + "eureka_results.explore(column = 'projected_new_transit_trips', scheme = 'NaturalBreaks')" ] }, { "cell_type": "code", "execution_count": null, - "id": "611272a6-9add-4218-90c9-ea3004d5e829", + "id": "3020e29d-a035-4248-b990-efc0947d02dd", "metadata": {}, "outputs": [], "source": [ - "# wilshire_results.explore(column = 'projected_new_transit_trips', scheme = 'NaturalBreaks')" + "eureka_results.sum()" ] }, { "cell_type": "code", "execution_count": null, - "id": "3e882b73-446a-4b31-8a5d-e569fa4016ac", + "id": "3a4e5ef0-f5d0-43ba-9895-d5542989383c", "metadata": {}, "outputs": [], "source": [ - "wilshire_results.sum()" + "eureka_results.describe()" ] }, { "cell_type": "markdown", - "id": "5cf215ef-e257-4405-b4a6-c1c0eab26116", - "metadata": { - "tags": [] - }, + "id": "6d4fdb29-2b3b-4055-ada9-b5b149db9f6c", + "metadata": {}, "source": [ - "## Fresno Route 1" + "# All Corridors Summary" ] }, { "cell_type": "code", "execution_count": null, - "id": "7ac39af3-5a44-44b5-a084-ce24fb115874", + "id": "98680b32-440f-4fc2-867c-ede1a1967393", "metadata": {}, "outputs": [], "source": [ - "fresno = feeds >> filter(_.name.str.contains('Fresno Sch'))\n", - "fresno" + "fresno_results['corridor'] = 'Fresno'\n", + "san_pablo_results['corridor'] = 'San Pablo Ave'\n", + "wilshire_results['corridor'] = 'Wilshire'\n", + "eureka_results['corridor'] = 'Eureka'" ] }, { "cell_type": "code", "execution_count": null, - "id": "b1bde719-a199-41c9-b152-487c95a43732", + "id": "263af4e4-07fe-4f74-ba78-b54829905a40", "metadata": {}, "outputs": [], "source": [ - "stops = shared_utils.gtfs_utils_v2.get_stops(analysis_date, fresno.feed_key)" + "all_results = pd.concat([fresno_results, san_pablo_results, wilshire_results, eureka_results])" ] }, { "cell_type": "code", "execution_count": null, - "id": "38f2c41e-88d5-4e2e-a3ee-7857746f6b78", + "id": "d3249b0a-2d67-4dba-bb58-7ab50b930185", "metadata": {}, "outputs": [], "source": [ - "trips = shared_utils.gtfs_utils_v2.get_trips(analysis_date, fresno.feed_key)" + "(all_results >> group_by(_.corridor)\n", + " >> summarize(total_new_transit_trips = _.projected_new_transit_trips.sum(),\n", + " total_population = _.total_pop.sum(),\n", + " total_vmt = _.total_mi_auto.sum(),\n", + " p50_auto_trip_mi = _.p50_mi_auto.quantile(.5),\n", + " total_auto_trips = _.total_trips_auto.sum()\n", + " )\n", + "\n", + ").to_csv('vmt_transit_corridors.csv')" + ] + }, + { + "cell_type": "markdown", + "id": "3a01d280-a612-4e72-8b06-e98aae3426d6", + "metadata": { + "jp-MarkdownHeadingCollapsed": true, + "tags": [] + }, + "source": [ + "## Redding Route 4" ] }, { "cell_type": "code", "execution_count": null, - "id": "b3d2981b-4b1e-439d-b9bc-748e76e1db6b", + "id": "c2fd7bc1-a575-4b5b-adb3-63c7866cacc2", "metadata": {}, "outputs": [], "source": [ - "trips.route_short_name.unique()" + "redding = feeds >> filter(_.name.str.contains('Redding'))\n", + "redding" ] }, { "cell_type": "code", "execution_count": null, - "id": "7128fd7b-ab96-47bf-999e-33d7cd586546", + "id": "909cd786-51d5-40fb-b997-532e67378fe7", "metadata": {}, "outputs": [], "source": [ - "trips_1 = trips >> filter(_.route_short_name.isin(['01']), _.direction_id == 0)" + "stops = shared_utils.gtfs_utils_v2.get_stops(analysis_date, redding.feed_key)" ] }, { "cell_type": "code", "execution_count": null, - "id": "e3fd0090-1cfe-4215-941f-4c627ce9b470", + "id": "334d3d45-0f38-4fb8-ae58-814d6429eee0", "metadata": {}, "outputs": [], "source": [ - "trips_1 = trips_1 >> filter(_.trip_instance_key == 'db65a5adda0fc0a2744580354516ac68')" + "trips = shared_utils.gtfs_utils_v2.get_trips(analysis_date, redding.feed_key)" ] }, { "cell_type": "code", "execution_count": null, - "id": "a9d4ad6d-7a4a-48f2-91e1-7559c621015a", + "id": "70b8f4d5-c70e-4320-9756-c13d9c919a58", "metadata": {}, "outputs": [], "source": [ - "st_1 = trips_to_stops(trips_1, fresno.feed_key)" + "trips.route_short_name.unique()" ] }, { "cell_type": "code", "execution_count": null, - "id": "2431c3e0-9338-4693-a02b-93a17962e196", + "id": "8cece935-15cf-4eab-a58f-311a0927d8ae", "metadata": {}, "outputs": [], "source": [ - "st_1 = st_1 >> filter(_.stop_sequence < 20) # vertical portion only" + "trips_4 = trips >> filter(_.route_short_name.isin(['4']), _.direction_id == 0)" ] }, { "cell_type": "code", "execution_count": null, - "id": "31156153-36c0-4d32-b834-553e8f8a95c6", + "id": "b56d6acc-d992-4d48-a5e4-affb42e3605f", "metadata": {}, "outputs": [], "source": [ - "# st_1.explore()" + "# trips_1 = trips_1 >> filter(_.trip_instance_key == 'db65a5adda0fc0a2744580354516ac68')" ] }, { "cell_type": "code", "execution_count": null, - "id": "35393a07-ba6e-4c67-a427-a8bc07fa0345", - "metadata": {}, + "id": "1b27cec5-ea22-4265-b7cb-6898794ae577", + "metadata": { + "tags": [] + }, "outputs": [], "source": [ - "fresno = sjoin_tracts(st_1, tracts, 804) # half-mile" + "st_4 = trips_to_stops(trips_4, redding.feed_key)" ] }, { "cell_type": "code", "execution_count": null, - "id": "ea4f36ad-72f2-4959-846b-21baeab21a83", + "id": "2e239235-82d3-42ed-963d-d2d196fb1d8a", "metadata": {}, "outputs": [], "source": [ - "# fresno.explore()" + "# st_4.explore()" ] }, { "cell_type": "code", "execution_count": null, - "id": "9dc9d26c-5743-4e2c-b107-0992cce1023c", + "id": "af4426e8-4d86-4ed9-9cd4-096d4df9eeaa", "metadata": {}, "outputs": [], "source": [ - "fresno.to_file('fresno.geojson')" + "redding = sjoin_tracts(st_4, tracts, 804) # half-mile" ] }, { "cell_type": "code", "execution_count": null, - "id": "7f5dd373-bf14-4e1a-b47d-81f85eb3231f", + "id": "c325c88e-7ddf-4f24-85f5-b2e49ea88dd6", "metadata": {}, "outputs": [], "source": [ - "fresno_results = gpd.read_parquet('outputs/fresno_trips_with_uza.parquet')" + "# redding.explore()" ] }, { "cell_type": "code", "execution_count": null, - "id": "82878ff2-007f-41b3-a378-2b808b05f5c0", + "id": "5387c9f7-9b77-49ef-b934-dc4f56836e28", "metadata": {}, "outputs": [], "source": [ - "# (wilshire_results >> select(-_.geometry)).to_csv('wilshire.csv')" + "redding.to_file('redding.geojson')" ] }, { "cell_type": "code", "execution_count": null, - "id": "203d5e11-c54f-42ea-8c19-d9fe36bc2643", + "id": "5cf3dfc7-2893-4cde-bae7-8e9fcdb751ca", "metadata": {}, "outputs": [], "source": [ - "# utils.make_zipped_shapefile(wilshire_results, 'wilsh')" + "redding_results = gpd.read_parquet('outputs/redding_trips_with_uza.parquet')" ] }, { "cell_type": "code", "execution_count": null, - "id": "9e2a3a9c-5b72-47da-bfcb-73bf05b955b4", + "id": "6e947b18-2710-4d71-89c4-0d256524e774", "metadata": {}, "outputs": [], "source": [ - "fresno_results.explore(column = 'projected_new_transit_trips', scheme = 'NaturalBreaks')" + "# (wilshire_results >> select(-_.geometry)).to_csv('wilshire.csv')" ] }, { "cell_type": "code", "execution_count": null, - "id": "294f0011-d722-4cd1-b3e4-88c3de380b21", + "id": "b4c33a16-fb03-4d87-bd02-1a957b17be51", "metadata": {}, "outputs": [], "source": [ - "fresno_results.sum()" + "# utils.make_zipped_shapefile(wilshire_results, 'wilsh')" ] }, { "cell_type": "code", "execution_count": null, - "id": "ef648be9-0c5a-40fa-8351-591175802794", + "id": "0cd7c5d1-499b-4109-b942-d1e3f93e6c97", "metadata": {}, "outputs": [], "source": [ - "fresno_results.describe()" + "redding_results.explore(column = 'projected_new_transit_trips', scheme = 'NaturalBreaks')" ] }, { - "cell_type": "markdown", - "id": "0378b6eb-ab6c-40f3-94a4-3aec913d6a3d", + "cell_type": "code", + "execution_count": null, + "id": "6e439a7b-5a7b-4aa7-bb80-5626f24600c6", "metadata": {}, + "outputs": [], "source": [ - "## San Pablo Ave" + "redding_results.sum()" ] }, { - "cell_type": "markdown", - "id": "3a01d280-a612-4e72-8b06-e98aae3426d6", + "cell_type": "code", + "execution_count": null, + "id": "4f883ae3-9dbd-4a39-935e-9e521159e7b3", "metadata": {}, + "outputs": [], "source": [ - "## Eureka Route TBD" + "redding_results.describe()" ] }, { "cell_type": "markdown", - "id": "c5d6a929-e7e4-4074-9467-2e7aaed57ddb", + "id": "086c18f7-c7cf-41a2-9147-727740f781e5", "metadata": { + "jp-MarkdownHeadingCollapsed": true, "tags": [] }, "source": [ - "## Sjoin and calculate" + "## MST (table)" ] }, { "cell_type": "code", "execution_count": null, - "id": "a3551c17-c263-41b6-9f81-632afc134988", + "id": "5f958167-a291-4aba-8566-111c5e713be9", "metadata": {}, "outputs": [], "source": [ - "surf_corridor = surf_corridor.to_crs(geography_utils.CA_NAD83Albers)\n", - "st_720 = st_720.to_crs(geography_utils.CA_NAD83Albers)" + "mst = feeds >> filter(_.name.str.contains('Monterey'))\n", + "mst" ] }, { "cell_type": "code", "execution_count": null, - "id": "2e3caf3a-42b1-4520-be55-1f5c59655607", + "id": "d68764b0-476a-4af8-8560-b931a8afba51", "metadata": {}, "outputs": [], "source": [ - "# half-mile buffer stop areas, corridors should be broader...\n", - "surf_corridor.geometry = surf_corridor.buffer(804) \n", - "st_720.geometry = st_720.buffer(804)" + "stops = shared_utils.gtfs_utils_v2.get_stops(analysis_date, mst.feed_key)" ] }, { "cell_type": "code", "execution_count": null, - "id": "52860ef0-afdf-478a-98c8-6a2eed4d44f1", + "id": "3bbf4ee7-7bd2-4ae2-968b-b17a5ef2514d", "metadata": {}, "outputs": [], "source": [ - "gdf = gdf >> select(-_.index_left, -_.index_right)" + "stops.columns" ] }, { "cell_type": "code", "execution_count": null, - "id": "1e6c6cbc-c388-43c3-8340-25d7b1519152", + "id": "f07ef246-b92d-4e00-989b-b1800744a694", "metadata": {}, "outputs": [], "source": [ - "surf = gpd.sjoin(gdf, surf_corridor) >> distinct(_.GEOID, _keep_all=True)" + "trips = shared_utils.gtfs_utils_v2.get_trips(analysis_date, mst.feed_key)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2fd8b773-d935-4c13-8f9e-84a8cba153c9", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "trips.columns" ] }, { "cell_type": "code", "execution_count": null, - "id": "884d4536-874d-432b-8e0e-fbe71c9160fb", + "id": "8fe0c258-f084-4030-94e6-b0a44f5f5498", "metadata": {}, "outputs": [], "source": [ - "surf.explore(column = 'projected_new_transit_trips', scheme = 'NaturalBreaks')" + "trips.route_short_name.unique()" ] }, { "cell_type": "code", "execution_count": null, - "id": "8821c382-1d27-4dd3-994b-6d73e9b384b1", + "id": "a6763fd7-c9cc-44e1-b7e9-8c8c919e3f91", "metadata": {}, "outputs": [], "source": [ - "wilshire = gpd.sjoin(gdf, st_720) >> distinct(_.GEOID, _keep_all=True)" + "ab_trips = trips >> filter(_.route_short_name.isin(['A', 'B']), _.direction_id == 0)" ] }, { "cell_type": "code", "execution_count": null, - "id": "b6541ddd-66b0-4462-abc0-d9542bf1e082", + "id": "f9151728-3f87-45b0-a799-eb2b126ea434", "metadata": {}, "outputs": [], "source": [ - "wilshire.explore(column = 'projected_new_transit_trips', scheme = 'NaturalBreaks')" + "trips_20 = trips >> filter(_.route_short_name == '20', _.direction_id == 0)" ] }, { "cell_type": "code", "execution_count": null, - "id": "4088fd65-a36c-4a54-a859-4bb954c5ce4c", + "id": "44c2f67b-74b7-4551-82b9-167f2744081b", "metadata": {}, "outputs": [], "source": [ - "surf.describe()" + "st_20 = shared_utils.gtfs_utils_v2.get_stop_times(analysis_date, mst.feed_key, trip_df=trips_20)" ] }, { "cell_type": "code", "execution_count": null, - "id": "a2169877-c50e-48f1-9729-7d37a7841e85", + "id": "fc5f463c-b3c6-4f80-86d4-9835c650eebb", "metadata": {}, "outputs": [], "source": [ - "wilshire.describe()" + "st_20 = st_20 >> distinct(_.stop_id, _.stop_sequence) >> collect()" ] }, { "cell_type": "code", "execution_count": null, - "id": "00959a08-aa6f-4a64-8f38-f90636a84d8a", + "id": "23f3a7b3-cd32-480f-ab1f-cc616c02e77a", "metadata": {}, "outputs": [], "source": [ - "surf.sum()" + "st_20 = stops >> select(_.stop_id, _.geometry) >> inner_join(_, st_20, on='stop_id')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8b71bbe9-26d0-42ec-8b30-a7cfdee2236e", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "st_20 = trips_to_stops(trips_20)" ] }, { "cell_type": "code", "execution_count": null, - "id": "481d4803-9e48-4cee-9fe9-181e3caa33f9", + "id": "99b2afea-3280-422c-b3b7-6e1c5ff54b5d", "metadata": {}, "outputs": [], "source": [ - "wilshire.sum()" + "# SURF BRT area for joins...\n", + "st_20 = st_20 >> filter(_.stop_sequence <= 27)" ] }, { - "cell_type": "markdown", - "id": "a9ae4745-dedf-4365-8869-9996558e2270", + "cell_type": "code", + "execution_count": null, + "id": "c3da388c-807d-424b-9e69-51588401ef2a", "metadata": {}, + "outputs": [], "source": [ - "# BART" + "# st_20.explore()" ] }, { "cell_type": "code", "execution_count": null, - "id": "56865daf-a6e5-41e5-a038-0cb0f2f6f79c", + "id": "0bfacea0-f958-4a65-90f3-2fec0ab04fd6", "metadata": {}, "outputs": [], "source": [ - "bart = feeds >> filter(_.name.str.contains('BART'))\n", - "bart" + "st_ab = trips_to_stops(ab_trips)" ] }, { "cell_type": "code", "execution_count": null, - "id": "6bcbf87f-b0d4-420a-b32c-6a5ecb82f781", + "id": "25ab4e55-c0cf-4919-b9ef-41e665c9a136", "metadata": {}, "outputs": [], "source": [ - "shapes = shared_utils.gtfs_utils_v2.get_shapes(analysis_date, bart.feed_key)" + "# st_ab.explore()" ] }, { "cell_type": "code", "execution_count": null, - "id": "bf94d274-264e-4120-ad0c-7a1cf4f2f703", - "metadata": { - "tags": [] - }, + "id": "27a834eb-34b0-46a4-9f0d-75534812a336", + "metadata": {}, "outputs": [], "source": [ - "shapes.to_file('../../../csis-metrics/project_prioritization/accessibility/refactor/BART.geojson')" + "surf_corridor = pd.concat([st_20, st_ab])" ] }, { "cell_type": "code", "execution_count": null, - "id": "11af10fe-820f-483f-b6d0-3b91ef70b7fd", + "id": "72671c73-1331-4888-a6e4-5a8cc3a34a16", "metadata": {}, "outputs": [], "source": [ - "trips = shared_utils.gtfs_utils_v2.get_trips(analysis_date, metro.feed_key)" + "surf_corridor.explore()" ] } ], diff --git a/sb125_analyses/vmt_transit_sketch/read_process_data.ipynb b/sb125_analyses/vmt_transit_sketch/read_process_data.ipynb index 157a099e7..634f5c544 100644 --- a/sb125_analyses/vmt_transit_sketch/read_process_data.ipynb +++ b/sb125_analyses/vmt_transit_sketch/read_process_data.ipynb @@ -9,7 +9,8 @@ "source": [ "import pandas as pd\n", "import geopandas as gpd\n", - "from siuba import *" + "from siuba import *\n", + "import numpy as np" ] }, { @@ -63,30 +64,71 @@ }, "outputs": [], "source": [ - "def read_group_replica(zip_path, replica_filename = 'replica-mode_split_test-02_01_24-trips_dataset.csv'):\n", + "def read_group_replica(zip_path):\n", " '''\n", - " zip_path: path to zip file containing a Replica trips export\n", + " zip_path: path to zip file containing a Replica trips export csv \n", " '''\n", " with zipfile.ZipFile(zip_path) as z:\n", - " with z.open(replica_filename) as f:\n", + " csvs = [f for f in z.namelist() if f[-3:] == 'csv']\n", + " assert len(csvs) == 1\n", + " with z.open(csvs[0]) as f:\n", " df = pd.read_csv(f)\n", " df = (df >> filter(_.primary_mode.isin(['private_auto', 'auto_passenger', 'on_demand_auto', 'public_transit']))\n", " >> select(-_.origin_trct_2020, -_.activity_id)\n", " )\n", " df['is_auto'] = df.primary_mode.str.contains('auto')\n", + " return df\n", " grouped = (df >> group_by(_.origin_trct_fips_2020, _.is_auto)\n", " >> summarize(n = _.shape[0], p50_distance = _.trip_distance_miles.quantile(.5),\n", " p75_distance = _.trip_distance_miles.quantile(.75),\n", " p90_distance = _.trip_distance_miles.quantile(.9),\n", - " total_miles = _.trip_distance_miles.sum(),\n", + " total_miles = _.trip_distance_miles.sum(), \n", " )\n", " )\n", - " # parquet_path = f'./intermediate/{zip_path.split(\".zip\")[0]}.parquet'\n", - " # grouped.to_parquet(parquet_path)\n", - " # print(f'grouped data -> {parquet_path}')\n", + "\n", " return grouped" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "c506f859-1ffc-4e36-ba03-1c4393ba4d9e", + "metadata": {}, + "outputs": [], + "source": [ + "fresno_raw = read_group_replica(f'replica_raw/corridors/replica-fresno-trips_dataset.zip')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "51c4c2e4-91d8-47ef-b2a2-ca30c6e2b84f", + "metadata": {}, + "outputs": [], + "source": [ + "fresno_raw.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7122b15b-574a-4cf1-84ab-0af08f703e57", + "metadata": {}, + "outputs": [], + "source": [ + "(wilshire_raw >> filter(_.is_auto)).trip_distance_miles.hist()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9423782c-0b5a-474a-bf5a-99ae2e6812b3", + "metadata": {}, + "outputs": [], + "source": [ + "(wilshire_raw >> filter(_.is_auto, _.trip_distance_miles < 4)).trip_distance_miles.hist()" + ] + }, { "cell_type": "markdown", "id": "a48da885-0c7c-41a6-be13-8364e7fdc48c", @@ -187,11 +229,10 @@ "outputs": [], "source": [ "grouped = pd.DataFrame()\n", - "for region in ['fresno']:\n", + "for region in ['eureka']:\n", " print(region)\n", " # note replica filename includes date of download...\n", - " df = read_group_replica(f'replica_raw/corridors/replica-{region}-trips_dataset.zip',\n", - " replica_filename='replica-mode_split_test-02_20_24-trips_dataset.csv')\n", + " df = read_group_replica(f'replica_raw/corridors/replica-{region}-trips_dataset.zip')\n", " grouped = pd.concat([grouped, df])" ] }, @@ -202,7 +243,7 @@ "metadata": {}, "outputs": [], "source": [ - "grouped.to_parquet('intermediate/fresno_grouped.parquet')" + "grouped.to_parquet('intermediate/eureka_grouped.parquet')" ] }, { @@ -259,41 +300,61 @@ { "cell_type": "code", "execution_count": null, - "id": "f4b313d7-62e5-46b4-ab93-6b25d176bdeb", + "id": "23b9ae5b-bd02-4978-b7d1-3e4b49de53b5", "metadata": {}, "outputs": [], "source": [ - "replica_grouped = pd.read_parquet('intermediate/fresno_grouped.parquet')" + "tracts_feeds = gpd.read_parquet('intermediate/feeds_tract_geo.parquet')" ] }, { "cell_type": "code", "execution_count": null, - "id": "23b9ae5b-bd02-4978-b7d1-3e4b49de53b5", + "id": "73088513-9710-4c9a-a205-d531c3807345", "metadata": {}, "outputs": [], "source": [ - "tracts_feeds = gpd.read_parquet('intermediate/feeds_tract_geo.parquet')" + "tracts_feeds.GEOID = tracts_feeds.GEOID.astype('int64')" ] }, { "cell_type": "code", "execution_count": null, - "id": "73088513-9710-4c9a-a205-d531c3807345", + "id": "61ce4f74-45e9-4c85-ad34-d2ac9c5cfc32", "metadata": {}, "outputs": [], "source": [ - "tracts_feeds.GEOID = tracts_feeds.GEOID.astype('int64')" + "tracts_feeds = tracts_feeds >> distinct(_.GEOID, _.geometry)" ] }, { "cell_type": "code", "execution_count": null, - "id": "61ce4f74-45e9-4c85-ad34-d2ac9c5cfc32", + "id": "3f9512da-00e9-4768-a79c-8570c4a0bec2", "metadata": {}, "outputs": [], "source": [ - "tracts_feeds = tracts_feeds >> distinct(_.GEOID, _.geometry)" + "replica_grouped = pd.read_parquet(f'intermediate/wilshire_grouped.parquet')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "240b6b96-ea68-49f2-8509-883af8f0c36b", + "metadata": {}, + "outputs": [], + "source": [ + "18062 / 1.6" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fa0e75af-11d9-4051-8b0f-3a8d95c166be", + "metadata": {}, + "outputs": [], + "source": [ + "(replica_grouped >> filter(_.is_auto))." ] }, { @@ -308,20 +369,27 @@ " replica_df: df from read_group_replica\n", " tracts_feeds_df: gdf from stops_by_tract_agency\n", " '''\n", + " auto_trip_counts = (replica_df >> filter(_.is_auto)\n", + " >> group_by(_.origin_trct_fips_2020)\n", + " >> summarize(auto_trips = _.n.sum())\n", + " )\n", " replica_df.is_auto = replica_df.is_auto.map(lambda x: 'yes' if x else 'no')\n", " \n", " df2 = replica_df >> spread('is_auto', 'p50_distance') >> select(-_.n, -_.total_miles)\n", " df2 = df2.rename(columns={'no': 'p50_mi_transit', 'yes': 'p50_mi_auto'})\n", - " \n", - " df3 = replica_df >> spread('is_auto', 'total_miles') >> select(-_.p50_distance, -_.n)\n", + " df3 = replica_df >> spread('is_auto', 'total_miles') >> select(-_.n, -_.p50_distance)\n", " df3 = df3.rename(columns={'no': 'total_mi_transit', 'yes': 'total_mi_auto'})\n", - " \n", " df2 = df2 >> inner_join(_, df3, on = 'origin_trct_fips_2020')\n", + " df2 = df2 >> inner_join(_, auto_trip_counts, on = 'origin_trct_fips_2020')\n", " \n", + " if 'p50_mi_transit' not in df2.columns:\n", + " df2['p50_mi_transit'] = np.nan\n", + " df2['total_mi_transit'] = np.nan\n", " df2 = (df2 >> group_by(_.origin_trct_fips_2020)\n", " >> summarize(p50_mi_transit = _.p50_mi_transit.max(), p50_mi_auto = _.p50_mi_auto.max(),\n", " total_mi_transit = _.total_mi_transit.max(),\n", - " total_mi_auto = _.total_mi_auto.max()\n", + " total_mi_auto = _.total_mi_auto.max(),\n", + " total_trips_auto = _.auto_trips.sum()\n", " )\n", " )\n", " \n", @@ -344,6 +412,16 @@ " return df2" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "f4b313d7-62e5-46b4-ab93-6b25d176bdeb", + "metadata": {}, + "outputs": [], + "source": [ + "# replica_grouped = pd.read_parquet('intermediate/wilshire_grouped.parquet')" + ] + }, { "cell_type": "code", "execution_count": null, @@ -353,7 +431,7 @@ }, "outputs": [], "source": [ - "processed_df = process_grouped_data(replica_grouped, tracts_feeds)" + "# processed_df = process_grouped_data(replica_grouped, tracts_feeds)" ] }, { @@ -384,8 +462,8 @@ "metadata": {}, "outputs": [], "source": [ - "no_transit_pct = no_transit.iloc[1, 1] / no_transit.iloc[0, 1]\n", - "f'{round(no_transit_pct*100, 0)} percent of VMT in tracts with no transit per GTFS Warehouse stops'" + "# no_transit_pct = no_transit.iloc[1, 1] / no_transit.iloc[0, 1]\n", + "# f'{round(no_transit_pct*100, 0)} percent of VMT in tracts with no transit per GTFS Warehouse stops'" ] }, { @@ -439,11 +517,18 @@ { "cell_type": "code", "execution_count": null, - "id": "10cb6149-4676-4b64-9ae7-b53f621f815b", - "metadata": {}, + "id": "e0740b97-48ce-4543-a560-9936c559e4df", + "metadata": { + "tags": [] + }, "outputs": [], "source": [ - "uza_joined.to_parquet('outputs/fresno_trips_with_uza.parquet')" + "for region in ['sanpablo', 'eureka', 'fresno', 'wilshire']:\n", + " replica_grouped = pd.read_parquet(f'intermediate/{region}_grouped.parquet')\n", + " processed_df = process_grouped_data(replica_grouped, tracts_feeds)\n", + " uza_joined = attach_tracts_pop(processed_df)\n", + " display(uza_joined >> head(3))\n", + " uza_joined.to_parquet(f'outputs/{region}_trips_with_uza.parquet')" ] }, { From 1fef9b0cf6561e9ef9d5198fbc429278f78913a6 Mon Sep 17 00:00:00 2001 From: Eric Dasmalchi Date: Mon, 6 May 2024 22:12:00 +0000 Subject: [PATCH 5/5] slight tidy, use gcs --- .../read_process_data.ipynb | 719 +++++++++++++----- 1 file changed, 535 insertions(+), 184 deletions(-) diff --git a/sb125_analyses/vmt_transit_sketch/read_process_data.ipynb b/sb125_analyses/vmt_transit_sketch/read_process_data.ipynb index 634f5c544..2cc71e981 100644 --- a/sb125_analyses/vmt_transit_sketch/read_process_data.ipynb +++ b/sb125_analyses/vmt_transit_sketch/read_process_data.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "id": "9e8158c2-a7f9-4b3c-a518-037132adf0c3", "metadata": {}, "outputs": [], @@ -15,7 +15,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "id": "81cddca7-bea4-494d-b0cb-02508d52b380", "metadata": {}, "outputs": [], @@ -25,7 +25,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "id": "37b679d4-f8bd-4450-bf9f-50b68e8570b4", "metadata": {}, "outputs": [], @@ -35,7 +35,29 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, + "id": "80324d88-ea3a-45a6-9362-933a2395ed31", + "metadata": {}, + "outputs": [], + "source": [ + "fs = get_fs()" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "3dcaf650-43fe-4532-9060-442b067ef173", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# ! pip install pygris" + ] + }, + { + "cell_type": "code", + "execution_count": 6, "id": "682268e8-78fc-4609-afc5-294f3c650b5e", "metadata": {}, "outputs": [], @@ -45,6 +67,16 @@ "# importlib.reload(_utils)" ] }, + { + "cell_type": "code", + "execution_count": 7, + "id": "5aeca438-6285-4cca-b375-ab8aa3849e42", + "metadata": {}, + "outputs": [], + "source": [ + "GCS_PATH = 'gs://calitp-analytics-data/data-analyses/sb125/vmt_transit_sketch/'" + ] + }, { "cell_type": "markdown", "id": "b076a21f-5a53-4b75-b140-0e4947099e42", @@ -57,7 +89,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "id": "85a89737-f90d-488f-9310-ca83557e476c", "metadata": { "tags": [] @@ -68,11 +100,22 @@ " '''\n", " zip_path: path to zip file containing a Replica trips export csv \n", " '''\n", - " with zipfile.ZipFile(zip_path) as z:\n", + " def parse_csv(zipfile):\n", " csvs = [f for f in z.namelist() if f[-3:] == 'csv']\n", " assert len(csvs) == 1\n", " with z.open(csvs[0]) as f:\n", " df = pd.read_csv(f)\n", + " return df\n", + " \n", + " if zip_path[:3] == 'gs:':\n", + " with fs.open(zip_path) as f:\n", + " with zipfile.ZipFile(f) as z:\n", + " df = parse_csv(z)\n", + " else:\n", + " with zipfile.ZipFile(f) as z:\n", + " df = parse_csv(z)\n", + " \n", + "\n", " df = (df >> filter(_.primary_mode.isin(['private_auto', 'auto_passenger', 'on_demand_auto', 'public_transit']))\n", " >> select(-_.origin_trct_2020, -_.activity_id)\n", " )\n", @@ -91,115 +134,159 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "c506f859-1ffc-4e36-ba03-1c4393ba4d9e", + "execution_count": 9, + "id": "f0df73e2-7ebe-431f-b533-6139cc9b79c0", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'gs://calitp-analytics-data/data-analyses/sb125/vmt_transit_sketch/replica_raw/corridors/replica-fresno-trips_dataset.zip'" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "fresno_raw = read_group_replica(f'replica_raw/corridors/replica-fresno-trips_dataset.zip')" + "f'{GCS_PATH}replica_raw/corridors/replica-fresno-trips_dataset.zip'" ] }, { "cell_type": "code", - "execution_count": null, - "id": "51c4c2e4-91d8-47ef-b2a2-ca30c6e2b84f", - "metadata": {}, - "outputs": [], - "source": [ - "fresno_raw.describe()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7122b15b-574a-4cf1-84ab-0af08f703e57", - "metadata": {}, - "outputs": [], - "source": [ - "(wilshire_raw >> filter(_.is_auto)).trip_distance_miles.hist()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9423782c-0b5a-474a-bf5a-99ae2e6812b3", - "metadata": {}, - "outputs": [], - "source": [ - "(wilshire_raw >> filter(_.is_auto, _.trip_distance_miles < 4)).trip_distance_miles.hist()" - ] - }, - { - "cell_type": "markdown", - "id": "a48da885-0c7c-41a6-be13-8364e7fdc48c", - "metadata": { - "jp-MarkdownHeadingCollapsed": true, - "tags": [] - }, - "source": [ - "## quick vmt" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ec3470ac-5f0e-49a0-9000-f371f952bc74", - "metadata": {}, - "outputs": [], - "source": [ - "zip_path = f'replica_raw/replica-la_north-trips_dataset.zip'" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "868a0ae4-b076-4775-beba-fdc9ba764b27", - "metadata": {}, - "outputs": [], - "source": [ - "replica_filename = 'replica-mode_split_test-02_01_24-trips_dataset.csv'" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b4c08637-9bbc-4727-af6d-14dc1c66b4a1", + "execution_count": 10, + "id": "c506f859-1ffc-4e36-ba03-1c4393ba4d9e", "metadata": {}, "outputs": [], "source": [ - "with zipfile.ZipFile(zip_path) as z:\n", - " with z.open(replica_filename) as f:\n", - " df = pd.read_csv(f)" + "fresno_raw = read_group_replica(f'{GCS_PATH}replica_raw/corridors/replica-fresno-trips_dataset.zip')" ] }, { "cell_type": "code", - "execution_count": null, - "id": "5875f7f5-2c1f-40f6-95fa-0a8d106b1e7a", + "execution_count": 11, + "id": "51c4c2e4-91d8-47ef-b2a2-ca30c6e2b84f", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
trip_duration_minutestrip_distance_milesorigin_trct_fips_2020
count156196.000000156196.0000001.561960e+05
mean8.0273312.4894946.019003e+09
std6.0411162.3055811.731910e+03
min0.0000000.1000006.019000e+09
25%3.0000000.8000006.019002e+09
50%7.0000001.6000006.019004e+09
75%11.0000003.5000006.019005e+09
max93.00000017.8000006.019005e+09
\n", + "
" + ], + "text/plain": [ + " trip_duration_minutes trip_distance_miles origin_trct_fips_2020\n", + "count 156196.000000 156196.000000 1.561960e+05\n", + "mean 8.027331 2.489494 6.019003e+09\n", + "std 6.041116 2.305581 1.731910e+03\n", + "min 0.000000 0.100000 6.019000e+09\n", + "25% 3.000000 0.800000 6.019002e+09\n", + "50% 7.000000 1.600000 6.019004e+09\n", + "75% 11.000000 3.500000 6.019005e+09\n", + "max 93.000000 17.800000 6.019005e+09" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "miles_all = df.trip_distance_miles.sum()" + "fresno_raw.describe()" ] }, { "cell_type": "code", - "execution_count": null, - "id": "01a823a1-de52-4d08-8b80-a024c1397f95", + "execution_count": 12, + "id": "7122b15b-574a-4cf1-84ab-0af08f703e57", "metadata": {}, "outputs": [], "source": [ - "shorter = (df >> filter(_.trip_distance_miles < _.trip_distance_miles.quantile(.95))).trip_distance_miles.sum()" + "# (wilshire_raw >> filter(_.is_auto)).trip_distance_miles.hist()" ] }, { "cell_type": "code", - "execution_count": null, - "id": "132b4a03-9f73-4978-8a35-ae5c130f7f73", + "execution_count": 13, + "id": "9423782c-0b5a-474a-bf5a-99ae2e6812b3", "metadata": {}, "outputs": [], "source": [ - "shorter / miles_all" + "# (wilshire_raw >> filter(_.is_auto, _.trip_distance_miles < 4)).trip_distance_miles.hist()" ] }, { @@ -212,7 +299,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "id": "dae2c9bb-5121-4966-85ed-111bd525c924", "metadata": {}, "outputs": [], @@ -223,32 +310,32 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "id": "15544ecf-1140-426f-be93-5ce53e2b5f7e", "metadata": {}, "outputs": [], "source": [ - "grouped = pd.DataFrame()\n", - "for region in ['eureka']:\n", - " print(region)\n", - " # note replica filename includes date of download...\n", - " df = read_group_replica(f'replica_raw/corridors/replica-{region}-trips_dataset.zip')\n", - " grouped = pd.concat([grouped, df])" + "# grouped = pd.DataFrame()\n", + "# for region in ['eureka']:\n", + "# print(region)\n", + "# # note replica filename includes date of download...\n", + "# df = read_group_replica(f'replica_raw/corridors/replica-{region}-trips_dataset.zip')\n", + "# grouped = pd.concat([grouped, df])" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "id": "1878b238-eec9-450c-bf3e-359016485495", "metadata": {}, "outputs": [], "source": [ - "grouped.to_parquet('intermediate/eureka_grouped.parquet')" + "# grouped.to_parquet('intermediate/eureka_grouped.parquet')" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "id": "9fb929ae-0c66-4dfb-9a0e-604bfedef078", "metadata": { "tags": [] @@ -277,39 +364,79 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "id": "240ddd8f-6a36-44e1-a1c0-32e6f50c3cac", "metadata": {}, "outputs": [], "source": [ "# grouped = pd.DataFrame()\n", "# for region in all_regions:\n", - "# grouped = pd.concat([grouped, pd.read_parquet(f'intermediate/replica-{region}-trips_dataset.parquet')])" + "# grouped = pd.concat([grouped, pd.read_parquet(f'{GCS_PATH}intermediate/replica-{region}-trips_dataset.parquet')])" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, + "id": "0de2a7b3-f1f1-43df-8e7e-2cf4b416378c", + "metadata": {}, + "outputs": [], + "source": [ + "# grouped" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "f1867025-ad72-4ff5-931c-30982a65f0af", + "metadata": {}, + "outputs": [], + "source": [ + "from calitp_data_analysis import utils" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "79dafb24-bebd-4ae6-91d4-8d8e80983c76", + "metadata": {}, + "outputs": [], + "source": [ + "# utils.geoparquet_gcs_export?" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "56e5f48c-649f-4e8f-8ef2-49ce8521cf2c", + "metadata": {}, + "outputs": [], + "source": [ + "# utils.geoparquet_gcs_export(grouped, f'{GCS_PATH}intermediate/', 'replica_grouped')" + ] + }, + { + "cell_type": "code", + "execution_count": 23, "id": "7596b001-2c1c-488f-b089-c375c0ddff4f", "metadata": {}, "outputs": [], "source": [ - "# replica_grouped = pd.read_parquet('intermediate/replica_grouped.parquet')" + "replica_grouped = pd.read_parquet(f'{GCS_PATH}intermediate/replica_grouped.parquet')" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, "id": "23b9ae5b-bd02-4978-b7d1-3e4b49de53b5", "metadata": {}, "outputs": [], "source": [ - "tracts_feeds = gpd.read_parquet('intermediate/feeds_tract_geo.parquet')" + "tracts_feeds = gpd.read_parquet(f'{GCS_PATH}intermediate/feeds_tract_geo.parquet')" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 25, "id": "73088513-9710-4c9a-a205-d531c3807345", "metadata": {}, "outputs": [], @@ -319,7 +446,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 26, "id": "61ce4f74-45e9-4c85-ad34-d2ac9c5cfc32", "metadata": {}, "outputs": [], @@ -329,37 +456,48 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 27, "id": "3f9512da-00e9-4768-a79c-8570c4a0bec2", "metadata": {}, "outputs": [], "source": [ - "replica_grouped = pd.read_parquet(f'intermediate/wilshire_grouped.parquet')" + "# replica_grouped = pd.read_parquet(f'intermediate/wilshire_grouped.parquet')" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 28, "id": "240b6b96-ea68-49f2-8509-883af8f0c36b", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "11288.75" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "18062 / 1.6" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 29, "id": "fa0e75af-11d9-4051-8b0f-3a8d95c166be", "metadata": {}, "outputs": [], "source": [ - "(replica_grouped >> filter(_.is_auto))." + "# (replica_grouped >> filter(_.is_auto))" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 30, "id": "06f007da-ad8d-4801-8595-85c5d44fe921", "metadata": {}, "outputs": [], @@ -414,42 +552,198 @@ }, { "cell_type": "code", - "execution_count": null, - "id": "f4b313d7-62e5-46b4-ab93-6b25d176bdeb", - "metadata": {}, - "outputs": [], - "source": [ - "# replica_grouped = pd.read_parquet('intermediate/wilshire_grouped.parquet')" - ] - }, - { - "cell_type": "code", - "execution_count": null, + "execution_count": 31, "id": "d661657b-cac7-4e9a-806d-48579c0516f9", "metadata": { "tags": [] }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "0.2778337212327877\n" + ] + } + ], "source": [ - "# processed_df = process_grouped_data(replica_grouped, tracts_feeds)" + "processed_df = process_grouped_data(replica_grouped, tracts_feeds)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 32, "id": "50f04f86-6f13-4654-952e-58dc07a23faa", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
origin_trct_fips_2020p50_mi_transitp50_mi_autototal_mi_transittotal_mi_autototal_trips_autop50_transit_longerno_transit_replicano_transittotal_minew_transit_miprojected_new_transit_trips
060014001004.26.95185.3165410.853636FalseFalseFalse170596.145956.69809610942.0
160014002004.13.43638.158763.428600TrueFalseFalse62401.516326.4540943982.0
260014003004.73.76130.7122736.557740TrueFalseFalse128867.234100.3385267255.0
\n", + "
" + ], + "text/plain": [ + " origin_trct_fips_2020 p50_mi_transit p50_mi_auto total_mi_transit \\\n", + "0 6001400100 4.2 6.9 5185.3 \n", + "1 6001400200 4.1 3.4 3638.1 \n", + "2 6001400300 4.7 3.7 6130.7 \n", + "\n", + " total_mi_auto total_trips_auto p50_transit_longer no_transit_replica \\\n", + "0 165410.8 53636 False False \n", + "1 58763.4 28600 True False \n", + "2 122736.5 57740 True False \n", + "\n", + " no_transit total_mi new_transit_mi projected_new_transit_trips \n", + "0 False 170596.1 45956.698096 10942.0 \n", + "1 False 62401.5 16326.454094 3982.0 \n", + "2 False 128867.2 34100.338526 7255.0 " + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "processed_df >> head(3)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 33, "id": "8169ca83-e540-48f4-af93-076a535f00c6", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
no_transittotal_mi_auto
0False1.162508e+09
1True1.294277e+08
\n", + "
" + ], + "text/plain": [ + " no_transit total_mi_auto\n", + "0 False 1.162508e+09\n", + "1 True 1.294277e+08" + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "no_transit = processed_df >> group_by(_.no_transit) >> summarize(total_mi_auto = _.total_mi_auto.sum())\n", "no_transit" @@ -457,7 +751,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 34, "id": "c7175031-a66c-4f90-8dca-193198b9d932", "metadata": {}, "outputs": [], @@ -468,32 +762,94 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 35, "id": "6476da44-36ac-4604-89d5-76966eb9411d", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "True 6571\n", + "False 2534\n", + "Name: p50_transit_longer, dtype: int64" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "processed_df.p50_transit_longer.value_counts()" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 36, + "id": "70bdaf9e-397b-46d4-bc0a-a73722cf8591", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Using FIPS code '06' for input 'CA'\n" + ] + } + ], + "source": [ + "tract_geo = _utils.get_tract_geoms()" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "c10b8a4f-b3de-4494-81b7-942c2b230c30", + "metadata": {}, + "outputs": [], + "source": [ + "ca_uzas = gpd.read_parquet(f'{GCS_PATH}intermediate/ca_uza.parquet')" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "id": "8d2f3dec-892d-4ffc-85fb-8e8866a51f42", + "metadata": {}, + "outputs": [], + "source": [ + "# census_pop = gpd.read_file(f'./census_ntd/DECENNIALPL2020.P1_2024-02-01T163251.zip')\n", + "\n", + "# census_cleaned = census_pop.iloc[2:,:][['GEO_ID', 'P1_001N']]\n", + "\n", + "# census_cleaned.to_parquet(f'{GCS_PATH}census_ntd/DECENNIALPL2020.parquet')" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "id": "d7b17b29-29b2-40f3-9038-c3b0b75f39f8", + "metadata": {}, + "outputs": [], + "source": [ + "census_cleaned = pd.read_parquet(f'{GCS_PATH}census_ntd/DECENNIALPL2020.parquet')" + ] + }, + { + "cell_type": "code", + "execution_count": 61, "id": "8995132b-5eba-48e8-bfdb-585f1ff1e9d8", "metadata": {}, "outputs": [], "source": [ - "def attach_tracts_pop(processed_df):\n", + "def attach_tracts_pop(processed_df, tract_geo, ca_uzas, census_cleaned):\n", " \n", - " tract_geo = _utils.get_tract_geoms()\n", " tract_geo.GEOID = tract_geo.GEOID.astype('int64')\n", " gdf = (tract_geo >> inner_join(_, processed_df, on = {'GEOID': 'origin_trct_fips_2020'})\n", " >> select(-_.origin_trct_fips_2020))\n", - " ca_uzas = gpd.read_parquet('intermediate/ca_uza.parquet')\n", " uza_joined = gpd.sjoin(gdf, ca_uzas, how = 'left')\n", " \n", - " census_pop = gpd.read_file('census_ntd/DECENNIALPL2020.P1_2024-02-01T163251.zip')\n", - " tract_pop = census_pop[['GEO_ID', 'P1_001N']].iloc[2:,:]\n", + " tract_pop = census_cleaned\n", " tract_pop.GEO_ID = tract_pop.GEO_ID.map(lambda x: x.split('US')[1])\n", " tract_pop.GEO_ID = tract_pop.GEO_ID.astype('int64')\n", " tract_pop = tract_pop >> select(_.total_pop == _.P1_001N, _.GEOID == _.GEO_ID)\n", @@ -506,12 +862,12 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 63, "id": "81f381a4-bb80-4fb7-97fa-853111914712", "metadata": {}, "outputs": [], "source": [ - "uza_joined = attach_tracts_pop(processed_df)" + "uza_joined = attach_tracts_pop(processed_df, tract_geo, ca_uzas, census_cleaned)" ] }, { @@ -523,22 +879,12 @@ }, "outputs": [], "source": [ - "for region in ['sanpablo', 'eureka', 'fresno', 'wilshire']:\n", - " replica_grouped = pd.read_parquet(f'intermediate/{region}_grouped.parquet')\n", - " processed_df = process_grouped_data(replica_grouped, tracts_feeds)\n", - " uza_joined = attach_tracts_pop(processed_df)\n", - " display(uza_joined >> head(3))\n", - " uza_joined.to_parquet(f'outputs/{region}_trips_with_uza.parquet')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "addb6ac5-5f4d-41dc-9a7a-0d6644bb699c", - "metadata": {}, - "outputs": [], - "source": [ - "# uza_joined.to_parquet('outputs/new_trips_with_uza.parquet')" + "# for region in ['sanpablo', 'eureka', 'fresno', 'wilshire']:\n", + "# replica_grouped = pd.read_parquet(f'intermediate/{region}_grouped.parquet')\n", + "# processed_df = process_grouped_data(replica_grouped, tracts_feeds)\n", + "# uza_joined = attach_tracts_pop(processed_df)\n", + "# display(uza_joined >> head(3))\n", + "# uza_joined.to_parquet(f'outputs/{region}_trips_with_uza.parquet')" ] }, { @@ -552,70 +898,75 @@ ] }, { - "cell_type": "markdown", - "id": "63e712e1-900d-430e-962e-72ed8dd30bbf", + "cell_type": "code", + "execution_count": 68, + "id": "6ed72b61-2c14-4eb2-a188-00384f52082a", "metadata": {}, + "outputs": [], "source": [ - "## Quick GCS Upload" + "uza_joined = (uza_joined\n", + " >> select(-_.index_right)\n", + " )" ] }, { "cell_type": "code", - "execution_count": null, - "id": "b0572b17-a9c2-4128-ab67-fa650c87fda0", + "execution_count": 71, + "id": "029334e4-be41-4f93-b95d-67687bd2d70e", "metadata": {}, "outputs": [], "source": [ - "fs = get_fs()" + "uza_joined['vmt_quantile'] = pd.qcut(uza_joined.total_mi_auto, 4, labels = ['p25', 'p50', 'p75', 'p100'])" ] }, { "cell_type": "code", - "execution_count": null, - "id": "76539b31-f757-4703-9f7a-2eea60834d06", + "execution_count": 72, + "id": "8f1b132b-8f81-4923-9efe-92f3162ee64c", "metadata": {}, "outputs": [], "source": [ - "_utils.GCS_PATH" + "!mkdir export" ] }, { "cell_type": "code", - "execution_count": null, - "id": "1518eea5-d2f1-4807-b392-4a4f2624439d", + "execution_count": 75, + "id": "ec2cc1b4-002b-498a-b1f0-055f5252fb70", "metadata": {}, "outputs": [], "source": [ - "lpath = 'replica_raw/'" + "uza_joined.vmt_quantile = uza_joined.vmt_quantile.astype(str)" ] }, { "cell_type": "code", - "execution_count": null, - "id": "e0048f15-f124-431d-9fae-35aa7ef3dd72", + "execution_count": 79, + "id": "354a6bea-fbc5-49d7-b531-2597f240510e", "metadata": {}, "outputs": [], "source": [ - "fs.put(lpath, _utils.GCS_PATH + lpath, recursive=True)" + "utils.geoparquet_gcs_export(uza_joined, f'{GCS_PATH}outputs/', 'new_trips_with_uza')" ] }, { - "cell_type": "markdown", - "id": "b27a8e5c-9d03-435c-bd5c-8a5de83a6d88", + "cell_type": "code", + "execution_count": 77, + "id": "30983d03-5c50-4480-85db-ed6666e34bd8", "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_532/1657757500.py:1: UserWarning: Column names longer than 10 characters will be truncated when saved to ESRI Shapefile.\n", + " uza_joined.to_file('./export/vmt_with_quantiles.shp')\n" + ] + } + ], "source": [ - "## Pulling _corridor_ level data\n", - "\n", - "* First, get corridor geoms" + "uza_joined.to_file('./export/vmt_with_quantiles.shp')" ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1a5d9b48-6f90-4e85-87e8-aa4c94d01aa3", - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": {