diff --git a/conveyal_update/conveyal_vars.py b/conveyal_update/conveyal_vars.py
index e06f901ad..3ed9cb2c9 100644
--- a/conveyal_update/conveyal_vars.py
+++ b/conveyal_update/conveyal_vars.py
@@ -1,14 +1,17 @@
 import datetime as dt
 
 gcs_path = 'gs://calitp-analytics-data/data-analyses/conveyal_update/'
-target_date = dt.date(2023, 10, 18)
+target_date = dt.date(2024, 2, 14)
 osm_file = 'us-west-latest.osm.pbf'
 #  http://download.geofabrik.de/north-america/us-west-latest.osm.pbf
 #  first download with wget...
 
 conveyal_regions = {}
-#  boundaries correspond to Conveyal Analysis regions
+ boundaries correspond to Conveyal Analysis regions
 conveyal_regions['norcal'] = {'north': 42.03909, 'south': 39.07038, 'east': -119.60541, 'west': -124.49158}
 conveyal_regions['central'] = {'north': 39.64165, 'south': 35.87347, 'east': -117.53174, 'west': -123.83789}
 conveyal_regions['socal'] = {'north': 35.8935, 'south': 32.5005, 'east': -114.13121, 'west': -121.46759}
-conveyal_regions['mojave'] = {'north': 37.81629, 'south': 34.89945, 'east': -114.59015, 'west': -118.38043}
\ No newline at end of file
+conveyal_regions['mojave'] = {'north': 37.81629, 'south': 34.89945, 'east': -114.59015, 'west': -118.38043}
+
+# #  special region for one-off Centennial Corridor
+# conveyal_regions['bakersfield'] = {'north': 36.81, 'south': 34.13, 'east': -117.12, 'west': -120.65}
\ No newline at end of file
diff --git a/conveyal_update/match_feeds_regions.py b/conveyal_update/match_feeds_regions.py
index f269bade2..318fc8e8b 100644
--- a/conveyal_update/match_feeds_regions.py
+++ b/conveyal_update/match_feeds_regions.py
@@ -1,7 +1,8 @@
 import os
 os.environ['USE_PYGEOS'] = '0'
 os.environ["CALITP_BQ_MAX_BYTES"] = str(800_000_000_000)
-from shared_utils import gtfs_utils_v2, geography_utils
+from shared_utils import gtfs_utils_v2
+from calitp_data_analysis import geography_utils
 
 import pandas as pd
 from siuba import *
diff --git a/sb125_analyses/vmt_transit_sketch/_utils.py b/sb125_analyses/vmt_transit_sketch/_utils.py
index 1a5a7f181..fd5c44c72 100644
--- a/sb125_analyses/vmt_transit_sketch/_utils.py
+++ b/sb125_analyses/vmt_transit_sketch/_utils.py
@@ -1,7 +1,7 @@
 import pygris
 import geopandas as gpd
 from siuba import *
-from shared_utils.geography_utils import CA_NAD83Albers
+from calitp_data_analysis.geography_utils import CA_NAD83Albers
 
 GCS_PATH = 'gs://calitp-analytics-data/data-analyses/sb125/vmt_transit_sketch/'
 
diff --git a/sb125_analyses/vmt_transit_sketch/corridor_selection.ipynb b/sb125_analyses/vmt_transit_sketch/corridor_selection.ipynb
new file mode 100644
index 000000000..a5984a904
--- /dev/null
+++ b/sb125_analyses/vmt_transit_sketch/corridor_selection.ipynb
@@ -0,0 +1,1368 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9e8158c2-a7f9-4b3c-a518-037132adf0c3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import geopandas as gpd\n",
+    "from siuba import *"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "81cddca7-bea4-494d-b0cb-02508d52b380",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import zipfile"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "15475e21-db2a-4f64-a2bc-38f2b76b9a4f",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# ! pip install pygris"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "682268e8-78fc-4609-afc5-294f3c650b5e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import _utils\n",
+    "import importlib\n",
+    "importlib.reload(_utils)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e269a698-e8bf-4af7-a53c-45265ca7e5ad",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import shared_utils\n",
+    "from calitp_data_analysis import geography_utils, utils"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0664b67b-b63d-4357-b855-20a33bc8d6d7",
+   "metadata": {},
+   "source": [
+    "# Selecting Corridors"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "684770aa-d035-45d3-a3a4-ada6c51c2692",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "analysis_date = '2023-04-15'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b1424118-5e7c-4e00-9eee-1630d251cb14",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "feeds = shared_utils.gtfs_utils_v2.schedule_daily_feed_to_gtfs_dataset_name(selected_date=analysis_date)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "081ae78d-9f96-4bca-b181-c6c40f3f8e76",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tracts = _utils.get_tract_geoms()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "53cb95cd-7e8c-455d-ab47-fe3968582541",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def trips_to_stops(trip_df, feed_list):\n",
+    "    st = shared_utils.gtfs_utils_v2.get_stop_times(analysis_date, feed_list, trip_df=trip_df)\n",
+    "    st = st >> distinct(_.stop_id, _.stop_sequence) >> collect()\n",
+    "    st = stops >> select(_.stop_id, _.geometry) >> inner_join(_, st, on='stop_id')\n",
+    "    return st"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1ed8886d-6403-438e-b5c6-b8dee2c61290",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def sjoin_tracts(stops_gdf, tracts_gdf, buffer_m):\n",
+    "    \n",
+    "    stops_gdf = stops_gdf.to_crs(geography_utils.CA_NAD83Albers)\n",
+    "    assert stops_gdf.crs == tracts_gdf.crs\n",
+    "    \n",
+    "    stops_gdf.geometry = stops_gdf.buffer(buffer_m)\n",
+    "    tracts_sjoined = gpd.sjoin(tracts_gdf, stops_gdf) >> distinct(_.GEOID, _keep_all=True)\n",
+    "    \n",
+    "    return tracts_sjoined"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "1ca1517e-876d-49ae-82c1-973e02116745",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "## Wilshire"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ed2d3a3a-3473-4174-91b7-56c4e6759e9d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "metro = feeds >> filter(_.name.str.contains('LA Metro Bus'))\n",
+    "metro"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "030ebc15-6824-47a6-b781-11628b74d848",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stops = shared_utils.gtfs_utils_v2.get_stops(analysis_date, metro.feed_key)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c1e324d3-7891-40f2-bd7b-2389fa36ee26",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "trips = shared_utils.gtfs_utils_v2.get_trips(analysis_date, metro.feed_key)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "384bc785-79ed-4ca8-be5d-14a0480e98cf",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "trips.route_short_name.unique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1b6fde5e-1394-4350-b483-1c1e6ad1dabb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "trips_720 = trips >> filter(_.route_short_name.isin(['720']), _.direction_id == 0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fed54ea9-8157-425a-889a-1069d77265ca",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "st_720 = trips_to_stops(trips_720, metro.feed_key)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "38b16a39-5d16-45bd-a9e4-71c22cacdb7f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#  only keep W of Wil/Wstn\n",
+    "st_720 = st_720 >> filter(_.stop_sequence <= 11)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3e04bc76-99c8-4930-a754-fc4c2c0f3597",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# st_720.explore()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "cfb4f847-c5a0-43e2-b8d9-c11c4469c954",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "wilshire = sjoin_tracts(st_720, tracts, 804)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "600ff9a4-5bbe-4078-9829-721842084f89",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# wilshire.explore()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "be46c69b-c2fa-4a37-ab0a-fff6ccd63cf2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# wilshire.to_file('wilshire.geojson')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "18a68442-9ab6-4973-868f-b22ace3ea90d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#  includes non-corridor vmt...\n",
+    "\n",
+    "# trips_all = gpd.read_parquet('outputs/new_trips_with_uza.parquet')\n",
+    "\n",
+    "# trips_all >> filter(_.GEOID.isin(wilshire_results.GEOID))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6c4e8f0e-941b-4bdd-b5ad-1a7e02602ad6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "wilshire_results = gpd.read_parquet('outputs/wilshire_trips_with_uza.parquet')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8714149d-fd48-4c11-bd13-c3ec1af6ef2a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# (wilshire_results >> select(-_.geometry)).to_csv('wilshire.csv')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0db59945-15cc-4800-8cd6-0efd01a21bfa",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# utils.make_zipped_shapefile(wilshire_results, 'wilsh')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "611272a6-9add-4218-90c9-ea3004d5e829",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "wilshire_results.explore(column = 'projected_new_transit_trips', scheme = 'NaturalBreaks')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3e882b73-446a-4b31-8a5d-e569fa4016ac",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "wilshire_results.sum()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "645f8132-8062-4a3f-8b97-df79483b44df",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "wilshire_results.describe()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "5cf215ef-e257-4405-b4a6-c1c0eab26116",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "## Fresno Route 1"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7ac39af3-5a44-44b5-a084-ce24fb115874",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fresno = feeds >> filter(_.name.str.contains('Fresno Sch'))\n",
+    "fresno"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b1bde719-a199-41c9-b152-487c95a43732",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stops = shared_utils.gtfs_utils_v2.get_stops(analysis_date, fresno.feed_key)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "38f2c41e-88d5-4e2e-a3ee-7857746f6b78",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "trips = shared_utils.gtfs_utils_v2.get_trips(analysis_date, fresno.feed_key)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b3d2981b-4b1e-439d-b9bc-748e76e1db6b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "trips.route_short_name.unique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7128fd7b-ab96-47bf-999e-33d7cd586546",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "trips_1 = trips >> filter(_.route_short_name.isin(['01']), _.direction_id == 0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e3fd0090-1cfe-4215-941f-4c627ce9b470",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "trips_1 = trips_1 >> filter(_.trip_instance_key == 'db65a5adda0fc0a2744580354516ac68')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a9d4ad6d-7a4a-48f2-91e1-7559c621015a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "st_1 = trips_to_stops(trips_1, fresno.feed_key)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2431c3e0-9338-4693-a02b-93a17962e196",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "st_1 = st_1 >> filter(_.stop_sequence < 20) #  vertical portion only"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "31156153-36c0-4d32-b834-553e8f8a95c6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# st_1.explore()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "35393a07-ba6e-4c67-a427-a8bc07fa0345",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fresno = sjoin_tracts(st_1, tracts, 804) #  half-mile"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ea4f36ad-72f2-4959-846b-21baeab21a83",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# fresno.explore()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9dc9d26c-5743-4e2c-b107-0992cce1023c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fresno.to_file('fresno.geojson')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7f5dd373-bf14-4e1a-b47d-81f85eb3231f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fresno_results = gpd.read_parquet('outputs/fresno_trips_with_uza.parquet')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "82878ff2-007f-41b3-a378-2b808b05f5c0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# (wilshire_results >> select(-_.geometry)).to_csv('wilshire.csv')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "203d5e11-c54f-42ea-8c19-d9fe36bc2643",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# utils.make_zipped_shapefile(wilshire_results, 'wilsh')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9e2a3a9c-5b72-47da-bfcb-73bf05b955b4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fresno_results.explore(column = 'projected_new_transit_trips', scheme = 'NaturalBreaks')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "294f0011-d722-4cd1-b3e4-88c3de380b21",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fresno_results.sum()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ef648be9-0c5a-40fa-8351-591175802794",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fresno_results.describe()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "0378b6eb-ab6c-40f3-94a4-3aec913d6a3d",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "## San Pablo Ave"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8e8a3159-40a9-4763-afa2-a325679d9ff2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ac = feeds >> filter(_.name.str.contains('AC Transit'))\n",
+    "ac"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6aec2329-d7f7-463e-86f0-12b609048529",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stops = shared_utils.gtfs_utils_v2.get_stops(analysis_date, ac.feed_key)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5c960435-da71-4074-aaea-15a8f12b18ca",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "trips = shared_utils.gtfs_utils_v2.get_trips(analysis_date, ac.feed_key)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "85ba91ca-e882-47b8-a343-ac5eab3b0a4e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "trips.route_short_name.unique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fcbd0633-395a-4bea-8c64-b21cb5ecd64f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "trips_72r = trips >> filter(_.route_short_name.isin(['72R']), _.direction_id == 0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e1359516-eddf-4cde-ba35-32dd8f7e5535",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "st_72r = trips_to_stops(trips_72r, ac.feed_key)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3f7cee73-690e-45bd-9d09-203f031e53e4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# st_72r.explore()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "71962307-67d1-4670-b3e2-14dea1c0770f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "san_pablo = sjoin_tracts(st_72r, tracts, 804) #  half-mile"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "27970e7d-d3a2-44c9-9c5b-aa839cf6c4dc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# san_pablo.explore()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4dd4b619-1bbb-4880-b5fb-0450c848b779",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "san_pablo.to_file('san_pablo.geojson')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "edfc5475-af4a-42a9-81ea-f041f7135938",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "san_pablo_results = gpd.read_parquet('outputs/sanpablo_trips_with_uza.parquet')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2eb69c0d-5508-487b-bbbf-a70a478ce164",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# (wilshire_results >> select(-_.geometry)).to_csv('wilshire.csv')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dff30ab0-80d7-43f8-b276-ad4f8e877f26",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# utils.make_zipped_shapefile(wilshire_results, 'wilsh')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a2eb977c-6779-4916-8576-385c6808e21a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "san_pablo_results.explore(column = 'projected_new_transit_trips', scheme = 'NaturalBreaks')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d5dea582-473e-4157-98bb-5a3572631b42",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "san_pablo_results.sum()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "369166fe-baf8-4bc2-86f8-c637b86c23bd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "san_pablo_results.describe()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "73b906dd-19e4-496e-b468-bd47fb3082be",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "## Eureka H Street/Purple Route"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7b248c72-9b00-4451-9843-02cb2c80c39a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "eureka = feeds >> filter(_.name.str.contains('Humboldt Schedule'))\n",
+    "eureka"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2136f99f-3c01-406e-8c60-9b3bba2f9920",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stops = shared_utils.gtfs_utils_v2.get_stops(analysis_date, eureka.feed_key)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3b75f1b9-184c-4667-81a4-261b1105249e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "trips = shared_utils.gtfs_utils_v2.get_trips(analysis_date, eureka.feed_key)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3030e4cc-b573-4083-9e2c-ee8c947160ef",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "trips.route_short_name.unique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4750a733-a639-4b7f-a78f-da1683c6a594",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "trips.route_long_name.unique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "19722dc1-21ad-4186-93d3-36b6522ba246",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "trips_rainbow = trips >> filter(_.route_long_name.isin(['Rainbow Route']), _.direction_id == 0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "24226a59-fc6c-4f75-ab46-70d561a2d20e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "tr"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8c1b2637-2446-4efd-b61a-19c08d534292",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# trips_1 = trips_1 >> filter(_.trip_instance_key == 'db65a5adda0fc0a2744580354516ac68')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "93a000d8-6e6b-47aa-9a82-98932989ba7b",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "st_rainbow = trips_to_stops(trips_purple, eureka.feed_key)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "eeaeaafc-819c-4727-9768-2a904a6437df",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "st_rainbow = st_rainbow >> filter(_.stop_sequence >= 35)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "eb39f64d-adaf-4391-b205-1b12ef6a1760",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# st_rainbow.explore()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "370fca77-e994-471f-8888-010b52738bec",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "eureka = sjoin_tracts(st_rainbow, tracts, 804) #  half-mile"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9f2386c4-bc1b-4b3b-bb80-16b04fe30112",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "eureka.explore()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e604495b-b0bf-46ce-8e14-ccac930dafbc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "eureka.to_file('eureka.geojson')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ef087597-674c-4256-8ce4-6ca7c0617665",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "eureka_results = gpd.read_parquet('outputs/eureka_trips_with_uza.parquet')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "74c2a66f-ff8e-482f-9c57-da742fbe5f42",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# (wilshire_results >> select(-_.geometry)).to_csv('wilshire.csv')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e7299e91-893e-4b81-979a-f700196c6a96",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# utils.make_zipped_shapefile(wilshire_results, 'wilsh')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6ff25cc5-b2aa-4e71-a5dc-2d70396c3805",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "eureka_results.explore(column = 'projected_new_transit_trips', scheme = 'NaturalBreaks')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3020e29d-a035-4248-b990-efc0947d02dd",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "eureka_results.sum()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3a4e5ef0-f5d0-43ba-9895-d5542989383c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "eureka_results.describe()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "6d4fdb29-2b3b-4055-ada9-b5b149db9f6c",
+   "metadata": {},
+   "source": [
+    "# All Corridors Summary"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "98680b32-440f-4fc2-867c-ede1a1967393",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fresno_results['corridor'] = 'Fresno'\n",
+    "san_pablo_results['corridor'] = 'San Pablo Ave'\n",
+    "wilshire_results['corridor'] = 'Wilshire'\n",
+    "eureka_results['corridor'] = 'Eureka'"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "263af4e4-07fe-4f74-ba78-b54829905a40",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "all_results = pd.concat([fresno_results, san_pablo_results, wilshire_results, eureka_results])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d3249b0a-2d67-4dba-bb58-7ab50b930185",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "(all_results >> group_by(_.corridor)\n",
+    "             >> summarize(total_new_transit_trips = _.projected_new_transit_trips.sum(),\n",
+    "                          total_population = _.total_pop.sum(),\n",
+    "                          total_vmt = _.total_mi_auto.sum(),\n",
+    "                          p50_auto_trip_mi = _.p50_mi_auto.quantile(.5),\n",
+    "                          total_auto_trips = _.total_trips_auto.sum()\n",
+    "                         )\n",
+    "\n",
+    ").to_csv('vmt_transit_corridors.csv')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "3a01d280-a612-4e72-8b06-e98aae3426d6",
+   "metadata": {
+    "jp-MarkdownHeadingCollapsed": true,
+    "tags": []
+   },
+   "source": [
+    "## Redding Route 4"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c2fd7bc1-a575-4b5b-adb3-63c7866cacc2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "redding = feeds >> filter(_.name.str.contains('Redding'))\n",
+    "redding"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "909cd786-51d5-40fb-b997-532e67378fe7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stops = shared_utils.gtfs_utils_v2.get_stops(analysis_date, redding.feed_key)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "334d3d45-0f38-4fb8-ae58-814d6429eee0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "trips = shared_utils.gtfs_utils_v2.get_trips(analysis_date, redding.feed_key)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "70b8f4d5-c70e-4320-9756-c13d9c919a58",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "trips.route_short_name.unique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8cece935-15cf-4eab-a58f-311a0927d8ae",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "trips_4 = trips >> filter(_.route_short_name.isin(['4']), _.direction_id == 0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b56d6acc-d992-4d48-a5e4-affb42e3605f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# trips_1 = trips_1 >> filter(_.trip_instance_key == 'db65a5adda0fc0a2744580354516ac68')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "1b27cec5-ea22-4265-b7cb-6898794ae577",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "st_4 = trips_to_stops(trips_4, redding.feed_key)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2e239235-82d3-42ed-963d-d2d196fb1d8a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# st_4.explore()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "af4426e8-4d86-4ed9-9cd4-096d4df9eeaa",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "redding = sjoin_tracts(st_4, tracts, 804) #  half-mile"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c325c88e-7ddf-4f24-85f5-b2e49ea88dd6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# redding.explore()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5387c9f7-9b77-49ef-b934-dc4f56836e28",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "redding.to_file('redding.geojson')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5cf3dfc7-2893-4cde-bae7-8e9fcdb751ca",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "redding_results = gpd.read_parquet('outputs/redding_trips_with_uza.parquet')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6e947b18-2710-4d71-89c4-0d256524e774",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# (wilshire_results >> select(-_.geometry)).to_csv('wilshire.csv')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b4c33a16-fb03-4d87-bd02-1a957b17be51",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# utils.make_zipped_shapefile(wilshire_results, 'wilsh')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0cd7c5d1-499b-4109-b942-d1e3f93e6c97",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "redding_results.explore(column = 'projected_new_transit_trips', scheme = 'NaturalBreaks')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6e439a7b-5a7b-4aa7-bb80-5626f24600c6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "redding_results.sum()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "4f883ae3-9dbd-4a39-935e-9e521159e7b3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "redding_results.describe()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "086c18f7-c7cf-41a2-9147-727740f781e5",
+   "metadata": {
+    "jp-MarkdownHeadingCollapsed": true,
+    "tags": []
+   },
+   "source": [
+    "## MST (table)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "5f958167-a291-4aba-8566-111c5e713be9",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "mst = feeds >> filter(_.name.str.contains('Monterey'))\n",
+    "mst"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d68764b0-476a-4af8-8560-b931a8afba51",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stops = shared_utils.gtfs_utils_v2.get_stops(analysis_date, mst.feed_key)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3bbf4ee7-7bd2-4ae2-968b-b17a5ef2514d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stops.columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f07ef246-b92d-4e00-989b-b1800744a694",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "trips = shared_utils.gtfs_utils_v2.get_trips(analysis_date, mst.feed_key)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "2fd8b773-d935-4c13-8f9e-84a8cba153c9",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "trips.columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8fe0c258-f084-4030-94e6-b0a44f5f5498",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "trips.route_short_name.unique()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a6763fd7-c9cc-44e1-b7e9-8c8c919e3f91",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "ab_trips = trips >> filter(_.route_short_name.isin(['A', 'B']), _.direction_id == 0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f9151728-3f87-45b0-a799-eb2b126ea434",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "trips_20 = trips >> filter(_.route_short_name == '20', _.direction_id == 0)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "44c2f67b-74b7-4551-82b9-167f2744081b",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "st_20 = shared_utils.gtfs_utils_v2.get_stop_times(analysis_date, mst.feed_key, trip_df=trips_20)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fc5f463c-b3c6-4f80-86d4-9835c650eebb",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "st_20 = st_20 >> distinct(_.stop_id, _.stop_sequence) >> collect()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "23f3a7b3-cd32-480f-ab1f-cc616c02e77a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "st_20 = stops >> select(_.stop_id, _.geometry) >> inner_join(_, st_20, on='stop_id')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8b71bbe9-26d0-42ec-8b30-a7cfdee2236e",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "st_20 = trips_to_stops(trips_20)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "99b2afea-3280-422c-b3b7-6e1c5ff54b5d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#  SURF BRT area for joins...\n",
+    "st_20 = st_20 >> filter(_.stop_sequence <= 27)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c3da388c-807d-424b-9e69-51588401ef2a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# st_20.explore()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0bfacea0-f958-4a65-90f3-2fec0ab04fd6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "st_ab = trips_to_stops(ab_trips)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "25ab4e55-c0cf-4919-b9ef-41e665c9a136",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# st_ab.explore()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "27a834eb-34b0-46a4-9f0d-75534812a336",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "surf_corridor = pd.concat([st_20, st_ab])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "72671c73-1331-4888-a6e4-5a8cc3a34a16",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "surf_corridor.explore()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.13"
+  },
+  "widgets": {
+   "application/vnd.jupyter.widget-state+json": {
+    "state": {},
+    "version_major": 2,
+    "version_minor": 0
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/sb125_analyses/vmt_transit_sketch/read_process_data.ipynb b/sb125_analyses/vmt_transit_sketch/read_process_data.ipynb
index c04a89e70..2cc71e981 100644
--- a/sb125_analyses/vmt_transit_sketch/read_process_data.ipynb
+++ b/sb125_analyses/vmt_transit_sketch/read_process_data.ipynb
@@ -2,19 +2,20 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 1,
    "id": "9e8158c2-a7f9-4b3c-a518-037132adf0c3",
    "metadata": {},
    "outputs": [],
    "source": [
     "import pandas as pd\n",
     "import geopandas as gpd\n",
-    "from siuba import *"
+    "from siuba import *\n",
+    "import numpy as np"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 2,
    "id": "81cddca7-bea4-494d-b0cb-02508d52b380",
    "metadata": {},
    "outputs": [],
@@ -22,6 +23,60 @@
     "import zipfile"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "id": "37b679d4-f8bd-4450-bf9f-50b68e8570b4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from calitp_data_analysis import get_fs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "80324d88-ea3a-45a6-9362-933a2395ed31",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "fs = get_fs()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "3dcaf650-43fe-4532-9060-442b067ef173",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "# ! pip install pygris"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "682268e8-78fc-4609-afc5-294f3c650b5e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import _utils\n",
+    "# import importlib\n",
+    "# importlib.reload(_utils)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "5aeca438-6285-4cca-b375-ab8aa3849e42",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "GCS_PATH = 'gs://calitp-analytics-data/data-analyses/sb125/vmt_transit_sketch/'"
+   ]
+  },
   {
    "cell_type": "markdown",
    "id": "b076a21f-5a53-4b75-b140-0e4947099e42",
@@ -34,289 +89,266 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 8,
    "id": "85a89737-f90d-488f-9310-ca83557e476c",
-   "metadata": {},
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
     "def read_group_replica(zip_path):\n",
     "    '''\n",
-    "    zip_path: path to zip file containing a Replica trips export\n",
+    "    zip_path: path to zip file containing a Replica trips export csv    \n",
     "    '''\n",
-    "    replica_filename = 'replica-mode_split_test-02_01_24-trips_dataset.csv'\n",
-    "    with zipfile.ZipFile(zip_path) as z:\n",
-    "        with z.open(replica_filename) as f:\n",
+    "    def parse_csv(zipfile):\n",
+    "        csvs = [f for f in z.namelist() if f[-3:] == 'csv']\n",
+    "        assert len(csvs) == 1\n",
+    "        with z.open(csvs[0]) as f:\n",
     "            df = pd.read_csv(f)\n",
+    "        return df\n",
+    "    \n",
+    "    if zip_path[:3] == 'gs:':\n",
+    "        with fs.open(zip_path) as f:\n",
+    "            with zipfile.ZipFile(f) as z:\n",
+    "                df = parse_csv(z)\n",
+    "    else:\n",
+    "        with zipfile.ZipFile(f) as z:\n",
+    "            df = parse_csv(z)\n",
+    "            \n",
+    "\n",
     "    df = (df >> filter(_.primary_mode.isin(['private_auto', 'auto_passenger', 'on_demand_auto', 'public_transit']))\n",
     "     >> select(-_.origin_trct_2020, -_.activity_id)\n",
     "         )\n",
     "    df['is_auto'] = df.primary_mode.str.contains('auto')\n",
+    "    return df\n",
     "    grouped = (df >> group_by(_.origin_trct_fips_2020, _.is_auto)\n",
     "                  >> summarize(n = _.shape[0], p50_distance = _.trip_distance_miles.quantile(.5),\n",
     "                               p75_distance = _.trip_distance_miles.quantile(.75),\n",
     "                               p90_distance = _.trip_distance_miles.quantile(.9),\n",
-    "                               total_miles = _.trip_distance_miles.sum(),\n",
+    "                               total_miles = _.trip_distance_miles.sum(),                               \n",
     "                              )\n",
     "        )\n",
-    "    # parquet_path = f'./intermediate/{zip_path.split(\".zip\")[0]}.parquet'\n",
-    "    # grouped.to_parquet(parquet_path)\n",
-    "    # print(f'grouped data -> {parquet_path}')\n",
+    "\n",
     "    return grouped"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
-   "id": "ec3470ac-5f0e-49a0-9000-f371f952bc74",
+   "execution_count": 9,
+   "id": "f0df73e2-7ebe-431f-b533-6139cc9b79c0",
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'gs://calitp-analytics-data/data-analyses/sb125/vmt_transit_sketch/replica_raw/corridors/replica-fresno-trips_dataset.zip'"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
-    "zip_path = f'replica_raw/replica-la_north-trips_dataset.zip'"
+    "f'{GCS_PATH}replica_raw/corridors/replica-fresno-trips_dataset.zip'"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
-   "id": "868a0ae4-b076-4775-beba-fdc9ba764b27",
+   "execution_count": 10,
+   "id": "c506f859-1ffc-4e36-ba03-1c4393ba4d9e",
    "metadata": {},
    "outputs": [],
    "source": [
-    "replica_filename = 'replica-mode_split_test-02_01_24-trips_dataset.csv'"
+    "fresno_raw = read_group_replica(f'{GCS_PATH}replica_raw/corridors/replica-fresno-trips_dataset.zip')"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
-   "id": "b4c08637-9bbc-4727-af6d-14dc1c66b4a1",
+   "execution_count": 11,
+   "id": "51c4c2e4-91d8-47ef-b2a2-ca30c6e2b84f",
    "metadata": {},
    "outputs": [
     {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/tmp/ipykernel_472/3605967939.py:3: DtypeWarning: Columns (6,7,8) have mixed types. Specify dtype option on import or set low_memory=False.\n",
-      "  df = pd.read_csv(f)\n"
-     ]
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>trip_duration_minutes</th>\n",
+       "      <th>trip_distance_miles</th>\n",
+       "      <th>origin_trct_fips_2020</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>count</th>\n",
+       "      <td>156196.000000</td>\n",
+       "      <td>156196.000000</td>\n",
+       "      <td>1.561960e+05</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>mean</th>\n",
+       "      <td>8.027331</td>\n",
+       "      <td>2.489494</td>\n",
+       "      <td>6.019003e+09</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>std</th>\n",
+       "      <td>6.041116</td>\n",
+       "      <td>2.305581</td>\n",
+       "      <td>1.731910e+03</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>min</th>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>0.100000</td>\n",
+       "      <td>6.019000e+09</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25%</th>\n",
+       "      <td>3.000000</td>\n",
+       "      <td>0.800000</td>\n",
+       "      <td>6.019002e+09</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>50%</th>\n",
+       "      <td>7.000000</td>\n",
+       "      <td>1.600000</td>\n",
+       "      <td>6.019004e+09</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>75%</th>\n",
+       "      <td>11.000000</td>\n",
+       "      <td>3.500000</td>\n",
+       "      <td>6.019005e+09</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>max</th>\n",
+       "      <td>93.000000</td>\n",
+       "      <td>17.800000</td>\n",
+       "      <td>6.019005e+09</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "       trip_duration_minutes  trip_distance_miles  origin_trct_fips_2020\n",
+       "count          156196.000000        156196.000000           1.561960e+05\n",
+       "mean                8.027331             2.489494           6.019003e+09\n",
+       "std                 6.041116             2.305581           1.731910e+03\n",
+       "min                 0.000000             0.100000           6.019000e+09\n",
+       "25%                 3.000000             0.800000           6.019002e+09\n",
+       "50%                 7.000000             1.600000           6.019004e+09\n",
+       "75%                11.000000             3.500000           6.019005e+09\n",
+       "max                93.000000            17.800000           6.019005e+09"
+      ]
+     },
+     "execution_count": 11,
+     "metadata": {},
+     "output_type": "execute_result"
     }
    ],
    "source": [
-    "with zipfile.ZipFile(zip_path) as z:\n",
-    "    with z.open(replica_filename) as f:\n",
-    "        df = pd.read_csv(f)"
+    "fresno_raw.describe()"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 12,
-   "id": "5875f7f5-2c1f-40f6-95fa-0a8d106b1e7a",
+   "id": "7122b15b-574a-4cf1-84ab-0af08f703e57",
    "metadata": {},
    "outputs": [],
    "source": [
-    "miles_all = df.trip_distance_miles.sum()"
+    "# (wilshire_raw >> filter(_.is_auto)).trip_distance_miles.hist()"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 13,
-   "id": "01a823a1-de52-4d08-8b80-a024c1397f95",
+   "id": "9423782c-0b5a-474a-bf5a-99ae2e6812b3",
    "metadata": {},
    "outputs": [],
    "source": [
-    "shorter = (df >> filter(_.trip_distance_miles < _.trip_distance_miles.quantile(.95))).trip_distance_miles.sum()"
+    "# (wilshire_raw >> filter(_.is_auto, _.trip_distance_miles < 4)).trip_distance_miles.hist()"
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": 14,
-   "id": "132b4a03-9f73-4978-8a35-ae5c130f7f73",
+   "cell_type": "markdown",
+   "id": "412218ad-4f25-49de-92df-a00cc6becc70",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "0.5640656816072517"
-      ]
-     },
-     "execution_count": 14,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
    "source": [
-    "shorter / miles_all"
+    "## grouping"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
-   "id": "6c80cc92-a452-4d92-bf2f-d1f40a83442d",
+   "execution_count": 14,
+   "id": "dae2c9bb-5121-4966-85ed-111bd525c924",
    "metadata": {},
    "outputs": [],
    "source": [
-    "# zip_path = 'replica-la_north-trips_dataset.zip'\n",
-    "\n",
-    "# replica_filename = 'replica-mode_split_test-02_01_24-trips_dataset.csv'\n",
-    "# with zipfile.ZipFile(zip_path) as z:\n",
-    "#     with z.open(replica_filename) as f:\n",
-    "#         df = pd.read_csv(f)"
+    "all_regions = ['central_a', 'central_b', 'north', 'la_north',\n",
+    "              'la_south', 'sandiego', 'socal_a', 'socal_b']"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
-   "id": "0b50f321-feee-4789-8f17-eee4f4a59f3f",
+   "execution_count": 15,
+   "id": "15544ecf-1140-426f-be93-5ce53e2b5f7e",
    "metadata": {},
    "outputs": [],
    "source": [
-    "# df >> head(10)"
+    "# grouped = pd.DataFrame()\n",
+    "# for region in ['eureka']:\n",
+    "#     print(region)\n",
+    "#     #  note replica filename includes date of download...\n",
+    "#     df = read_group_replica(f'replica_raw/corridors/replica-{region}-trips_dataset.zip')\n",
+    "#     grouped = pd.concat([grouped, df])"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
-   "id": "dae2c9bb-5121-4966-85ed-111bd525c924",
+   "execution_count": 16,
+   "id": "1878b238-eec9-450c-bf3e-359016485495",
    "metadata": {},
    "outputs": [],
    "source": [
-    "all_regions = ['central_a', 'central_b', 'north', 'la_north',\n",
-    "              'la_south', 'sandiego', 'socal_a', 'socal_b']"
+    "# grouped.to_parquet('intermediate/eureka_grouped.parquet')"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 17,
    "id": "9fb929ae-0c66-4dfb-9a0e-604bfedef078",
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "central_a\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/tmp/ipykernel_444/3995037246.py:8: DtypeWarning: Columns (6,7,8) have mixed types. Specify dtype option on import or set low_memory=False.\n",
-      "  df = pd.read_csv(f)\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "central_b\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/tmp/ipykernel_444/3995037246.py:8: DtypeWarning: Columns (6,7,8) have mixed types. Specify dtype option on import or set low_memory=False.\n",
-      "  df = pd.read_csv(f)\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "north\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/tmp/ipykernel_444/3995037246.py:8: DtypeWarning: Columns (6,7,8) have mixed types. Specify dtype option on import or set low_memory=False.\n",
-      "  df = pd.read_csv(f)\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "la_north\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/tmp/ipykernel_444/3995037246.py:8: DtypeWarning: Columns (6,7,8) have mixed types. Specify dtype option on import or set low_memory=False.\n",
-      "  df = pd.read_csv(f)\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "la_south\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/tmp/ipykernel_444/3995037246.py:8: DtypeWarning: Columns (6,7,8) have mixed types. Specify dtype option on import or set low_memory=False.\n",
-      "  df = pd.read_csv(f)\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "sandiego\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/tmp/ipykernel_444/3995037246.py:8: DtypeWarning: Columns (6,7,8) have mixed types. Specify dtype option on import or set low_memory=False.\n",
-      "  df = pd.read_csv(f)\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "socal_a\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/tmp/ipykernel_444/3995037246.py:8: DtypeWarning: Columns (6,7,8) have mixed types. Specify dtype option on import or set low_memory=False.\n",
-      "  df = pd.read_csv(f)\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "socal_b\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/tmp/ipykernel_444/3995037246.py:8: DtypeWarning: Columns (6,7,8) have mixed types. Specify dtype option on import or set low_memory=False.\n",
-      "  df = pd.read_csv(f)\n"
-     ]
-    }
-   ],
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
    "source": [
-    "grouped = pd.DataFrame()\n",
-    "for region in all_regions:\n",
-    "    print(region)\n",
-    "    df = read_group_replica(f'replica_raw/replica-{region}-trips_dataset.zip')\n",
-    "    grouped = pd.concat([grouped, df])\n",
+    "# grouped = pd.DataFrame()\n",
+    "# for region in all_regions:\n",
+    "#     print(region)\n",
+    "#     df = read_group_replica(f'replica_raw/replica-{region}-trips_dataset.zip')\n",
+    "#     grouped = pd.concat([grouped, df])\n",
     "\n",
-    "grouped.to_parquet('intermediate/replica_grouped.parquet')"
+    "# grouped.to_parquet('intermediate/replica_grouped.parquet')"
    ]
   },
   {
@@ -326,44 +358,85 @@
    "source": [
     "# Read back in grouped data\n",
     "\n",
-    "* number of trips, median distance, and total miles travelled by auto yes/no and Census tract"
+    "* number of trips, median distance, and total miles travelled by auto yes/no and Census tract\n",
+    "* TODO non-manual regions :)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 18,
    "id": "240ddd8f-6a36-44e1-a1c0-32e6f50c3cac",
    "metadata": {},
    "outputs": [],
    "source": [
     "# grouped = pd.DataFrame()\n",
     "# for region in all_regions:\n",
-    "#     grouped = pd.concat([grouped, pd.read_parquet(f'intermediate/replica-{region}-trips_dataset.parquet')])"
+    "#     grouped = pd.concat([grouped, pd.read_parquet(f'{GCS_PATH}intermediate/replica-{region}-trips_dataset.parquet')])"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 33,
+   "execution_count": 19,
+   "id": "0de2a7b3-f1f1-43df-8e7e-2cf4b416378c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# grouped"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "f1867025-ad72-4ff5-931c-30982a65f0af",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from calitp_data_analysis import utils"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "id": "79dafb24-bebd-4ae6-91d4-8d8e80983c76",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# utils.geoparquet_gcs_export?"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "id": "56e5f48c-649f-4e8f-8ef2-49ce8521cf2c",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# utils.geoparquet_gcs_export(grouped, f'{GCS_PATH}intermediate/', 'replica_grouped')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 23,
    "id": "7596b001-2c1c-488f-b089-c375c0ddff4f",
    "metadata": {},
    "outputs": [],
    "source": [
-    "replica_grouped = pd.read_parquet('intermediate/replica_grouped.parquet')"
+    "replica_grouped = pd.read_parquet(f'{GCS_PATH}intermediate/replica_grouped.parquet')"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 34,
+   "execution_count": 24,
    "id": "23b9ae5b-bd02-4978-b7d1-3e4b49de53b5",
    "metadata": {},
    "outputs": [],
    "source": [
-    "tracts_feeds = gpd.read_parquet('intermediate/feeds_tract_geo.parquet')"
+    "tracts_feeds = gpd.read_parquet(f'{GCS_PATH}intermediate/feeds_tract_geo.parquet')"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 35,
+   "execution_count": 25,
    "id": "73088513-9710-4c9a-a205-d531c3807345",
    "metadata": {},
    "outputs": [],
@@ -373,7 +446,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 36,
+   "execution_count": 26,
    "id": "61ce4f74-45e9-4c85-ad34-d2ac9c5cfc32",
    "metadata": {},
    "outputs": [],
@@ -383,7 +456,48 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 37,
+   "execution_count": 27,
+   "id": "3f9512da-00e9-4768-a79c-8570c4a0bec2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# replica_grouped = pd.read_parquet(f'intermediate/wilshire_grouped.parquet')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "id": "240b6b96-ea68-49f2-8509-883af8f0c36b",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "11288.75"
+      ]
+     },
+     "execution_count": 28,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "18062 / 1.6"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "id": "fa0e75af-11d9-4051-8b0f-3a8d95c166be",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# (replica_grouped >> filter(_.is_auto))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
    "id": "06f007da-ad8d-4801-8595-85c5d44fe921",
    "metadata": {},
    "outputs": [],
@@ -393,20 +507,27 @@
     "    replica_df: df from read_group_replica\n",
     "    tracts_feeds_df: gdf from stops_by_tract_agency\n",
     "    '''\n",
+    "    auto_trip_counts = (replica_df >> filter(_.is_auto)\n",
+    "                        >> group_by(_.origin_trct_fips_2020)\n",
+    "                        >> summarize(auto_trips = _.n.sum())\n",
+    "                       )\n",
     "    replica_df.is_auto = replica_df.is_auto.map(lambda x: 'yes' if x else 'no')\n",
     "    \n",
     "    df2 = replica_df >> spread('is_auto', 'p50_distance') >> select(-_.n, -_.total_miles)\n",
     "    df2 = df2.rename(columns={'no': 'p50_mi_transit', 'yes': 'p50_mi_auto'})\n",
-    "    \n",
-    "    df3 = replica_df >> spread('is_auto', 'total_miles') >> select(-_.p50_distance, -_.n)\n",
+    "    df3 = replica_df >> spread('is_auto', 'total_miles') >> select(-_.n, -_.p50_distance)\n",
     "    df3 = df3.rename(columns={'no': 'total_mi_transit', 'yes': 'total_mi_auto'})\n",
-    "    \n",
     "    df2 = df2 >> inner_join(_, df3, on = 'origin_trct_fips_2020')\n",
+    "    df2 = df2 >> inner_join(_, auto_trip_counts, on = 'origin_trct_fips_2020')\n",
     "    \n",
+    "    if 'p50_mi_transit' not in df2.columns:\n",
+    "        df2['p50_mi_transit'] = np.nan\n",
+    "        df2['total_mi_transit'] = np.nan\n",
     "    df2 = (df2 >> group_by(_.origin_trct_fips_2020)\n",
     "           >> summarize(p50_mi_transit = _.p50_mi_transit.max(), p50_mi_auto = _.p50_mi_auto.max(),\n",
     "                        total_mi_transit = _.total_mi_transit.max(),\n",
-    "                        total_mi_auto = _.total_mi_auto.max()\n",
+    "                        total_mi_auto = _.total_mi_auto.max(),\n",
+    "                        total_trips_auto = _.auto_trips.sum()\n",
     "                       )\n",
     "          )\n",
     "    \n",
@@ -431,7 +552,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 38,
+   "execution_count": 31,
    "id": "d661657b-cac7-4e9a-806d-48579c0516f9",
    "metadata": {
     "tags": []
@@ -446,12 +567,12 @@
     }
    ],
    "source": [
-    "df2 = process_grouped_data(replica_grouped, tracts_feeds)"
+    "processed_df = process_grouped_data(replica_grouped, tracts_feeds)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 39,
+   "execution_count": 32,
    "id": "50f04f86-6f13-4654-952e-58dc07a23faa",
    "metadata": {},
    "outputs": [
@@ -481,6 +602,7 @@
        "      <th>p50_mi_auto</th>\n",
        "      <th>total_mi_transit</th>\n",
        "      <th>total_mi_auto</th>\n",
+       "      <th>total_trips_auto</th>\n",
        "      <th>p50_transit_longer</th>\n",
        "      <th>no_transit_replica</th>\n",
        "      <th>no_transit</th>\n",
@@ -497,6 +619,7 @@
        "      <td>6.9</td>\n",
        "      <td>5185.3</td>\n",
        "      <td>165410.8</td>\n",
+       "      <td>53636</td>\n",
        "      <td>False</td>\n",
        "      <td>False</td>\n",
        "      <td>False</td>\n",
@@ -511,6 +634,7 @@
        "      <td>3.4</td>\n",
        "      <td>3638.1</td>\n",
        "      <td>58763.4</td>\n",
+       "      <td>28600</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
        "      <td>False</td>\n",
@@ -525,6 +649,7 @@
        "      <td>3.7</td>\n",
        "      <td>6130.7</td>\n",
        "      <td>122736.5</td>\n",
+       "      <td>57740</td>\n",
        "      <td>True</td>\n",
        "      <td>False</td>\n",
        "      <td>False</td>\n",
@@ -542,29 +667,29 @@
        "1             6001400200             4.1          3.4            3638.1   \n",
        "2             6001400300             4.7          3.7            6130.7   \n",
        "\n",
-       "   total_mi_auto  p50_transit_longer  no_transit_replica  no_transit  \\\n",
-       "0       165410.8               False               False       False   \n",
-       "1        58763.4                True               False       False   \n",
-       "2       122736.5                True               False       False   \n",
+       "   total_mi_auto  total_trips_auto  p50_transit_longer  no_transit_replica  \\\n",
+       "0       165410.8             53636               False               False   \n",
+       "1        58763.4             28600                True               False   \n",
+       "2       122736.5             57740                True               False   \n",
        "\n",
-       "   total_mi  new_transit_mi  projected_new_transit_trips  \n",
-       "0  170596.1    45956.698096                      10942.0  \n",
-       "1   62401.5    16326.454094                       3982.0  \n",
-       "2  128867.2    34100.338526                       7255.0  "
+       "   no_transit  total_mi  new_transit_mi  projected_new_transit_trips  \n",
+       "0       False  170596.1    45956.698096                      10942.0  \n",
+       "1       False   62401.5    16326.454094                       3982.0  \n",
+       "2       False  128867.2    34100.338526                       7255.0  "
       ]
      },
-     "execution_count": 39,
+     "execution_count": 32,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "df2 >> head(3)"
+    "processed_df >> head(3)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 40,
+   "execution_count": 33,
    "id": "8169ca83-e540-48f4-af93-076a535f00c6",
    "metadata": {},
    "outputs": [
@@ -614,41 +739,30 @@
        "1        True   1.294277e+08"
       ]
      },
-     "execution_count": 40,
+     "execution_count": 33,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "no_transit = df2 >> group_by(_.no_transit) >> summarize(total_mi_auto = _.total_mi_auto.sum())\n",
+    "no_transit = processed_df >> group_by(_.no_transit) >> summarize(total_mi_auto = _.total_mi_auto.sum())\n",
     "no_transit"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 41,
+   "execution_count": 34,
    "id": "c7175031-a66c-4f90-8dca-193198b9d932",
    "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'11.0 percent of VMT in tracts with no transit per GTFS Warehouse stops'"
-      ]
-     },
-     "execution_count": 41,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
+   "outputs": [],
    "source": [
-    "no_transit_pct = no_transit.iloc[1, 1] / no_transit.iloc[0, 1]\n",
-    "f'{round(no_transit_pct*100, 0)} percent of VMT in tracts with no transit per GTFS Warehouse stops'"
+    "# no_transit_pct = no_transit.iloc[1, 1] / no_transit.iloc[0, 1]\n",
+    "# f'{round(no_transit_pct*100, 0)} percent of VMT in tracts with no transit per GTFS Warehouse stops'"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 42,
+   "execution_count": 35,
    "id": "6476da44-36ac-4604-89d5-76966eb9411d",
    "metadata": {},
    "outputs": [
@@ -660,42 +774,19 @@
        "Name: p50_transit_longer, dtype: int64"
       ]
      },
-     "execution_count": 42,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "df2.p50_transit_longer.value_counts()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 43,
-   "id": "682268e8-78fc-4609-afc5-294f3c650b5e",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "<module '_utils' from '/home/jovyan/data-analyses/sb125_analyses/vmt_transit_sketch/_utils.py'>"
-      ]
-     },
-     "execution_count": 43,
+     "execution_count": 35,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "import _utils\n",
-    "import importlib\n",
-    "importlib.reload(_utils)"
+    "processed_df.p50_transit_longer.value_counts()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 44,
-   "id": "e83400ec-c295-40b3-91d6-3c3bd1ffb5f4",
+   "execution_count": 36,
+   "id": "70bdaf9e-397b-46d4-bc0a-a73722cf8591",
    "metadata": {},
    "outputs": [
     {
@@ -712,357 +803,169 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 45,
-   "id": "76766b3c-efc1-4ad7-9d47-3f465c616abf",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# tract_geo = gpd.read_file('./tl_2020_06_tract.zip') >> select(_.GEOID, _.geometry)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 46,
-   "id": "876be135-baf9-41bb-880f-22cf9df2c4a9",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "tract_geo.GEOID = tract_geo.GEOID.astype('int64')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 47,
-   "id": "03ecd820-d9e5-4c08-82ae-0929638e3af4",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "gdf = tract_geo >> inner_join(_, df2, on = {'GEOID': 'origin_trct_fips_2020'}) >> select(-_.origin_trct_fips_2020)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 48,
-   "id": "3a830524-3c8f-4915-8768-3062ddbf7db0",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "ca_uzas = gpd.read_parquet('intermediate/ca_uza.parquet')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 49,
-   "id": "d00b301f-ec03-4525-b415-53f9f65a77c2",
+   "execution_count": 37,
+   "id": "c10b8a4f-b3de-4494-81b7-942c2b230c30",
    "metadata": {},
    "outputs": [],
    "source": [
-    "uza_joined = gpd.sjoin(gdf, ca_uzas, how = 'left')"
+    "ca_uzas = gpd.read_parquet(f'{GCS_PATH}intermediate/ca_uza.parquet')"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 50,
-   "id": "4cb23dd6-94b8-4b36-952b-054a06c04192",
+   "execution_count": 59,
+   "id": "8d2f3dec-892d-4ffc-85fb-8e8866a51f42",
    "metadata": {},
    "outputs": [],
    "source": [
-    "#  TODO to util, other source?\n",
-    "\n",
-    "census_pop = gpd.read_file('census_ntd/DECENNIALPL2020.P1_2024-02-01T163251.zip')\n",
-    "\n",
-    "tract_pop = census_pop[['GEO_ID', 'P1_001N']].iloc[2:,:]\n",
+    "# census_pop = gpd.read_file(f'./census_ntd/DECENNIALPL2020.P1_2024-02-01T163251.zip')\n",
     "\n",
-    "tract_pop.GEO_ID = tract_pop.GEO_ID.map(lambda x: x.split('US')[1])\n",
+    "# census_cleaned = census_pop.iloc[2:,:][['GEO_ID', 'P1_001N']]\n",
     "\n",
-    "tract_pop.GEO_ID = tract_pop.GEO_ID.astype('int64')\n",
-    "\n",
-    "tract_pop = tract_pop >> select(_.total_pop == _.P1_001N, _.GEOID == _.GEO_ID)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 51,
-   "id": "31623fcf-efd7-4863-8f12-71a04ac45410",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "uza_joined = uza_joined >> inner_join(_, tract_pop, on = 'GEOID')"
+    "# census_cleaned.to_parquet(f'{GCS_PATH}census_ntd/DECENNIALPL2020.parquet')"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 52,
-   "id": "fb733ab6-007a-4747-91dd-ec40688a066f",
+   "execution_count": 60,
+   "id": "d7b17b29-29b2-40f3-9038-c3b0b75f39f8",
    "metadata": {},
    "outputs": [],
    "source": [
-    "uza_joined.total_pop = uza_joined.total_pop.astype('int64')\n",
-    "uza_joined['new_trips_per_capita'] = uza_joined.projected_new_transit_trips / uza_joined.total_pop"
+    "census_cleaned = pd.read_parquet(f'{GCS_PATH}census_ntd/DECENNIALPL2020.parquet')"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 53,
-   "id": "addb6ac5-5f4d-41dc-9a7a-0d6644bb699c",
+   "execution_count": 61,
+   "id": "8995132b-5eba-48e8-bfdb-585f1ff1e9d8",
    "metadata": {},
    "outputs": [],
    "source": [
-    "uza_joined.to_parquet('outputs/new_trips_with_uza.parquet')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 34,
-   "id": "666ebaa0-7d29-4f56-a3fd-5ad7391b15c8",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# uza_joined = uza_joined >> filter(_.P1_001N != 0) # remove tracts where nobody lives"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "019d1eaf-c3a7-4b71-a0b9-f9b041a04280",
-   "metadata": {},
-   "source": [
-    "# \"What if VMT decreased by 25% per the CARB target, and all those trips were on (existing) transit instead?\"\n",
-    "\n",
-    "## VMT is a spatial phenomenon, our analysis should be spatial too\n",
-    "\n",
-    "* Start with \"big data\" weekday residential VMT per Census tract via Replica\n",
-    "* Per target, future VMT should be 25% less\n",
-    "* Assume tripmaking remains constant, and that transit entirely replaces that VMT\n",
-    "    * optional: find tracts with no transit service, hold their VMT constant and redistribute missed target among remaining tracts (30% reduction instead of 25% perhaps?)\n",
+    "def attach_tracts_pop(processed_df, tract_geo, ca_uzas, census_cleaned):\n",
     "    \n",
-    "## From reduced VMT to transit trips\n",
-    "\n",
-    "* Replica gives transit trip lengths but it may not be reliable (\"good for auto, less so for transit\")\n",
-    "    * It's generally showing the median transit trip as longer than the median auto trip, which seems questionable\n",
-    "    * We have plenty of good spatial data on transit service _provision_, but not ridership (generally agency-level only)\n",
-    "    * May need to refer to research/default to a fixed \"median transit trip\" length based on population density\n",
-    "* Regardless, get a rough estimate by dividing reduced VMT in each tract by median transit trip distance\n",
-    "* Reality check using derived modeshare number?\n",
-    "\n",
-    "## Connecting our estimate to California's transit provider landscape\n",
-    "\n",
-    "* Proportionally assign new trips per census tract to transit operators\n",
-    "    * ~By number of stops in tract? OK for bus but will dramatically undercount rail~\n",
-    "    * By each operator's proportion of regional ridership (from NTD)? Will overcount in tracts on the edge of large operator service areas, but perhaps preferable\n",
-    "* Can then create operator-level estimates of increased ridership and service hour provision\n",
-    "    * This is where we have the best estimates of existing ridership..."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "id": "2db21010-14d2-47d1-b8be-9fd270cca949",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import geopandas as gpd"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "345b6bfd-f569-442f-9641-4f5f5dd4c40c",
-   "metadata": {},
-   "source": [
-    "## Mapping..."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "b84ee354-9bc8-4808-b89e-39de27d24b71",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "gdf.explore(column = 'total_mi_transit', scheme = 'NaturalBreaks')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "2bde07bb-30d9-48b7-86ae-1055c15c4aac",
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "gdf.explore(column = 'total_mi_auto', scheme = 'NaturalBreaks')"
+    "    tract_geo.GEOID = tract_geo.GEOID.astype('int64')\n",
+    "    gdf = (tract_geo >> inner_join(_, processed_df, on = {'GEOID': 'origin_trct_fips_2020'})\n",
+    "                     >> select(-_.origin_trct_fips_2020))\n",
+    "    uza_joined = gpd.sjoin(gdf, ca_uzas, how = 'left')\n",
+    "    \n",
+    "    tract_pop = census_cleaned\n",
+    "    tract_pop.GEO_ID = tract_pop.GEO_ID.map(lambda x: x.split('US')[1])\n",
+    "    tract_pop.GEO_ID = tract_pop.GEO_ID.astype('int64')\n",
+    "    tract_pop = tract_pop >> select(_.total_pop == _.P1_001N, _.GEOID == _.GEO_ID)\n",
+    "    uza_joined = uza_joined >> inner_join(_, tract_pop, on = 'GEOID')\n",
+    "    uza_joined.total_pop = uza_joined.total_pop.astype('int64')\n",
+    "    uza_joined['new_trips_per_capita'] = uza_joined.projected_new_transit_trips / uza_joined.total_pop\n",
+    "    \n",
+    "    return uza_joined"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "id": "0a9c55c5-b17e-4e4f-b0ec-198d2cd89b14",
+   "execution_count": 63,
+   "id": "81f381a4-bb80-4fb7-97fa-853111914712",
    "metadata": {},
    "outputs": [],
    "source": [
-    "(gdf >> filter(_.no_transit)).explore()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "76a258eb-cd26-4d2e-8e0c-8ac50c5c1449",
-   "metadata": {},
-   "source": [
-    "## New transit trips"
+    "uza_joined = attach_tracts_pop(processed_df, tract_geo, ca_uzas, census_cleaned)"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "ea9768db-f59e-4d73-a21c-c903736b4421",
-   "metadata": {},
+   "id": "e0740b97-48ce-4543-a560-9936c559e4df",
+   "metadata": {
+    "tags": []
+   },
    "outputs": [],
    "source": [
-    "# gdf.explore(column = 'new_transit_mi', scheme = 'NaturalBreaks')"
+    "# for region in ['sanpablo', 'eureka', 'fresno', 'wilshire']:\n",
+    "#     replica_grouped = pd.read_parquet(f'intermediate/{region}_grouped.parquet')\n",
+    "#     processed_df = process_grouped_data(replica_grouped, tracts_feeds)\n",
+    "#     uza_joined = attach_tracts_pop(processed_df)\n",
+    "#     display(uza_joined >> head(3))\n",
+    "#     uza_joined.to_parquet(f'outputs/{region}_trips_with_uza.parquet')"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "a6d4647f-d6a3-4935-ab1f-613738ffecc5",
+   "id": "666ebaa0-7d29-4f56-a3fd-5ad7391b15c8",
    "metadata": {},
    "outputs": [],
    "source": [
-    "gdf = gdf >> filter(_.new_trips_per_capita < _.new_trips_per_capita.quantile(.99))"
+    "# uza_joined = uza_joined >> filter(_.P1_001N != 0) # remove tracts where nobody lives"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "id": "d560c555-ced9-43e5-bfb7-923a2c6a65ca",
+   "execution_count": 68,
+   "id": "6ed72b61-2c14-4eb2-a188-00384f52082a",
    "metadata": {},
    "outputs": [],
    "source": [
-    "gdf.explore(column = 'new_trips_per_capita', scheme = 'Quantiles')"
+    "uza_joined = (uza_joined\n",
+    "              >> select(-_.index_right)\n",
+    "              )"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "id": "8fb69137-fb4e-4140-9600-1155712585fc",
+   "execution_count": 71,
+   "id": "029334e4-be41-4f93-b95d-67687bd2d70e",
    "metadata": {},
    "outputs": [],
    "source": [
-    "gdf.explore(column = 'projected_new_transit_trips', scheme = 'NaturalBreaks')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 32,
-   "id": "718a534e-e8f5-4a2f-8698-f87c298d7ba0",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "17797968.0"
-      ]
-     },
-     "execution_count": 32,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "gdf.projected_new_transit_trips.sum()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "db1e79d4-2958-4483-bee9-7ea58ecd4406",
-   "metadata": {},
-   "source": [
-    "About 18 million new daily trips across LA/Orange/San Diego/Imperial Counties. For reference, LA Metro's daily ridership is around 1 million. Current regional transit modeshare is only about 5%..."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "id": "6b708ffd-6d83-45e3-86c1-39d30ec2f5a8",
-   "metadata": {},
-   "source": [
-    "## Next Steps\n",
-    "\n",
-    "* caveat: other strategies (land use, active modes...)\n",
-    "* caveat: induced travel\n",
-    "* stratify into \"good transit, not riding\", \"bad transit\"\n",
-    "* LODES o/d data? Replica? -> Conveyal transit o/d find that \"good transit but not riding it\"\n",
-    "   * find what doesn't show up in aggregate accessibility...\n",
-    "* https://walker-data.com/pygris/"
+    "uza_joined['vmt_quantile'] = pd.qcut(uza_joined.total_mi_auto, 4, labels = ['p25', 'p50', 'p75', 'p100'])"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 99,
-   "id": "37b679d4-f8bd-4450-bf9f-50b68e8570b4",
+   "execution_count": 72,
+   "id": "8f1b132b-8f81-4923-9efe-92f3162ee64c",
    "metadata": {},
    "outputs": [],
    "source": [
-    "from calitp_data_analysis import get_fs"
+    "!mkdir export"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 100,
-   "id": "b0572b17-a9c2-4128-ab67-fa650c87fda0",
+   "execution_count": 75,
+   "id": "ec2cc1b4-002b-498a-b1f0-055f5252fb70",
    "metadata": {},
    "outputs": [],
    "source": [
-    "fs = get_fs()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 102,
-   "id": "76539b31-f757-4703-9f7a-2eea60834d06",
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "'gs://calitp-analytics-data/data-analyses/sb125/vmt_transit_sketch/'"
-      ]
-     },
-     "execution_count": 102,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "_utils.GCS_PATH"
+    "uza_joined.vmt_quantile = uza_joined.vmt_quantile.astype(str)"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 111,
-   "id": "1518eea5-d2f1-4807-b392-4a4f2624439d",
+   "execution_count": 79,
+   "id": "354a6bea-fbc5-49d7-b531-2597f240510e",
    "metadata": {},
    "outputs": [],
    "source": [
-    "lpath = 'replica_raw/'"
+    "utils.geoparquet_gcs_export(uza_joined, f'{GCS_PATH}outputs/', 'new_trips_with_uza')"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 112,
-   "id": "e0048f15-f124-431d-9fae-35aa7ef3dd72",
+   "execution_count": 77,
+   "id": "30983d03-5c50-4480-85db-ed6666e34bd8",
    "metadata": {},
    "outputs": [
     {
-     "data": {
-      "text/plain": [
-       "[None, None, None, None, None, None, None, None, None, None]"
-      ]
-     },
-     "execution_count": 112,
-     "metadata": {},
-     "output_type": "execute_result"
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/tmp/ipykernel_532/1657757500.py:1: UserWarning: Column names longer than 10 characters will be truncated when saved to ESRI Shapefile.\n",
+      "  uza_joined.to_file('./export/vmt_with_quantiles.shp')\n"
+     ]
     }
    ],
    "source": [
-    "fs.put(lpath, _utils.GCS_PATH + lpath, recursive=True)"
+    "uza_joined.to_file('./export/vmt_with_quantiles.shp')"
    ]
   }
  ],