diff --git a/rt_segment_speeds/21_segment_comparison.ipynb b/rt_segment_speeds/21_segment_comparison.ipynb
new file mode 100644
index 000000000..1871a6ad9
--- /dev/null
+++ b/rt_segment_speeds/21_segment_comparison.ipynb
@@ -0,0 +1,331 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "1b6a55f7-2ffd-4c4c-bbee-106403d9f27c",
+ "metadata": {},
+ "source": [
+ "# Select a couple of simpler trips to compare\n",
+ "\n",
+ "* Compare methodologies, which differ when handling more complex shapes\n",
+ "* But is it different even in simpler shapes?\n",
+ "* Start with Big Blue Bus and LA Metro"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "bd13ebd1-69b0-4fc2-8202-cc34eacb6e9e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import geopandas as gpd\n",
+ "import pandas as pd\n",
+ "\n",
+ "from shared_utils import rt_dates, rt_utils\n",
+ "from segment_speed_utils.project_vars import SEGMENT_GCS\n",
+ "\n",
+ "from prep_comparison import map_one_trip\n",
+ "\n",
+ "analysis_date = rt_dates.DATES[\"sep2023\"]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "369c795d-5a7a-4471-9432-c0338f430b27",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "df_eric = gpd.read_parquet(f\"{SEGMENT_GCS}speeds_eric_{analysis_date}.parquet\")\n",
+ "df_tiff = gpd.read_parquet(f\"{SEGMENT_GCS}speeds_tiff_{analysis_date}.parquet\")\n",
+ "speed_df = pd.read_parquet(\n",
+ " f\"{SEGMENT_GCS}speeds_comparison_{analysis_date}.parquet\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "3c362785-d64b-489b-ab5d-071525dd488d",
+ "metadata": {},
+ "source": [
+ "## Side-by-Side Maps"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "9b08e29e-b792-4187-9c2b-6c44b0c72ee9",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "((151014, 23), (155314, 24))"
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df_eric.shape, df_tiff.shape "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "79c973ab-d9b9-4da5-bf6b-30daaded0e84",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#df_tiff.route_id.unique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "58421989-2f26-44de-8431-b11185246d69",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#one_route = \"96-13168\"\n",
+ "#df_tiff[df_tiff.route_id==one_route].explore(\n",
+ "# \"route_id\", tiles = \"CartoDB Positron\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "1f3c7cc5-b186-41db-9777-6df4da95486f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#df_tiff[df_tiff.route_id==one_route].trip_id.unique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "fcaa69c2-72e3-4314-b92b-38ef939c2d3b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "trips_to_try = {\n",
+ " \"metro_720\": \"10720012750651-JUNE23\", #route_id: 720-13168\n",
+ " \"metro_901\": \"10901000590843-JUNE23\", #route_id: 901-13168\n",
+ " \"metro_550\": \"10550001350610-JUNE23\", # route_id: 550-13168\n",
+ " \"metro_230\": \"10230000830600-JUNE23\", # route_id: 230-13168\n",
+ " \"metro_96\": \"10096002510743-JUNE23\", # route_id: 96-13168\n",
+ " \"bbb1\": \"908521\", # route_id: 3639\n",
+ " #\"bbb2\": \"\", #route_id\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "f20cce34-b4e9-40a1-8845-259b8d5b4e41",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
Make this Notebook Trusted to load map: File -> Trust Notebook
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "map_one_trip(df_eric, trips_to_try[\"metro_720\"])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "2feb428a-a676-413c-8777-3306de7cca67",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "Make this Notebook Trusted to load map: File -> Trust Notebook
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "map_one_trip(df_tiff, trips_to_try[\"metro_720\"])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "320ad850-c27c-4e8a-894e-2155aaece14b",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "Make this Notebook Trusted to load map: File -> Trust Notebook
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "map_one_trip(df_eric, trips_to_try[\"metro_901\"])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "e884a2a8-4473-4e28-9d03-76746a0014c8",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "Make this Notebook Trusted to load map: File -> Trust Notebook
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 10,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "map_one_trip(df_tiff, trips_to_try[\"metro_901\"])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "db2c3e5d-eee4-4384-ab1d-058727175f61",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "map_one_trip(df_eric, trips_to_try[\"metro_550\"])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "300a67c3-fe11-4b2c-a3e2-8e76f3df27b5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "map_one_trip(df_tiff, trips_to_try[\"metro_550\"])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "34219b56-0b5f-4530-bfa1-3d5aad3ae8da",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "map_one_trip(df_eric, trips_to_try[\"metro_230\"])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "114a5b60-f405-4eff-965a-51d7d463fcdc",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "map_one_trip(df_tiff, trips_to_try[\"metro_230\"])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "1786d445-5e5e-4eb9-b116-028a3c3814b1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "map_one_trip(df_eric, trips_to_try[\"metro_96\"])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "12c49bcb-5c75-4726-bd93-608ce1d73029",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "map_one_trip(df_tiff, trips_to_try[\"metro_96\"])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "aec93764-f09f-41b2-9e59-41f881cb107f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "map_one_trip(df_eric, trips_to_try[\"bbb1\"])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "314b7816-951c-4712-9e1b-a23197618acf",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "map_one_trip(df_tiff, trips_to_try[\"bbb1\"])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "32052ff7-5f7d-4b73-864b-2e7cebc1b925",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.13"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/rt_segment_speeds/prep_comparison.py b/rt_segment_speeds/prep_comparison.py
new file mode 100644
index 000000000..e439a791d
--- /dev/null
+++ b/rt_segment_speeds/prep_comparison.py
@@ -0,0 +1,146 @@
+"""
+Prep the data used to compare segment methodologies
+between Eric and Tiffany.
+
+Get at why speeds are coming out differently.
+
+ - segments do not exactly match
+ - points over which speeds are calculated aren't exactly the same,
+ since understanding of direction is not exactly the same
+ - peel all that back and start at the trip-level to see what's going into
+ averages, start with simpler shapes (no loop, no inlining)
+"""
+import geopandas as gpd
+import pandas as pd
+
+from shared_utils import rt_dates, rt_utils
+from segment_speed_utils import helpers
+from segment_speed_utils.project_vars import SEGMENT_GCS, GCS_FILE_PATH
+from calitp_data_analysis import utils
+
+RT_DELAY_GCS = f"{GCS_FILE_PATH}rt_delay/v2_segment_speed_views/"
+analysis_date = rt_dates.DATES["sep2023"]
+
+def prep_eric_data(analysis_date: str) -> gpd.GeoDataFrame:
+ itp_ids = [
+ 182,
+ 300,
+ ]
+
+ # Don't narrow down time-of-day yet, we might select a trip from any
+ # of these
+ time_of_day = [
+ "AM_Peak", "Midday", "PM_Peak"
+ ]
+
+ eric_dfs = [
+ gpd.read_parquet(
+ f"{RT_DELAY_GCS}{itp_id}_{analysis_date}_{time}.parquet")
+ for itp_id, time in zip(itp_ids, time_of_day)
+ ]
+
+ df_eric = pd.concat(eric_dfs, axis=0).reset_index(drop=True)
+
+ return df_eric
+
+
+def prep_tiff_data(
+ analysis_date: str,
+ subset_df: gpd.GeoDataFrame
+) -> gpd.GeoDataFrame:
+
+ shape_trips = subset_df[["shape_id", "trip_id"]].drop_duplicates()
+
+ scheduled_trips = helpers.import_scheduled_trips(
+ analysis_date,
+ columns = [
+ "gtfs_dataset_key", "name",
+ "trip_id", "trip_instance_key",
+ "shape_id", "shape_array_key",
+ "route_id", "direction_id"],
+ get_pandas = True
+ ).rename(columns = {"gtfs_dataset_key": "schedule_gtfs_dataset_key"})
+
+ # Grab the trip_instance_keys we need and use it
+ # to filter the speeds parquet down
+ subset_trips = scheduled_trips.merge(
+ shape_trips,
+ on = ["shape_id", "trip_id"],
+ how = "inner"
+ )
+
+ trip_instances = subset_trips.trip_instance_key.unique().tolist()
+ subset_shapes = subset_trips.shape_array_key.unique().tolist()
+
+ segments = gpd.read_parquet(
+ f"{SEGMENT_GCS}stop_segments_{analysis_date}.parquet",
+ filters = [[("shape_array_key", "in", subset_shapes)]]
+ ).drop(columns = ["geometry_arrowized", "district_name"])
+
+ filtered_trip_speeds = pd.read_parquet(
+ f"{SEGMENT_GCS}speeds_stop_segments_{analysis_date}.parquet",
+ filters = [[("trip_instance_key", "in", trip_instances)]]
+ ).merge(
+ subset_trips,
+ on = ["trip_instance_key", "shape_array_key"],
+ how = "inner"
+ )
+
+ df_tiff = pd.merge(
+ segments,
+ filtered_trip_speeds,
+ on = ["schedule_gtfs_dataset_key", "shape_array_key", "stop_sequence"],
+ how = "inner"
+ )
+
+ return df_tiff
+
+
+def map_one_trip(gdf: gpd.GeoDataFrame, one_trip: str):
+ gdf2 = gdf[gdf.trip_id==one_trip]
+
+ m1 = gdf2.explore(
+ "speed_mph",
+ tiles = "CartoDB Positron",
+ cmap = rt_utils.ZERO_THIRTY_COLORSCALE
+ )
+
+ return m1
+
+if __name__ == "__main__":
+ df_eric = prep_eric_data(analysis_date)
+ df_tiff = prep_tiff_data(analysis_date, df_eric)
+
+ utils.geoparquet_gcs_export(
+ df_eric,
+ SEGMENT_GCS,
+ f"speeds_eric_{analysis_date}"
+ )
+
+ utils.geoparquet_gcs_export(
+ df_tiff,
+ SEGMENT_GCS,
+ f"speeds_tiff_{analysis_date}"
+ )
+
+ # stop_sequence doesn't exactly merge, but that's fine,
+ # since Eric cuts shorter segments, so stop_sequence can have
+ # values like 1.25, 1.50, etc.
+ # Leave it in the merge for now, and allow left_only merges
+ identifier_cols = [
+ "trip_id", "shape_id", "stop_id", "stop_sequence",
+ "route_id", "direction_id",
+ ]
+
+
+ speed_df = pd.merge(
+ df_eric[identifier_cols + ["speed_mph"]].rename(
+ columns = {"speed_mph": "eric_speed_mph"}),
+ df_tiff[identifier_cols + ["speed_mph"]].rename(
+ columns = {"speed_mph": "tiff_speed_mph"}),
+ on = identifier_cols,
+ how = "left",
+ indicator = True
+ )
+
+ speed_df.to_parquet(f"{SEGMENT_GCS}speeds_comparison_{analysis_date}.parquet")
\ No newline at end of file