diff --git a/rt_scheduled_v_ran/02_spatial.ipynb b/rt_scheduled_v_ran/02_spatial.ipynb
new file mode 100644
index 000000000..4bdb9ebb3
--- /dev/null
+++ b/rt_scheduled_v_ran/02_spatial.ipynb
@@ -0,0 +1,7402 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "c410afe6-22d7-4546-817c-fea818954108",
+ "metadata": {},
+ "source": [
+ "# Migrate to VP Usable\n",
+ "* https://github.com/cal-itp/data-analyses/issues/936\n",
+ "* cd rt_segment_speeds && pip install -r requirements.txt && cd ..\n",
+ " * https://github.com/cal-itp/data-analyses/blob/main/Makefile#L49C2-L49C66\n",
+ " "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "8de007c5-4cdb-4285-9536-0a5f7b75b2de",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import datetime\n",
+ "import dask.dataframe as dd\n",
+ "import dask_geopandas as dg\n",
+ "import dask\n",
+ "import geopandas as gpd\n",
+ "import pandas as pd\n",
+ "from scripts import vp_spatial_accuracy\n",
+ "from segment_speed_utils import helpers\n",
+ "from calitp_data_analysis.geography_utils import WGS84\n",
+ "from segment_speed_utils.project_vars import (\n",
+ " PROJECT_CRS,\n",
+ " SEGMENT_GCS,\n",
+ " analysis_date,\n",
+ " GCS_FILE_PATH,\n",
+ " COMPILED_CACHED_VIEWS,\n",
+ " RT_SCHED_GCS,\n",
+ " CONFIG_PATH\n",
+ ")\n",
+ "\n",
+ "# For speeds c2\n",
+ "from typing import Literal\n",
+ "import numpy as np\n",
+ "\n",
+ "# for speeds c3\n",
+ "# also import numpy \n",
+ "from shared_utils.rt_utils import MPH_PER_MPS\n",
+ "from calitp_data_analysis import utils\n",
+ "from shared_utils import portfolio_utils, schedule_rt_utils\n",
+ "from segment_speed_utils import helpers, sched_rt_utils, wrangle_shapes, segment_calcs"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "c5ba79eb-2d4e-4daa-9de6-3ab657ac0a15",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "pd.options.display.max_columns = 100\n",
+ "pd.options.display.float_format = \"{:.2f}\".format\n",
+ "pd.set_option(\"display.max_rows\", None)\n",
+ "pd.set_option(\"display.max_colwidth\", None)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "d8c8f895-8b72-413a-8ec2-72e7e0ebf29f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# calitp-analytics-data/data-analyses/rt_segment_speeds/vp_usable_2023-10-11\n",
+ "operator = 'Bay Area 511 Muni VehiclePositions'\n",
+ "gtfs_key = '7cc0cb1871dfd558f11a2885c145d144'"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "a3863c86-7f53-4755-8444-534a2f11e59c",
+ "metadata": {},
+ "source": [
+ "## Spatial Accuracy\n",
+ "* Based on https://github.com/cal-itp/data-analyses/blob/main/rt_scheduled_v_ran/scripts/vp_spatial_accuracy.py\n",
+ "### Grab_shape_keys_in_vp\n",
+ "#### First time reading `vp_usable`"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "6bd6833a-7eb4-487a-bb3a-e445a2d1941d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def grab_shape_keys_in_vp(analysis_date: str) -> pd.DataFrame:\n",
+ " \"\"\"\n",
+ " Subset raw vp and find unique trip_instance_keys.\n",
+ " Create crosswalk to link trip_instance_key to shape_array_key.\n",
+ " \"\"\"\n",
+ " vp_trip_df = pd.read_parquet(f\"{SEGMENT_GCS}vp_usable_{analysis_date}\",\n",
+ " filters = [[('gtfs_dataset_name', \"==\", operator),\n",
+ " ('schedule_gtfs_dataset_key', '==', gtfs_key)]],\n",
+ " columns = ['trip_instance_key'])\n",
+ " \n",
+ " vp_trip_df = vp_trip_df.drop_duplicates().reset_index(drop = True)\n",
+ " \n",
+ " # Make sure we have a shape geometry too\n",
+ " # otherwise map_partitions will throw error\n",
+ " shapes = pd.read_parquet(\n",
+ " f\"{COMPILED_CACHED_VIEWS}routelines_{analysis_date}.parquet\",\n",
+ " columns = [\"shape_array_key\"],\n",
+ " ).dropna().drop_duplicates()\n",
+ " \n",
+ " trips_with_shape = helpers.import_scheduled_trips(\n",
+ " analysis_date,\n",
+ " columns = [\"trip_instance_key\", \"shape_array_key\"],\n",
+ " get_pandas = True\n",
+ " ).merge(\n",
+ " shapes,\n",
+ " on = \"shape_array_key\",\n",
+ " how = \"inner\"\n",
+ " ).merge(\n",
+ " vp_trip_df,\n",
+ " on = \"trip_instance_key\",\n",
+ " how = \"inner\"\n",
+ " ).drop_duplicates().dropna().reset_index(drop=True)\n",
+ "\n",
+ " return trips_with_shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "8759e976-ea60-4604-945d-1ec9693b0495",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "trips_with_shape = grab_shape_keys_in_vp(analysis_date)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "bbbc85b4-a588-4f20-9822-eb4d5b4b3723",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(9240, 2)"
+ ]
+ },
+ "execution_count": 6,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "trips_with_shape.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "34f69e0d-68c8-4b8e-8b82-8318672d0f4d",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " trip_instance_key | \n",
+ " shape_array_key | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 12de3d260e9fe09fa878cb4cdb2d6898 | \n",
+ " 749b225ca6691f77914e88577dc13e68 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 934aea5748bce830ffc2fa88dc01402a | \n",
+ " 749b225ca6691f77914e88577dc13e68 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 3d06fa8e68e4f38d3ccc7adfabb5c3d9 | \n",
+ " f1a7410fae06937b7183f6a553707915 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 34109ad8cfeca83cd459d42c7d51d602 | \n",
+ " f1a7410fae06937b7183f6a553707915 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 34acd907ae9d5eb5456d683d2458bbc6 | \n",
+ " f1a7410fae06937b7183f6a553707915 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " trip_instance_key shape_array_key\n",
+ "0 12de3d260e9fe09fa878cb4cdb2d6898 749b225ca6691f77914e88577dc13e68\n",
+ "1 934aea5748bce830ffc2fa88dc01402a 749b225ca6691f77914e88577dc13e68\n",
+ "2 3d06fa8e68e4f38d3ccc7adfabb5c3d9 f1a7410fae06937b7183f6a553707915\n",
+ "3 34109ad8cfeca83cd459d42c7d51d602 f1a7410fae06937b7183f6a553707915\n",
+ "4 34acd907ae9d5eb5456d683d2458bbc6 f1a7410fae06937b7183f6a553707915"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "trips_with_shape.head()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "8be73ca3-7ca2-4616-9504-7defe86f3792",
+ "metadata": {},
+ "source": [
+ "### Buffer shapes"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "ca9923a7-914f-43ce-a840-a15d41b43aee",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# This is trips_with_shape_geom\n",
+ "trips_with_shape_geom = vp_spatial_accuracy.buffer_shapes(analysis_date, \n",
+ " trips_with_shape,\n",
+ " 35)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "caf4dd49-4839-475d-99ff-7765ef5fb20a",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(9240, 3)"
+ ]
+ },
+ "execution_count": 9,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "trips_with_shape_geom.shape"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "fc115ac6-2042-4565-9ac7-bd961339f8aa",
+ "metadata": {},
+ "source": [
+ "#### Second time reading in the same file, streamline"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "beaddbc2-fd11-4021-bdf0-7eea81835226",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "vp = dd.read_parquet(f\"{SEGMENT_GCS}vp_usable_{analysis_date}\",\n",
+ " filters = [[('gtfs_dataset_name', \"==\", operator),\n",
+ " ('schedule_gtfs_dataset_key', '==', gtfs_key)]])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "6053ea36-1f85-4a1a-b10b-019f6d870986",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "trip_instance_key 9240\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 11,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "vp[['trip_instance_key']].compute().nunique()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "384c1e89-0b57-49dd-aaa0-69ea03e25b23",
+ "metadata": {},
+ "source": [
+ "### Full function"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "79fe772c-74db-49e7-b41d-e1daa3f23ee3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def merge_vp_with_shape_and_count(\n",
+ " vp: dd.DataFrame,\n",
+ " trips_with_shape_geom: gpd.GeoDataFrame\n",
+ ") -> gpd.GeoDataFrame:\n",
+ " \"\"\"\n",
+ " Merge vp with crosswalk and buffered shapes.\n",
+ " Get vp count totals and vp within shape.\n",
+ " \"\"\"\n",
+ " vp_gdf = gpd.GeoDataFrame(\n",
+ " vp,\n",
+ " geometry = gpd.points_from_xy(vp.x, vp.y),\n",
+ " crs = WGS84\n",
+ " ).to_crs(PROJECT_CRS)\n",
+ " \n",
+ " vp_gdf = vp_gdf.rename(columns = {\n",
+ " 0:'gtfs_dataset_name',\n",
+ " 1: 'schedule_gtfs_dataset_key', \n",
+ " 2:'trip_id',\n",
+ " 3:'trip_instance_key',\n",
+ " 4: 'location_timestamp', \n",
+ " 5: 'location_timestamp_local',\n",
+ " 6:'x',\n",
+ " 7:'y', \n",
+ " 8:'vp_idx', \n",
+ " 9:'gtfs_dataset_key', \n",
+ " 10:'vp_dir_xnorm', \n",
+ " 11:'vp_dir_ynorm',\n",
+ " 12:'vp_primary_direction'})\n",
+ " \n",
+ " vp2 = pd.merge(\n",
+ " vp_gdf,\n",
+ " trips_with_shape_geom,\n",
+ " on = \"trip_instance_key\",\n",
+ " how = \"inner\"\n",
+ " ).reset_index(drop=True)\n",
+ " \n",
+ " total_vp = vp_spatial_accuracy.total_vp_counts_by_trip(vp2)\n",
+ " \n",
+ " vp2 = vp2.assign(\n",
+ " is_within = vp2.geometry_x.within(vp2.geometry_y)\n",
+ " ).query('is_within==True')\n",
+ " \n",
+ " vps_in_shape = (vp2.groupby(\"trip_instance_key\", \n",
+ " observed = True, group_keys = False)\n",
+ " .agg({\"location_timestamp_local\": \"count\"})\n",
+ " .reset_index()\n",
+ " .rename(columns = {\"location_timestamp_local\": \"vp_in_shape\"})\n",
+ " )\n",
+ " \n",
+ " count_df = pd.merge(\n",
+ " total_vp,\n",
+ " vps_in_shape,\n",
+ " on = \"trip_instance_key\",\n",
+ " how = \"left\"\n",
+ " )\n",
+ " \n",
+ " count_df = count_df.assign(\n",
+ " vp_in_shape = count_df.vp_in_shape.fillna(0).astype(\"int32\"),\n",
+ " total_vp = count_df.total_vp.fillna(0).astype(\"int32\")\n",
+ " )\n",
+ " \n",
+ " return count_df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "561287f4-8189-441c-881c-1f34fc43e7d5",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "muni = merge_vp_with_shape_and_count(vp, trips_with_shape_geom)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "bcaa5da2-21a5-4c87-825a-fa2637ef9b8f",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "count 9240.00\n",
+ "mean 161.87\n",
+ "std 64.06\n",
+ "min 0.00\n",
+ "25% 118.00\n",
+ "50% 167.00\n",
+ "75% 207.00\n",
+ "max 481.00\n",
+ "Name: vp_in_shape, dtype: float64"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "muni.vp_in_shape.describe()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "3504f86b-08bd-4a8d-b810-e654dd911c06",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "9240"
+ ]
+ },
+ "execution_count": 15,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "muni.trip_instance_key.nunique()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b03b76bf-2ce3-4400-b072-f53c9f942deb",
+ "metadata": {},
+ "source": [
+ "#### Add %?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 16,
+ "id": "4b0f141c-e583-4bb2-a3a7-f059e27a66fc",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "count 9240.00\n",
+ "mean 0.93\n",
+ "std 0.14\n",
+ "min 0.00\n",
+ "25% 0.94\n",
+ "50% 1.00\n",
+ "75% 1.00\n",
+ "max 1.00\n",
+ "dtype: float64"
+ ]
+ },
+ "execution_count": 16,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "(muni.vp_in_shape/muni.total_vp).describe()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0a306082-dcfc-4a4a-bc3b-5ee90ae9517d",
+ "metadata": {},
+ "source": [
+ "#### Question: We are keeping rows in which 0 vps are in the shape?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "id": "eaa02c27-eda6-47ac-ae18-827f1616d73e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " trip_instance_key | \n",
+ " total_vp | \n",
+ " vp_in_shape | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 167 | \n",
+ " 04ae6ea9655473bdcaf9b80d443558ae | \n",
+ " 111 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 351 | \n",
+ " 0a1553b7a738967a422b7b8960560ded | \n",
+ " 75 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 408 | \n",
+ " 0bc53a1063efae8a32eca4a9034d2a21 | \n",
+ " 61 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 1128 | \n",
+ " 1fe28ce8669a7f946f272faf80d80ce7 | \n",
+ " 70 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 1981 | \n",
+ " 37aed2e5ad125f8aede20922b38bf6b9 | \n",
+ " 78 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 3022 | \n",
+ " 54e54e2839a6945b1005322398a89ae9 | \n",
+ " 132 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 3127 | \n",
+ " 580684a7a6b43ce5130f60a441681713 | \n",
+ " 46 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 3213 | \n",
+ " 5a82f8c02f2036db77b1979f4ae228f5 | \n",
+ " 41 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 3861 | \n",
+ " 6cbdc2d321ca63df78bce01eeb06fe07 | \n",
+ " 47 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 3937 | \n",
+ " 6ee4a9165f47b59cb3e10b0125dd72e8 | \n",
+ " 34 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 4562 | \n",
+ " 7ff345e91303290951ef0665a57dc90e | \n",
+ " 55 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 5288 | \n",
+ " 9351c60516ef43464d962a956d1b3806 | \n",
+ " 96 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 5894 | \n",
+ " a40cc2271f048e2b72d94b051c6688cb | \n",
+ " 120 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 6119 | \n",
+ " aa82ee2a81331cbdf044a7d5280d436a | \n",
+ " 41 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 6758 | \n",
+ " bcb78da06a56761d2a3b8b16a3436f3d | \n",
+ " 101 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 7043 | \n",
+ " c3fc63ec2ff7a35c0f821659075be708 | \n",
+ " 60 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 7543 | \n",
+ " d21ff3a0d37a484dc4f979d052652565 | \n",
+ " 45 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 7734 | \n",
+ " d79da0eafb4ac5188f1cd14850a6f880 | \n",
+ " 90 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 8431 | \n",
+ " ea8751445dcd6a175bd23ec5fc423d0c | \n",
+ " 44 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 8718 | \n",
+ " f298ef83aaa934eb6577fd2ca19d4ebd | \n",
+ " 82 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " trip_instance_key total_vp vp_in_shape\n",
+ "167 04ae6ea9655473bdcaf9b80d443558ae 111 0\n",
+ "351 0a1553b7a738967a422b7b8960560ded 75 0\n",
+ "408 0bc53a1063efae8a32eca4a9034d2a21 61 0\n",
+ "1128 1fe28ce8669a7f946f272faf80d80ce7 70 0\n",
+ "1981 37aed2e5ad125f8aede20922b38bf6b9 78 0\n",
+ "3022 54e54e2839a6945b1005322398a89ae9 132 0\n",
+ "3127 580684a7a6b43ce5130f60a441681713 46 0\n",
+ "3213 5a82f8c02f2036db77b1979f4ae228f5 41 0\n",
+ "3861 6cbdc2d321ca63df78bce01eeb06fe07 47 0\n",
+ "3937 6ee4a9165f47b59cb3e10b0125dd72e8 34 0\n",
+ "4562 7ff345e91303290951ef0665a57dc90e 55 0\n",
+ "5288 9351c60516ef43464d962a956d1b3806 96 0\n",
+ "5894 a40cc2271f048e2b72d94b051c6688cb 120 0\n",
+ "6119 aa82ee2a81331cbdf044a7d5280d436a 41 0\n",
+ "6758 bcb78da06a56761d2a3b8b16a3436f3d 101 0\n",
+ "7043 c3fc63ec2ff7a35c0f821659075be708 60 0\n",
+ "7543 d21ff3a0d37a484dc4f979d052652565 45 0\n",
+ "7734 d79da0eafb4ac5188f1cd14850a6f880 90 0\n",
+ "8431 ea8751445dcd6a175bd23ec5fc423d0c 44 0\n",
+ "8718 f298ef83aaa934eb6577fd2ca19d4ebd 82 0"
+ ]
+ },
+ "execution_count": 17,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "muni.loc[muni.vp_in_shape == 0]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "ad776dcd-700e-499e-b1c2-57c44d255153",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "trip_instance_key 9240\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "vp[['trip_instance_key']].compute().nunique()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "19a20f72-8a90-4823-8cc5-32f91ae1286d",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "## Update Completeness\n",
+ "* https://github.com/cal-itp/data-analyses/blob/main/rt_predictions/01_update_completeness.ipynb\n",
+ "\n",
+ "#### Keep only relevant `trips instance keys`?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 19,
+ "id": "b2e0fa2d-089d-49a5-ab20-a8ef65925795",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Only use the trips with shapes.\n",
+ "relevant_trips = list(muni.trip_instance_key.unique())"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "0e0e963e-ce3a-43bd-b5eb-209af2524e0f",
+ "metadata": {},
+ "source": [
+ "#### Third time reading in `vp_usable`"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 20,
+ "id": "845ad61c-eab0-40c9-9e50-09c35bd50a73",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "completeness_cols = ['vp_idx',\n",
+ " 'location_timestamp_local', 'trip_instance_key',\n",
+ " 'gtfs_dataset_key']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 21,
+ "id": "88b571f2-624c-47aa-a154-286902fa4a96",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "vp_filtered = dd.read_parquet(f\"{SEGMENT_GCS}vp_usable_{analysis_date}\",\n",
+ " columns = completeness_cols, \n",
+ " filters = [[('gtfs_dataset_name', \"==\", operator),\n",
+ " ('schedule_gtfs_dataset_key', '==', gtfs_key),\n",
+ " ('trip_instance_key', 'in', relevant_trips)]])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 22,
+ "id": "8b1174c1-b693-40fd-94c6-16152a2a3cd7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fresh_updates = vp_filtered.sort_values(['vp_idx']).reset_index(drop = True)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "222d4f15-1fde-4194-b2bd-a10a50976336",
+ "metadata": {},
+ "source": [
+ "#### Question: Can't use dask for this type of groupby \n",
+ "* Also grouping only by `trip_instance_key` yields the best result..unsure if that's ok"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 23,
+ "id": "add7de33-bf89-4ab1-a907-a5f6f399821e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "two_cols = ['trip_instance_key','gtfs_dataset_key']"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "id": "c9083d90-f232-4568-99f1-f92ccd98c5cc",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "fresh_updates_df = fresh_updates.compute()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "id": "c9cd0c00-bdda-45c3-bb81-cc0556475501",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# fresh_updates_df = fresh_updates_df.assign(fresh = 1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 26,
+ "id": "201eab8e-893c-4a29-be9f-a1a81cafbbde",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "\" DOES NOT WORK\\ntest_group1 = (fresh_updates_df.groupby([\\n *two_cols, \\n pd.Grouper(key = 'location_timestamp_local', freq = '1Min')\\n ])\\n .count()).reset_index() \""
+ ]
+ },
+ "execution_count": 26,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "\"\"\" DOES NOT WORK\n",
+ "test_group1 = (fresh_updates_df.groupby([\n",
+ " *two_cols, \n",
+ " pd.Grouper(key = 'location_timestamp_local', freq = '1Min')\n",
+ " ])\n",
+ " .count()).reset_index() \"\"\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "id": "be48ea1f-b6e9-40b6-b121-2b9b744b86cb",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "test_group2 = (fresh_updates_df.groupby([\n",
+ " *['trip_instance_key'], \n",
+ " pd.Grouper(key = 'location_timestamp_local', freq = '1Min')\n",
+ " ])\n",
+ " .count()\n",
+ " .reset_index()\n",
+ " .rename(columns = {'vp_idx':'number_of_pings_per_minute'})\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b9fa09c7-6406-4c72-9a61-e925fce8fa8d",
+ "metadata": {},
+ "source": [
+ "### Checks"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "id": "2b697668-d7e7-4803-a289-0fe8c2e681d1",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "key1 = \"38247cbee93b6f85d58bf1812ae553b9\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 29,
+ "id": "c693d819-3cbc-4766-bdb2-0176a7340f55",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "key2 = \"fac53ed1db7d914cc4c1857e967344f4\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "id": "c00f7968-5ada-4623-8881-4d7607b86e5a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "key3 = \"9e291393bf06763fb6c6fe950d6e8097\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 31,
+ "id": "074357ed-2e23-45c9-913e-8e5092534627",
+ "metadata": {
+ "scrolled": true,
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# test_group2.loc[test_group2.trip_instance_key == key1]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "id": "bc1bfbf4-d828-41ba-bc46-9f7bb687e123",
+ "metadata": {
+ "scrolled": true,
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# fresh_updates_df.loc[fresh_updates_df.trip_instance_key == key1]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "id": "e3a6c25b-922e-4083-ba53-20193b441df3",
+ "metadata": {
+ "scrolled": true,
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# test_group2.loc[test_group2.trip_instance_key == key2]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 34,
+ "id": "f20ba29c-1554-4155-b3c8-56f806196939",
+ "metadata": {
+ "scrolled": true,
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# fresh_updates_df.loc[fresh_updates_df.trip_instance_key == key2]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "id": "0a1ab3be-8f3d-416e-9c18-3d2e365250c5",
+ "metadata": {
+ "scrolled": true,
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# test_group2.loc[test_group2.trip_instance_key == key3]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "id": "ec0f9c0a-0833-48e3-bf14-b00cd75aa560",
+ "metadata": {
+ "scrolled": true,
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# fresh_updates_df.loc[fresh_updates_df.trip_instance_key == key3]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "id": "5374f05c-3dfb-4e9d-a44f-1387b70c4434",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# (test_group2.fresh - test_group2.number_of_pings_per_minute).describe()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 38,
+ "id": "c4f0f887-0bf1-4acd-9204-ce4180517fff",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# test_group2.fresh.describe()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 39,
+ "id": "61617748-c555-4e88-b027-a7362cbb40f2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "test_group2 = test_group2.assign(\n",
+ " atleast2_trip_updates = test_group2.apply(\n",
+ " lambda x: 1 if x[\"number_of_pings_per_minute\"] >= 2\n",
+ " else 0, axis=1)\n",
+ " ) \n",
+ " "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "id": "f127aa0e-90f3-4946-a3c0-80c9b84aa101",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "1 534886\n",
+ "0 6795\n",
+ "Name: atleast2_trip_updates, dtype: int64"
+ ]
+ },
+ "execution_count": 40,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "test_group2.atleast2_trip_updates.value_counts()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ccd85839-5d29-4c0b-be92-bc6c86d3999a",
+ "metadata": {},
+ "source": [
+ "### Trip minutes is wrong\n",
+ "* Check w/ 38247cbee93b6f85d58bf1812ae553b9\n",
+ "* Began at 16:01:00, end at 17:00:000\n",
+ "* Skips having data: jumps from 4:38 to 4:54"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 41,
+ "id": "079a4eb5-5708-407c-81a6-f88466e9e9df",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "' DOES NOT WORK \\ntest_group2_grouped = (test_group2.groupby(two_cols)\\n .agg({\\n \"location_timestamp_local\": \"size\",\\n \"atleast2_trip_updates\": \"sum\"})\\n .reset_index()\\n ).rename(columns = {\\n \"location_timestamp_local\": \"trip_min_elapsed\"\\n })\\n '"
+ ]
+ },
+ "execution_count": 41,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "\"\"\" DOES NOT WORK \n",
+ "test_group2_grouped = (test_group2.groupby(two_cols)\n",
+ " .agg({\n",
+ " \"location_timestamp_local\": \"size\",\n",
+ " \"atleast2_trip_updates\": \"sum\"})\n",
+ " .reset_index()\n",
+ " ).rename(columns = {\n",
+ " \"location_timestamp_local\": \"trip_min_elapsed\"\n",
+ " })\n",
+ " \"\"\" "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 42,
+ "id": "45f51f48-df0b-42ed-9f25-07534f2c2514",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# test_group2_grouped.loc[test_group2_grouped.trip_instance_key == key1]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 43,
+ "id": "5965a48a-dec8-444f-8f69-714c64c885ab",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "test_group2['max_time'] = test_group2.location_timestamp_local"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 44,
+ "id": "d04cba0b-afd6-418a-a916-ea5fcea3f0a0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "test_group3 = (test_group2\n",
+ " .groupby(['trip_instance_key'])\n",
+ " .agg({'location_timestamp_local':'min','max_time':'max', \n",
+ " 'atleast2_trip_updates':'sum'})\n",
+ " .reset_index()\n",
+ " .rename(columns = {'location_timestamp_local':'min_time'})\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 45,
+ "id": "cdbcc81c-1dc4-4111-808e-c3590568d163",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "test_group3['trip_min_elapsed'] = (test_group3.max_time - test_group3.min_time) / pd.Timedelta(minutes=1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 46,
+ "id": "753d1d27-8813-4cdf-bf9f-dc5193f908f7",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " trip_instance_key | \n",
+ " min_time | \n",
+ " max_time | \n",
+ " atleast2_trip_updates | \n",
+ " trip_min_elapsed | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 2002 | \n",
+ " 38247cbee93b6f85d58bf1812ae553b9 | \n",
+ " 2023-10-11 16:01:00 | \n",
+ " 2023-10-11 17:00:00 | \n",
+ " 45 | \n",
+ " 59.00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " trip_instance_key min_time \\\n",
+ "2002 38247cbee93b6f85d58bf1812ae553b9 2023-10-11 16:01:00 \n",
+ "\n",
+ " max_time atleast2_trip_updates trip_min_elapsed \n",
+ "2002 2023-10-11 17:00:00 45 59.00 "
+ ]
+ },
+ "execution_count": 46,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "test_group3.loc[test_group3.trip_instance_key == key1]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 47,
+ "id": "d4e45b96-5809-49a0-840e-112efc568a8e",
+ "metadata": {
+ "scrolled": true,
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# test_group2.loc[test_group2.trip_instance_key == key1]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 48,
+ "id": "366efe58-3194-4fd9-9b86-a4aab10cf003",
+ "metadata": {
+ "scrolled": true,
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "\n",
+ "# fresh_updates_df.loc[fresh_updates_df.trip_instance_key == key1]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 49,
+ "id": "df84e20f-33e4-4c93-bc09-ddf2f85b5ae2",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(133, 4)"
+ ]
+ },
+ "execution_count": 49,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "\n",
+ "fresh_updates_df.loc[fresh_updates_df.trip_instance_key == key1].shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 50,
+ "id": "bf6654a0-01bf-4ae3-8158-133d9e5aeb91",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "test_group3 = test_group3.assign(\n",
+ " pct_update_complete = test_group3.atleast2_trip_updates.divide(\n",
+ " test_group3.trip_min_elapsed)\n",
+ " ) "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "12805755-8e1c-44f1-bd4b-bf0308757367",
+ "metadata": {},
+ "source": [
+ "### Some trips experiencing pct-update-complete greater than 100%\n",
+ "* They have more rows than trip_min_elapsed"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 51,
+ "id": "60cfbabc-e181-4004-9e08-9ff7b80fceae",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "3701"
+ ]
+ },
+ "execution_count": 51,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "len(test_group3.loc[test_group3.pct_update_complete > 1])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 52,
+ "id": "6e745fec-c6c0-4f0a-b618-44a252f36b9a",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "9240"
+ ]
+ },
+ "execution_count": 52,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "len(test_group3)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 53,
+ "id": "a76fce93-01f6-43ec-a12b-0929883673b7",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "count 9240.00\n",
+ "mean 1.00\n",
+ "std 0.06\n",
+ "min 0.02\n",
+ "25% 1.00\n",
+ "50% 1.00\n",
+ "75% 1.02\n",
+ "max 1.10\n",
+ "Name: pct_update_complete, dtype: float64"
+ ]
+ },
+ "execution_count": 53,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "test_group3.pct_update_complete.describe()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 54,
+ "id": "ca5b2186-0b89-47fa-8d34-437b6f26849e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " trip_instance_key | \n",
+ " min_time | \n",
+ " max_time | \n",
+ " atleast2_trip_updates | \n",
+ " trip_min_elapsed | \n",
+ " pct_update_complete | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 1486 | \n",
+ " 2a9fb1144c567b98582f7dc025182ef0 | \n",
+ " 2023-10-11 10:20:00 | \n",
+ " 2023-10-11 10:30:00 | \n",
+ " 11 | \n",
+ " 10.00 | \n",
+ " 1.10 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " trip_instance_key min_time \\\n",
+ "1486 2a9fb1144c567b98582f7dc025182ef0 2023-10-11 10:20:00 \n",
+ "\n",
+ " max_time atleast2_trip_updates trip_min_elapsed \\\n",
+ "1486 2023-10-11 10:30:00 11 10.00 \n",
+ "\n",
+ " pct_update_complete \n",
+ "1486 1.10 "
+ ]
+ },
+ "execution_count": 54,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "test_group3.loc[test_group3.pct_update_complete == 1.1].sample()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "13103a30-dd21-4390-8b97-20c2f3b314ad",
+ "metadata": {},
+ "source": [
+ "#### How many cols have larger atleast2_trip_updates compared to trip-min_elapsed (delete later)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 55,
+ "id": "6862f705-501d-4285-85db-8dd758aa04c7",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "test_group3['larger'] =test_group3.trip_min_elapsed - test_group3.atleast2_trip_updates "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 56,
+ "id": "d8659d98-012a-4b2d-9825-e605df194cad",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "count 9240.00\n",
+ "mean 0.29\n",
+ "std 3.82\n",
+ "min -1.00\n",
+ "25% -1.00\n",
+ "50% 0.00\n",
+ "75% 0.00\n",
+ "max 105.00\n",
+ "Name: larger, dtype: float64"
+ ]
+ },
+ "execution_count": 56,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "test_group3.larger.describe()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "4512571b-d486-4490-bec0-6489f1b5d0f2",
+ "metadata": {},
+ "source": [
+ "#### One trip only recorded 2+ pings per minute in 36% of its duration"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 57,
+ "id": "8e633abb-5030-4f83-a70a-2214b4017048",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " trip_instance_key | \n",
+ " min_time | \n",
+ " max_time | \n",
+ " atleast2_trip_updates | \n",
+ " trip_min_elapsed | \n",
+ " pct_update_complete | \n",
+ " larger | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 3942 | \n",
+ " 6ef4805f0104b95614b86a2b1c374d23 | \n",
+ " 2023-10-11 17:48:00 | \n",
+ " 2023-10-11 20:31:00 | \n",
+ " 58 | \n",
+ " 163.00 | \n",
+ " 0.36 | \n",
+ " 105.00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " trip_instance_key min_time \\\n",
+ "3942 6ef4805f0104b95614b86a2b1c374d23 2023-10-11 17:48:00 \n",
+ "\n",
+ " max_time atleast2_trip_updates trip_min_elapsed \\\n",
+ "3942 2023-10-11 20:31:00 58 163.00 \n",
+ "\n",
+ " pct_update_complete larger \n",
+ "3942 0.36 105.00 "
+ ]
+ },
+ "execution_count": 57,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "test_group3.loc[test_group3.larger == 105].sample()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 58,
+ "id": "5787a3b6-375f-4488-a536-0c543a61f780",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "176"
+ ]
+ },
+ "execution_count": 58,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "len(fresh_updates_df.loc[fresh_updates_df.trip_instance_key == \"6ef4805f0104b95614b86a2b1c374d23\"])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "e9360550-2b92-406e-a37e-48347fec849d",
+ "metadata": {},
+ "source": [
+ "* Minutes skipped: 6:12 to 6:18\n",
+ "* 6:19-6:26\n",
+ "* 6:28-7:33 etc etc\n",
+ "* Trip started at 5:48, ended at 8:31 "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 59,
+ "id": "7b94e534-8dcb-4ed7-b864-4244d93a2ac1",
+ "metadata": {
+ "scrolled": true,
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# test_group2.loc[test_group2.trip_instance_key == \"6ef4805f0104b95614b86a2b1c374d23\"]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 60,
+ "id": "75d05e56-5d6d-4e02-955e-f2ab7567526a",
+ "metadata": {
+ "scrolled": true,
+ "tags": []
+ },
+ "outputs": [],
+ "source": [
+ "# fresh_updates_df.loc[fresh_updates_df.trip_instance_key == \"6ef4805f0104b95614b86a2b1c374d23\"]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 61,
+ "id": "53cb5d9e-be40-4989-a3b6-af2131c30075",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def mask_overly_large_min(row):\n",
+ " if row.atleast2_trip_updates > row.trip_min_elapsed:\n",
+ " return row.trip_min_elapsed\n",
+ " else:\n",
+ " return row.atleast2_trip_updates"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 62,
+ "id": "827559b6-f997-4842-a324-4d0f169b7607",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Apply the function: if the atleast2 trip updates is larger than trip_min_elapsed, mask it\n",
+ "# Or maybe just mask the pct-update-complete? \n",
+ "test_group3[\"test_mask\"] = test_group3.apply(mask_overly_large_min, axis=1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 63,
+ "id": "8d94dc4f-88c0-4817-92ae-5b8b9957ef54",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "test_group3 = test_group3.assign(\n",
+ " pct_update_complete2 = test_group3.test_mask.divide(\n",
+ " test_group3.trip_min_elapsed)\n",
+ " ) "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 64,
+ "id": "bc39796b-aeea-4e95-b542-23d6eb5d87b3",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " trip_instance_key | \n",
+ " min_time | \n",
+ " max_time | \n",
+ " atleast2_trip_updates | \n",
+ " trip_min_elapsed | \n",
+ " pct_update_complete | \n",
+ " larger | \n",
+ " test_mask | \n",
+ " pct_update_complete2 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 1510 | \n",
+ " 2b14ada5cb9e31a2c84b2a19b22c711a | \n",
+ " 2023-10-11 13:10:00 | \n",
+ " 2023-10-11 14:37:00 | \n",
+ " 88 | \n",
+ " 87.00 | \n",
+ " 1.01 | \n",
+ " -1.00 | \n",
+ " 87.00 | \n",
+ " 1.00 | \n",
+ "
\n",
+ " \n",
+ " 3938 | \n",
+ " 6ee775a74150b96abf9531462c6c69e0 | \n",
+ " 2023-10-11 02:59:00 | \n",
+ " 2023-10-11 03:30:00 | \n",
+ " 32 | \n",
+ " 31.00 | \n",
+ " 1.03 | \n",
+ " -1.00 | \n",
+ " 31.00 | \n",
+ " 1.00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " trip_instance_key min_time \\\n",
+ "1510 2b14ada5cb9e31a2c84b2a19b22c711a 2023-10-11 13:10:00 \n",
+ "3938 6ee775a74150b96abf9531462c6c69e0 2023-10-11 02:59:00 \n",
+ "\n",
+ " max_time atleast2_trip_updates trip_min_elapsed \\\n",
+ "1510 2023-10-11 14:37:00 88 87.00 \n",
+ "3938 2023-10-11 03:30:00 32 31.00 \n",
+ "\n",
+ " pct_update_complete larger test_mask pct_update_complete2 \n",
+ "1510 1.01 -1.00 87.00 1.00 \n",
+ "3938 1.03 -1.00 31.00 1.00 "
+ ]
+ },
+ "execution_count": 64,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "test_group3.loc[test_group3.larger == -1].sample(2)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "52c81121-29bb-41ca-bd3a-cfe2b60f7989",
+ "metadata": {},
+ "source": [
+ "#### Ex of a trip: ten minute sbut eleven rows 37622040815e89d063272bb6e37acc65\n",
+ "* The trip is ten minutes but there are eleven rows."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 65,
+ "id": "749f3055-f199-4905-854b-cf208c969f26",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " trip_instance_key | \n",
+ " min_time | \n",
+ " max_time | \n",
+ " atleast2_trip_updates | \n",
+ " trip_min_elapsed | \n",
+ " pct_update_complete | \n",
+ " larger | \n",
+ " test_mask | \n",
+ " pct_update_complete2 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 1972 | \n",
+ " 37622040815e89d063272bb6e37acc65 | \n",
+ " 2023-10-11 05:31:00 | \n",
+ " 2023-10-11 05:41:00 | \n",
+ " 11 | \n",
+ " 10.00 | \n",
+ " 1.10 | \n",
+ " -1.00 | \n",
+ " 10.00 | \n",
+ " 1.00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " trip_instance_key min_time \\\n",
+ "1972 37622040815e89d063272bb6e37acc65 2023-10-11 05:31:00 \n",
+ "\n",
+ " max_time atleast2_trip_updates trip_min_elapsed \\\n",
+ "1972 2023-10-11 05:41:00 11 10.00 \n",
+ "\n",
+ " pct_update_complete larger test_mask pct_update_complete2 \n",
+ "1972 1.10 -1.00 10.00 1.00 "
+ ]
+ },
+ "execution_count": 65,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "test_group3.loc[test_group3.trip_instance_key == \"37622040815e89d063272bb6e37acc65\"]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 174,
+ "id": "b3a1f2f3-6981-4be6-bccb-8636e4430f65",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(11, 6)"
+ ]
+ },
+ "execution_count": 174,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "test_group2.loc[test_group2.trip_instance_key == \"37622040815e89d063272bb6e37acc65\"].shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 66,
+ "id": "d109f168-9183-489d-9815-f179cc26e217",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "1"
+ ]
+ },
+ "execution_count": 66,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "len(test_group3.loc[test_group3.trip_instance_key == \"37622040815e89d063272bb6e37acc65\"])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 67,
+ "id": "714e610c-018f-406c-abc9-8c69de0ff7a5",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " trip_instance_key | \n",
+ " min_time | \n",
+ " max_time | \n",
+ " atleast2_trip_updates | \n",
+ " trip_min_elapsed | \n",
+ " pct_update_complete | \n",
+ " larger | \n",
+ " test_mask | \n",
+ " pct_update_complete2 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 1972 | \n",
+ " 37622040815e89d063272bb6e37acc65 | \n",
+ " 2023-10-11 05:31:00 | \n",
+ " 2023-10-11 05:41:00 | \n",
+ " 11 | \n",
+ " 10.00 | \n",
+ " 1.10 | \n",
+ " -1.00 | \n",
+ " 10.00 | \n",
+ " 1.00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " trip_instance_key min_time \\\n",
+ "1972 37622040815e89d063272bb6e37acc65 2023-10-11 05:31:00 \n",
+ "\n",
+ " max_time atleast2_trip_updates trip_min_elapsed \\\n",
+ "1972 2023-10-11 05:41:00 11 10.00 \n",
+ "\n",
+ " pct_update_complete larger test_mask pct_update_complete2 \n",
+ "1972 1.10 -1.00 10.00 1.00 "
+ ]
+ },
+ "execution_count": 67,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "test_group3.loc[test_group3.trip_instance_key == \"37622040815e89d063272bb6e37acc65\"]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "6b54ee94-48e6-42ed-a87f-97ba04293589",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "#### 568caf4acf76125fb5db063f8737e5a8\n",
+ "* Trip is 73 minutes"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 68,
+ "id": "f505e963-6dd7-4624-a2a0-324ade8c04b8",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " trip_instance_key | \n",
+ " min_time | \n",
+ " max_time | \n",
+ " atleast2_trip_updates | \n",
+ " trip_min_elapsed | \n",
+ " pct_update_complete | \n",
+ " larger | \n",
+ " test_mask | \n",
+ " pct_update_complete2 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 3077 | \n",
+ " 568caf4acf76125fb5db063f8737e5a8 | \n",
+ " 2023-10-11 08:32:00 | \n",
+ " 2023-10-11 09:45:00 | \n",
+ " 74 | \n",
+ " 73.00 | \n",
+ " 1.01 | \n",
+ " -1.00 | \n",
+ " 73.00 | \n",
+ " 1.00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " trip_instance_key min_time \\\n",
+ "3077 568caf4acf76125fb5db063f8737e5a8 2023-10-11 08:32:00 \n",
+ "\n",
+ " max_time atleast2_trip_updates trip_min_elapsed \\\n",
+ "3077 2023-10-11 09:45:00 74 73.00 \n",
+ "\n",
+ " pct_update_complete larger test_mask pct_update_complete2 \n",
+ "3077 1.01 -1.00 73.00 1.00 "
+ ]
+ },
+ "execution_count": 68,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "test_group3.loc[test_group3.trip_instance_key == \"568caf4acf76125fb5db063f8737e5a8\"]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 69,
+ "id": "d8d800f1-e881-43f1-bd4e-699d93ba8aaf",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "74"
+ ]
+ },
+ "execution_count": 69,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "len(test_group2.loc[test_group2.trip_instance_key == \"568caf4acf76125fb5db063f8737e5a8\"])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 70,
+ "id": "b3a10c1f-afc5-4206-a79f-9c78b1df4854",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "221"
+ ]
+ },
+ "execution_count": 70,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "len(fresh_updates_df.loc[fresh_updates_df.trip_instance_key == \"568caf4acf76125fb5db063f8737e5a8\"])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 71,
+ "id": "fde7e3e4-2df8-48f3-a34a-545a5aef5f81",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "9240"
+ ]
+ },
+ "execution_count": 71,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "len(test_group3)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 72,
+ "id": "95a64572-3d15-4c47-ac93-1adb9c5f8d46",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " trip_instance_key | \n",
+ " min_time | \n",
+ " max_time | \n",
+ " atleast2_trip_updates | \n",
+ " trip_min_elapsed | \n",
+ " pct_update_complete | \n",
+ " larger | \n",
+ " test_mask | \n",
+ " pct_update_complete2 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 5128 | \n",
+ " 8f471c6d3f23a285b0e957db9431a89a | \n",
+ " 2023-10-11 19:10:00 | \n",
+ " 2023-10-11 20:11:00 | \n",
+ " 61 | \n",
+ " 61.00 | \n",
+ " 1.00 | \n",
+ " 0.00 | \n",
+ " 61.00 | \n",
+ " 1.00 | \n",
+ "
\n",
+ " \n",
+ " 8609 | \n",
+ " efa2c7f74ba9cd19255dc54ec7f08ec0 | \n",
+ " 2023-10-11 13:04:00 | \n",
+ " 2023-10-11 14:05:00 | \n",
+ " 60 | \n",
+ " 61.00 | \n",
+ " 0.98 | \n",
+ " 1.00 | \n",
+ " 60.00 | \n",
+ " 0.98 | \n",
+ "
\n",
+ " \n",
+ " 5646 | \n",
+ " 9d2e48287e1f9f7f907bc1c91a811ae3 | \n",
+ " 2023-10-11 08:38:00 | \n",
+ " 2023-10-11 09:22:00 | \n",
+ " 44 | \n",
+ " 44.00 | \n",
+ " 1.00 | \n",
+ " 0.00 | \n",
+ " 44.00 | \n",
+ " 1.00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " trip_instance_key min_time \\\n",
+ "5128 8f471c6d3f23a285b0e957db9431a89a 2023-10-11 19:10:00 \n",
+ "8609 efa2c7f74ba9cd19255dc54ec7f08ec0 2023-10-11 13:04:00 \n",
+ "5646 9d2e48287e1f9f7f907bc1c91a811ae3 2023-10-11 08:38:00 \n",
+ "\n",
+ " max_time atleast2_trip_updates trip_min_elapsed \\\n",
+ "5128 2023-10-11 20:11:00 61 61.00 \n",
+ "8609 2023-10-11 14:05:00 60 61.00 \n",
+ "5646 2023-10-11 09:22:00 44 44.00 \n",
+ "\n",
+ " pct_update_complete larger test_mask pct_update_complete2 \n",
+ "5128 1.00 0.00 61.00 1.00 \n",
+ "8609 0.98 1.00 60.00 0.98 \n",
+ "5646 1.00 0.00 44.00 1.00 "
+ ]
+ },
+ "execution_count": 72,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "test_group3.sample(3)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "3b61450e-528c-4dab-9582-2bda3343e379",
+ "metadata": {},
+ "source": [
+ "## How many minutes a trip took and the average speeds?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 73,
+ "id": "36afb7b1-04f9-494b-a4d8-f85ddadc785b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# relevant_trips = list(test_group3.trip_instance_key.unique())"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "96335b84-31a9-4887-b97f-8ebc35610220",
+ "metadata": {},
+ "source": [
+ "### C2 \n",
+ "* https://github.com/cal-itp/data-analyses/blob/metrics_rt/rt_segment_speeds/scripts/C2_triangulate_vp.py\n",
+ "* Break it apart and check it out later to undersatnd what's happening\n",
+ "* No need to `subset_usable_vp` because already filtered above."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 74,
+ "id": "de7fba7a-6796-4e64-884c-55d636be2c12",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def triangulate_vp(\n",
+ " ddf: dd.DataFrame, \n",
+ " group_cols: list = [\"trip_instance_key\"]\n",
+ ") -> np.ndarray:\n",
+ " \"\"\"\n",
+ " Grab a sample of vehicle positions for each trip to triangulate distance.\n",
+ " These vp already sjoined onto the shape.\n",
+ " Roughly pick vp at equally spaced intervals.\n",
+ " \n",
+ " Dask aggregation can't group and use lambda to create list of possible \n",
+ " vp_idx.\n",
+ " \"\"\" \n",
+ " grouped_ddf = ddf.groupby(group_cols, observed=True, group_keys=False)\n",
+ "\n",
+ " min_df = (grouped_ddf\n",
+ " .agg({\"vp_idx\": \"min\"})\n",
+ " .rename(columns = {\"vp_idx\": \"min_vp_idx\"})\n",
+ " )\n",
+ "\n",
+ " max_df = (grouped_ddf\n",
+ " .agg({\"vp_idx\": \"max\"})\n",
+ " .rename(columns = {\"vp_idx\": \"max_vp_idx\"})\n",
+ " )\n",
+ " \n",
+ " vp_range = dd.merge(\n",
+ " min_df,\n",
+ " max_df,\n",
+ " left_index = True,\n",
+ " right_index = True,\n",
+ " how = \"inner\"\n",
+ " )\n",
+ "\n",
+ " vp_range = vp_range.persist()\n",
+ " \n",
+ " vp_range[\"range_diff\"] = vp_range.max_vp_idx - vp_range.min_vp_idx\n",
+ " \n",
+ " vp_range = vp_range.assign(\n",
+ " p25_vp_idx = (vp_range.range_diff * 0.25 + vp_range.min_vp_idx\n",
+ " ).round(0).astype(\"int64\"),\n",
+ " p50_vp_idx = (vp_range.range_diff * 0.5 + vp_range.min_vp_idx\n",
+ " ).round(0).astype(\"int64\"),\n",
+ " p75_vp_idx = (vp_range.range_diff * 0.75 + vp_range.min_vp_idx\n",
+ " ).round(0).astype(\"int64\"),\n",
+ " )\n",
+ " \n",
+ " vp_idx_cols = [\n",
+ " \"min_vp_idx\", \n",
+ " \"p25_vp_idx\",\n",
+ " \"p50_vp_idx\", \n",
+ " \"p75_vp_idx\",\n",
+ " \"max_vp_idx\"\n",
+ " ]\n",
+ "\n",
+ " results = vp_range[vp_idx_cols].compute().to_numpy().flatten() \n",
+ " \n",
+ " results = list(results)\n",
+ " return results"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "44d836be-21d3-435e-9b35-a1d9863e6257",
+ "metadata": {},
+ "source": [
+ "#### Help: which df should I use to triangulate?\n",
+ "* Trips in which 0 of the points fall into the shapes should be excluded?\n",
+ "* Using the same df as in update completeness."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 75,
+ "id": "659a01af-3c01-4cc3-92ea-834a96c2106a",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "1604702"
+ ]
+ },
+ "execution_count": 75,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "len(vp_filtered)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 76,
+ "id": "75d6fcc5-8c85-4c62-a063-62a7a73ecdc0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "triangulate_muni = triangulate_vp(vp_filtered, 'trip_instance_key')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 77,
+ "id": "91eab6e3-4e44-4f31-829b-1dcc9b2aebf5",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "list"
+ ]
+ },
+ "execution_count": 77,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "type(triangulate_muni)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 78,
+ "id": "8e709ae9-1c6d-4b55-ad5c-deb1d724276e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "trip_instance_key 9240\n",
+ "dtype: int64"
+ ]
+ },
+ "execution_count": 78,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "vp_filtered[['trip_instance_key']].compute().nunique()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "3dac48a4-cabc-415d-822c-0162fa995efd",
+ "metadata": {},
+ "source": [
+ "#### 4th time loading `vp_usable`"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 79,
+ "id": "7807ccce-416c-4f03-a92a-2296edc1d19f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "vp_results = dd.read_parquet(f\"{SEGMENT_GCS}vp_usable_{analysis_date}\",\n",
+ " columns = [\n",
+ " \"gtfs_dataset_key\", \"trip_instance_key\",\n",
+ " \"location_timestamp_local\",\n",
+ " \"x\", \"y\", \"vp_idx\"],\n",
+ " filters = [[('gtfs_dataset_name', \"==\", operator),\n",
+ " ('schedule_gtfs_dataset_key', '==', gtfs_key),\n",
+ " ('vp_idx', 'in', triangulate_muni)]]).compute()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 80,
+ "id": "d98556b6-4e16-4bce-b080-19bceadde4af",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "46198"
+ ]
+ },
+ "execution_count": 80,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "len(vp_results)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 81,
+ "id": "9f0282d0-c59f-4843-b23e-bb22be66404e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "9240"
+ ]
+ },
+ "execution_count": 81,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "vp_results.trip_instance_key.nunique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 82,
+ "id": "bbcab824-28a9-4fb3-828a-471b83966b39",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " gtfs_dataset_key | \n",
+ " trip_instance_key | \n",
+ " location_timestamp_local | \n",
+ " x | \n",
+ " y | \n",
+ " vp_idx | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 11825575 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " d28f5a7bf8faf0aa6de5c4cf02c7c106 | \n",
+ " 2023-10-11 17:06:59 | \n",
+ " -122.48 | \n",
+ " 37.73 | \n",
+ " 11825575 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " gtfs_dataset_key trip_instance_key \\\n",
+ "11825575 c0e3039da063db95ebabd3fe4ee611a4 d28f5a7bf8faf0aa6de5c4cf02c7c106 \n",
+ "\n",
+ " location_timestamp_local x y vp_idx \n",
+ "11825575 2023-10-11 17:06:59 -122.48 37.73 11825575 "
+ ]
+ },
+ "execution_count": 82,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "vp_results.sample()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 83,
+ "id": "63c04621-ad68-4e3d-9cec-133dbf0aa3b0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def merge_rt_scheduled_trips(\n",
+ " rt_trips: dd.DataFrame,\n",
+ " analysis_date: str,\n",
+ " group_cols: list = [\"trip_instance_key\"]) -> dd.DataFrame:\n",
+ " \"\"\"\n",
+ " Merge RT trips (vehicle positions) to scheduled trips \n",
+ " to get the shape_array_key.\n",
+ " Don't pull other scheduled trip columns now, wait until\n",
+ " after aggregation is done.\n",
+ " \"\"\"\n",
+ " trips = helpers.import_scheduled_trips(\n",
+ " analysis_date,\n",
+ " columns = group_cols + [\"shape_array_key\"],\n",
+ " get_pandas = True\n",
+ " )\n",
+ " \n",
+ " df = dd.merge(\n",
+ " rt_trips,\n",
+ " trips,\n",
+ " on = group_cols,\n",
+ " how = \"left\",\n",
+ " )\n",
+ " \n",
+ " return df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 84,
+ "id": "866b83cc-c0d9-4fc5-b794-f995e8b760de",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# https://github.com/cal-itp/data-analyses/blob/metrics_rt/rt_segment_speeds/scripts/C2_triangulate_vp.py#L170-L180\n",
+ "vp_with_sched = (\n",
+ " merge_rt_scheduled_trips(\n",
+ " vp_results, \n",
+ " analysis_date, \n",
+ " group_cols = [\"trip_instance_key\"]\n",
+ " ).sort_values(\"vp_idx\")\n",
+ " .reset_index(drop=True)\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 85,
+ "id": "8b6f6495-aa13-4452-a2c9-bf8c1f7f49fd",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "pandas.core.frame.DataFrame"
+ ]
+ },
+ "execution_count": 85,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "type(vp_with_sched)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 86,
+ "id": "a7b261ec-41b8-4f87-998d-a4b5fafc7ec2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Check triangualte worked \n",
+ "triangulate_check = vp_with_sched.groupby(['trip_instance_key']).agg({'location_timestamp_local':'nunique'}).reset_index()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 89,
+ "id": "bd19bfd5-48d1-4722-a67c-56069bf89e35",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "5 9239\n",
+ "3 1\n",
+ "Name: location_timestamp_local, dtype: int64"
+ ]
+ },
+ "execution_count": 89,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "triangulate_check.location_timestamp_local.value_counts()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 87,
+ "id": "24d761ba-cc50-406e-9538-e6f2f8705b75",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " trip_instance_key | \n",
+ " location_timestamp_local | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 6824 | \n",
+ " be7a580802c3ec183904cac37e6c0afd | \n",
+ " 3 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " trip_instance_key location_timestamp_local\n",
+ "6824 be7a580802c3ec183904cac37e6c0afd 3"
+ ]
+ },
+ "execution_count": 87,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "triangulate_check.loc[triangulate_check.location_timestamp_local == 3]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 88,
+ "id": "6e24fb38-f641-418d-99cc-c1b7aaf0e4a1",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "count 9240.00\n",
+ "mean 5.00\n",
+ "std 0.02\n",
+ "min 3.00\n",
+ "25% 5.00\n",
+ "50% 5.00\n",
+ "75% 5.00\n",
+ "max 5.00\n",
+ "Name: location_timestamp_local, dtype: float64"
+ ]
+ },
+ "execution_count": 88,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "triangulate_check.location_timestamp_local.describe()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 176,
+ "id": "546417f6-e033-43e5-ba70-fdc392697561",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " vp_idx | \n",
+ " location_timestamp_local | \n",
+ " trip_instance_key | \n",
+ " gtfs_dataset_key | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 12506123 | \n",
+ " 12506123 | \n",
+ " 2023-10-11 16:13:08 | \n",
+ " be7a580802c3ec183904cac37e6c0afd | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ "
\n",
+ " \n",
+ " 12506124 | \n",
+ " 12506124 | \n",
+ " 2023-10-11 17:01:19 | \n",
+ " be7a580802c3ec183904cac37e6c0afd | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ "
\n",
+ " \n",
+ " 12506125 | \n",
+ " 12506125 | \n",
+ " 2023-10-11 17:01:35 | \n",
+ " be7a580802c3ec183904cac37e6c0afd | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " vp_idx location_timestamp_local trip_instance_key \\\n",
+ "12506123 12506123 2023-10-11 16:13:08 be7a580802c3ec183904cac37e6c0afd \n",
+ "12506124 12506124 2023-10-11 17:01:19 be7a580802c3ec183904cac37e6c0afd \n",
+ "12506125 12506125 2023-10-11 17:01:35 be7a580802c3ec183904cac37e6c0afd \n",
+ "\n",
+ " gtfs_dataset_key \n",
+ "12506123 c0e3039da063db95ebabd3fe4ee611a4 \n",
+ "12506124 c0e3039da063db95ebabd3fe4ee611a4 \n",
+ "12506125 c0e3039da063db95ebabd3fe4ee611a4 "
+ ]
+ },
+ "execution_count": 176,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "vp_filtered.loc[vp_filtered.trip_instance_key == \"be7a580802c3ec183904cac37e6c0afd\"].compute()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "687e9c19-daf3-4384-942a-947728bb0680",
+ "metadata": {
+ "tags": []
+ },
+ "source": [
+ "### C3\n",
+ "* https://github.com/cal-itp/data-analyses/blob/metrics_rt/rt_segment_speeds/scripts/C3_trip_route_speed.py"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 91,
+ "id": "78c974a0-51f8-42a0-b256-1d8f7c22482e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# https://github.com/cal-itp/data-analyses/blob/metrics_rt/rt_segment_speeds/scripts/C3_trip_route_speed.py#L262-L265\n",
+ "vp_c3 = gpd.GeoDataFrame(\n",
+ " vp_with_sched,\n",
+ " geometry = gpd.points_from_xy(vp_with_sched.x, vp_with_sched.y, crs=WGS84)\n",
+ " ).to_crs(PROJECT_CRS).drop(columns = [\"x\", \"y\"])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 92,
+ "id": "410e5f12-24fc-443b-adb5-e05f9e12ba2a",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "9240"
+ ]
+ },
+ "execution_count": 92,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "vp_c3.trip_instance_key.nunique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 93,
+ "id": "23c64d78-3c7f-4b58-9ccd-18159d9e1a4e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "shapes_list = vp_c3.shape_array_key.unique().tolist()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 94,
+ "id": "b3bd3c05-7c1d-44e8-afe3-a28a7c6e6190",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "shapes = helpers.import_scheduled_shapes(\n",
+ " analysis_date,\n",
+ " columns = [\"shape_array_key\",\"geometry\"],\n",
+ " filters = [[(\"shape_array_key\", \"in\", shapes_list)]],\n",
+ " get_pandas = True,\n",
+ " crs = PROJECT_CRS\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 95,
+ "id": "cbcf928d-c9ec-46e9-9685-48ca5efe8359",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(255, 2)"
+ ]
+ },
+ "execution_count": 95,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "shapes.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 96,
+ "id": "255037a4-079e-4586-beba-0c170f4c6fcb",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "geopandas.geodataframe.GeoDataFrame"
+ ]
+ },
+ "execution_count": 96,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "type(shapes)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 97,
+ "id": "47bbe61e-e014-48c3-b85f-1dea06d5e56e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# shapes.explore('shape_array_key')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 98,
+ "id": "27c8fff8-f8a7-4906-a3f4-7faac4969bde",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# https://github.com/cal-itp/data-analyses/blob/metrics_rt/rt_segment_speeds/scripts/C3_trip_route_speed.py#L280-L287\n",
+ "c3_m1 = pd.merge(\n",
+ " vp_c3,\n",
+ " shapes,\n",
+ " on = \"shape_array_key\",\n",
+ " how = \"inner\"\n",
+ " ).rename(columns = {\"geometry_x\": \"vp_geometry\", \n",
+ " \"geometry_y\": \"shape_geometry\"}\n",
+ " ).set_geometry(\"vp_geometry\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 99,
+ "id": "7430576e-c571-45de-afb9-500c21336808",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# https://github.com/cal-itp/data-analyses/blob/metrics_rt/rt_segment_speeds/scripts/C3_trip_route_speed.py#L290-L293\n",
+ "shape_meters_geoseries = wrangle_shapes.project_point_geom_onto_linestring(\n",
+ " c3_m1,\n",
+ " \"shape_geometry\",\n",
+ " \"vp_geometry\",\n",
+ " )\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 100,
+ "id": "6631d594-78d7-494a-8bd2-86d2b0c19b1f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "c3_m1[\"shape_meters\"] = shape_meters_geoseries"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "9c6f493b-53dd-4ef6-95ad-445a0562d633",
+ "metadata": {},
+ "source": [
+ "#### distance_and_seconds_elapsed\n",
+ "* Breaking apart to understand\n",
+ "* https://github.com/cal-itp/data-analyses/blob/metrics_rt/rt_segment_speeds/scripts/C3_trip_route_speed.py#L290-L293"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 101,
+ "id": "50260f4c-91b2-4e89-afbb-a1dc12594d4d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dist_col = \"shape_meters\"\n",
+ "time_col = \"location_timestamp_local\"\n",
+ "group_cols = [\"gtfs_dataset_key\", \"trip_instance_key\"]\n",
+ "sort_cols = group_cols + [\"vp_idx\"]\n",
+ " "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 102,
+ "id": "af0b69f8-5c48-433b-a732-958d12eb2e82",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "geopandas.geodataframe.GeoDataFrame"
+ ]
+ },
+ "execution_count": 102,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "type(c3_m1)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 103,
+ "id": "34e03a27-05bc-49fb-b285-1c9df094fc27",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "c3_m1 = c3_m1.assign(\n",
+ " prior_dist = (c3_m1.sort_values(sort_cols)\n",
+ " .groupby(group_cols, \n",
+ " observed=True, group_keys=False)\n",
+ " [dist_col]\n",
+ " .apply(lambda x: x.shift(1))\n",
+ " ),\n",
+ " prior_time = (c3_m1.sort_values(sort_cols)\n",
+ " .groupby(group_cols, \n",
+ " observed=True, group_keys=False)\n",
+ " [time_col]\n",
+ " .apply(lambda x: x.shift(1))\n",
+ " ) \n",
+ " )\n",
+ " "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 104,
+ "id": "ffbc94d1-090f-4a54-ac85-d5b155660ca8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# distance should be positive, but sometimes it's not, \n",
+ "# so use absolute value\n",
+ "c3_m1 = c3_m1.assign(\n",
+ " change_meters = abs(c3_m1[dist_col] - c3_m1.prior_dist),\n",
+ " change_sec = (c3_m1[time_col] - c3_m1.prior_time).divide(\n",
+ " np.timedelta64(1, 's'))\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 105,
+ "id": "a274e442-2a65-4c84-a5ba-dc35d068771c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "one_trip = c3_m1.loc[c3_m1.trip_instance_key == \"ec2ef3dc047b844d7abf2d035728e202\"]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 106,
+ "id": "2700eca7-ce6e-4c9d-92d4-7331fee5ffc3",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "28840 NaT\n",
+ "28841 0 days 00:05:21\n",
+ "28842 0 days 00:05:21\n",
+ "28843 0 days 00:05:06\n",
+ "28844 0 days 00:05:21\n",
+ "dtype: timedelta64[ns]"
+ ]
+ },
+ "execution_count": 106,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "(one_trip[time_col] - one_trip.prior_time).head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 107,
+ "id": "47e620ba-672a-4baf-bfbd-90ec5b7c6865",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " gtfs_dataset_key | \n",
+ " trip_instance_key | \n",
+ " location_timestamp_local | \n",
+ " vp_idx | \n",
+ " shape_array_key | \n",
+ " vp_geometry | \n",
+ " shape_meters | \n",
+ " prior_dist | \n",
+ " prior_time | \n",
+ " change_meters | \n",
+ " change_sec | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 28840 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " ec2ef3dc047b844d7abf2d035728e202 | \n",
+ " 2023-10-11 21:21:08 | \n",
+ " 12415722 | \n",
+ " 652d492c9725307ab5f725bb616ee4a0 | \n",
+ " POINT (-212402.440 -28559.730) | \n",
+ " 161.50 | \n",
+ " NaN | \n",
+ " NaT | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 28841 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " ec2ef3dc047b844d7abf2d035728e202 | \n",
+ " 2023-10-11 21:26:29 | \n",
+ " 12415738 | \n",
+ " 652d492c9725307ab5f725bb616ee4a0 | \n",
+ " POINT (-212088.927 -27551.761) | \n",
+ " 1454.71 | \n",
+ " 161.50 | \n",
+ " 2023-10-11 21:21:08 | \n",
+ " 1293.21 | \n",
+ " 321.00 | \n",
+ "
\n",
+ " \n",
+ " 28842 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " ec2ef3dc047b844d7abf2d035728e202 | \n",
+ " 2023-10-11 21:31:50 | \n",
+ " 12415754 | \n",
+ " 652d492c9725307ab5f725bb616ee4a0 | \n",
+ " POINT (-212560.266 -26634.904) | \n",
+ " 2819.51 | \n",
+ " 1454.71 | \n",
+ " 2023-10-11 21:26:29 | \n",
+ " 1364.80 | \n",
+ " 321.00 | \n",
+ "
\n",
+ " \n",
+ " 28843 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " ec2ef3dc047b844d7abf2d035728e202 | \n",
+ " 2023-10-11 21:36:56 | \n",
+ " 12415769 | \n",
+ " 652d492c9725307ab5f725bb616ee4a0 | \n",
+ " POINT (-212711.278 -26637.392) | \n",
+ " 2970.41 | \n",
+ " 2819.51 | \n",
+ " 2023-10-11 21:31:50 | \n",
+ " 150.90 | \n",
+ " 306.00 | \n",
+ "
\n",
+ " \n",
+ " 28844 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " ec2ef3dc047b844d7abf2d035728e202 | \n",
+ " 2023-10-11 21:42:17 | \n",
+ " 12415785 | \n",
+ " 652d492c9725307ab5f725bb616ee4a0 | \n",
+ " POINT (-212752.276 -26640.128) | \n",
+ " 3016.19 | \n",
+ " 2970.41 | \n",
+ " 2023-10-11 21:36:56 | \n",
+ " 45.78 | \n",
+ " 321.00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " gtfs_dataset_key trip_instance_key \\\n",
+ "28840 c0e3039da063db95ebabd3fe4ee611a4 ec2ef3dc047b844d7abf2d035728e202 \n",
+ "28841 c0e3039da063db95ebabd3fe4ee611a4 ec2ef3dc047b844d7abf2d035728e202 \n",
+ "28842 c0e3039da063db95ebabd3fe4ee611a4 ec2ef3dc047b844d7abf2d035728e202 \n",
+ "28843 c0e3039da063db95ebabd3fe4ee611a4 ec2ef3dc047b844d7abf2d035728e202 \n",
+ "28844 c0e3039da063db95ebabd3fe4ee611a4 ec2ef3dc047b844d7abf2d035728e202 \n",
+ "\n",
+ " location_timestamp_local vp_idx shape_array_key \\\n",
+ "28840 2023-10-11 21:21:08 12415722 652d492c9725307ab5f725bb616ee4a0 \n",
+ "28841 2023-10-11 21:26:29 12415738 652d492c9725307ab5f725bb616ee4a0 \n",
+ "28842 2023-10-11 21:31:50 12415754 652d492c9725307ab5f725bb616ee4a0 \n",
+ "28843 2023-10-11 21:36:56 12415769 652d492c9725307ab5f725bb616ee4a0 \n",
+ "28844 2023-10-11 21:42:17 12415785 652d492c9725307ab5f725bb616ee4a0 \n",
+ "\n",
+ " vp_geometry shape_meters prior_dist \\\n",
+ "28840 POINT (-212402.440 -28559.730) 161.50 NaN \n",
+ "28841 POINT (-212088.927 -27551.761) 1454.71 161.50 \n",
+ "28842 POINT (-212560.266 -26634.904) 2819.51 1454.71 \n",
+ "28843 POINT (-212711.278 -26637.392) 2970.41 2819.51 \n",
+ "28844 POINT (-212752.276 -26640.128) 3016.19 2970.41 \n",
+ "\n",
+ " prior_time change_meters change_sec \n",
+ "28840 NaT NaN NaN \n",
+ "28841 2023-10-11 21:21:08 1293.21 321.00 \n",
+ "28842 2023-10-11 21:26:29 1364.80 321.00 \n",
+ "28843 2023-10-11 21:31:50 150.90 306.00 \n",
+ "28844 2023-10-11 21:36:56 45.78 321.00 "
+ ]
+ },
+ "execution_count": 107,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "one_trip.drop(columns = ['shape_geometry'])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 108,
+ "id": "da8c879f-b7d5-40d5-95ea-540a68dfc9f3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Test with one trip\n",
+ "one_trip2 = (one_trip.groupby(group_cols, \n",
+ " observed=True, group_keys=False)\n",
+ " .agg({\"change_meters\": \"sum\", \n",
+ " \"change_sec\": \"sum\"})\n",
+ " .reset_index()\n",
+ " )\n",
+ " "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 109,
+ "id": "91e1e3cd-3ddd-4f0d-ac4c-9d79bb136828",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " gtfs_dataset_key | \n",
+ " trip_instance_key | \n",
+ " change_meters | \n",
+ " change_sec | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " ec2ef3dc047b844d7abf2d035728e202 | \n",
+ " 2854.69 | \n",
+ " 1269.00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " gtfs_dataset_key trip_instance_key \\\n",
+ "0 c0e3039da063db95ebabd3fe4ee611a4 ec2ef3dc047b844d7abf2d035728e202 \n",
+ "\n",
+ " change_meters change_sec \n",
+ "0 2854.69 1269.00 "
+ ]
+ },
+ "execution_count": 109,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "one_trip2"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 110,
+ "id": "a501338c-5c10-4872-81e0-483ef8f44171",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "2.237"
+ ]
+ },
+ "execution_count": 110,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "MPH_PER_MPS"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 111,
+ "id": "7e67e8b6-0d28-4222-9fd4-8cb85eec9a1f",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " gtfs_dataset_key | \n",
+ " trip_instance_key | \n",
+ " change_meters | \n",
+ " change_sec | \n",
+ " speed_mph | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " ec2ef3dc047b844d7abf2d035728e202 | \n",
+ " 2854.69 | \n",
+ " 1269.00 | \n",
+ " 5.03 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " gtfs_dataset_key trip_instance_key \\\n",
+ "0 c0e3039da063db95ebabd3fe4ee611a4 ec2ef3dc047b844d7abf2d035728e202 \n",
+ "\n",
+ " change_meters change_sec speed_mph \n",
+ "0 2854.69 1269.00 5.03 "
+ ]
+ },
+ "execution_count": 111,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "one_trip2.assign(\n",
+ " speed_mph = (one_trip2.change_meters.divide(one_trip2.change_sec) * \n",
+ " MPH_PER_MPS)\n",
+ " )\n",
+ " \n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 112,
+ "id": "e687d2c6-51ee-481b-bc23-3540a5b6d09b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "c3_m2 = (c3_m1.groupby(group_cols, \n",
+ " observed=True, group_keys=False)\n",
+ " .agg({\"change_meters\": \"sum\", \n",
+ " \"change_sec\": \"sum\"})\n",
+ " .reset_index()\n",
+ " )\n",
+ " "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 113,
+ "id": "2d5dec48-a014-4c64-ac73-cd0a867b12fb",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "c3_m2 = c3_m2.assign(\n",
+ " speed_mph = (c3_m2.change_meters.divide(c3_m1.change_sec) * \n",
+ " MPH_PER_MPS)\n",
+ " )\n",
+ " \n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 114,
+ "id": "00560062-5a81-4035-baad-e59c631166e4",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "count 7392.00\n",
+ "mean 29.82\n",
+ "std 21.56\n",
+ "min 0.00\n",
+ "25% 17.66\n",
+ "50% 25.23\n",
+ "75% 35.86\n",
+ "max 340.24\n",
+ "Name: speed_mph, dtype: float64"
+ ]
+ },
+ "execution_count": 114,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "c3_m2.speed_mph.describe()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 115,
+ "id": "dac3c329-0602-4391-a6cb-5eb66d5fa656",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# muni.loc[muni.vp_in_shape == 0]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "54c52ae9-03a7-4986-a62f-21ee53e89630",
+ "metadata": {},
+ "source": [
+ "#### See why a trip would have speed_mph of 0"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 116,
+ "id": "93e5d405-bd05-4108-89ac-83ffbaef40d9",
+ "metadata": {
+ "scrolled": true,
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " gtfs_dataset_key | \n",
+ " trip_instance_key | \n",
+ " location_timestamp_local | \n",
+ " vp_idx | \n",
+ " shape_array_key | \n",
+ " vp_geometry | \n",
+ " shape_meters | \n",
+ " prior_dist | \n",
+ " prior_time | \n",
+ " change_meters | \n",
+ " change_sec | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 35703 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " 04ae6ea9655473bdcaf9b80d443558ae | \n",
+ " 2023-10-11 18:34:47 | \n",
+ " 12628774 | \n",
+ " f82328b9817126227af1aa52033d8847 | \n",
+ " POINT (-212105.135 -21893.355) | \n",
+ " 1411.65 | \n",
+ " NaN | \n",
+ " NaT | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 35704 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " 04ae6ea9655473bdcaf9b80d443558ae | \n",
+ " 2023-10-11 18:44:18 | \n",
+ " 12628802 | \n",
+ " f82328b9817126227af1aa52033d8847 | \n",
+ " POINT (-212100.059 -21901.268) | \n",
+ " 1411.65 | \n",
+ " 1411.65 | \n",
+ " 2023-10-11 18:34:47 | \n",
+ " 0.00 | \n",
+ " 571.00 | \n",
+ "
\n",
+ " \n",
+ " 35705 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " 04ae6ea9655473bdcaf9b80d443558ae | \n",
+ " 2023-10-11 18:53:17 | \n",
+ " 12628829 | \n",
+ " f82328b9817126227af1aa52033d8847 | \n",
+ " POINT (-212100.087 -21902.380) | \n",
+ " 1411.65 | \n",
+ " 1411.65 | \n",
+ " 2023-10-11 18:44:18 | \n",
+ " 0.00 | \n",
+ " 539.00 | \n",
+ "
\n",
+ " \n",
+ " 35706 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " 04ae6ea9655473bdcaf9b80d443558ae | \n",
+ " 2023-10-11 19:02:17 | \n",
+ " 12628856 | \n",
+ " f82328b9817126227af1aa52033d8847 | \n",
+ " POINT (-212100.228 -21907.936) | \n",
+ " 1411.65 | \n",
+ " 1411.65 | \n",
+ " 2023-10-11 18:53:17 | \n",
+ " 0.00 | \n",
+ " 540.00 | \n",
+ "
\n",
+ " \n",
+ " 35707 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " 04ae6ea9655473bdcaf9b80d443558ae | \n",
+ " 2023-10-11 19:11:31 | \n",
+ " 12628884 | \n",
+ " f82328b9817126227af1aa52033d8847 | \n",
+ " POINT (-212098.611 -21913.537) | \n",
+ " 1411.65 | \n",
+ " 1411.65 | \n",
+ " 2023-10-11 19:02:17 | \n",
+ " 0.00 | \n",
+ " 554.00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " gtfs_dataset_key trip_instance_key \\\n",
+ "35703 c0e3039da063db95ebabd3fe4ee611a4 04ae6ea9655473bdcaf9b80d443558ae \n",
+ "35704 c0e3039da063db95ebabd3fe4ee611a4 04ae6ea9655473bdcaf9b80d443558ae \n",
+ "35705 c0e3039da063db95ebabd3fe4ee611a4 04ae6ea9655473bdcaf9b80d443558ae \n",
+ "35706 c0e3039da063db95ebabd3fe4ee611a4 04ae6ea9655473bdcaf9b80d443558ae \n",
+ "35707 c0e3039da063db95ebabd3fe4ee611a4 04ae6ea9655473bdcaf9b80d443558ae \n",
+ "\n",
+ " location_timestamp_local vp_idx shape_array_key \\\n",
+ "35703 2023-10-11 18:34:47 12628774 f82328b9817126227af1aa52033d8847 \n",
+ "35704 2023-10-11 18:44:18 12628802 f82328b9817126227af1aa52033d8847 \n",
+ "35705 2023-10-11 18:53:17 12628829 f82328b9817126227af1aa52033d8847 \n",
+ "35706 2023-10-11 19:02:17 12628856 f82328b9817126227af1aa52033d8847 \n",
+ "35707 2023-10-11 19:11:31 12628884 f82328b9817126227af1aa52033d8847 \n",
+ "\n",
+ " vp_geometry shape_meters prior_dist \\\n",
+ "35703 POINT (-212105.135 -21893.355) 1411.65 NaN \n",
+ "35704 POINT (-212100.059 -21901.268) 1411.65 1411.65 \n",
+ "35705 POINT (-212100.087 -21902.380) 1411.65 1411.65 \n",
+ "35706 POINT (-212100.228 -21907.936) 1411.65 1411.65 \n",
+ "35707 POINT (-212098.611 -21913.537) 1411.65 1411.65 \n",
+ "\n",
+ " prior_time change_meters change_sec \n",
+ "35703 NaT NaN NaN \n",
+ "35704 2023-10-11 18:34:47 0.00 571.00 \n",
+ "35705 2023-10-11 18:44:18 0.00 539.00 \n",
+ "35706 2023-10-11 18:53:17 0.00 540.00 \n",
+ "35707 2023-10-11 19:02:17 0.00 554.00 "
+ ]
+ },
+ "execution_count": 116,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "c3_m1.loc[c3_m1.trip_instance_key == \"04ae6ea9655473bdcaf9b80d443558ae\"].drop(columns = ['shape_geometry'])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 209,
+ "id": "a303aab1-a574-4dbb-9a62-fff52cbd3070",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "RangeIndex: 9240 entries, 0 to 9239\n",
+ "Data columns (total 5 columns):\n",
+ " # Column Non-Null Count Dtype \n",
+ "--- ------ -------------- ----- \n",
+ " 0 gtfs_dataset_key 9240 non-null category\n",
+ " 1 trip_instance_key 9240 non-null object \n",
+ " 2 change_meters 9240 non-null float64 \n",
+ " 3 change_sec 9240 non-null float64 \n",
+ " 4 speed_mph 7392 non-null float64 \n",
+ "dtypes: category(1), float64(3), object(1)\n",
+ "memory usage: 298.0+ KB\n"
+ ]
+ }
+ ],
+ "source": [
+ "c3_m2.info()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 117,
+ "id": "91db6999-f3f8-4001-bdda-4ad59abeeef7",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " gtfs_dataset_key | \n",
+ " trip_instance_key | \n",
+ " change_meters | \n",
+ " change_sec | \n",
+ " speed_mph | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 167 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " 04ae6ea9655473bdcaf9b80d443558ae | \n",
+ " 0.00 | \n",
+ " 2204.00 | \n",
+ " 0.00 | \n",
+ "
\n",
+ " \n",
+ " 863 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " 183990ffbd08d7d7df0308b35227a058 | \n",
+ " 0.00 | \n",
+ " 1530.00 | \n",
+ " 0.00 | \n",
+ "
\n",
+ " \n",
+ " 1128 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " 1fe28ce8669a7f946f272faf80d80ce7 | \n",
+ " 0.00 | \n",
+ " 1379.00 | \n",
+ " 0.00 | \n",
+ "
\n",
+ " \n",
+ " 2854 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " 5056febd44cda31254459111e1fc79d0 | \n",
+ " 0.00 | \n",
+ " 875.00 | \n",
+ " 0.00 | \n",
+ "
\n",
+ " \n",
+ " 3213 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " 5a82f8c02f2036db77b1979f4ae228f5 | \n",
+ " 0.00 | \n",
+ " 803.00 | \n",
+ " 0.00 | \n",
+ "
\n",
+ " \n",
+ " 3232 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " 5b07e2bf68a16c54d798ef4028e02026 | \n",
+ " 0.00 | \n",
+ " 1004.00 | \n",
+ " 0.00 | \n",
+ "
\n",
+ " \n",
+ " 3937 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " 6ee4a9165f47b59cb3e10b0125dd72e8 | \n",
+ " 0.00 | \n",
+ " 649.00 | \n",
+ " 0.00 | \n",
+ "
\n",
+ " \n",
+ " 4562 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " 7ff345e91303290951ef0665a57dc90e | \n",
+ " 0.00 | \n",
+ " 1403.00 | \n",
+ " 0.00 | \n",
+ "
\n",
+ " \n",
+ " 6758 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " bcb78da06a56761d2a3b8b16a3436f3d | \n",
+ " 0.00 | \n",
+ " 2000.00 | \n",
+ " 0.00 | \n",
+ "
\n",
+ " \n",
+ " 7043 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " c3fc63ec2ff7a35c0f821659075be708 | \n",
+ " 0.00 | \n",
+ " 1173.00 | \n",
+ " 0.00 | \n",
+ "
\n",
+ " \n",
+ " 7239 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " c992295be425ff2d17de0f0f29bec301 | \n",
+ " 0.00 | \n",
+ " 877.00 | \n",
+ " 0.00 | \n",
+ "
\n",
+ " \n",
+ " 7543 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " d21ff3a0d37a484dc4f979d052652565 | \n",
+ " 0.00 | \n",
+ " 873.00 | \n",
+ " 0.00 | \n",
+ "
\n",
+ " \n",
+ " 8431 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " ea8751445dcd6a175bd23ec5fc423d0c | \n",
+ " 0.00 | \n",
+ " 866.00 | \n",
+ " 0.00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " gtfs_dataset_key trip_instance_key \\\n",
+ "167 c0e3039da063db95ebabd3fe4ee611a4 04ae6ea9655473bdcaf9b80d443558ae \n",
+ "863 c0e3039da063db95ebabd3fe4ee611a4 183990ffbd08d7d7df0308b35227a058 \n",
+ "1128 c0e3039da063db95ebabd3fe4ee611a4 1fe28ce8669a7f946f272faf80d80ce7 \n",
+ "2854 c0e3039da063db95ebabd3fe4ee611a4 5056febd44cda31254459111e1fc79d0 \n",
+ "3213 c0e3039da063db95ebabd3fe4ee611a4 5a82f8c02f2036db77b1979f4ae228f5 \n",
+ "3232 c0e3039da063db95ebabd3fe4ee611a4 5b07e2bf68a16c54d798ef4028e02026 \n",
+ "3937 c0e3039da063db95ebabd3fe4ee611a4 6ee4a9165f47b59cb3e10b0125dd72e8 \n",
+ "4562 c0e3039da063db95ebabd3fe4ee611a4 7ff345e91303290951ef0665a57dc90e \n",
+ "6758 c0e3039da063db95ebabd3fe4ee611a4 bcb78da06a56761d2a3b8b16a3436f3d \n",
+ "7043 c0e3039da063db95ebabd3fe4ee611a4 c3fc63ec2ff7a35c0f821659075be708 \n",
+ "7239 c0e3039da063db95ebabd3fe4ee611a4 c992295be425ff2d17de0f0f29bec301 \n",
+ "7543 c0e3039da063db95ebabd3fe4ee611a4 d21ff3a0d37a484dc4f979d052652565 \n",
+ "8431 c0e3039da063db95ebabd3fe4ee611a4 ea8751445dcd6a175bd23ec5fc423d0c \n",
+ "\n",
+ " change_meters change_sec speed_mph \n",
+ "167 0.00 2204.00 0.00 \n",
+ "863 0.00 1530.00 0.00 \n",
+ "1128 0.00 1379.00 0.00 \n",
+ "2854 0.00 875.00 0.00 \n",
+ "3213 0.00 803.00 0.00 \n",
+ "3232 0.00 1004.00 0.00 \n",
+ "3937 0.00 649.00 0.00 \n",
+ "4562 0.00 1403.00 0.00 \n",
+ "6758 0.00 2000.00 0.00 \n",
+ "7043 0.00 1173.00 0.00 \n",
+ "7239 0.00 877.00 0.00 \n",
+ "7543 0.00 873.00 0.00 \n",
+ "8431 0.00 866.00 0.00 "
+ ]
+ },
+ "execution_count": 117,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "c3_m2.loc[c3_m2.speed_mph == 0]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "328178ff-9e1e-48b6-9580-ae6b557f0987",
+ "metadata": {},
+ "source": [
+ "#### add_scheduled_trip_columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 118,
+ "id": "e465113c-2bc5-4792-bd10-822184882fe6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Attach scheduled trip columns, like route, direction, time_of_day\n",
+ "group_cols = [\"trip_instance_key\"]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 119,
+ "id": "25b7221d-725c-4f9a-9728-497e6b46464f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "keep_cols = [\n",
+ " \"gtfs_dataset_key\",\n",
+ " \"direction_id\", \n",
+ " \"route_id\", \"route_short_name\", \"route_long_name\", \"route_desc\",\n",
+ " ] + group_cols"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 120,
+ "id": "ab221f62-4c2b-4700-8612-0b491432c36f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "crosswalk = helpers.import_scheduled_trips(\n",
+ " analysis_date, \n",
+ " columns = keep_cols, \n",
+ " get_pandas = True\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 121,
+ "id": "29ddf9da-454a-40d6-bc2c-ab2b74504cbe",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " schedule_gtfs_dataset_key | \n",
+ " direction_id | \n",
+ " route_id | \n",
+ " route_short_name | \n",
+ " route_long_name | \n",
+ " route_desc | \n",
+ " trip_instance_key | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1770249a5a2e770ca90628434d4934b1 | \n",
+ " 1.00 | \n",
+ " 3402 | \n",
+ " Route 11 | \n",
+ " Route 11 | \n",
+ " PACIFIC VIEW MALL via TELEPHONE RD | \n",
+ " 1b3cc71bbb3c3166e8c5540ca26a97ba | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1770249a5a2e770ca90628434d4934b1 | \n",
+ " 1.00 | \n",
+ " 3402 | \n",
+ " Route 11 | \n",
+ " Route 11 | \n",
+ " PACIFIC VIEW MALL via TELEPHONE RD | \n",
+ " f780ba65965e61d394635fd80fc81232 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " schedule_gtfs_dataset_key direction_id route_id route_short_name \\\n",
+ "0 1770249a5a2e770ca90628434d4934b1 1.00 3402 Route 11 \n",
+ "1 1770249a5a2e770ca90628434d4934b1 1.00 3402 Route 11 \n",
+ "\n",
+ " route_long_name route_desc \\\n",
+ "0 Route 11 PACIFIC VIEW MALL via TELEPHONE RD \n",
+ "1 Route 11 PACIFIC VIEW MALL via TELEPHONE RD \n",
+ "\n",
+ " trip_instance_key \n",
+ "0 1b3cc71bbb3c3166e8c5540ca26a97ba \n",
+ "1 f780ba65965e61d394635fd80fc81232 "
+ ]
+ },
+ "execution_count": 121,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "crosswalk.head(2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 122,
+ "id": "774e182c-9157-47e3-9c95-160a331353c6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "common_shape = sched_rt_utils.most_common_shape_by_route_direction(analysis_date)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 123,
+ "id": "e83a2e56-3fe7-400f-8a67-9bad9989a5dc",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " schedule_gtfs_dataset_key | \n",
+ " route_id | \n",
+ " direction_id | \n",
+ " common_shape_id | \n",
+ " shape_array_key | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 014d0998350083249a9eb310635548c2 | \n",
+ " 10866826 | \n",
+ " 1.00 | \n",
+ " 10866826:1 | \n",
+ " 80d84c820ca200c0b3d1791185c72b56 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 014d0998350083249a9eb310635548c2 | \n",
+ " 10866849 | \n",
+ " 1.00 | \n",
+ " 10866849:1 | \n",
+ " 99faaf65ccc65b3ec0e6704765b60195 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " schedule_gtfs_dataset_key route_id direction_id common_shape_id \\\n",
+ "0 014d0998350083249a9eb310635548c2 10866826 1.00 10866826:1 \n",
+ "1 014d0998350083249a9eb310635548c2 10866849 1.00 10866849:1 \n",
+ "\n",
+ " shape_array_key \n",
+ "0 80d84c820ca200c0b3d1791185c72b56 \n",
+ "1 99faaf65ccc65b3ec0e6704765b60195 "
+ ]
+ },
+ "execution_count": 123,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "common_shape.head(2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 124,
+ "id": "ada2f87d-85c4-4999-926e-7dbb0cc57b90",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "crosswalk2 = pd.merge(\n",
+ " crosswalk,\n",
+ " common_shape,\n",
+ " on = [\"schedule_gtfs_dataset_key\", \"route_id\", \"direction_id\"],\n",
+ " how = \"inner\"\n",
+ " ).astype({\"direction_id\": \"Int64\"})\n",
+ " "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 125,
+ "id": "c473c35b-ac7c-4839-81eb-3b31ec25ca38",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " schedule_gtfs_dataset_key | \n",
+ " direction_id | \n",
+ " route_id | \n",
+ " route_short_name | \n",
+ " route_long_name | \n",
+ " route_desc | \n",
+ " trip_instance_key | \n",
+ " common_shape_id | \n",
+ " shape_array_key | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 84626 | \n",
+ " 3c275e5acf8974e1afd765bd3011424c | \n",
+ " 1 | \n",
+ " 59 | \n",
+ " HD | \n",
+ " Hospital: Direct | \n",
+ " None | \n",
+ " 966a69a34ccc9b82dc65ae82346a12e4 | \n",
+ " 59:2 | \n",
+ " b47002305320d71375303e2de926642a | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " schedule_gtfs_dataset_key direction_id route_id \\\n",
+ "84626 3c275e5acf8974e1afd765bd3011424c 1 59 \n",
+ "\n",
+ " route_short_name route_long_name route_desc \\\n",
+ "84626 HD Hospital: Direct None \n",
+ "\n",
+ " trip_instance_key common_shape_id \\\n",
+ "84626 966a69a34ccc9b82dc65ae82346a12e4 59:2 \n",
+ "\n",
+ " shape_array_key \n",
+ "84626 b47002305320d71375303e2de926642a "
+ ]
+ },
+ "execution_count": 125,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "crosswalk2.sample()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 126,
+ "id": "866defff-f203-4f25-a150-aea22333c7f2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "time_of_day = sched_rt_utils.get_trip_time_buckets(analysis_date)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 127,
+ "id": "d52698d8-7021-4766-92b2-dd6674928944",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "((101973, 5), 101973)"
+ ]
+ },
+ "execution_count": 127,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "time_of_day.shape, time_of_day.trip_instance_key.nunique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 128,
+ "id": "bc2161ba-d506-41a6-8517-63891af5f8e9",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " trip_instance_key | \n",
+ " service_hours | \n",
+ " trip_first_departure_datetime_pacific | \n",
+ " time_of_day | \n",
+ " service_minutes | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1b3cc71bbb3c3166e8c5540ca26a97ba | \n",
+ " 0.60 | \n",
+ " 2023-10-11 10:40:00 | \n",
+ " Midday | \n",
+ " 36.00 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " f780ba65965e61d394635fd80fc81232 | \n",
+ " 0.55 | \n",
+ " 2023-10-11 17:40:00 | \n",
+ " PM Peak | \n",
+ " 33.00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " trip_instance_key service_hours \\\n",
+ "0 1b3cc71bbb3c3166e8c5540ca26a97ba 0.60 \n",
+ "1 f780ba65965e61d394635fd80fc81232 0.55 \n",
+ "\n",
+ " trip_first_departure_datetime_pacific time_of_day service_minutes \n",
+ "0 2023-10-11 10:40:00 Midday 36.00 \n",
+ "1 2023-10-11 17:40:00 PM Peak 33.00 "
+ ]
+ },
+ "execution_count": 128,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "time_of_day.head(2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 129,
+ "id": "834ce86a-8735-4d06-9be9-ef96d2fbbec0",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "crosswalk2 = portfolio_utils.add_route_name(\n",
+ " crosswalk2\n",
+ " ).drop(columns = [\"route_short_name\", \"route_long_name\", \"route_desc\"])\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 130,
+ "id": "226915d5-2dce-45af-aeae-0d6f32f36f1e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " schedule_gtfs_dataset_key | \n",
+ " direction_id | \n",
+ " route_id | \n",
+ " trip_instance_key | \n",
+ " common_shape_id | \n",
+ " shape_array_key | \n",
+ " route_name_used | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1770249a5a2e770ca90628434d4934b1 | \n",
+ " 1 | \n",
+ " 3402 | \n",
+ " 1b3cc71bbb3c3166e8c5540ca26a97ba | \n",
+ " 8254 | \n",
+ " a669792e07a16b260139c6950011b759 | \n",
+ " PACIFIC VIEW MALL via TELEPHONE RD | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1770249a5a2e770ca90628434d4934b1 | \n",
+ " 1 | \n",
+ " 3402 | \n",
+ " f780ba65965e61d394635fd80fc81232 | \n",
+ " 8254 | \n",
+ " a669792e07a16b260139c6950011b759 | \n",
+ " PACIFIC VIEW MALL via TELEPHONE RD | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " schedule_gtfs_dataset_key direction_id route_id \\\n",
+ "0 1770249a5a2e770ca90628434d4934b1 1 3402 \n",
+ "1 1770249a5a2e770ca90628434d4934b1 1 3402 \n",
+ "\n",
+ " trip_instance_key common_shape_id \\\n",
+ "0 1b3cc71bbb3c3166e8c5540ca26a97ba 8254 \n",
+ "1 f780ba65965e61d394635fd80fc81232 8254 \n",
+ "\n",
+ " shape_array_key route_name_used \n",
+ "0 a669792e07a16b260139c6950011b759 PACIFIC VIEW MALL via TELEPHONE RD \n",
+ "1 a669792e07a16b260139c6950011b759 PACIFIC VIEW MALL via TELEPHONE RD "
+ ]
+ },
+ "execution_count": 130,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "crosswalk2.head(2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 131,
+ "id": "03971a5f-a151-43bf-a417-2d7be7a1d37b",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "pandas.core.frame.DataFrame"
+ ]
+ },
+ "execution_count": 131,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "type(c3_m2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 132,
+ "id": "0163454e-5e0c-41d5-895d-8e0a6985ff29",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# https://github.com/cal-itp/data-analyses/blob/metrics_rt/rt_segment_speeds/scripts/C3_trip_route_speed.py#L116-L124\n",
+ "c3_m3 = dd.merge(\n",
+ " c3_m2,\n",
+ " crosswalk2,\n",
+ " on = group_cols,\n",
+ " how = \"left\",\n",
+ " ).merge(\n",
+ " time_of_day,\n",
+ " on = group_cols,\n",
+ " how = \"left\"\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 133,
+ "id": "39bd5b37-d195-4dc1-85d4-e1c54b298248",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "pandas.core.frame.DataFrame"
+ ]
+ },
+ "execution_count": 133,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "type(c3_m3)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 134,
+ "id": "49e475a8-530d-4b12-9c9d-1741b0fc39dd",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['gtfs_dataset_key', 'trip_instance_key', 'change_meters', 'change_sec',\n",
+ " 'speed_mph', 'schedule_gtfs_dataset_key', 'direction_id', 'route_id',\n",
+ " 'common_shape_id', 'shape_array_key', 'route_name_used',\n",
+ " 'service_hours', 'trip_first_departure_datetime_pacific', 'time_of_day',\n",
+ " 'service_minutes'],\n",
+ " dtype='object')"
+ ]
+ },
+ "execution_count": 134,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "c3_m3.columns"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "7d40ef54-9b9e-40ea-9651-1a00a36d8105",
+ "metadata": {},
+ "source": [
+ "#### avg_route_speeds_by_time_of_day"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 135,
+ "id": "3e325d57-160f-40b5-8a7c-0692173e8874",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# https://github.com/cal-itp/data-analyses/blob/metrics_rt/rt_segment_speeds/scripts/C3_trip_route_speed.py#L166\n",
+ "def drop_extremely_low_and_high_speeds(\n",
+ " df: pd.DataFrame, \n",
+ " speed_range: tuple\n",
+ ") -> pd.DataFrame:\n",
+ " \"\"\"\n",
+ " Descriptives show the 5th percentile is around 5 mph, \n",
+ " and 95th percentile is around 25 mph.\n",
+ " \n",
+ " There are some weird calculations for <3 mph, and even\n",
+ " some negative values, so let's exclude those...maybe\n",
+ " the vp is not traveling across the entirety of the shape.\n",
+ " \n",
+ " Exclude unusually high speeds, over 70 mph.\n",
+ " \"\"\"\n",
+ " low, high = speed_range\n",
+ " \n",
+ " df2 = df[(df.speed_mph >= low) & \n",
+ " (df.speed_mph <= high)\n",
+ " ].reset_index(drop=True)\n",
+ " \n",
+ " return df2"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 136,
+ "id": "f5c8dc5d-61cd-4883-a089-000ca1e11d76",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "c3_m3 = drop_extremely_low_and_high_speeds(c3_m3, speed_range = (3, 70))"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 178,
+ "id": "983a896a-e40b-4c46-93a9-78643c7e1e69",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "((6993, 15), (46198, 12), (9240, 5))"
+ ]
+ },
+ "execution_count": 178,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "c3_m3.shape, c3_m1.shape, c3_m2.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 138,
+ "id": "a9be0c3a-f224-40f4-b807-bb7fc387e344",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "6993"
+ ]
+ },
+ "execution_count": 138,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "c3_m3.trip_instance_key.nunique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 139,
+ "id": "33dbfa67-8bef-457c-b415-c5826e41601c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "9240"
+ ]
+ },
+ "execution_count": 139,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "c3_m1.trip_instance_key.nunique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 177,
+ "id": "52ae9db4-6e23-4c8b-b421-cba1c583b31f",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " gtfs_dataset_key | \n",
+ " trip_instance_key | \n",
+ " change_meters | \n",
+ " change_sec | \n",
+ " speed_mph | \n",
+ " schedule_gtfs_dataset_key | \n",
+ " direction_id | \n",
+ " route_id | \n",
+ " common_shape_id | \n",
+ " shape_array_key | \n",
+ " route_name_used | \n",
+ " service_hours | \n",
+ " trip_first_departure_datetime_pacific | \n",
+ " time_of_day | \n",
+ " service_minutes | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ "Empty DataFrame\n",
+ "Columns: [gtfs_dataset_key, trip_instance_key, change_meters, change_sec, speed_mph, schedule_gtfs_dataset_key, direction_id, route_id, common_shape_id, shape_array_key, route_name_used, service_hours, trip_first_departure_datetime_pacific, time_of_day, service_minutes]\n",
+ "Index: []"
+ ]
+ },
+ "execution_count": 177,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "c3_m3.loc[c3_m3.trip_instance_key == \"04ae6ea9655473bdcaf9b80d443558ae\"]"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b00d1985-6f8d-47da-9164-422fc6b515d0",
+ "metadata": {},
+ "source": [
+ "#### Question: Is this common to have 25% of trips dropped?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 140,
+ "id": "8ffc55df-73e0-412e-b428-018184b5efff",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "0.7568181818181818"
+ ]
+ },
+ "execution_count": 140,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "6993/9240"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 141,
+ "id": "626c4e6c-fc59-4bb0-91bb-007887d3e75b",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "group_cols = [\n",
+ " \"gtfs_dataset_key\", \"time_of_day\",\n",
+ " \"route_id\", \"direction_id\",\n",
+ " \"route_name_used\",\n",
+ " \"common_shape_id\", \"shape_array_key\"\n",
+ " ]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 142,
+ "id": "c2d6e858-4d63-4ffc-baea-d8a5ab32987a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# test with one route\n",
+ "one_route = c3_m3.loc[c3_m3.route_id == \"14R\"]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 143,
+ "id": "a6a2c843-022e-4e4e-bd1c-eace7dd25d59",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "((193, 15), 193)"
+ ]
+ },
+ "execution_count": 143,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "one_route.shape, one_route.trip_instance_key.nunique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 144,
+ "id": "c8bc46f0-bcd3-4aaa-8c6c-5e38b0fe7210",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " gtfs_dataset_key | \n",
+ " trip_instance_key | \n",
+ " change_meters | \n",
+ " change_sec | \n",
+ " speed_mph | \n",
+ " schedule_gtfs_dataset_key | \n",
+ " direction_id | \n",
+ " route_id | \n",
+ " common_shape_id | \n",
+ " shape_array_key | \n",
+ " route_name_used | \n",
+ " service_hours | \n",
+ " trip_first_departure_datetime_pacific | \n",
+ " time_of_day | \n",
+ " service_minutes | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 73 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " 0317ba64ff87ddd8b3dd626368a341a8 | \n",
+ " 13487.57 | \n",
+ " 5379.00 | \n",
+ " 40.83 | \n",
+ " 7cc0cb1871dfd558f11a2885c145d144 | \n",
+ " 1 | \n",
+ " 14R | \n",
+ " 11452 | \n",
+ " defb027e7468735f91300a9851f3e1d7 | \n",
+ " 5am-10pm daily | \n",
+ " 0.90 | \n",
+ " 2023-10-11 18:25:00 | \n",
+ " PM Peak | \n",
+ " 54.00 | \n",
+ "
\n",
+ " \n",
+ " 144 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " 059a49f356f6b8bdb7f12458dafc68e4 | \n",
+ " 20787.34 | \n",
+ " 5069.00 | \n",
+ " 59.09 | \n",
+ " 7cc0cb1871dfd558f11a2885c145d144 | \n",
+ " 1 | \n",
+ " 14R | \n",
+ " 11452 | \n",
+ " defb027e7468735f91300a9851f3e1d7 | \n",
+ " 5am-10pm daily | \n",
+ " 0.75 | \n",
+ " 2023-10-11 08:30:00 | \n",
+ " AM Peak | \n",
+ " 45.00 | \n",
+ "
\n",
+ " \n",
+ " 173 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " 06dc64f00c021e66d0c7bd52df8fed9c | \n",
+ " 12924.90 | \n",
+ " 2594.00 | \n",
+ " 32.27 | \n",
+ " 7cc0cb1871dfd558f11a2885c145d144 | \n",
+ " 0 | \n",
+ " 14R | \n",
+ " 11403 | \n",
+ " 453493134b4598d0dbdddb92e825ae24 | \n",
+ " 5am-10pm daily | \n",
+ " 0.85 | \n",
+ " 2023-10-11 19:36:00 | \n",
+ " PM Peak | \n",
+ " 51.00 | \n",
+ "
\n",
+ " \n",
+ " 202 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " 07ab77edf2b69295d643794e3cd497af | \n",
+ " 13682.26 | \n",
+ " 4427.00 | \n",
+ " 32.49 | \n",
+ " 7cc0cb1871dfd558f11a2885c145d144 | \n",
+ " 1 | \n",
+ " 14R | \n",
+ " 11452 | \n",
+ " defb027e7468735f91300a9851f3e1d7 | \n",
+ " 5am-10pm daily | \n",
+ " 0.77 | \n",
+ " 2023-10-11 05:58:00 | \n",
+ " Early AM | \n",
+ " 46.00 | \n",
+ "
\n",
+ " \n",
+ " 230 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " 08adb7403f95343c6c9579cb137c8c15 | \n",
+ " 13624.98 | \n",
+ " 5094.00 | \n",
+ " 33.46 | \n",
+ " 7cc0cb1871dfd558f11a2885c145d144 | \n",
+ " 1 | \n",
+ " 14R | \n",
+ " 11452 | \n",
+ " defb027e7468735f91300a9851f3e1d7 | \n",
+ " 5am-10pm daily | \n",
+ " 0.90 | \n",
+ " 2023-10-11 12:49:00 | \n",
+ " Midday | \n",
+ " 54.00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " gtfs_dataset_key trip_instance_key \\\n",
+ "73 c0e3039da063db95ebabd3fe4ee611a4 0317ba64ff87ddd8b3dd626368a341a8 \n",
+ "144 c0e3039da063db95ebabd3fe4ee611a4 059a49f356f6b8bdb7f12458dafc68e4 \n",
+ "173 c0e3039da063db95ebabd3fe4ee611a4 06dc64f00c021e66d0c7bd52df8fed9c \n",
+ "202 c0e3039da063db95ebabd3fe4ee611a4 07ab77edf2b69295d643794e3cd497af \n",
+ "230 c0e3039da063db95ebabd3fe4ee611a4 08adb7403f95343c6c9579cb137c8c15 \n",
+ "\n",
+ " change_meters change_sec speed_mph schedule_gtfs_dataset_key \\\n",
+ "73 13487.57 5379.00 40.83 7cc0cb1871dfd558f11a2885c145d144 \n",
+ "144 20787.34 5069.00 59.09 7cc0cb1871dfd558f11a2885c145d144 \n",
+ "173 12924.90 2594.00 32.27 7cc0cb1871dfd558f11a2885c145d144 \n",
+ "202 13682.26 4427.00 32.49 7cc0cb1871dfd558f11a2885c145d144 \n",
+ "230 13624.98 5094.00 33.46 7cc0cb1871dfd558f11a2885c145d144 \n",
+ "\n",
+ " direction_id route_id common_shape_id shape_array_key \\\n",
+ "73 1 14R 11452 defb027e7468735f91300a9851f3e1d7 \n",
+ "144 1 14R 11452 defb027e7468735f91300a9851f3e1d7 \n",
+ "173 0 14R 11403 453493134b4598d0dbdddb92e825ae24 \n",
+ "202 1 14R 11452 defb027e7468735f91300a9851f3e1d7 \n",
+ "230 1 14R 11452 defb027e7468735f91300a9851f3e1d7 \n",
+ "\n",
+ " route_name_used service_hours trip_first_departure_datetime_pacific \\\n",
+ "73 5am-10pm daily 0.90 2023-10-11 18:25:00 \n",
+ "144 5am-10pm daily 0.75 2023-10-11 08:30:00 \n",
+ "173 5am-10pm daily 0.85 2023-10-11 19:36:00 \n",
+ "202 5am-10pm daily 0.77 2023-10-11 05:58:00 \n",
+ "230 5am-10pm daily 0.90 2023-10-11 12:49:00 \n",
+ "\n",
+ " time_of_day service_minutes \n",
+ "73 PM Peak 54.00 \n",
+ "144 AM Peak 45.00 \n",
+ "173 PM Peak 51.00 \n",
+ "202 Early AM 46.00 \n",
+ "230 Midday 54.00 "
+ ]
+ },
+ "execution_count": 144,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "one_route.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 145,
+ "id": "8ef2f776-d9f0-4ca9-a9f5-b384040f2f05",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['gtfs_dataset_key',\n",
+ " 'time_of_day',\n",
+ " 'route_id',\n",
+ " 'direction_id',\n",
+ " 'route_name_used',\n",
+ " 'common_shape_id',\n",
+ " 'shape_array_key']"
+ ]
+ },
+ "execution_count": 145,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "group_cols"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "aa8cf621-b772-44d4-97e5-c11c738adfb0",
+ "metadata": {},
+ "source": [
+ "#### One route test"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 146,
+ "id": "10f541bc-1e94-4878-bb1e-2ae88ece99ab",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "one_route2 = (one_route.groupby(group_cols, \n",
+ " observed = True, group_keys = False)\n",
+ " .agg({\n",
+ " \"speed_mph\": \"mean\",\n",
+ " \"service_minutes\": \"mean\",\n",
+ " \"change_sec\": \"mean\",\n",
+ " \"trip_instance_key\": \"count\"\n",
+ " }).reset_index()\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 147,
+ "id": "3eac1991-7a9c-4a05-84f2-0769529609da",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(10, 11)"
+ ]
+ },
+ "execution_count": 147,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "one_route2.shape"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 148,
+ "id": "58647959-a118-403d-97bc-c1abb35d61b5",
+ "metadata": {
+ "tags": []
+ },
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " gtfs_dataset_key | \n",
+ " time_of_day | \n",
+ " route_id | \n",
+ " direction_id | \n",
+ " route_name_used | \n",
+ " common_shape_id | \n",
+ " shape_array_key | \n",
+ " speed_mph | \n",
+ " service_minutes | \n",
+ " change_sec | \n",
+ " trip_instance_key | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " AM Peak | \n",
+ " 14R | \n",
+ " 0 | \n",
+ " 5am-10pm daily | \n",
+ " 11403 | \n",
+ " 453493134b4598d0dbdddb92e825ae24 | \n",
+ " 29.38 | \n",
+ " 43.19 | \n",
+ " 3139.44 | \n",
+ " 16 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " AM Peak | \n",
+ " 14R | \n",
+ " 1 | \n",
+ " 5am-10pm daily | \n",
+ " 11452 | \n",
+ " defb027e7468735f91300a9851f3e1d7 | \n",
+ " 39.80 | \n",
+ " 50.36 | \n",
+ " 4804.36 | \n",
+ " 22 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " Early AM | \n",
+ " 14R | \n",
+ " 0 | \n",
+ " 5am-10pm daily | \n",
+ " 11403 | \n",
+ " 453493134b4598d0dbdddb92e825ae24 | \n",
+ " 37.89 | \n",
+ " 42.00 | \n",
+ " 3555.80 | \n",
+ " 5 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " Early AM | \n",
+ " 14R | \n",
+ " 1 | \n",
+ " 5am-10pm daily | \n",
+ " 11452 | \n",
+ " defb027e7468735f91300a9851f3e1d7 | \n",
+ " 34.42 | \n",
+ " 44.45 | \n",
+ " 4602.45 | \n",
+ " 11 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " Evening | \n",
+ " 14R | \n",
+ " 0 | \n",
+ " 5am-10pm daily | \n",
+ " 11403 | \n",
+ " 453493134b4598d0dbdddb92e825ae24 | \n",
+ " 33.69 | \n",
+ " 51.00 | \n",
+ " 3014.44 | \n",
+ " 9 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " Evening | \n",
+ " 14R | \n",
+ " 1 | \n",
+ " 5am-10pm daily | \n",
+ " 11452 | \n",
+ " defb027e7468735f91300a9851f3e1d7 | \n",
+ " 32.25 | \n",
+ " 53.00 | \n",
+ " 5149.00 | \n",
+ " 5 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " Midday | \n",
+ " 14R | \n",
+ " 0 | \n",
+ " 5am-10pm daily | \n",
+ " 11403 | \n",
+ " 453493134b4598d0dbdddb92e825ae24 | \n",
+ " 32.46 | \n",
+ " 46.94 | \n",
+ " 3371.86 | \n",
+ " 35 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " Midday | \n",
+ " 14R | \n",
+ " 1 | \n",
+ " 5am-10pm daily | \n",
+ " 11452 | \n",
+ " defb027e7468735f91300a9851f3e1d7 | \n",
+ " 31.30 | \n",
+ " 50.61 | \n",
+ " 4478.18 | \n",
+ " 33 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " PM Peak | \n",
+ " 14R | \n",
+ " 0 | \n",
+ " 5am-10pm daily | \n",
+ " 11403 | \n",
+ " 453493134b4598d0dbdddb92e825ae24 | \n",
+ " 38.41 | \n",
+ " 49.78 | \n",
+ " 3893.81 | \n",
+ " 36 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " PM Peak | \n",
+ " 14R | \n",
+ " 1 | \n",
+ " 5am-10pm daily | \n",
+ " 11452 | \n",
+ " defb027e7468735f91300a9851f3e1d7 | \n",
+ " 31.35 | \n",
+ " 53.71 | \n",
+ " 4823.19 | \n",
+ " 21 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " gtfs_dataset_key time_of_day route_id direction_id \\\n",
+ "0 c0e3039da063db95ebabd3fe4ee611a4 AM Peak 14R 0 \n",
+ "1 c0e3039da063db95ebabd3fe4ee611a4 AM Peak 14R 1 \n",
+ "2 c0e3039da063db95ebabd3fe4ee611a4 Early AM 14R 0 \n",
+ "3 c0e3039da063db95ebabd3fe4ee611a4 Early AM 14R 1 \n",
+ "4 c0e3039da063db95ebabd3fe4ee611a4 Evening 14R 0 \n",
+ "5 c0e3039da063db95ebabd3fe4ee611a4 Evening 14R 1 \n",
+ "6 c0e3039da063db95ebabd3fe4ee611a4 Midday 14R 0 \n",
+ "7 c0e3039da063db95ebabd3fe4ee611a4 Midday 14R 1 \n",
+ "8 c0e3039da063db95ebabd3fe4ee611a4 PM Peak 14R 0 \n",
+ "9 c0e3039da063db95ebabd3fe4ee611a4 PM Peak 14R 1 \n",
+ "\n",
+ " route_name_used common_shape_id shape_array_key \\\n",
+ "0 5am-10pm daily 11403 453493134b4598d0dbdddb92e825ae24 \n",
+ "1 5am-10pm daily 11452 defb027e7468735f91300a9851f3e1d7 \n",
+ "2 5am-10pm daily 11403 453493134b4598d0dbdddb92e825ae24 \n",
+ "3 5am-10pm daily 11452 defb027e7468735f91300a9851f3e1d7 \n",
+ "4 5am-10pm daily 11403 453493134b4598d0dbdddb92e825ae24 \n",
+ "5 5am-10pm daily 11452 defb027e7468735f91300a9851f3e1d7 \n",
+ "6 5am-10pm daily 11403 453493134b4598d0dbdddb92e825ae24 \n",
+ "7 5am-10pm daily 11452 defb027e7468735f91300a9851f3e1d7 \n",
+ "8 5am-10pm daily 11403 453493134b4598d0dbdddb92e825ae24 \n",
+ "9 5am-10pm daily 11452 defb027e7468735f91300a9851f3e1d7 \n",
+ "\n",
+ " speed_mph service_minutes change_sec trip_instance_key \n",
+ "0 29.38 43.19 3139.44 16 \n",
+ "1 39.80 50.36 4804.36 22 \n",
+ "2 37.89 42.00 3555.80 5 \n",
+ "3 34.42 44.45 4602.45 11 \n",
+ "4 33.69 51.00 3014.44 9 \n",
+ "5 32.25 53.00 5149.00 5 \n",
+ "6 32.46 46.94 3371.86 35 \n",
+ "7 31.30 50.61 4478.18 33 \n",
+ "8 38.41 49.78 3893.81 36 \n",
+ "9 31.35 53.71 4823.19 21 "
+ ]
+ },
+ "execution_count": 148,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "one_route2"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 149,
+ "id": "7514d742-2076-487c-84a6-235a6ac65ef4",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " gtfs_dataset_key | \n",
+ " time_of_day | \n",
+ " route_id | \n",
+ " direction_id | \n",
+ " route_name | \n",
+ " common_shape_id | \n",
+ " shape_array_key | \n",
+ " speed_mph | \n",
+ " avg_sched_trip_min | \n",
+ " n_trips | \n",
+ " avg_rt_trip_min | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " AM Peak | \n",
+ " 14R | \n",
+ " 0 | \n",
+ " 5am-10pm daily | \n",
+ " 11403 | \n",
+ " 453493134b4598d0dbdddb92e825ae24 | \n",
+ " 29.40 | \n",
+ " 43.20 | \n",
+ " 16 | \n",
+ " 52.30 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " AM Peak | \n",
+ " 14R | \n",
+ " 1 | \n",
+ " 5am-10pm daily | \n",
+ " 11452 | \n",
+ " defb027e7468735f91300a9851f3e1d7 | \n",
+ " 39.80 | \n",
+ " 50.40 | \n",
+ " 22 | \n",
+ " 80.10 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " Early AM | \n",
+ " 14R | \n",
+ " 0 | \n",
+ " 5am-10pm daily | \n",
+ " 11403 | \n",
+ " 453493134b4598d0dbdddb92e825ae24 | \n",
+ " 37.90 | \n",
+ " 42.00 | \n",
+ " 5 | \n",
+ " 59.30 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " Early AM | \n",
+ " 14R | \n",
+ " 1 | \n",
+ " 5am-10pm daily | \n",
+ " 11452 | \n",
+ " defb027e7468735f91300a9851f3e1d7 | \n",
+ " 34.40 | \n",
+ " 44.50 | \n",
+ " 11 | \n",
+ " 76.70 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " Evening | \n",
+ " 14R | \n",
+ " 0 | \n",
+ " 5am-10pm daily | \n",
+ " 11403 | \n",
+ " 453493134b4598d0dbdddb92e825ae24 | \n",
+ " 33.70 | \n",
+ " 51.00 | \n",
+ " 9 | \n",
+ " 50.20 | \n",
+ "
\n",
+ " \n",
+ " 5 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " Evening | \n",
+ " 14R | \n",
+ " 1 | \n",
+ " 5am-10pm daily | \n",
+ " 11452 | \n",
+ " defb027e7468735f91300a9851f3e1d7 | \n",
+ " 32.30 | \n",
+ " 53.00 | \n",
+ " 5 | \n",
+ " 85.80 | \n",
+ "
\n",
+ " \n",
+ " 6 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " Midday | \n",
+ " 14R | \n",
+ " 0 | \n",
+ " 5am-10pm daily | \n",
+ " 11403 | \n",
+ " 453493134b4598d0dbdddb92e825ae24 | \n",
+ " 32.50 | \n",
+ " 46.90 | \n",
+ " 35 | \n",
+ " 56.20 | \n",
+ "
\n",
+ " \n",
+ " 7 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " Midday | \n",
+ " 14R | \n",
+ " 1 | \n",
+ " 5am-10pm daily | \n",
+ " 11452 | \n",
+ " defb027e7468735f91300a9851f3e1d7 | \n",
+ " 31.30 | \n",
+ " 50.60 | \n",
+ " 33 | \n",
+ " 74.60 | \n",
+ "
\n",
+ " \n",
+ " 8 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " PM Peak | \n",
+ " 14R | \n",
+ " 0 | \n",
+ " 5am-10pm daily | \n",
+ " 11403 | \n",
+ " 453493134b4598d0dbdddb92e825ae24 | \n",
+ " 38.40 | \n",
+ " 49.80 | \n",
+ " 36 | \n",
+ " 64.90 | \n",
+ "
\n",
+ " \n",
+ " 9 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " PM Peak | \n",
+ " 14R | \n",
+ " 1 | \n",
+ " 5am-10pm daily | \n",
+ " 11452 | \n",
+ " defb027e7468735f91300a9851f3e1d7 | \n",
+ " 31.40 | \n",
+ " 53.70 | \n",
+ " 21 | \n",
+ " 80.40 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " gtfs_dataset_key time_of_day route_id direction_id \\\n",
+ "0 c0e3039da063db95ebabd3fe4ee611a4 AM Peak 14R 0 \n",
+ "1 c0e3039da063db95ebabd3fe4ee611a4 AM Peak 14R 1 \n",
+ "2 c0e3039da063db95ebabd3fe4ee611a4 Early AM 14R 0 \n",
+ "3 c0e3039da063db95ebabd3fe4ee611a4 Early AM 14R 1 \n",
+ "4 c0e3039da063db95ebabd3fe4ee611a4 Evening 14R 0 \n",
+ "5 c0e3039da063db95ebabd3fe4ee611a4 Evening 14R 1 \n",
+ "6 c0e3039da063db95ebabd3fe4ee611a4 Midday 14R 0 \n",
+ "7 c0e3039da063db95ebabd3fe4ee611a4 Midday 14R 1 \n",
+ "8 c0e3039da063db95ebabd3fe4ee611a4 PM Peak 14R 0 \n",
+ "9 c0e3039da063db95ebabd3fe4ee611a4 PM Peak 14R 1 \n",
+ "\n",
+ " route_name common_shape_id shape_array_key \\\n",
+ "0 5am-10pm daily 11403 453493134b4598d0dbdddb92e825ae24 \n",
+ "1 5am-10pm daily 11452 defb027e7468735f91300a9851f3e1d7 \n",
+ "2 5am-10pm daily 11403 453493134b4598d0dbdddb92e825ae24 \n",
+ "3 5am-10pm daily 11452 defb027e7468735f91300a9851f3e1d7 \n",
+ "4 5am-10pm daily 11403 453493134b4598d0dbdddb92e825ae24 \n",
+ "5 5am-10pm daily 11452 defb027e7468735f91300a9851f3e1d7 \n",
+ "6 5am-10pm daily 11403 453493134b4598d0dbdddb92e825ae24 \n",
+ "7 5am-10pm daily 11452 defb027e7468735f91300a9851f3e1d7 \n",
+ "8 5am-10pm daily 11403 453493134b4598d0dbdddb92e825ae24 \n",
+ "9 5am-10pm daily 11452 defb027e7468735f91300a9851f3e1d7 \n",
+ "\n",
+ " speed_mph avg_sched_trip_min n_trips avg_rt_trip_min \n",
+ "0 29.40 43.20 16 52.30 \n",
+ "1 39.80 50.40 22 80.10 \n",
+ "2 37.90 42.00 5 59.30 \n",
+ "3 34.40 44.50 11 76.70 \n",
+ "4 33.70 51.00 9 50.20 \n",
+ "5 32.30 53.00 5 85.80 \n",
+ "6 32.50 46.90 35 56.20 \n",
+ "7 31.30 50.60 33 74.60 \n",
+ "8 38.40 49.80 36 64.90 \n",
+ "9 31.40 53.70 21 80.40 "
+ ]
+ },
+ "execution_count": 149,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "one_route2.assign(\n",
+ " avg_rt_trip_min = one_route2.change_sec.divide(60).round(1),\n",
+ " service_minutes = one_route2.service_minutes.round(1),\n",
+ " speed_mph = one_route2.speed_mph.round(1),\n",
+ " ).rename(columns = {\n",
+ " \"service_minutes\": \"avg_sched_trip_min\",\n",
+ " \"trip_instance_key\": \"n_trips\",\n",
+ " \"route_name_used\": \"route_name\",\n",
+ " }).drop(columns = \"change_sec\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 150,
+ "id": "9279c8f2-ea9b-4476-b8e0-717774396fd2",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# https://github.com/cal-itp/data-analyses/blob/metrics_rt/rt_segment_speeds/scripts/C3_trip_route_speed.py#L168-L177\n",
+ "c3_m4 = (c3_m3.groupby(group_cols, \n",
+ " observed = True, group_keys = False)\n",
+ " .agg({\n",
+ " \"speed_mph\": \"mean\",\n",
+ " \"service_minutes\": \"mean\",\n",
+ " \"change_sec\": \"mean\",\n",
+ " \"trip_instance_key\": \"count\"\n",
+ " }).reset_index()\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 151,
+ "id": "d4440381-13fe-45c0-86a5-e0c3423c037f",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# https://github.com/cal-itp/data-analyses/blob/metrics_rt/rt_segment_speeds/scripts/C3_trip_route_speed.py#L178-L188\n",
+ "c3_m4 = c3_m4.assign(\n",
+ " avg_rt_trip_min = c3_m4.change_sec.divide(60).round(1),\n",
+ " service_minutes = c3_m4.service_minutes.round(1),\n",
+ " speed_mph = c3_m4.speed_mph.round(1),\n",
+ " ).rename(columns = {\n",
+ " \"service_minutes\": \"avg_sched_trip_min\",\n",
+ " \"trip_instance_key\": \"n_trips\",\n",
+ " \"route_name_used\": \"route_name\",\n",
+ " }).drop(columns = \"change_sec\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2c70074a-c4d4-4b73-a43f-7516f9ac9bdf",
+ "metadata": {},
+ "source": [
+ "#### Checks"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "d9186189-d512-48c6-b1eb-8253561df44d",
+ "metadata": {},
+ "source": [
+ "##### Shape 1"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 152,
+ "id": "d1b02f96-e561-4b68-87da-cc2c6838ec1d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "shape_array_1 = \"0055ea6cd09cc68606d37851a6c91366\"\n",
+ "shape1_time = \"AM Peak\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 153,
+ "id": "3d9a7c26-3dd1-4589-8412-f0b763da274c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def check_shapes(shape_array, time_of_day):\n",
+ " display(c3_m4.loc[(c3_m4.shape_array_key == shape_array) & (c3_m4.time_of_day == time_of_day)])\n",
+ " display(c3_m3.loc[(c3_m3.shape_array_key == shape_array) & (c3_m3.time_of_day == time_of_day)])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 154,
+ "id": "fdacca64-50e2-4160-92bd-cda2e279d887",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " gtfs_dataset_key | \n",
+ " time_of_day | \n",
+ " route_id | \n",
+ " direction_id | \n",
+ " route_name | \n",
+ " common_shape_id | \n",
+ " shape_array_key | \n",
+ " speed_mph | \n",
+ " avg_sched_trip_min | \n",
+ " n_trips | \n",
+ " avg_rt_trip_min | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 69 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " AM Peak | \n",
+ " 55 | \n",
+ " 0 | \n",
+ " 5am-10pm daily | \n",
+ " 260 | \n",
+ " 0055ea6cd09cc68606d37851a6c91366 | \n",
+ " 10.10 | \n",
+ " 16.00 | \n",
+ " 4 | \n",
+ " 31.70 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " gtfs_dataset_key time_of_day route_id direction_id \\\n",
+ "69 c0e3039da063db95ebabd3fe4ee611a4 AM Peak 55 0 \n",
+ "\n",
+ " route_name common_shape_id shape_array_key \\\n",
+ "69 5am-10pm daily 260 0055ea6cd09cc68606d37851a6c91366 \n",
+ "\n",
+ " speed_mph avg_sched_trip_min n_trips avg_rt_trip_min \n",
+ "69 10.10 16.00 4 31.70 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " gtfs_dataset_key | \n",
+ " trip_instance_key | \n",
+ " change_meters | \n",
+ " change_sec | \n",
+ " speed_mph | \n",
+ " schedule_gtfs_dataset_key | \n",
+ " direction_id | \n",
+ " route_id | \n",
+ " common_shape_id | \n",
+ " shape_array_key | \n",
+ " route_name_used | \n",
+ " service_hours | \n",
+ " trip_first_departure_datetime_pacific | \n",
+ " time_of_day | \n",
+ " service_minutes | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 4963 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " b68e89ab04e684839d2cde80be127219 | \n",
+ " 3832.26 | \n",
+ " 2365.00 | \n",
+ " 6.52 | \n",
+ " 7cc0cb1871dfd558f11a2885c145d144 | \n",
+ " 0 | \n",
+ " 55 | \n",
+ " 260 | \n",
+ " 0055ea6cd09cc68606d37851a6c91366 | \n",
+ " 5am-10pm daily | \n",
+ " 0.27 | \n",
+ " 2023-10-11 07:59:00 | \n",
+ " AM Peak | \n",
+ " 16.00 | \n",
+ "
\n",
+ " \n",
+ " 5783 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " d32ca8727a4d9c0ee126664507d42c08 | \n",
+ " 3856.82 | \n",
+ " 2255.00 | \n",
+ " 6.99 | \n",
+ " 7cc0cb1871dfd558f11a2885c145d144 | \n",
+ " 0 | \n",
+ " 55 | \n",
+ " 260 | \n",
+ " 0055ea6cd09cc68606d37851a6c91366 | \n",
+ " 5am-10pm daily | \n",
+ " 0.27 | \n",
+ " 2023-10-11 09:20:00 | \n",
+ " AM Peak | \n",
+ " 16.00 | \n",
+ "
\n",
+ " \n",
+ " 6308 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " e59e81c33cab600d4009318f354fbf9a | \n",
+ " 1662.33 | \n",
+ " 881.00 | \n",
+ " 3.59 | \n",
+ " 7cc0cb1871dfd558f11a2885c145d144 | \n",
+ " 0 | \n",
+ " 55 | \n",
+ " 260 | \n",
+ " 0055ea6cd09cc68606d37851a6c91366 | \n",
+ " 5am-10pm daily | \n",
+ " 0.27 | \n",
+ " 2023-10-11 08:20:00 | \n",
+ " AM Peak | \n",
+ " 16.00 | \n",
+ "
\n",
+ " \n",
+ " 6559 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " ef66bcc7ebab13dc4cd4e2b0c6085ee7 | \n",
+ " 3843.80 | \n",
+ " 2099.00 | \n",
+ " 23.18 | \n",
+ " 7cc0cb1871dfd558f11a2885c145d144 | \n",
+ " 0 | \n",
+ " 55 | \n",
+ " 260 | \n",
+ " 0055ea6cd09cc68606d37851a6c91366 | \n",
+ " 5am-10pm daily | \n",
+ " 0.27 | \n",
+ " 2023-10-11 09:41:00 | \n",
+ " AM Peak | \n",
+ " 16.00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " gtfs_dataset_key trip_instance_key \\\n",
+ "4963 c0e3039da063db95ebabd3fe4ee611a4 b68e89ab04e684839d2cde80be127219 \n",
+ "5783 c0e3039da063db95ebabd3fe4ee611a4 d32ca8727a4d9c0ee126664507d42c08 \n",
+ "6308 c0e3039da063db95ebabd3fe4ee611a4 e59e81c33cab600d4009318f354fbf9a \n",
+ "6559 c0e3039da063db95ebabd3fe4ee611a4 ef66bcc7ebab13dc4cd4e2b0c6085ee7 \n",
+ "\n",
+ " change_meters change_sec speed_mph schedule_gtfs_dataset_key \\\n",
+ "4963 3832.26 2365.00 6.52 7cc0cb1871dfd558f11a2885c145d144 \n",
+ "5783 3856.82 2255.00 6.99 7cc0cb1871dfd558f11a2885c145d144 \n",
+ "6308 1662.33 881.00 3.59 7cc0cb1871dfd558f11a2885c145d144 \n",
+ "6559 3843.80 2099.00 23.18 7cc0cb1871dfd558f11a2885c145d144 \n",
+ "\n",
+ " direction_id route_id common_shape_id shape_array_key \\\n",
+ "4963 0 55 260 0055ea6cd09cc68606d37851a6c91366 \n",
+ "5783 0 55 260 0055ea6cd09cc68606d37851a6c91366 \n",
+ "6308 0 55 260 0055ea6cd09cc68606d37851a6c91366 \n",
+ "6559 0 55 260 0055ea6cd09cc68606d37851a6c91366 \n",
+ "\n",
+ " route_name_used service_hours trip_first_departure_datetime_pacific \\\n",
+ "4963 5am-10pm daily 0.27 2023-10-11 07:59:00 \n",
+ "5783 5am-10pm daily 0.27 2023-10-11 09:20:00 \n",
+ "6308 5am-10pm daily 0.27 2023-10-11 08:20:00 \n",
+ "6559 5am-10pm daily 0.27 2023-10-11 09:41:00 \n",
+ "\n",
+ " time_of_day service_minutes \n",
+ "4963 AM Peak 16.00 \n",
+ "5783 AM Peak 16.00 \n",
+ "6308 AM Peak 16.00 \n",
+ "6559 AM Peak 16.00 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "check_shapes(shape_array_1, shape1_time)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 155,
+ "id": "327155b2-c46d-4d89-be06-58f5f2f324cc",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "31.666666666666668"
+ ]
+ },
+ "execution_count": 155,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "(2365+2255+881+2099)/4/60"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "07b4ca28-3a71-424c-b742-9b47bc4176db",
+ "metadata": {},
+ "source": [
+ "##### Shape 2"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 156,
+ "id": "e391d24c-0def-4475-b570-dcb4c0fdb82d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "shape_key2 = \"e52c291259f04c198a0c1f245ec26be9\"\n",
+ "time2 = \"AM Peak\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 157,
+ "id": "f0513f75-7ade-4eb5-a5cf-95e7cb647ab8",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " gtfs_dataset_key | \n",
+ " time_of_day | \n",
+ " route_id | \n",
+ " direction_id | \n",
+ " route_name | \n",
+ " common_shape_id | \n",
+ " shape_array_key | \n",
+ " speed_mph | \n",
+ " avg_sched_trip_min | \n",
+ " n_trips | \n",
+ " avg_rt_trip_min | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 65 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " AM Peak | \n",
+ " 52 | \n",
+ " 0 | \n",
+ " Weekdays 6am-10pm Weekends 8am-10pm | \n",
+ " 5200 | \n",
+ " e52c291259f04c198a0c1f245ec26be9 | \n",
+ " 15.90 | \n",
+ " 26.00 | \n",
+ " 6 | \n",
+ " 47.50 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " gtfs_dataset_key time_of_day route_id direction_id \\\n",
+ "65 c0e3039da063db95ebabd3fe4ee611a4 AM Peak 52 0 \n",
+ "\n",
+ " route_name common_shape_id \\\n",
+ "65 Weekdays 6am-10pm Weekends 8am-10pm 5200 \n",
+ "\n",
+ " shape_array_key speed_mph avg_sched_trip_min n_trips \\\n",
+ "65 e52c291259f04c198a0c1f245ec26be9 15.90 26.00 6 \n",
+ "\n",
+ " avg_rt_trip_min \n",
+ "65 47.50 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " gtfs_dataset_key | \n",
+ " trip_instance_key | \n",
+ " change_meters | \n",
+ " change_sec | \n",
+ " speed_mph | \n",
+ " schedule_gtfs_dataset_key | \n",
+ " direction_id | \n",
+ " route_id | \n",
+ " common_shape_id | \n",
+ " shape_array_key | \n",
+ " route_name_used | \n",
+ " service_hours | \n",
+ " trip_first_departure_datetime_pacific | \n",
+ " time_of_day | \n",
+ " service_minutes | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 57 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " 027ffc2ecdce130df8e8b2f2d81b17d6 | \n",
+ " 6505.34 | \n",
+ " 2703.00 | \n",
+ " 18.19 | \n",
+ " 7cc0cb1871dfd558f11a2885c145d144 | \n",
+ " 0 | \n",
+ " 52 | \n",
+ " 5200 | \n",
+ " e52c291259f04c198a0c1f245ec26be9 | \n",
+ " Weekdays 6am-10pm Weekends 8am-10pm | \n",
+ " 0.40 | \n",
+ " 2023-10-11 09:34:00 | \n",
+ " AM Peak | \n",
+ " 24.00 | \n",
+ "
\n",
+ " \n",
+ " 91 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " 03b5bb249b7188cc3935b355ad008a82 | \n",
+ " 6625.95 | \n",
+ " 2548.00 | \n",
+ " 18.46 | \n",
+ " 7cc0cb1871dfd558f11a2885c145d144 | \n",
+ " 0 | \n",
+ " 52 | \n",
+ " 5200 | \n",
+ " e52c291259f04c198a0c1f245ec26be9 | \n",
+ " Weekdays 6am-10pm Weekends 8am-10pm | \n",
+ " 0.48 | \n",
+ " 2023-10-11 07:56:00 | \n",
+ " AM Peak | \n",
+ " 29.00 | \n",
+ "
\n",
+ " \n",
+ " 2101 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " 50848f3ca9a5b8db9149cf4d4c0ce008 | \n",
+ " 6660.49 | \n",
+ " 3261.00 | \n",
+ " 12.02 | \n",
+ " 7cc0cb1871dfd558f11a2885c145d144 | \n",
+ " 0 | \n",
+ " 52 | \n",
+ " 5200 | \n",
+ " e52c291259f04c198a0c1f245ec26be9 | \n",
+ " Weekdays 6am-10pm Weekends 8am-10pm | \n",
+ " 0.42 | \n",
+ " 2023-10-11 08:34:00 | \n",
+ " AM Peak | \n",
+ " 25.00 | \n",
+ "
\n",
+ " \n",
+ " 4013 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " 94f8ca1587768bca12471d4dfd63cace | \n",
+ " 6786.87 | \n",
+ " 2735.00 | \n",
+ " 14.68 | \n",
+ " 7cc0cb1871dfd558f11a2885c145d144 | \n",
+ " 0 | \n",
+ " 52 | \n",
+ " 5200 | \n",
+ " e52c291259f04c198a0c1f245ec26be9 | \n",
+ " Weekdays 6am-10pm Weekends 8am-10pm | \n",
+ " 0.42 | \n",
+ " 2023-10-11 08:54:00 | \n",
+ " AM Peak | \n",
+ " 25.00 | \n",
+ "
\n",
+ " \n",
+ " 4699 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " acf1b602311569dd97867a45b5c16e4c | \n",
+ " 9837.34 | \n",
+ " 3691.00 | \n",
+ " 17.80 | \n",
+ " 7cc0cb1871dfd558f11a2885c145d144 | \n",
+ " 0 | \n",
+ " 52 | \n",
+ " 5200 | \n",
+ " e52c291259f04c198a0c1f245ec26be9 | \n",
+ " Weekdays 6am-10pm Weekends 8am-10pm | \n",
+ " 0.48 | \n",
+ " 2023-10-11 08:15:00 | \n",
+ " AM Peak | \n",
+ " 29.00 | \n",
+ "
\n",
+ " \n",
+ " 6700 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " f5058b9f8fe6df4e55644a6cc0896faf | \n",
+ " 6657.79 | \n",
+ " 2162.00 | \n",
+ " 14.39 | \n",
+ " 7cc0cb1871dfd558f11a2885c145d144 | \n",
+ " 0 | \n",
+ " 52 | \n",
+ " 5200 | \n",
+ " e52c291259f04c198a0c1f245ec26be9 | \n",
+ " Weekdays 6am-10pm Weekends 8am-10pm | \n",
+ " 0.40 | \n",
+ " 2023-10-11 09:55:00 | \n",
+ " AM Peak | \n",
+ " 24.00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " gtfs_dataset_key trip_instance_key \\\n",
+ "57 c0e3039da063db95ebabd3fe4ee611a4 027ffc2ecdce130df8e8b2f2d81b17d6 \n",
+ "91 c0e3039da063db95ebabd3fe4ee611a4 03b5bb249b7188cc3935b355ad008a82 \n",
+ "2101 c0e3039da063db95ebabd3fe4ee611a4 50848f3ca9a5b8db9149cf4d4c0ce008 \n",
+ "4013 c0e3039da063db95ebabd3fe4ee611a4 94f8ca1587768bca12471d4dfd63cace \n",
+ "4699 c0e3039da063db95ebabd3fe4ee611a4 acf1b602311569dd97867a45b5c16e4c \n",
+ "6700 c0e3039da063db95ebabd3fe4ee611a4 f5058b9f8fe6df4e55644a6cc0896faf \n",
+ "\n",
+ " change_meters change_sec speed_mph schedule_gtfs_dataset_key \\\n",
+ "57 6505.34 2703.00 18.19 7cc0cb1871dfd558f11a2885c145d144 \n",
+ "91 6625.95 2548.00 18.46 7cc0cb1871dfd558f11a2885c145d144 \n",
+ "2101 6660.49 3261.00 12.02 7cc0cb1871dfd558f11a2885c145d144 \n",
+ "4013 6786.87 2735.00 14.68 7cc0cb1871dfd558f11a2885c145d144 \n",
+ "4699 9837.34 3691.00 17.80 7cc0cb1871dfd558f11a2885c145d144 \n",
+ "6700 6657.79 2162.00 14.39 7cc0cb1871dfd558f11a2885c145d144 \n",
+ "\n",
+ " direction_id route_id common_shape_id shape_array_key \\\n",
+ "57 0 52 5200 e52c291259f04c198a0c1f245ec26be9 \n",
+ "91 0 52 5200 e52c291259f04c198a0c1f245ec26be9 \n",
+ "2101 0 52 5200 e52c291259f04c198a0c1f245ec26be9 \n",
+ "4013 0 52 5200 e52c291259f04c198a0c1f245ec26be9 \n",
+ "4699 0 52 5200 e52c291259f04c198a0c1f245ec26be9 \n",
+ "6700 0 52 5200 e52c291259f04c198a0c1f245ec26be9 \n",
+ "\n",
+ " route_name_used service_hours \\\n",
+ "57 Weekdays 6am-10pm Weekends 8am-10pm 0.40 \n",
+ "91 Weekdays 6am-10pm Weekends 8am-10pm 0.48 \n",
+ "2101 Weekdays 6am-10pm Weekends 8am-10pm 0.42 \n",
+ "4013 Weekdays 6am-10pm Weekends 8am-10pm 0.42 \n",
+ "4699 Weekdays 6am-10pm Weekends 8am-10pm 0.48 \n",
+ "6700 Weekdays 6am-10pm Weekends 8am-10pm 0.40 \n",
+ "\n",
+ " trip_first_departure_datetime_pacific time_of_day service_minutes \n",
+ "57 2023-10-11 09:34:00 AM Peak 24.00 \n",
+ "91 2023-10-11 07:56:00 AM Peak 29.00 \n",
+ "2101 2023-10-11 08:34:00 AM Peak 25.00 \n",
+ "4013 2023-10-11 08:54:00 AM Peak 25.00 \n",
+ "4699 2023-10-11 08:15:00 AM Peak 29.00 \n",
+ "6700 2023-10-11 09:55:00 AM Peak 24.00 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "check_shapes(shape_key2, time2)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "69d4639b-a9e9-44d0-8398-3aca1a0a11ff",
+ "metadata": {},
+ "source": [
+ "## Outer Join\n",
+ "#### Question: how to connect back to trip instance key?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 183,
+ "id": "41d4e8fd-33fa-4dfc-aa12-98c8abe16240",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "pandas.core.frame.DataFrame"
+ ]
+ },
+ "execution_count": 183,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Spatial Accuracy\n",
+ "type(muni)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 184,
+ "id": "dfb2e9c8-67d3-4a4d-b64a-eebac0f35bac",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['trip_instance_key', 'total_vp', 'vp_in_shape'], dtype='object')"
+ ]
+ },
+ "execution_count": 184,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "muni.columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 185,
+ "id": "6b2780f0-a359-4fb1-9082-2c7d40ba42df",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "9240"
+ ]
+ },
+ "execution_count": 185,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "muni.trip_instance_key.nunique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 186,
+ "id": "05adc5b0-2a0b-4860-af3d-bda2349bff99",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " trip_instance_key | \n",
+ " total_vp | \n",
+ " vp_in_shape | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 00068c2e2316950af50ffaa9584c7a46 | \n",
+ " 126 | \n",
+ " 126 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 000c62b70d9438b7951457a74a4c89b2 | \n",
+ " 152 | \n",
+ " 140 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 0010126e10a24f22fb36018ed0f79572 | \n",
+ " 413 | \n",
+ " 315 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 0013822d90020aa52bae3dab73d15fd0 | \n",
+ " 205 | \n",
+ " 205 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 001a66b1633388f4fe9eea5acf374481 | \n",
+ " 227 | \n",
+ " 227 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " trip_instance_key total_vp vp_in_shape\n",
+ "0 00068c2e2316950af50ffaa9584c7a46 126 126\n",
+ "1 000c62b70d9438b7951457a74a4c89b2 152 140\n",
+ "2 0010126e10a24f22fb36018ed0f79572 413 315\n",
+ "3 0013822d90020aa52bae3dab73d15fd0 205 205\n",
+ "4 001a66b1633388f4fe9eea5acf374481 227 227"
+ ]
+ },
+ "execution_count": 186,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "muni.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 187,
+ "id": "fd016ce7-c607-41c4-8fe3-fd5835a8abc2",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['trip_instance_key', 'min_time', 'max_time', 'atleast2_trip_updates',\n",
+ " 'trip_min_elapsed', 'pct_update_complete', 'larger', 'test_mask',\n",
+ " 'pct_update_complete2'],\n",
+ " dtype='object')"
+ ]
+ },
+ "execution_count": 187,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Update Completeness\n",
+ "test_group3.columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 188,
+ "id": "09b39eab-5919-4b96-a127-a9e4ce4f86aa",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "9240"
+ ]
+ },
+ "execution_count": 188,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "test_group3.trip_instance_key.nunique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 189,
+ "id": "96940334-7067-4f0f-aa6b-564ad1a6848d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "test_group3 = test_group3.drop(columns = ['min_time','max_time','trip_min_elapsed', 'pct_update_complete','larger','test_mask'])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 190,
+ "id": "130b01f7-02a6-49b3-8f87-d76ebbf53a8f",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " trip_instance_key | \n",
+ " atleast2_trip_updates | \n",
+ " pct_update_complete2 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 00068c2e2316950af50ffaa9584c7a46 | \n",
+ " 42 | \n",
+ " 1.00 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 000c62b70d9438b7951457a74a4c89b2 | \n",
+ " 50 | \n",
+ " 0.98 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 0010126e10a24f22fb36018ed0f79572 | \n",
+ " 138 | \n",
+ " 1.00 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 0013822d90020aa52bae3dab73d15fd0 | \n",
+ " 68 | \n",
+ " 1.00 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 001a66b1633388f4fe9eea5acf374481 | \n",
+ " 76 | \n",
+ " 1.00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " trip_instance_key atleast2_trip_updates \\\n",
+ "0 00068c2e2316950af50ffaa9584c7a46 42 \n",
+ "1 000c62b70d9438b7951457a74a4c89b2 50 \n",
+ "2 0010126e10a24f22fb36018ed0f79572 138 \n",
+ "3 0013822d90020aa52bae3dab73d15fd0 68 \n",
+ "4 001a66b1633388f4fe9eea5acf374481 76 \n",
+ "\n",
+ " pct_update_complete2 \n",
+ "0 1.00 \n",
+ "1 0.98 \n",
+ "2 1.00 \n",
+ "3 1.00 \n",
+ "4 1.00 "
+ ]
+ },
+ "execution_count": 190,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "test_group3.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 191,
+ "id": "a24dbf3a-e060-49b1-982b-2a583c98286a",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "outer_merge1 = pd.merge(test_group3, muni, \n",
+ " on = \"trip_instance_key\",\n",
+ " how = \"outer\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 192,
+ "id": "320cb842-c935-4c40-b7c9-6396c820da68",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " trip_instance_key | \n",
+ " atleast2_trip_updates | \n",
+ " pct_update_complete2 | \n",
+ " total_vp | \n",
+ " vp_in_shape | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 3202 | \n",
+ " 5a26483ef38d216c42696681ee376614 | \n",
+ " 71 | \n",
+ " 0.95 | \n",
+ " 217 | \n",
+ " 183 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " trip_instance_key atleast2_trip_updates \\\n",
+ "3202 5a26483ef38d216c42696681ee376614 71 \n",
+ "\n",
+ " pct_update_complete2 total_vp vp_in_shape \n",
+ "3202 0.95 217 183 "
+ ]
+ },
+ "execution_count": 192,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "outer_merge1.sample()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 202,
+ "id": "44c12eae-0c78-4d50-9a48-42cbd7c2521f",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "9240"
+ ]
+ },
+ "execution_count": 202,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "outer_merge1.trip_instance_key.nunique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 193,
+ "id": "f75301c2-96bd-487e-9c10-176777fc4f8c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['gtfs_dataset_key', 'trip_instance_key', 'change_meters', 'change_sec',\n",
+ " 'speed_mph', 'schedule_gtfs_dataset_key', 'direction_id', 'route_id',\n",
+ " 'common_shape_id', 'shape_array_key', 'route_name_used',\n",
+ " 'service_hours', 'trip_first_departure_datetime_pacific', 'time_of_day',\n",
+ " 'service_minutes'],\n",
+ " dtype='object')"
+ ]
+ },
+ "execution_count": 193,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "c3_m3.columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 194,
+ "id": "2ee51b82-169c-449b-a507-103c9a839dbf",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# Averages\n",
+ "crosswalk_back_to_trips = c3_m3[['shape_array_key','gtfs_dataset_key','time_of_day','trip_instance_key']].drop_duplicates().reset_index(drop = True)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 195,
+ "id": "7d5ddf3f-4cc8-43af-9575-c90bece499fb",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "6993"
+ ]
+ },
+ "execution_count": 195,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "crosswalk_back_to_trips.trip_instance_key.nunique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 196,
+ "id": "5d3a24fd-362f-4a62-9fcf-c7f03b2770f3",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "outer_merge2 = pd.merge(outer_merge1, crosswalk_back_to_trips, \n",
+ " on = \"trip_instance_key\",\n",
+ " how = \"outer\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 197,
+ "id": "2d6b71df-80e9-4fbd-8ac2-e1f55ba04929",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['trip_instance_key', 'atleast2_trip_updates', 'pct_update_complete2',\n",
+ " 'total_vp', 'vp_in_shape', 'shape_array_key', 'gtfs_dataset_key',\n",
+ " 'time_of_day'],\n",
+ " dtype='object')"
+ ]
+ },
+ "execution_count": 197,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "outer_merge2.columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 198,
+ "id": "6030cb7f-5655-4132-b214-4259e5d5090e",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "Index(['gtfs_dataset_key', 'time_of_day', 'route_id', 'direction_id',\n",
+ " 'route_name', 'common_shape_id', 'shape_array_key', 'speed_mph',\n",
+ " 'avg_sched_trip_min', 'n_trips', 'avg_rt_trip_min'],\n",
+ " dtype='object')"
+ ]
+ },
+ "execution_count": 198,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "c3_m4.columns"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 199,
+ "id": "c8bea8f0-ea00-4e1d-9783-398f277e768c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "outer_merge3 = pd.merge(outer_merge2, c3_m4, \n",
+ " on = [\"gtfs_dataset_key\", \"shape_array_key\", \"time_of_day\"],\n",
+ " how = \"outer\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "fe34ed54-badd-45ed-8ee2-f18a51207b3c",
+ "metadata": {},
+ "source": [
+ "#### Question: even if we drop the speeds, we should still have the shape_array_key and whatnot?\n",
+ "* How come some of the rows have no speeds?? What went wrong?\n",
+ "* 519cc26f9599677993f8d8cd269eb3cc should have a speed?"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 206,
+ "id": "e3883628-9629-45ef-9a29-d5f4e60215c2",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " gtfs_dataset_key | \n",
+ " trip_instance_key | \n",
+ " change_meters | \n",
+ " change_sec | \n",
+ " speed_mph | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 2900 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " 519cc26f9599677993f8d8cd269eb3cc | \n",
+ " 11329.47 | \n",
+ " 2960.00 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " gtfs_dataset_key trip_instance_key \\\n",
+ "2900 c0e3039da063db95ebabd3fe4ee611a4 519cc26f9599677993f8d8cd269eb3cc \n",
+ "\n",
+ " change_meters change_sec speed_mph \n",
+ "2900 11329.47 2960.00 NaN "
+ ]
+ },
+ "execution_count": 206,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "c3_m2.loc[c3_m2.trip_instance_key == \"519cc26f9599677993f8d8cd269eb3cc\"]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 208,
+ "id": "363d9f9e-a299-4e70-8e80-ce608b74bd18",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " gtfs_dataset_key | \n",
+ " trip_instance_key | \n",
+ " location_timestamp_local | \n",
+ " vp_idx | \n",
+ " shape_array_key | \n",
+ " vp_geometry | \n",
+ " shape_meters | \n",
+ " prior_dist | \n",
+ " prior_time | \n",
+ " change_meters | \n",
+ " change_sec | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 15100 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " 519cc26f9599677993f8d8cd269eb3cc | \n",
+ " 2023-10-11 20:02:59 | \n",
+ " 11958556 | \n",
+ " 15955f0486291f4d897d69cc2e2db08d | \n",
+ " POINT (-208310.474 -19578.210) | \n",
+ " 146.29 | \n",
+ " NaN | \n",
+ " NaT | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 15101 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " 519cc26f9599677993f8d8cd269eb3cc | \n",
+ " 2023-10-11 20:15:16 | \n",
+ " 11958593 | \n",
+ " 15955f0486291f4d897d69cc2e2db08d | \n",
+ " POINT (-208720.144 -18849.817) | \n",
+ " 4400.58 | \n",
+ " 146.29 | \n",
+ " 2023-10-11 20:02:59 | \n",
+ " 4254.30 | \n",
+ " 737.00 | \n",
+ "
\n",
+ " \n",
+ " 15102 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " 519cc26f9599677993f8d8cd269eb3cc | \n",
+ " 2023-10-11 20:27:31 | \n",
+ " 11958630 | \n",
+ " 15955f0486291f4d897d69cc2e2db08d | \n",
+ " POINT (-210732.513 -22505.869) | \n",
+ " 11426.63 | \n",
+ " 4400.58 | \n",
+ " 2023-10-11 20:15:16 | \n",
+ " 7026.05 | \n",
+ " 735.00 | \n",
+ "
\n",
+ " \n",
+ " 15103 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " 519cc26f9599677993f8d8cd269eb3cc | \n",
+ " 2023-10-11 20:39:48 | \n",
+ " 11958667 | \n",
+ " 15955f0486291f4d897d69cc2e2db08d | \n",
+ " POINT (-210720.177 -22491.169) | \n",
+ " 11445.89 | \n",
+ " 11426.63 | \n",
+ " 2023-10-11 20:27:31 | \n",
+ " 19.26 | \n",
+ " 737.00 | \n",
+ "
\n",
+ " \n",
+ " 15104 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " 519cc26f9599677993f8d8cd269eb3cc | \n",
+ " 2023-10-11 20:52:19 | \n",
+ " 11958704 | \n",
+ " 15955f0486291f4d897d69cc2e2db08d | \n",
+ " POINT (-210647.181 -22436.301) | \n",
+ " 11475.76 | \n",
+ " 11445.89 | \n",
+ " 2023-10-11 20:39:48 | \n",
+ " 29.86 | \n",
+ " 751.00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " gtfs_dataset_key trip_instance_key \\\n",
+ "15100 c0e3039da063db95ebabd3fe4ee611a4 519cc26f9599677993f8d8cd269eb3cc \n",
+ "15101 c0e3039da063db95ebabd3fe4ee611a4 519cc26f9599677993f8d8cd269eb3cc \n",
+ "15102 c0e3039da063db95ebabd3fe4ee611a4 519cc26f9599677993f8d8cd269eb3cc \n",
+ "15103 c0e3039da063db95ebabd3fe4ee611a4 519cc26f9599677993f8d8cd269eb3cc \n",
+ "15104 c0e3039da063db95ebabd3fe4ee611a4 519cc26f9599677993f8d8cd269eb3cc \n",
+ "\n",
+ " location_timestamp_local vp_idx shape_array_key \\\n",
+ "15100 2023-10-11 20:02:59 11958556 15955f0486291f4d897d69cc2e2db08d \n",
+ "15101 2023-10-11 20:15:16 11958593 15955f0486291f4d897d69cc2e2db08d \n",
+ "15102 2023-10-11 20:27:31 11958630 15955f0486291f4d897d69cc2e2db08d \n",
+ "15103 2023-10-11 20:39:48 11958667 15955f0486291f4d897d69cc2e2db08d \n",
+ "15104 2023-10-11 20:52:19 11958704 15955f0486291f4d897d69cc2e2db08d \n",
+ "\n",
+ " vp_geometry shape_meters prior_dist \\\n",
+ "15100 POINT (-208310.474 -19578.210) 146.29 NaN \n",
+ "15101 POINT (-208720.144 -18849.817) 4400.58 146.29 \n",
+ "15102 POINT (-210732.513 -22505.869) 11426.63 4400.58 \n",
+ "15103 POINT (-210720.177 -22491.169) 11445.89 11426.63 \n",
+ "15104 POINT (-210647.181 -22436.301) 11475.76 11445.89 \n",
+ "\n",
+ " prior_time change_meters change_sec \n",
+ "15100 NaT NaN NaN \n",
+ "15101 2023-10-11 20:02:59 4254.30 737.00 \n",
+ "15102 2023-10-11 20:15:16 7026.05 735.00 \n",
+ "15103 2023-10-11 20:27:31 19.26 737.00 \n",
+ "15104 2023-10-11 20:39:48 29.86 751.00 "
+ ]
+ },
+ "execution_count": 208,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "c3_m1.loc[c3_m1.trip_instance_key == \"519cc26f9599677993f8d8cd269eb3cc\"].drop(columns = ['shape_geometry'])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "596112c3-1cc8-434e-97b5-12f1dc2864c7",
+ "metadata": {},
+ "source": [
+ "#### check w/ another trip"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 215,
+ "id": "b254128f-6388-4315-b99d-0616c973f3a1",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " gtfs_dataset_key | \n",
+ " trip_instance_key | \n",
+ " change_meters | \n",
+ " change_sec | \n",
+ " speed_mph | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 5290 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " 9359a381c89e52b7bc78bb4942e4b077 | \n",
+ " 4872.66 | \n",
+ " 5109.00 | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " gtfs_dataset_key trip_instance_key \\\n",
+ "5290 c0e3039da063db95ebabd3fe4ee611a4 9359a381c89e52b7bc78bb4942e4b077 \n",
+ "\n",
+ " change_meters change_sec speed_mph \n",
+ "5290 4872.66 5109.00 NaN "
+ ]
+ },
+ "execution_count": 215,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "c3_m2.loc[c3_m2.trip_instance_key == \"9359a381c89e52b7bc78bb4942e4b077\"]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 216,
+ "id": "e38be645-43bf-4f7a-9a08-108fdfe76f7c",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " gtfs_dataset_key | \n",
+ " trip_instance_key | \n",
+ " location_timestamp_local | \n",
+ " vp_idx | \n",
+ " shape_array_key | \n",
+ " vp_geometry | \n",
+ " shape_meters | \n",
+ " prior_dist | \n",
+ " prior_time | \n",
+ " change_meters | \n",
+ " change_sec | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 2105 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " 9359a381c89e52b7bc78bb4942e4b077 | \n",
+ " 2023-10-11 07:22:58 | \n",
+ " 11475833 | \n",
+ " 6b99e6706ef5d2fb8e8518256a3c00c4 | \n",
+ " POINT (-215158.815 -23030.763) | \n",
+ " 357.86 | \n",
+ " NaN | \n",
+ " NaT | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 2106 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " 9359a381c89e52b7bc78bb4942e4b077 | \n",
+ " 2023-10-11 07:44:17 | \n",
+ " 11475897 | \n",
+ " 6b99e6706ef5d2fb8e8518256a3c00c4 | \n",
+ " POINT (-215158.815 -23030.763) | \n",
+ " 357.86 | \n",
+ " 357.86 | \n",
+ " 2023-10-11 07:22:58 | \n",
+ " 0.00 | \n",
+ " 1279.00 | \n",
+ "
\n",
+ " \n",
+ " 2107 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " 9359a381c89e52b7bc78bb4942e4b077 | \n",
+ " 2023-10-11 08:05:37 | \n",
+ " 11475961 | \n",
+ " 6b99e6706ef5d2fb8e8518256a3c00c4 | \n",
+ " POINT (-213850.819 -22835.343) | \n",
+ " 1645.28 | \n",
+ " 357.86 | \n",
+ " 2023-10-11 07:44:17 | \n",
+ " 1287.42 | \n",
+ " 1280.00 | \n",
+ "
\n",
+ " \n",
+ " 2108 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " 9359a381c89e52b7bc78bb4942e4b077 | \n",
+ " 2023-10-11 08:27:00 | \n",
+ " 11476025 | \n",
+ " 6b99e6706ef5d2fb8e8518256a3c00c4 | \n",
+ " POINT (-210713.785 -22130.479) | \n",
+ " 5150.33 | \n",
+ " 1645.28 | \n",
+ " 2023-10-11 08:05:37 | \n",
+ " 3505.05 | \n",
+ " 1283.00 | \n",
+ "
\n",
+ " \n",
+ " 2109 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " 9359a381c89e52b7bc78bb4942e4b077 | \n",
+ " 2023-10-11 08:48:07 | \n",
+ " 11476089 | \n",
+ " 6b99e6706ef5d2fb8e8518256a3c00c4 | \n",
+ " POINT (-210637.615 -22057.788) | \n",
+ " 5230.53 | \n",
+ " 5150.33 | \n",
+ " 2023-10-11 08:27:00 | \n",
+ " 80.19 | \n",
+ " 1267.00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " gtfs_dataset_key trip_instance_key \\\n",
+ "2105 c0e3039da063db95ebabd3fe4ee611a4 9359a381c89e52b7bc78bb4942e4b077 \n",
+ "2106 c0e3039da063db95ebabd3fe4ee611a4 9359a381c89e52b7bc78bb4942e4b077 \n",
+ "2107 c0e3039da063db95ebabd3fe4ee611a4 9359a381c89e52b7bc78bb4942e4b077 \n",
+ "2108 c0e3039da063db95ebabd3fe4ee611a4 9359a381c89e52b7bc78bb4942e4b077 \n",
+ "2109 c0e3039da063db95ebabd3fe4ee611a4 9359a381c89e52b7bc78bb4942e4b077 \n",
+ "\n",
+ " location_timestamp_local vp_idx shape_array_key \\\n",
+ "2105 2023-10-11 07:22:58 11475833 6b99e6706ef5d2fb8e8518256a3c00c4 \n",
+ "2106 2023-10-11 07:44:17 11475897 6b99e6706ef5d2fb8e8518256a3c00c4 \n",
+ "2107 2023-10-11 08:05:37 11475961 6b99e6706ef5d2fb8e8518256a3c00c4 \n",
+ "2108 2023-10-11 08:27:00 11476025 6b99e6706ef5d2fb8e8518256a3c00c4 \n",
+ "2109 2023-10-11 08:48:07 11476089 6b99e6706ef5d2fb8e8518256a3c00c4 \n",
+ "\n",
+ " vp_geometry shape_meters prior_dist \\\n",
+ "2105 POINT (-215158.815 -23030.763) 357.86 NaN \n",
+ "2106 POINT (-215158.815 -23030.763) 357.86 357.86 \n",
+ "2107 POINT (-213850.819 -22835.343) 1645.28 357.86 \n",
+ "2108 POINT (-210713.785 -22130.479) 5150.33 1645.28 \n",
+ "2109 POINT (-210637.615 -22057.788) 5230.53 5150.33 \n",
+ "\n",
+ " prior_time change_meters change_sec \n",
+ "2105 NaT NaN NaN \n",
+ "2106 2023-10-11 07:22:58 0.00 1279.00 \n",
+ "2107 2023-10-11 07:44:17 1287.42 1280.00 \n",
+ "2108 2023-10-11 08:05:37 3505.05 1283.00 \n",
+ "2109 2023-10-11 08:27:00 80.19 1267.00 "
+ ]
+ },
+ "execution_count": 216,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "c3_m1.loc[c3_m1.trip_instance_key == \"9359a381c89e52b7bc78bb4942e4b077\"].drop(columns = ['shape_geometry'])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "fa73bd7b-9cd5-42d1-a5b2-38cf252ab5f6",
+ "metadata": {},
+ "source": [
+ "#### Check a trip with speed mph"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 217,
+ "id": "326763e6-75d3-4891-af7d-690b5ce1781d",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " gtfs_dataset_key | \n",
+ " trip_instance_key | \n",
+ " change_meters | \n",
+ " change_sec | \n",
+ " speed_mph | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 6889 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " bfe892c83789ea8fb7cf2bfbf8c3704a | \n",
+ " 6745.28 | \n",
+ " 2668.00 | \n",
+ " 12.51 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " gtfs_dataset_key trip_instance_key \\\n",
+ "6889 c0e3039da063db95ebabd3fe4ee611a4 bfe892c83789ea8fb7cf2bfbf8c3704a \n",
+ "\n",
+ " change_meters change_sec speed_mph \n",
+ "6889 6745.28 2668.00 12.51 "
+ ]
+ },
+ "execution_count": 217,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "c3_m2.loc[~c3_m2.speed_mph.isna()].sample()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 218,
+ "id": "f711e45d-98f8-40ed-a599-658669fd21d6",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " gtfs_dataset_key | \n",
+ " trip_instance_key | \n",
+ " change_meters | \n",
+ " change_sec | \n",
+ " speed_mph | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 6889 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " bfe892c83789ea8fb7cf2bfbf8c3704a | \n",
+ " 6745.28 | \n",
+ " 2668.00 | \n",
+ " 12.51 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " gtfs_dataset_key trip_instance_key \\\n",
+ "6889 c0e3039da063db95ebabd3fe4ee611a4 bfe892c83789ea8fb7cf2bfbf8c3704a \n",
+ "\n",
+ " change_meters change_sec speed_mph \n",
+ "6889 6745.28 2668.00 12.51 "
+ ]
+ },
+ "execution_count": 218,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "c3_m2.loc[c3_m2.trip_instance_key == \"bfe892c83789ea8fb7cf2bfbf8c3704a\"]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 219,
+ "id": "36ad980e-d8fa-4a61-aac7-ecc688701625",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " gtfs_dataset_key | \n",
+ " trip_instance_key | \n",
+ " location_timestamp_local | \n",
+ " vp_idx | \n",
+ " shape_array_key | \n",
+ " vp_geometry | \n",
+ " shape_meters | \n",
+ " prior_dist | \n",
+ " prior_time | \n",
+ " change_meters | \n",
+ " change_sec | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 23620 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " bfe892c83789ea8fb7cf2bfbf8c3704a | \n",
+ " 2023-10-11 07:27:20 | \n",
+ " 12268908 | \n",
+ " 3493166801a9913231925d9a167b2182 | \n",
+ " POINT (-210495.407 -23811.592) | \n",
+ " 331.68 | \n",
+ " NaN | \n",
+ " NaT | \n",
+ " NaN | \n",
+ " NaN | \n",
+ "
\n",
+ " \n",
+ " 23621 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " bfe892c83789ea8fb7cf2bfbf8c3704a | \n",
+ " 2023-10-11 07:38:37 | \n",
+ " 12268942 | \n",
+ " 3493166801a9913231925d9a167b2182 | \n",
+ " POINT (-210542.304 -23633.930) | \n",
+ " 570.97 | \n",
+ " 331.68 | \n",
+ " 2023-10-11 07:27:20 | \n",
+ " 239.29 | \n",
+ " 677.00 | \n",
+ "
\n",
+ " \n",
+ " 23622 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " bfe892c83789ea8fb7cf2bfbf8c3704a | \n",
+ " 2023-10-11 07:49:25 | \n",
+ " 12268975 | \n",
+ " 3493166801a9913231925d9a167b2182 | \n",
+ " POINT (-211724.216 -22030.923) | \n",
+ " 2816.80 | \n",
+ " 570.97 | \n",
+ " 2023-10-11 07:38:37 | \n",
+ " 2245.82 | \n",
+ " 648.00 | \n",
+ "
\n",
+ " \n",
+ " 23623 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " bfe892c83789ea8fb7cf2bfbf8c3704a | \n",
+ " 2023-10-11 08:00:28 | \n",
+ " 12269008 | \n",
+ " 3493166801a9913231925d9a167b2182 | \n",
+ " POINT (-213012.221 -21465.442) | \n",
+ " 4744.72 | \n",
+ " 2816.80 | \n",
+ " 2023-10-11 07:49:25 | \n",
+ " 1927.92 | \n",
+ " 663.00 | \n",
+ "
\n",
+ " \n",
+ " 23624 | \n",
+ " c0e3039da063db95ebabd3fe4ee611a4 | \n",
+ " bfe892c83789ea8fb7cf2bfbf8c3704a | \n",
+ " 2023-10-11 08:11:48 | \n",
+ " 12269042 | \n",
+ " 3493166801a9913231925d9a167b2182 | \n",
+ " POINT (-215142.435 -21544.168) | \n",
+ " 7076.96 | \n",
+ " 4744.72 | \n",
+ " 2023-10-11 08:00:28 | \n",
+ " 2332.24 | \n",
+ " 680.00 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " gtfs_dataset_key trip_instance_key \\\n",
+ "23620 c0e3039da063db95ebabd3fe4ee611a4 bfe892c83789ea8fb7cf2bfbf8c3704a \n",
+ "23621 c0e3039da063db95ebabd3fe4ee611a4 bfe892c83789ea8fb7cf2bfbf8c3704a \n",
+ "23622 c0e3039da063db95ebabd3fe4ee611a4 bfe892c83789ea8fb7cf2bfbf8c3704a \n",
+ "23623 c0e3039da063db95ebabd3fe4ee611a4 bfe892c83789ea8fb7cf2bfbf8c3704a \n",
+ "23624 c0e3039da063db95ebabd3fe4ee611a4 bfe892c83789ea8fb7cf2bfbf8c3704a \n",
+ "\n",
+ " location_timestamp_local vp_idx shape_array_key \\\n",
+ "23620 2023-10-11 07:27:20 12268908 3493166801a9913231925d9a167b2182 \n",
+ "23621 2023-10-11 07:38:37 12268942 3493166801a9913231925d9a167b2182 \n",
+ "23622 2023-10-11 07:49:25 12268975 3493166801a9913231925d9a167b2182 \n",
+ "23623 2023-10-11 08:00:28 12269008 3493166801a9913231925d9a167b2182 \n",
+ "23624 2023-10-11 08:11:48 12269042 3493166801a9913231925d9a167b2182 \n",
+ "\n",
+ " vp_geometry shape_meters prior_dist \\\n",
+ "23620 POINT (-210495.407 -23811.592) 331.68 NaN \n",
+ "23621 POINT (-210542.304 -23633.930) 570.97 331.68 \n",
+ "23622 POINT (-211724.216 -22030.923) 2816.80 570.97 \n",
+ "23623 POINT (-213012.221 -21465.442) 4744.72 2816.80 \n",
+ "23624 POINT (-215142.435 -21544.168) 7076.96 4744.72 \n",
+ "\n",
+ " prior_time change_meters change_sec \n",
+ "23620 NaT NaN NaN \n",
+ "23621 2023-10-11 07:27:20 239.29 677.00 \n",
+ "23622 2023-10-11 07:38:37 2245.82 648.00 \n",
+ "23623 2023-10-11 07:49:25 1927.92 663.00 \n",
+ "23624 2023-10-11 08:00:28 2332.24 680.00 "
+ ]
+ },
+ "execution_count": 219,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "c3_m1.loc[c3_m1.trip_instance_key == \"bfe892c83789ea8fb7cf2bfbf8c3704a\"].drop(columns = ['shape_geometry'])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 203,
+ "id": "cecd7b7d-cbb9-4ae9-859b-56c82b47f1b3",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "9240"
+ ]
+ },
+ "execution_count": 203,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "outer_merge3.trip_instance_key.nunique()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 204,
+ "id": "81efe638-1c2c-41d0-a681-38cd310b0f51",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "\n",
+ "Int64Index: 9240 entries, 0 to 9239\n",
+ "Data columns (total 16 columns):\n",
+ " # Column Non-Null Count Dtype \n",
+ "--- ------ -------------- ----- \n",
+ " 0 trip_instance_key 9240 non-null object \n",
+ " 1 atleast2_trip_updates 9240 non-null int64 \n",
+ " 2 pct_update_complete2 9240 non-null float64 \n",
+ " 3 total_vp 9240 non-null int32 \n",
+ " 4 vp_in_shape 9240 non-null int32 \n",
+ " 5 shape_array_key 6993 non-null object \n",
+ " 6 gtfs_dataset_key 6993 non-null category\n",
+ " 7 time_of_day 6993 non-null object \n",
+ " 8 route_id 6993 non-null object \n",
+ " 9 direction_id 6993 non-null Int64 \n",
+ " 10 route_name 6993 non-null object \n",
+ " 11 common_shape_id 6993 non-null object \n",
+ " 12 speed_mph 6993 non-null float64 \n",
+ " 13 avg_sched_trip_min 6993 non-null float64 \n",
+ " 14 n_trips 6993 non-null float64 \n",
+ " 15 avg_rt_trip_min 6993 non-null float64 \n",
+ "dtypes: Int64(1), category(1), float64(5), int32(2), int64(1), object(6)\n",
+ "memory usage: 1.1+ MB\n"
+ ]
+ }
+ ],
+ "source": [
+ "outer_merge3.info()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 201,
+ "id": "ca82385c-2138-4ffa-9263-da69926f727c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# check_shapes(\"dfd50ed85c60540a1f9b9c2d1afa93ff\", \"Evening\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "381e9cf3-9dbf-4432-869d-f39d28927a7d",
+ "metadata": {},
+ "source": [
+ "## Final cleaning\n",
+ "* How come you export it twice? \n",
+ "* https://github.com/cal-itp/data-analyses/blob/metrics_rt/rt_segment_speeds/scripts/C3_trip_route_speed.py#L178-L188"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.9.13"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}