diff --git a/rt_scheduled_v_ran/02_spatial.ipynb b/rt_scheduled_v_ran/02_spatial.ipynb new file mode 100644 index 000000000..4bdb9ebb3 --- /dev/null +++ b/rt_scheduled_v_ran/02_spatial.ipynb @@ -0,0 +1,7402 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "c410afe6-22d7-4546-817c-fea818954108", + "metadata": {}, + "source": [ + "# Migrate to VP Usable\n", + "* https://github.com/cal-itp/data-analyses/issues/936\n", + "* cd rt_segment_speeds && pip install -r requirements.txt && cd ..\n", + " * https://github.com/cal-itp/data-analyses/blob/main/Makefile#L49C2-L49C66\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "8de007c5-4cdb-4285-9536-0a5f7b75b2de", + "metadata": {}, + "outputs": [], + "source": [ + "import datetime\n", + "import dask.dataframe as dd\n", + "import dask_geopandas as dg\n", + "import dask\n", + "import geopandas as gpd\n", + "import pandas as pd\n", + "from scripts import vp_spatial_accuracy\n", + "from segment_speed_utils import helpers\n", + "from calitp_data_analysis.geography_utils import WGS84\n", + "from segment_speed_utils.project_vars import (\n", + " PROJECT_CRS,\n", + " SEGMENT_GCS,\n", + " analysis_date,\n", + " GCS_FILE_PATH,\n", + " COMPILED_CACHED_VIEWS,\n", + " RT_SCHED_GCS,\n", + " CONFIG_PATH\n", + ")\n", + "\n", + "# For speeds c2\n", + "from typing import Literal\n", + "import numpy as np\n", + "\n", + "# for speeds c3\n", + "# also import numpy \n", + "from shared_utils.rt_utils import MPH_PER_MPS\n", + "from calitp_data_analysis import utils\n", + "from shared_utils import portfolio_utils, schedule_rt_utils\n", + "from segment_speed_utils import helpers, sched_rt_utils, wrangle_shapes, segment_calcs" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "c5ba79eb-2d4e-4daa-9de6-3ab657ac0a15", + "metadata": {}, + "outputs": [], + "source": [ + "pd.options.display.max_columns = 100\n", + "pd.options.display.float_format = \"{:.2f}\".format\n", + "pd.set_option(\"display.max_rows\", None)\n", + "pd.set_option(\"display.max_colwidth\", None)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "d8c8f895-8b72-413a-8ec2-72e7e0ebf29f", + "metadata": {}, + "outputs": [], + "source": [ + "# calitp-analytics-data/data-analyses/rt_segment_speeds/vp_usable_2023-10-11\n", + "operator = 'Bay Area 511 Muni VehiclePositions'\n", + "gtfs_key = '7cc0cb1871dfd558f11a2885c145d144'" + ] + }, + { + "cell_type": "markdown", + "id": "a3863c86-7f53-4755-8444-534a2f11e59c", + "metadata": {}, + "source": [ + "## Spatial Accuracy\n", + "* Based on https://github.com/cal-itp/data-analyses/blob/main/rt_scheduled_v_ran/scripts/vp_spatial_accuracy.py\n", + "### Grab_shape_keys_in_vp\n", + "#### First time reading `vp_usable`" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "6bd6833a-7eb4-487a-bb3a-e445a2d1941d", + "metadata": {}, + "outputs": [], + "source": [ + "def grab_shape_keys_in_vp(analysis_date: str) -> pd.DataFrame:\n", + " \"\"\"\n", + " Subset raw vp and find unique trip_instance_keys.\n", + " Create crosswalk to link trip_instance_key to shape_array_key.\n", + " \"\"\"\n", + " vp_trip_df = pd.read_parquet(f\"{SEGMENT_GCS}vp_usable_{analysis_date}\",\n", + " filters = [[('gtfs_dataset_name', \"==\", operator),\n", + " ('schedule_gtfs_dataset_key', '==', gtfs_key)]],\n", + " columns = ['trip_instance_key'])\n", + " \n", + " vp_trip_df = vp_trip_df.drop_duplicates().reset_index(drop = True)\n", + " \n", + " # Make sure we have a shape geometry too\n", + " # otherwise map_partitions will throw error\n", + " shapes = pd.read_parquet(\n", + " f\"{COMPILED_CACHED_VIEWS}routelines_{analysis_date}.parquet\",\n", + " columns = [\"shape_array_key\"],\n", + " ).dropna().drop_duplicates()\n", + " \n", + " trips_with_shape = helpers.import_scheduled_trips(\n", + " analysis_date,\n", + " columns = [\"trip_instance_key\", \"shape_array_key\"],\n", + " get_pandas = True\n", + " ).merge(\n", + " shapes,\n", + " on = \"shape_array_key\",\n", + " how = \"inner\"\n", + " ).merge(\n", + " vp_trip_df,\n", + " on = \"trip_instance_key\",\n", + " how = \"inner\"\n", + " ).drop_duplicates().dropna().reset_index(drop=True)\n", + "\n", + " return trips_with_shape" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "8759e976-ea60-4604-945d-1ec9693b0495", + "metadata": {}, + "outputs": [], + "source": [ + "trips_with_shape = grab_shape_keys_in_vp(analysis_date)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "bbbc85b4-a588-4f20-9822-eb4d5b4b3723", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(9240, 2)" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "trips_with_shape.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "34f69e0d-68c8-4b8e-8b82-8318672d0f4d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
trip_instance_keyshape_array_key
012de3d260e9fe09fa878cb4cdb2d6898749b225ca6691f77914e88577dc13e68
1934aea5748bce830ffc2fa88dc01402a749b225ca6691f77914e88577dc13e68
23d06fa8e68e4f38d3ccc7adfabb5c3d9f1a7410fae06937b7183f6a553707915
334109ad8cfeca83cd459d42c7d51d602f1a7410fae06937b7183f6a553707915
434acd907ae9d5eb5456d683d2458bbc6f1a7410fae06937b7183f6a553707915
\n", + "
" + ], + "text/plain": [ + " trip_instance_key shape_array_key\n", + "0 12de3d260e9fe09fa878cb4cdb2d6898 749b225ca6691f77914e88577dc13e68\n", + "1 934aea5748bce830ffc2fa88dc01402a 749b225ca6691f77914e88577dc13e68\n", + "2 3d06fa8e68e4f38d3ccc7adfabb5c3d9 f1a7410fae06937b7183f6a553707915\n", + "3 34109ad8cfeca83cd459d42c7d51d602 f1a7410fae06937b7183f6a553707915\n", + "4 34acd907ae9d5eb5456d683d2458bbc6 f1a7410fae06937b7183f6a553707915" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "trips_with_shape.head()" + ] + }, + { + "cell_type": "markdown", + "id": "8be73ca3-7ca2-4616-9504-7defe86f3792", + "metadata": {}, + "source": [ + "### Buffer shapes" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "ca9923a7-914f-43ce-a840-a15d41b43aee", + "metadata": {}, + "outputs": [], + "source": [ + "# This is trips_with_shape_geom\n", + "trips_with_shape_geom = vp_spatial_accuracy.buffer_shapes(analysis_date, \n", + " trips_with_shape,\n", + " 35)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "caf4dd49-4839-475d-99ff-7765ef5fb20a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(9240, 3)" + ] + }, + "execution_count": 9, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "trips_with_shape_geom.shape" + ] + }, + { + "cell_type": "markdown", + "id": "fc115ac6-2042-4565-9ac7-bd961339f8aa", + "metadata": {}, + "source": [ + "#### Second time reading in the same file, streamline" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "beaddbc2-fd11-4021-bdf0-7eea81835226", + "metadata": {}, + "outputs": [], + "source": [ + "vp = dd.read_parquet(f\"{SEGMENT_GCS}vp_usable_{analysis_date}\",\n", + " filters = [[('gtfs_dataset_name', \"==\", operator),\n", + " ('schedule_gtfs_dataset_key', '==', gtfs_key)]])" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "6053ea36-1f85-4a1a-b10b-019f6d870986", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "trip_instance_key 9240\n", + "dtype: int64" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "vp[['trip_instance_key']].compute().nunique()" + ] + }, + { + "cell_type": "markdown", + "id": "384c1e89-0b57-49dd-aaa0-69ea03e25b23", + "metadata": {}, + "source": [ + "### Full function" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "79fe772c-74db-49e7-b41d-e1daa3f23ee3", + "metadata": {}, + "outputs": [], + "source": [ + "def merge_vp_with_shape_and_count(\n", + " vp: dd.DataFrame,\n", + " trips_with_shape_geom: gpd.GeoDataFrame\n", + ") -> gpd.GeoDataFrame:\n", + " \"\"\"\n", + " Merge vp with crosswalk and buffered shapes.\n", + " Get vp count totals and vp within shape.\n", + " \"\"\"\n", + " vp_gdf = gpd.GeoDataFrame(\n", + " vp,\n", + " geometry = gpd.points_from_xy(vp.x, vp.y),\n", + " crs = WGS84\n", + " ).to_crs(PROJECT_CRS)\n", + " \n", + " vp_gdf = vp_gdf.rename(columns = {\n", + " 0:'gtfs_dataset_name',\n", + " 1: 'schedule_gtfs_dataset_key', \n", + " 2:'trip_id',\n", + " 3:'trip_instance_key',\n", + " 4: 'location_timestamp', \n", + " 5: 'location_timestamp_local',\n", + " 6:'x',\n", + " 7:'y', \n", + " 8:'vp_idx', \n", + " 9:'gtfs_dataset_key', \n", + " 10:'vp_dir_xnorm', \n", + " 11:'vp_dir_ynorm',\n", + " 12:'vp_primary_direction'})\n", + " \n", + " vp2 = pd.merge(\n", + " vp_gdf,\n", + " trips_with_shape_geom,\n", + " on = \"trip_instance_key\",\n", + " how = \"inner\"\n", + " ).reset_index(drop=True)\n", + " \n", + " total_vp = vp_spatial_accuracy.total_vp_counts_by_trip(vp2)\n", + " \n", + " vp2 = vp2.assign(\n", + " is_within = vp2.geometry_x.within(vp2.geometry_y)\n", + " ).query('is_within==True')\n", + " \n", + " vps_in_shape = (vp2.groupby(\"trip_instance_key\", \n", + " observed = True, group_keys = False)\n", + " .agg({\"location_timestamp_local\": \"count\"})\n", + " .reset_index()\n", + " .rename(columns = {\"location_timestamp_local\": \"vp_in_shape\"})\n", + " )\n", + " \n", + " count_df = pd.merge(\n", + " total_vp,\n", + " vps_in_shape,\n", + " on = \"trip_instance_key\",\n", + " how = \"left\"\n", + " )\n", + " \n", + " count_df = count_df.assign(\n", + " vp_in_shape = count_df.vp_in_shape.fillna(0).astype(\"int32\"),\n", + " total_vp = count_df.total_vp.fillna(0).astype(\"int32\")\n", + " )\n", + " \n", + " return count_df" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "561287f4-8189-441c-881c-1f34fc43e7d5", + "metadata": {}, + "outputs": [], + "source": [ + "muni = merge_vp_with_shape_and_count(vp, trips_with_shape_geom)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "bcaa5da2-21a5-4c87-825a-fa2637ef9b8f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 9240.00\n", + "mean 161.87\n", + "std 64.06\n", + "min 0.00\n", + "25% 118.00\n", + "50% 167.00\n", + "75% 207.00\n", + "max 481.00\n", + "Name: vp_in_shape, dtype: float64" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "muni.vp_in_shape.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "3504f86b-08bd-4a8d-b810-e654dd911c06", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "9240" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "muni.trip_instance_key.nunique()" + ] + }, + { + "cell_type": "markdown", + "id": "b03b76bf-2ce3-4400-b072-f53c9f942deb", + "metadata": {}, + "source": [ + "#### Add %?" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "4b0f141c-e583-4bb2-a3a7-f059e27a66fc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 9240.00\n", + "mean 0.93\n", + "std 0.14\n", + "min 0.00\n", + "25% 0.94\n", + "50% 1.00\n", + "75% 1.00\n", + "max 1.00\n", + "dtype: float64" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "(muni.vp_in_shape/muni.total_vp).describe()" + ] + }, + { + "cell_type": "markdown", + "id": "0a306082-dcfc-4a4a-bc3b-5ee90ae9517d", + "metadata": {}, + "source": [ + "#### Question: We are keeping rows in which 0 vps are in the shape?" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "eaa02c27-eda6-47ac-ae18-827f1616d73e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
trip_instance_keytotal_vpvp_in_shape
16704ae6ea9655473bdcaf9b80d443558ae1110
3510a1553b7a738967a422b7b8960560ded750
4080bc53a1063efae8a32eca4a9034d2a21610
11281fe28ce8669a7f946f272faf80d80ce7700
198137aed2e5ad125f8aede20922b38bf6b9780
302254e54e2839a6945b1005322398a89ae91320
3127580684a7a6b43ce5130f60a441681713460
32135a82f8c02f2036db77b1979f4ae228f5410
38616cbdc2d321ca63df78bce01eeb06fe07470
39376ee4a9165f47b59cb3e10b0125dd72e8340
45627ff345e91303290951ef0665a57dc90e550
52889351c60516ef43464d962a956d1b3806960
5894a40cc2271f048e2b72d94b051c6688cb1200
6119aa82ee2a81331cbdf044a7d5280d436a410
6758bcb78da06a56761d2a3b8b16a3436f3d1010
7043c3fc63ec2ff7a35c0f821659075be708600
7543d21ff3a0d37a484dc4f979d052652565450
7734d79da0eafb4ac5188f1cd14850a6f880900
8431ea8751445dcd6a175bd23ec5fc423d0c440
8718f298ef83aaa934eb6577fd2ca19d4ebd820
\n", + "
" + ], + "text/plain": [ + " trip_instance_key total_vp vp_in_shape\n", + "167 04ae6ea9655473bdcaf9b80d443558ae 111 0\n", + "351 0a1553b7a738967a422b7b8960560ded 75 0\n", + "408 0bc53a1063efae8a32eca4a9034d2a21 61 0\n", + "1128 1fe28ce8669a7f946f272faf80d80ce7 70 0\n", + "1981 37aed2e5ad125f8aede20922b38bf6b9 78 0\n", + "3022 54e54e2839a6945b1005322398a89ae9 132 0\n", + "3127 580684a7a6b43ce5130f60a441681713 46 0\n", + "3213 5a82f8c02f2036db77b1979f4ae228f5 41 0\n", + "3861 6cbdc2d321ca63df78bce01eeb06fe07 47 0\n", + "3937 6ee4a9165f47b59cb3e10b0125dd72e8 34 0\n", + "4562 7ff345e91303290951ef0665a57dc90e 55 0\n", + "5288 9351c60516ef43464d962a956d1b3806 96 0\n", + "5894 a40cc2271f048e2b72d94b051c6688cb 120 0\n", + "6119 aa82ee2a81331cbdf044a7d5280d436a 41 0\n", + "6758 bcb78da06a56761d2a3b8b16a3436f3d 101 0\n", + "7043 c3fc63ec2ff7a35c0f821659075be708 60 0\n", + "7543 d21ff3a0d37a484dc4f979d052652565 45 0\n", + "7734 d79da0eafb4ac5188f1cd14850a6f880 90 0\n", + "8431 ea8751445dcd6a175bd23ec5fc423d0c 44 0\n", + "8718 f298ef83aaa934eb6577fd2ca19d4ebd 82 0" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "muni.loc[muni.vp_in_shape == 0]" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "ad776dcd-700e-499e-b1c2-57c44d255153", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "trip_instance_key 9240\n", + "dtype: int64" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "vp[['trip_instance_key']].compute().nunique()" + ] + }, + { + "cell_type": "markdown", + "id": "19a20f72-8a90-4823-8cc5-32f91ae1286d", + "metadata": { + "tags": [] + }, + "source": [ + "## Update Completeness\n", + "* https://github.com/cal-itp/data-analyses/blob/main/rt_predictions/01_update_completeness.ipynb\n", + "\n", + "#### Keep only relevant `trips instance keys`?" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "b2e0fa2d-089d-49a5-ab20-a8ef65925795", + "metadata": {}, + "outputs": [], + "source": [ + "# Only use the trips with shapes.\n", + "relevant_trips = list(muni.trip_instance_key.unique())" + ] + }, + { + "cell_type": "markdown", + "id": "0e0e963e-ce3a-43bd-b5eb-209af2524e0f", + "metadata": {}, + "source": [ + "#### Third time reading in `vp_usable`" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "845ad61c-eab0-40c9-9e50-09c35bd50a73", + "metadata": {}, + "outputs": [], + "source": [ + "completeness_cols = ['vp_idx',\n", + " 'location_timestamp_local', 'trip_instance_key',\n", + " 'gtfs_dataset_key']" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "88b571f2-624c-47aa-a154-286902fa4a96", + "metadata": {}, + "outputs": [], + "source": [ + "vp_filtered = dd.read_parquet(f\"{SEGMENT_GCS}vp_usable_{analysis_date}\",\n", + " columns = completeness_cols, \n", + " filters = [[('gtfs_dataset_name', \"==\", operator),\n", + " ('schedule_gtfs_dataset_key', '==', gtfs_key),\n", + " ('trip_instance_key', 'in', relevant_trips)]])" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "8b1174c1-b693-40fd-94c6-16152a2a3cd7", + "metadata": {}, + "outputs": [], + "source": [ + "fresh_updates = vp_filtered.sort_values(['vp_idx']).reset_index(drop = True)" + ] + }, + { + "cell_type": "markdown", + "id": "222d4f15-1fde-4194-b2bd-a10a50976336", + "metadata": {}, + "source": [ + "#### Question: Can't use dask for this type of groupby \n", + "* Also grouping only by `trip_instance_key` yields the best result..unsure if that's ok" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "add7de33-bf89-4ab1-a907-a5f6f399821e", + "metadata": {}, + "outputs": [], + "source": [ + "two_cols = ['trip_instance_key','gtfs_dataset_key']" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "c9083d90-f232-4568-99f1-f92ccd98c5cc", + "metadata": {}, + "outputs": [], + "source": [ + "fresh_updates_df = fresh_updates.compute()" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "c9cd0c00-bdda-45c3-bb81-cc0556475501", + "metadata": {}, + "outputs": [], + "source": [ + "# fresh_updates_df = fresh_updates_df.assign(fresh = 1)" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "201eab8e-893c-4a29-be9f-a1a81cafbbde", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "\" DOES NOT WORK\\ntest_group1 = (fresh_updates_df.groupby([\\n *two_cols, \\n pd.Grouper(key = 'location_timestamp_local', freq = '1Min')\\n ])\\n .count()).reset_index() \"" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\"\"\" DOES NOT WORK\n", + "test_group1 = (fresh_updates_df.groupby([\n", + " *two_cols, \n", + " pd.Grouper(key = 'location_timestamp_local', freq = '1Min')\n", + " ])\n", + " .count()).reset_index() \"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "be48ea1f-b6e9-40b6-b121-2b9b744b86cb", + "metadata": {}, + "outputs": [], + "source": [ + "test_group2 = (fresh_updates_df.groupby([\n", + " *['trip_instance_key'], \n", + " pd.Grouper(key = 'location_timestamp_local', freq = '1Min')\n", + " ])\n", + " .count()\n", + " .reset_index()\n", + " .rename(columns = {'vp_idx':'number_of_pings_per_minute'})\n", + " )" + ] + }, + { + "cell_type": "markdown", + "id": "b9fa09c7-6406-4c72-9a61-e925fce8fa8d", + "metadata": {}, + "source": [ + "### Checks" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "2b697668-d7e7-4803-a289-0fe8c2e681d1", + "metadata": {}, + "outputs": [], + "source": [ + "key1 = \"38247cbee93b6f85d58bf1812ae553b9\"" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "c693d819-3cbc-4766-bdb2-0176a7340f55", + "metadata": {}, + "outputs": [], + "source": [ + "key2 = \"fac53ed1db7d914cc4c1857e967344f4\"" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "c00f7968-5ada-4623-8881-4d7607b86e5a", + "metadata": {}, + "outputs": [], + "source": [ + "key3 = \"9e291393bf06763fb6c6fe950d6e8097\"" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "074357ed-2e23-45c9-913e-8e5092534627", + "metadata": { + "scrolled": true, + "tags": [] + }, + "outputs": [], + "source": [ + "# test_group2.loc[test_group2.trip_instance_key == key1]" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "bc1bfbf4-d828-41ba-bc46-9f7bb687e123", + "metadata": { + "scrolled": true, + "tags": [] + }, + "outputs": [], + "source": [ + "# fresh_updates_df.loc[fresh_updates_df.trip_instance_key == key1]" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "e3a6c25b-922e-4083-ba53-20193b441df3", + "metadata": { + "scrolled": true, + "tags": [] + }, + "outputs": [], + "source": [ + "# test_group2.loc[test_group2.trip_instance_key == key2]" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "f20ba29c-1554-4155-b3c8-56f806196939", + "metadata": { + "scrolled": true, + "tags": [] + }, + "outputs": [], + "source": [ + "# fresh_updates_df.loc[fresh_updates_df.trip_instance_key == key2]" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "0a1ab3be-8f3d-416e-9c18-3d2e365250c5", + "metadata": { + "scrolled": true, + "tags": [] + }, + "outputs": [], + "source": [ + "# test_group2.loc[test_group2.trip_instance_key == key3]" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "ec0f9c0a-0833-48e3-bf14-b00cd75aa560", + "metadata": { + "scrolled": true, + "tags": [] + }, + "outputs": [], + "source": [ + "# fresh_updates_df.loc[fresh_updates_df.trip_instance_key == key3]" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "5374f05c-3dfb-4e9d-a44f-1387b70c4434", + "metadata": {}, + "outputs": [], + "source": [ + "# (test_group2.fresh - test_group2.number_of_pings_per_minute).describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "c4f0f887-0bf1-4acd-9204-ce4180517fff", + "metadata": {}, + "outputs": [], + "source": [ + "# test_group2.fresh.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "61617748-c555-4e88-b027-a7362cbb40f2", + "metadata": {}, + "outputs": [], + "source": [ + "test_group2 = test_group2.assign(\n", + " atleast2_trip_updates = test_group2.apply(\n", + " lambda x: 1 if x[\"number_of_pings_per_minute\"] >= 2\n", + " else 0, axis=1)\n", + " ) \n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "f127aa0e-90f3-4946-a3c0-80c9b84aa101", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1 534886\n", + "0 6795\n", + "Name: atleast2_trip_updates, dtype: int64" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_group2.atleast2_trip_updates.value_counts()" + ] + }, + { + "cell_type": "markdown", + "id": "ccd85839-5d29-4c0b-be92-bc6c86d3999a", + "metadata": {}, + "source": [ + "### Trip minutes is wrong\n", + "* Check w/ 38247cbee93b6f85d58bf1812ae553b9\n", + "* Began at 16:01:00, end at 17:00:000\n", + "* Skips having data: jumps from 4:38 to 4:54" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "079a4eb5-5708-407c-81a6-f88466e9e9df", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "' DOES NOT WORK \\ntest_group2_grouped = (test_group2.groupby(two_cols)\\n .agg({\\n \"location_timestamp_local\": \"size\",\\n \"atleast2_trip_updates\": \"sum\"})\\n .reset_index()\\n ).rename(columns = {\\n \"location_timestamp_local\": \"trip_min_elapsed\"\\n })\\n '" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\"\"\" DOES NOT WORK \n", + "test_group2_grouped = (test_group2.groupby(two_cols)\n", + " .agg({\n", + " \"location_timestamp_local\": \"size\",\n", + " \"atleast2_trip_updates\": \"sum\"})\n", + " .reset_index()\n", + " ).rename(columns = {\n", + " \"location_timestamp_local\": \"trip_min_elapsed\"\n", + " })\n", + " \"\"\" " + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "45f51f48-df0b-42ed-9f25-07534f2c2514", + "metadata": {}, + "outputs": [], + "source": [ + "# test_group2_grouped.loc[test_group2_grouped.trip_instance_key == key1]" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "5965a48a-dec8-444f-8f69-714c64c885ab", + "metadata": {}, + "outputs": [], + "source": [ + "test_group2['max_time'] = test_group2.location_timestamp_local" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "d04cba0b-afd6-418a-a916-ea5fcea3f0a0", + "metadata": {}, + "outputs": [], + "source": [ + "test_group3 = (test_group2\n", + " .groupby(['trip_instance_key'])\n", + " .agg({'location_timestamp_local':'min','max_time':'max', \n", + " 'atleast2_trip_updates':'sum'})\n", + " .reset_index()\n", + " .rename(columns = {'location_timestamp_local':'min_time'})\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "cdbcc81c-1dc4-4111-808e-c3590568d163", + "metadata": {}, + "outputs": [], + "source": [ + "test_group3['trip_min_elapsed'] = (test_group3.max_time - test_group3.min_time) / pd.Timedelta(minutes=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "753d1d27-8813-4cdf-bf9f-dc5193f908f7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
trip_instance_keymin_timemax_timeatleast2_trip_updatestrip_min_elapsed
200238247cbee93b6f85d58bf1812ae553b92023-10-11 16:01:002023-10-11 17:00:004559.00
\n", + "
" + ], + "text/plain": [ + " trip_instance_key min_time \\\n", + "2002 38247cbee93b6f85d58bf1812ae553b9 2023-10-11 16:01:00 \n", + "\n", + " max_time atleast2_trip_updates trip_min_elapsed \n", + "2002 2023-10-11 17:00:00 45 59.00 " + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_group3.loc[test_group3.trip_instance_key == key1]" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "id": "d4e45b96-5809-49a0-840e-112efc568a8e", + "metadata": { + "scrolled": true, + "tags": [] + }, + "outputs": [], + "source": [ + "# test_group2.loc[test_group2.trip_instance_key == key1]" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "id": "366efe58-3194-4fd9-9b86-a4aab10cf003", + "metadata": { + "scrolled": true, + "tags": [] + }, + "outputs": [], + "source": [ + "\n", + "# fresh_updates_df.loc[fresh_updates_df.trip_instance_key == key1]" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "id": "df84e20f-33e4-4c93-bc09-ddf2f85b5ae2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(133, 4)" + ] + }, + "execution_count": 49, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "fresh_updates_df.loc[fresh_updates_df.trip_instance_key == key1].shape" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "id": "bf6654a0-01bf-4ae3-8158-133d9e5aeb91", + "metadata": {}, + "outputs": [], + "source": [ + "test_group3 = test_group3.assign(\n", + " pct_update_complete = test_group3.atleast2_trip_updates.divide(\n", + " test_group3.trip_min_elapsed)\n", + " ) " + ] + }, + { + "cell_type": "markdown", + "id": "12805755-8e1c-44f1-bd4b-bf0308757367", + "metadata": {}, + "source": [ + "### Some trips experiencing pct-update-complete greater than 100%\n", + "* They have more rows than trip_min_elapsed" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "id": "60cfbabc-e181-4004-9e08-9ff7b80fceae", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "3701" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(test_group3.loc[test_group3.pct_update_complete > 1])" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "id": "6e745fec-c6c0-4f0a-b618-44a252f36b9a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "9240" + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(test_group3)" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "id": "a76fce93-01f6-43ec-a12b-0929883673b7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 9240.00\n", + "mean 1.00\n", + "std 0.06\n", + "min 0.02\n", + "25% 1.00\n", + "50% 1.00\n", + "75% 1.02\n", + "max 1.10\n", + "Name: pct_update_complete, dtype: float64" + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_group3.pct_update_complete.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "id": "ca5b2186-0b89-47fa-8d34-437b6f26849e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
trip_instance_keymin_timemax_timeatleast2_trip_updatestrip_min_elapsedpct_update_complete
14862a9fb1144c567b98582f7dc025182ef02023-10-11 10:20:002023-10-11 10:30:001110.001.10
\n", + "
" + ], + "text/plain": [ + " trip_instance_key min_time \\\n", + "1486 2a9fb1144c567b98582f7dc025182ef0 2023-10-11 10:20:00 \n", + "\n", + " max_time atleast2_trip_updates trip_min_elapsed \\\n", + "1486 2023-10-11 10:30:00 11 10.00 \n", + "\n", + " pct_update_complete \n", + "1486 1.10 " + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_group3.loc[test_group3.pct_update_complete == 1.1].sample()" + ] + }, + { + "cell_type": "markdown", + "id": "13103a30-dd21-4390-8b97-20c2f3b314ad", + "metadata": {}, + "source": [ + "#### How many cols have larger atleast2_trip_updates compared to trip-min_elapsed (delete later)" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "id": "6862f705-501d-4285-85db-8dd758aa04c7", + "metadata": {}, + "outputs": [], + "source": [ + "test_group3['larger'] =test_group3.trip_min_elapsed - test_group3.atleast2_trip_updates " + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "id": "d8659d98-012a-4b2d-9825-e605df194cad", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 9240.00\n", + "mean 0.29\n", + "std 3.82\n", + "min -1.00\n", + "25% -1.00\n", + "50% 0.00\n", + "75% 0.00\n", + "max 105.00\n", + "Name: larger, dtype: float64" + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_group3.larger.describe()" + ] + }, + { + "cell_type": "markdown", + "id": "4512571b-d486-4490-bec0-6489f1b5d0f2", + "metadata": {}, + "source": [ + "#### One trip only recorded 2+ pings per minute in 36% of its duration" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "id": "8e633abb-5030-4f83-a70a-2214b4017048", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
trip_instance_keymin_timemax_timeatleast2_trip_updatestrip_min_elapsedpct_update_completelarger
39426ef4805f0104b95614b86a2b1c374d232023-10-11 17:48:002023-10-11 20:31:0058163.000.36105.00
\n", + "
" + ], + "text/plain": [ + " trip_instance_key min_time \\\n", + "3942 6ef4805f0104b95614b86a2b1c374d23 2023-10-11 17:48:00 \n", + "\n", + " max_time atleast2_trip_updates trip_min_elapsed \\\n", + "3942 2023-10-11 20:31:00 58 163.00 \n", + "\n", + " pct_update_complete larger \n", + "3942 0.36 105.00 " + ] + }, + "execution_count": 57, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_group3.loc[test_group3.larger == 105].sample()" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "id": "5787a3b6-375f-4488-a536-0c543a61f780", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "176" + ] + }, + "execution_count": 58, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(fresh_updates_df.loc[fresh_updates_df.trip_instance_key == \"6ef4805f0104b95614b86a2b1c374d23\"])" + ] + }, + { + "cell_type": "markdown", + "id": "e9360550-2b92-406e-a37e-48347fec849d", + "metadata": {}, + "source": [ + "* Minutes skipped: 6:12 to 6:18\n", + "* 6:19-6:26\n", + "* 6:28-7:33 etc etc\n", + "* Trip started at 5:48, ended at 8:31 " + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "id": "7b94e534-8dcb-4ed7-b864-4244d93a2ac1", + "metadata": { + "scrolled": true, + "tags": [] + }, + "outputs": [], + "source": [ + "# test_group2.loc[test_group2.trip_instance_key == \"6ef4805f0104b95614b86a2b1c374d23\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "id": "75d05e56-5d6d-4e02-955e-f2ab7567526a", + "metadata": { + "scrolled": true, + "tags": [] + }, + "outputs": [], + "source": [ + "# fresh_updates_df.loc[fresh_updates_df.trip_instance_key == \"6ef4805f0104b95614b86a2b1c374d23\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "id": "53cb5d9e-be40-4989-a3b6-af2131c30075", + "metadata": {}, + "outputs": [], + "source": [ + "def mask_overly_large_min(row):\n", + " if row.atleast2_trip_updates > row.trip_min_elapsed:\n", + " return row.trip_min_elapsed\n", + " else:\n", + " return row.atleast2_trip_updates" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "id": "827559b6-f997-4842-a324-4d0f169b7607", + "metadata": {}, + "outputs": [], + "source": [ + "# Apply the function: if the atleast2 trip updates is larger than trip_min_elapsed, mask it\n", + "# Or maybe just mask the pct-update-complete? \n", + "test_group3[\"test_mask\"] = test_group3.apply(mask_overly_large_min, axis=1)" + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "id": "8d94dc4f-88c0-4817-92ae-5b8b9957ef54", + "metadata": {}, + "outputs": [], + "source": [ + "test_group3 = test_group3.assign(\n", + " pct_update_complete2 = test_group3.test_mask.divide(\n", + " test_group3.trip_min_elapsed)\n", + " ) " + ] + }, + { + "cell_type": "code", + "execution_count": 64, + "id": "bc39796b-aeea-4e95-b542-23d6eb5d87b3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
trip_instance_keymin_timemax_timeatleast2_trip_updatestrip_min_elapsedpct_update_completelargertest_maskpct_update_complete2
15102b14ada5cb9e31a2c84b2a19b22c711a2023-10-11 13:10:002023-10-11 14:37:008887.001.01-1.0087.001.00
39386ee775a74150b96abf9531462c6c69e02023-10-11 02:59:002023-10-11 03:30:003231.001.03-1.0031.001.00
\n", + "
" + ], + "text/plain": [ + " trip_instance_key min_time \\\n", + "1510 2b14ada5cb9e31a2c84b2a19b22c711a 2023-10-11 13:10:00 \n", + "3938 6ee775a74150b96abf9531462c6c69e0 2023-10-11 02:59:00 \n", + "\n", + " max_time atleast2_trip_updates trip_min_elapsed \\\n", + "1510 2023-10-11 14:37:00 88 87.00 \n", + "3938 2023-10-11 03:30:00 32 31.00 \n", + "\n", + " pct_update_complete larger test_mask pct_update_complete2 \n", + "1510 1.01 -1.00 87.00 1.00 \n", + "3938 1.03 -1.00 31.00 1.00 " + ] + }, + "execution_count": 64, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_group3.loc[test_group3.larger == -1].sample(2)" + ] + }, + { + "cell_type": "markdown", + "id": "52c81121-29bb-41ca-bd3a-cfe2b60f7989", + "metadata": {}, + "source": [ + "#### Ex of a trip: ten minute sbut eleven rows 37622040815e89d063272bb6e37acc65\n", + "* The trip is ten minutes but there are eleven rows." + ] + }, + { + "cell_type": "code", + "execution_count": 65, + "id": "749f3055-f199-4905-854b-cf208c969f26", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
trip_instance_keymin_timemax_timeatleast2_trip_updatestrip_min_elapsedpct_update_completelargertest_maskpct_update_complete2
197237622040815e89d063272bb6e37acc652023-10-11 05:31:002023-10-11 05:41:001110.001.10-1.0010.001.00
\n", + "
" + ], + "text/plain": [ + " trip_instance_key min_time \\\n", + "1972 37622040815e89d063272bb6e37acc65 2023-10-11 05:31:00 \n", + "\n", + " max_time atleast2_trip_updates trip_min_elapsed \\\n", + "1972 2023-10-11 05:41:00 11 10.00 \n", + "\n", + " pct_update_complete larger test_mask pct_update_complete2 \n", + "1972 1.10 -1.00 10.00 1.00 " + ] + }, + "execution_count": 65, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_group3.loc[test_group3.trip_instance_key == \"37622040815e89d063272bb6e37acc65\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 174, + "id": "b3a1f2f3-6981-4be6-bccb-8636e4430f65", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(11, 6)" + ] + }, + "execution_count": 174, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_group2.loc[test_group2.trip_instance_key == \"37622040815e89d063272bb6e37acc65\"].shape" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "id": "d109f168-9183-489d-9815-f179cc26e217", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1" + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(test_group3.loc[test_group3.trip_instance_key == \"37622040815e89d063272bb6e37acc65\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "id": "714e610c-018f-406c-abc9-8c69de0ff7a5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
trip_instance_keymin_timemax_timeatleast2_trip_updatestrip_min_elapsedpct_update_completelargertest_maskpct_update_complete2
197237622040815e89d063272bb6e37acc652023-10-11 05:31:002023-10-11 05:41:001110.001.10-1.0010.001.00
\n", + "
" + ], + "text/plain": [ + " trip_instance_key min_time \\\n", + "1972 37622040815e89d063272bb6e37acc65 2023-10-11 05:31:00 \n", + "\n", + " max_time atleast2_trip_updates trip_min_elapsed \\\n", + "1972 2023-10-11 05:41:00 11 10.00 \n", + "\n", + " pct_update_complete larger test_mask pct_update_complete2 \n", + "1972 1.10 -1.00 10.00 1.00 " + ] + }, + "execution_count": 67, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_group3.loc[test_group3.trip_instance_key == \"37622040815e89d063272bb6e37acc65\"]" + ] + }, + { + "cell_type": "markdown", + "id": "6b54ee94-48e6-42ed-a87f-97ba04293589", + "metadata": { + "tags": [] + }, + "source": [ + "#### 568caf4acf76125fb5db063f8737e5a8\n", + "* Trip is 73 minutes" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "id": "f505e963-6dd7-4624-a2a0-324ade8c04b8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
trip_instance_keymin_timemax_timeatleast2_trip_updatestrip_min_elapsedpct_update_completelargertest_maskpct_update_complete2
3077568caf4acf76125fb5db063f8737e5a82023-10-11 08:32:002023-10-11 09:45:007473.001.01-1.0073.001.00
\n", + "
" + ], + "text/plain": [ + " trip_instance_key min_time \\\n", + "3077 568caf4acf76125fb5db063f8737e5a8 2023-10-11 08:32:00 \n", + "\n", + " max_time atleast2_trip_updates trip_min_elapsed \\\n", + "3077 2023-10-11 09:45:00 74 73.00 \n", + "\n", + " pct_update_complete larger test_mask pct_update_complete2 \n", + "3077 1.01 -1.00 73.00 1.00 " + ] + }, + "execution_count": 68, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_group3.loc[test_group3.trip_instance_key == \"568caf4acf76125fb5db063f8737e5a8\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "id": "d8d800f1-e881-43f1-bd4e-699d93ba8aaf", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "74" + ] + }, + "execution_count": 69, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(test_group2.loc[test_group2.trip_instance_key == \"568caf4acf76125fb5db063f8737e5a8\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "id": "b3a10c1f-afc5-4206-a79f-9c78b1df4854", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "221" + ] + }, + "execution_count": 70, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(fresh_updates_df.loc[fresh_updates_df.trip_instance_key == \"568caf4acf76125fb5db063f8737e5a8\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "id": "fde7e3e4-2df8-48f3-a34a-545a5aef5f81", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "9240" + ] + }, + "execution_count": 71, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(test_group3)" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "id": "95a64572-3d15-4c47-ac93-1adb9c5f8d46", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
trip_instance_keymin_timemax_timeatleast2_trip_updatestrip_min_elapsedpct_update_completelargertest_maskpct_update_complete2
51288f471c6d3f23a285b0e957db9431a89a2023-10-11 19:10:002023-10-11 20:11:006161.001.000.0061.001.00
8609efa2c7f74ba9cd19255dc54ec7f08ec02023-10-11 13:04:002023-10-11 14:05:006061.000.981.0060.000.98
56469d2e48287e1f9f7f907bc1c91a811ae32023-10-11 08:38:002023-10-11 09:22:004444.001.000.0044.001.00
\n", + "
" + ], + "text/plain": [ + " trip_instance_key min_time \\\n", + "5128 8f471c6d3f23a285b0e957db9431a89a 2023-10-11 19:10:00 \n", + "8609 efa2c7f74ba9cd19255dc54ec7f08ec0 2023-10-11 13:04:00 \n", + "5646 9d2e48287e1f9f7f907bc1c91a811ae3 2023-10-11 08:38:00 \n", + "\n", + " max_time atleast2_trip_updates trip_min_elapsed \\\n", + "5128 2023-10-11 20:11:00 61 61.00 \n", + "8609 2023-10-11 14:05:00 60 61.00 \n", + "5646 2023-10-11 09:22:00 44 44.00 \n", + "\n", + " pct_update_complete larger test_mask pct_update_complete2 \n", + "5128 1.00 0.00 61.00 1.00 \n", + "8609 0.98 1.00 60.00 0.98 \n", + "5646 1.00 0.00 44.00 1.00 " + ] + }, + "execution_count": 72, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_group3.sample(3)" + ] + }, + { + "cell_type": "markdown", + "id": "3b61450e-528c-4dab-9582-2bda3343e379", + "metadata": {}, + "source": [ + "## How many minutes a trip took and the average speeds?" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "id": "36afb7b1-04f9-494b-a4d8-f85ddadc785b", + "metadata": {}, + "outputs": [], + "source": [ + "# relevant_trips = list(test_group3.trip_instance_key.unique())" + ] + }, + { + "cell_type": "markdown", + "id": "96335b84-31a9-4887-b97f-8ebc35610220", + "metadata": {}, + "source": [ + "### C2 \n", + "* https://github.com/cal-itp/data-analyses/blob/metrics_rt/rt_segment_speeds/scripts/C2_triangulate_vp.py\n", + "* Break it apart and check it out later to undersatnd what's happening\n", + "* No need to `subset_usable_vp` because already filtered above." + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "id": "de7fba7a-6796-4e64-884c-55d636be2c12", + "metadata": {}, + "outputs": [], + "source": [ + "def triangulate_vp(\n", + " ddf: dd.DataFrame, \n", + " group_cols: list = [\"trip_instance_key\"]\n", + ") -> np.ndarray:\n", + " \"\"\"\n", + " Grab a sample of vehicle positions for each trip to triangulate distance.\n", + " These vp already sjoined onto the shape.\n", + " Roughly pick vp at equally spaced intervals.\n", + " \n", + " Dask aggregation can't group and use lambda to create list of possible \n", + " vp_idx.\n", + " \"\"\" \n", + " grouped_ddf = ddf.groupby(group_cols, observed=True, group_keys=False)\n", + "\n", + " min_df = (grouped_ddf\n", + " .agg({\"vp_idx\": \"min\"})\n", + " .rename(columns = {\"vp_idx\": \"min_vp_idx\"})\n", + " )\n", + "\n", + " max_df = (grouped_ddf\n", + " .agg({\"vp_idx\": \"max\"})\n", + " .rename(columns = {\"vp_idx\": \"max_vp_idx\"})\n", + " )\n", + " \n", + " vp_range = dd.merge(\n", + " min_df,\n", + " max_df,\n", + " left_index = True,\n", + " right_index = True,\n", + " how = \"inner\"\n", + " )\n", + "\n", + " vp_range = vp_range.persist()\n", + " \n", + " vp_range[\"range_diff\"] = vp_range.max_vp_idx - vp_range.min_vp_idx\n", + " \n", + " vp_range = vp_range.assign(\n", + " p25_vp_idx = (vp_range.range_diff * 0.25 + vp_range.min_vp_idx\n", + " ).round(0).astype(\"int64\"),\n", + " p50_vp_idx = (vp_range.range_diff * 0.5 + vp_range.min_vp_idx\n", + " ).round(0).astype(\"int64\"),\n", + " p75_vp_idx = (vp_range.range_diff * 0.75 + vp_range.min_vp_idx\n", + " ).round(0).astype(\"int64\"),\n", + " )\n", + " \n", + " vp_idx_cols = [\n", + " \"min_vp_idx\", \n", + " \"p25_vp_idx\",\n", + " \"p50_vp_idx\", \n", + " \"p75_vp_idx\",\n", + " \"max_vp_idx\"\n", + " ]\n", + "\n", + " results = vp_range[vp_idx_cols].compute().to_numpy().flatten() \n", + " \n", + " results = list(results)\n", + " return results" + ] + }, + { + "cell_type": "markdown", + "id": "44d836be-21d3-435e-9b35-a1d9863e6257", + "metadata": {}, + "source": [ + "#### Help: which df should I use to triangulate?\n", + "* Trips in which 0 of the points fall into the shapes should be excluded?\n", + "* Using the same df as in update completeness." + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "id": "659a01af-3c01-4cc3-92ea-834a96c2106a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "1604702" + ] + }, + "execution_count": 75, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(vp_filtered)" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "id": "75d6fcc5-8c85-4c62-a063-62a7a73ecdc0", + "metadata": {}, + "outputs": [], + "source": [ + "triangulate_muni = triangulate_vp(vp_filtered, 'trip_instance_key')" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "id": "91eab6e3-4e44-4f31-829b-1dcc9b2aebf5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "list" + ] + }, + "execution_count": 77, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(triangulate_muni)" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "id": "8e709ae9-1c6d-4b55-ad5c-deb1d724276e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "trip_instance_key 9240\n", + "dtype: int64" + ] + }, + "execution_count": 78, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "vp_filtered[['trip_instance_key']].compute().nunique()" + ] + }, + { + "cell_type": "markdown", + "id": "3dac48a4-cabc-415d-822c-0162fa995efd", + "metadata": {}, + "source": [ + "#### 4th time loading `vp_usable`" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "id": "7807ccce-416c-4f03-a92a-2296edc1d19f", + "metadata": {}, + "outputs": [], + "source": [ + "vp_results = dd.read_parquet(f\"{SEGMENT_GCS}vp_usable_{analysis_date}\",\n", + " columns = [\n", + " \"gtfs_dataset_key\", \"trip_instance_key\",\n", + " \"location_timestamp_local\",\n", + " \"x\", \"y\", \"vp_idx\"],\n", + " filters = [[('gtfs_dataset_name', \"==\", operator),\n", + " ('schedule_gtfs_dataset_key', '==', gtfs_key),\n", + " ('vp_idx', 'in', triangulate_muni)]]).compute()" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "id": "d98556b6-4e16-4bce-b080-19bceadde4af", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "46198" + ] + }, + "execution_count": 80, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(vp_results)" + ] + }, + { + "cell_type": "code", + "execution_count": 81, + "id": "9f0282d0-c59f-4843-b23e-bb22be66404e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "9240" + ] + }, + "execution_count": 81, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "vp_results.trip_instance_key.nunique()" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "id": "bbcab824-28a9-4fb3-828a-471b83966b39", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
gtfs_dataset_keytrip_instance_keylocation_timestamp_localxyvp_idx
11825575c0e3039da063db95ebabd3fe4ee611a4d28f5a7bf8faf0aa6de5c4cf02c7c1062023-10-11 17:06:59-122.4837.7311825575
\n", + "
" + ], + "text/plain": [ + " gtfs_dataset_key trip_instance_key \\\n", + "11825575 c0e3039da063db95ebabd3fe4ee611a4 d28f5a7bf8faf0aa6de5c4cf02c7c106 \n", + "\n", + " location_timestamp_local x y vp_idx \n", + "11825575 2023-10-11 17:06:59 -122.48 37.73 11825575 " + ] + }, + "execution_count": 82, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "vp_results.sample()" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "id": "63c04621-ad68-4e3d-9cec-133dbf0aa3b0", + "metadata": {}, + "outputs": [], + "source": [ + "def merge_rt_scheduled_trips(\n", + " rt_trips: dd.DataFrame,\n", + " analysis_date: str,\n", + " group_cols: list = [\"trip_instance_key\"]) -> dd.DataFrame:\n", + " \"\"\"\n", + " Merge RT trips (vehicle positions) to scheduled trips \n", + " to get the shape_array_key.\n", + " Don't pull other scheduled trip columns now, wait until\n", + " after aggregation is done.\n", + " \"\"\"\n", + " trips = helpers.import_scheduled_trips(\n", + " analysis_date,\n", + " columns = group_cols + [\"shape_array_key\"],\n", + " get_pandas = True\n", + " )\n", + " \n", + " df = dd.merge(\n", + " rt_trips,\n", + " trips,\n", + " on = group_cols,\n", + " how = \"left\",\n", + " )\n", + " \n", + " return df" + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "id": "866b83cc-c0d9-4fc5-b794-f995e8b760de", + "metadata": {}, + "outputs": [], + "source": [ + "# https://github.com/cal-itp/data-analyses/blob/metrics_rt/rt_segment_speeds/scripts/C2_triangulate_vp.py#L170-L180\n", + "vp_with_sched = (\n", + " merge_rt_scheduled_trips(\n", + " vp_results, \n", + " analysis_date, \n", + " group_cols = [\"trip_instance_key\"]\n", + " ).sort_values(\"vp_idx\")\n", + " .reset_index(drop=True)\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "id": "8b6f6495-aa13-4452-a2c9-bf8c1f7f49fd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "pandas.core.frame.DataFrame" + ] + }, + "execution_count": 85, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(vp_with_sched)" + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "id": "a7b261ec-41b8-4f87-998d-a4b5fafc7ec2", + "metadata": {}, + "outputs": [], + "source": [ + "# Check triangualte worked \n", + "triangulate_check = vp_with_sched.groupby(['trip_instance_key']).agg({'location_timestamp_local':'nunique'}).reset_index()" + ] + }, + { + "cell_type": "code", + "execution_count": 89, + "id": "bd19bfd5-48d1-4722-a67c-56069bf89e35", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "5 9239\n", + "3 1\n", + "Name: location_timestamp_local, dtype: int64" + ] + }, + "execution_count": 89, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "triangulate_check.location_timestamp_local.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "id": "24d761ba-cc50-406e-9538-e6f2f8705b75", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
trip_instance_keylocation_timestamp_local
6824be7a580802c3ec183904cac37e6c0afd3
\n", + "
" + ], + "text/plain": [ + " trip_instance_key location_timestamp_local\n", + "6824 be7a580802c3ec183904cac37e6c0afd 3" + ] + }, + "execution_count": 87, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "triangulate_check.loc[triangulate_check.location_timestamp_local == 3]" + ] + }, + { + "cell_type": "code", + "execution_count": 88, + "id": "6e24fb38-f641-418d-99cc-c1b7aaf0e4a1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 9240.00\n", + "mean 5.00\n", + "std 0.02\n", + "min 3.00\n", + "25% 5.00\n", + "50% 5.00\n", + "75% 5.00\n", + "max 5.00\n", + "Name: location_timestamp_local, dtype: float64" + ] + }, + "execution_count": 88, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "triangulate_check.location_timestamp_local.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 176, + "id": "546417f6-e033-43e5-ba70-fdc392697561", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
vp_idxlocation_timestamp_localtrip_instance_keygtfs_dataset_key
12506123125061232023-10-11 16:13:08be7a580802c3ec183904cac37e6c0afdc0e3039da063db95ebabd3fe4ee611a4
12506124125061242023-10-11 17:01:19be7a580802c3ec183904cac37e6c0afdc0e3039da063db95ebabd3fe4ee611a4
12506125125061252023-10-11 17:01:35be7a580802c3ec183904cac37e6c0afdc0e3039da063db95ebabd3fe4ee611a4
\n", + "
" + ], + "text/plain": [ + " vp_idx location_timestamp_local trip_instance_key \\\n", + "12506123 12506123 2023-10-11 16:13:08 be7a580802c3ec183904cac37e6c0afd \n", + "12506124 12506124 2023-10-11 17:01:19 be7a580802c3ec183904cac37e6c0afd \n", + "12506125 12506125 2023-10-11 17:01:35 be7a580802c3ec183904cac37e6c0afd \n", + "\n", + " gtfs_dataset_key \n", + "12506123 c0e3039da063db95ebabd3fe4ee611a4 \n", + "12506124 c0e3039da063db95ebabd3fe4ee611a4 \n", + "12506125 c0e3039da063db95ebabd3fe4ee611a4 " + ] + }, + "execution_count": 176, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "vp_filtered.loc[vp_filtered.trip_instance_key == \"be7a580802c3ec183904cac37e6c0afd\"].compute()" + ] + }, + { + "cell_type": "markdown", + "id": "687e9c19-daf3-4384-942a-947728bb0680", + "metadata": { + "tags": [] + }, + "source": [ + "### C3\n", + "* https://github.com/cal-itp/data-analyses/blob/metrics_rt/rt_segment_speeds/scripts/C3_trip_route_speed.py" + ] + }, + { + "cell_type": "code", + "execution_count": 91, + "id": "78c974a0-51f8-42a0-b256-1d8f7c22482e", + "metadata": {}, + "outputs": [], + "source": [ + "# https://github.com/cal-itp/data-analyses/blob/metrics_rt/rt_segment_speeds/scripts/C3_trip_route_speed.py#L262-L265\n", + "vp_c3 = gpd.GeoDataFrame(\n", + " vp_with_sched,\n", + " geometry = gpd.points_from_xy(vp_with_sched.x, vp_with_sched.y, crs=WGS84)\n", + " ).to_crs(PROJECT_CRS).drop(columns = [\"x\", \"y\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 92, + "id": "410e5f12-24fc-443b-adb5-e05f9e12ba2a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "9240" + ] + }, + "execution_count": 92, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "vp_c3.trip_instance_key.nunique()" + ] + }, + { + "cell_type": "code", + "execution_count": 93, + "id": "23c64d78-3c7f-4b58-9ccd-18159d9e1a4e", + "metadata": {}, + "outputs": [], + "source": [ + "shapes_list = vp_c3.shape_array_key.unique().tolist()" + ] + }, + { + "cell_type": "code", + "execution_count": 94, + "id": "b3bd3c05-7c1d-44e8-afe3-a28a7c6e6190", + "metadata": {}, + "outputs": [], + "source": [ + "shapes = helpers.import_scheduled_shapes(\n", + " analysis_date,\n", + " columns = [\"shape_array_key\",\"geometry\"],\n", + " filters = [[(\"shape_array_key\", \"in\", shapes_list)]],\n", + " get_pandas = True,\n", + " crs = PROJECT_CRS\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 95, + "id": "cbcf928d-c9ec-46e9-9685-48ca5efe8359", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(255, 2)" + ] + }, + "execution_count": 95, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "shapes.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 96, + "id": "255037a4-079e-4586-beba-0c170f4c6fcb", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "geopandas.geodataframe.GeoDataFrame" + ] + }, + "execution_count": 96, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(shapes)" + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "id": "47bbe61e-e014-48c3-b85f-1dea06d5e56e", + "metadata": {}, + "outputs": [], + "source": [ + "# shapes.explore('shape_array_key')" + ] + }, + { + "cell_type": "code", + "execution_count": 98, + "id": "27c8fff8-f8a7-4906-a3f4-7faac4969bde", + "metadata": {}, + "outputs": [], + "source": [ + "# https://github.com/cal-itp/data-analyses/blob/metrics_rt/rt_segment_speeds/scripts/C3_trip_route_speed.py#L280-L287\n", + "c3_m1 = pd.merge(\n", + " vp_c3,\n", + " shapes,\n", + " on = \"shape_array_key\",\n", + " how = \"inner\"\n", + " ).rename(columns = {\"geometry_x\": \"vp_geometry\", \n", + " \"geometry_y\": \"shape_geometry\"}\n", + " ).set_geometry(\"vp_geometry\")" + ] + }, + { + "cell_type": "code", + "execution_count": 99, + "id": "7430576e-c571-45de-afb9-500c21336808", + "metadata": {}, + "outputs": [], + "source": [ + "# https://github.com/cal-itp/data-analyses/blob/metrics_rt/rt_segment_speeds/scripts/C3_trip_route_speed.py#L290-L293\n", + "shape_meters_geoseries = wrangle_shapes.project_point_geom_onto_linestring(\n", + " c3_m1,\n", + " \"shape_geometry\",\n", + " \"vp_geometry\",\n", + " )\n" + ] + }, + { + "cell_type": "code", + "execution_count": 100, + "id": "6631d594-78d7-494a-8bd2-86d2b0c19b1f", + "metadata": {}, + "outputs": [], + "source": [ + "c3_m1[\"shape_meters\"] = shape_meters_geoseries" + ] + }, + { + "cell_type": "markdown", + "id": "9c6f493b-53dd-4ef6-95ad-445a0562d633", + "metadata": {}, + "source": [ + "#### distance_and_seconds_elapsed\n", + "* Breaking apart to understand\n", + "* https://github.com/cal-itp/data-analyses/blob/metrics_rt/rt_segment_speeds/scripts/C3_trip_route_speed.py#L290-L293" + ] + }, + { + "cell_type": "code", + "execution_count": 101, + "id": "50260f4c-91b2-4e89-afbb-a1dc12594d4d", + "metadata": {}, + "outputs": [], + "source": [ + "dist_col = \"shape_meters\"\n", + "time_col = \"location_timestamp_local\"\n", + "group_cols = [\"gtfs_dataset_key\", \"trip_instance_key\"]\n", + "sort_cols = group_cols + [\"vp_idx\"]\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 102, + "id": "af0b69f8-5c48-433b-a732-958d12eb2e82", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "geopandas.geodataframe.GeoDataFrame" + ] + }, + "execution_count": 102, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(c3_m1)" + ] + }, + { + "cell_type": "code", + "execution_count": 103, + "id": "34e03a27-05bc-49fb-b285-1c9df094fc27", + "metadata": {}, + "outputs": [], + "source": [ + "c3_m1 = c3_m1.assign(\n", + " prior_dist = (c3_m1.sort_values(sort_cols)\n", + " .groupby(group_cols, \n", + " observed=True, group_keys=False)\n", + " [dist_col]\n", + " .apply(lambda x: x.shift(1))\n", + " ),\n", + " prior_time = (c3_m1.sort_values(sort_cols)\n", + " .groupby(group_cols, \n", + " observed=True, group_keys=False)\n", + " [time_col]\n", + " .apply(lambda x: x.shift(1))\n", + " ) \n", + " )\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 104, + "id": "ffbc94d1-090f-4a54-ac85-d5b155660ca8", + "metadata": {}, + "outputs": [], + "source": [ + "# distance should be positive, but sometimes it's not, \n", + "# so use absolute value\n", + "c3_m1 = c3_m1.assign(\n", + " change_meters = abs(c3_m1[dist_col] - c3_m1.prior_dist),\n", + " change_sec = (c3_m1[time_col] - c3_m1.prior_time).divide(\n", + " np.timedelta64(1, 's'))\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 105, + "id": "a274e442-2a65-4c84-a5ba-dc35d068771c", + "metadata": {}, + "outputs": [], + "source": [ + "one_trip = c3_m1.loc[c3_m1.trip_instance_key == \"ec2ef3dc047b844d7abf2d035728e202\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 106, + "id": "2700eca7-ce6e-4c9d-92d4-7331fee5ffc3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "28840 NaT\n", + "28841 0 days 00:05:21\n", + "28842 0 days 00:05:21\n", + "28843 0 days 00:05:06\n", + "28844 0 days 00:05:21\n", + "dtype: timedelta64[ns]" + ] + }, + "execution_count": 106, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "(one_trip[time_col] - one_trip.prior_time).head()" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "id": "47e620ba-672a-4baf-bfbd-90ec5b7c6865", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
gtfs_dataset_keytrip_instance_keylocation_timestamp_localvp_idxshape_array_keyvp_geometryshape_metersprior_distprior_timechange_meterschange_sec
28840c0e3039da063db95ebabd3fe4ee611a4ec2ef3dc047b844d7abf2d035728e2022023-10-11 21:21:0812415722652d492c9725307ab5f725bb616ee4a0POINT (-212402.440 -28559.730)161.50NaNNaTNaNNaN
28841c0e3039da063db95ebabd3fe4ee611a4ec2ef3dc047b844d7abf2d035728e2022023-10-11 21:26:2912415738652d492c9725307ab5f725bb616ee4a0POINT (-212088.927 -27551.761)1454.71161.502023-10-11 21:21:081293.21321.00
28842c0e3039da063db95ebabd3fe4ee611a4ec2ef3dc047b844d7abf2d035728e2022023-10-11 21:31:5012415754652d492c9725307ab5f725bb616ee4a0POINT (-212560.266 -26634.904)2819.511454.712023-10-11 21:26:291364.80321.00
28843c0e3039da063db95ebabd3fe4ee611a4ec2ef3dc047b844d7abf2d035728e2022023-10-11 21:36:5612415769652d492c9725307ab5f725bb616ee4a0POINT (-212711.278 -26637.392)2970.412819.512023-10-11 21:31:50150.90306.00
28844c0e3039da063db95ebabd3fe4ee611a4ec2ef3dc047b844d7abf2d035728e2022023-10-11 21:42:1712415785652d492c9725307ab5f725bb616ee4a0POINT (-212752.276 -26640.128)3016.192970.412023-10-11 21:36:5645.78321.00
\n", + "
" + ], + "text/plain": [ + " gtfs_dataset_key trip_instance_key \\\n", + "28840 c0e3039da063db95ebabd3fe4ee611a4 ec2ef3dc047b844d7abf2d035728e202 \n", + "28841 c0e3039da063db95ebabd3fe4ee611a4 ec2ef3dc047b844d7abf2d035728e202 \n", + "28842 c0e3039da063db95ebabd3fe4ee611a4 ec2ef3dc047b844d7abf2d035728e202 \n", + "28843 c0e3039da063db95ebabd3fe4ee611a4 ec2ef3dc047b844d7abf2d035728e202 \n", + "28844 c0e3039da063db95ebabd3fe4ee611a4 ec2ef3dc047b844d7abf2d035728e202 \n", + "\n", + " location_timestamp_local vp_idx shape_array_key \\\n", + "28840 2023-10-11 21:21:08 12415722 652d492c9725307ab5f725bb616ee4a0 \n", + "28841 2023-10-11 21:26:29 12415738 652d492c9725307ab5f725bb616ee4a0 \n", + "28842 2023-10-11 21:31:50 12415754 652d492c9725307ab5f725bb616ee4a0 \n", + "28843 2023-10-11 21:36:56 12415769 652d492c9725307ab5f725bb616ee4a0 \n", + "28844 2023-10-11 21:42:17 12415785 652d492c9725307ab5f725bb616ee4a0 \n", + "\n", + " vp_geometry shape_meters prior_dist \\\n", + "28840 POINT (-212402.440 -28559.730) 161.50 NaN \n", + "28841 POINT (-212088.927 -27551.761) 1454.71 161.50 \n", + "28842 POINT (-212560.266 -26634.904) 2819.51 1454.71 \n", + "28843 POINT (-212711.278 -26637.392) 2970.41 2819.51 \n", + "28844 POINT (-212752.276 -26640.128) 3016.19 2970.41 \n", + "\n", + " prior_time change_meters change_sec \n", + "28840 NaT NaN NaN \n", + "28841 2023-10-11 21:21:08 1293.21 321.00 \n", + "28842 2023-10-11 21:26:29 1364.80 321.00 \n", + "28843 2023-10-11 21:31:50 150.90 306.00 \n", + "28844 2023-10-11 21:36:56 45.78 321.00 " + ] + }, + "execution_count": 107, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "one_trip.drop(columns = ['shape_geometry'])" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "id": "da8c879f-b7d5-40d5-95ea-540a68dfc9f3", + "metadata": {}, + "outputs": [], + "source": [ + "# Test with one trip\n", + "one_trip2 = (one_trip.groupby(group_cols, \n", + " observed=True, group_keys=False)\n", + " .agg({\"change_meters\": \"sum\", \n", + " \"change_sec\": \"sum\"})\n", + " .reset_index()\n", + " )\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 109, + "id": "91e1e3cd-3ddd-4f0d-ac4c-9d79bb136828", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
gtfs_dataset_keytrip_instance_keychange_meterschange_sec
0c0e3039da063db95ebabd3fe4ee611a4ec2ef3dc047b844d7abf2d035728e2022854.691269.00
\n", + "
" + ], + "text/plain": [ + " gtfs_dataset_key trip_instance_key \\\n", + "0 c0e3039da063db95ebabd3fe4ee611a4 ec2ef3dc047b844d7abf2d035728e202 \n", + "\n", + " change_meters change_sec \n", + "0 2854.69 1269.00 " + ] + }, + "execution_count": 109, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "one_trip2" + ] + }, + { + "cell_type": "code", + "execution_count": 110, + "id": "a501338c-5c10-4872-81e0-483ef8f44171", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "2.237" + ] + }, + "execution_count": 110, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "MPH_PER_MPS" + ] + }, + { + "cell_type": "code", + "execution_count": 111, + "id": "7e67e8b6-0d28-4222-9fd4-8cb85eec9a1f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
gtfs_dataset_keytrip_instance_keychange_meterschange_secspeed_mph
0c0e3039da063db95ebabd3fe4ee611a4ec2ef3dc047b844d7abf2d035728e2022854.691269.005.03
\n", + "
" + ], + "text/plain": [ + " gtfs_dataset_key trip_instance_key \\\n", + "0 c0e3039da063db95ebabd3fe4ee611a4 ec2ef3dc047b844d7abf2d035728e202 \n", + "\n", + " change_meters change_sec speed_mph \n", + "0 2854.69 1269.00 5.03 " + ] + }, + "execution_count": 111, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "one_trip2.assign(\n", + " speed_mph = (one_trip2.change_meters.divide(one_trip2.change_sec) * \n", + " MPH_PER_MPS)\n", + " )\n", + " \n" + ] + }, + { + "cell_type": "code", + "execution_count": 112, + "id": "e687d2c6-51ee-481b-bc23-3540a5b6d09b", + "metadata": {}, + "outputs": [], + "source": [ + "c3_m2 = (c3_m1.groupby(group_cols, \n", + " observed=True, group_keys=False)\n", + " .agg({\"change_meters\": \"sum\", \n", + " \"change_sec\": \"sum\"})\n", + " .reset_index()\n", + " )\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 113, + "id": "2d5dec48-a014-4c64-ac73-cd0a867b12fb", + "metadata": {}, + "outputs": [], + "source": [ + "c3_m2 = c3_m2.assign(\n", + " speed_mph = (c3_m2.change_meters.divide(c3_m1.change_sec) * \n", + " MPH_PER_MPS)\n", + " )\n", + " \n" + ] + }, + { + "cell_type": "code", + "execution_count": 114, + "id": "00560062-5a81-4035-baad-e59c631166e4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 7392.00\n", + "mean 29.82\n", + "std 21.56\n", + "min 0.00\n", + "25% 17.66\n", + "50% 25.23\n", + "75% 35.86\n", + "max 340.24\n", + "Name: speed_mph, dtype: float64" + ] + }, + "execution_count": 114, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "c3_m2.speed_mph.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 115, + "id": "dac3c329-0602-4391-a6cb-5eb66d5fa656", + "metadata": {}, + "outputs": [], + "source": [ + "# muni.loc[muni.vp_in_shape == 0]" + ] + }, + { + "cell_type": "markdown", + "id": "54c52ae9-03a7-4986-a62f-21ee53e89630", + "metadata": {}, + "source": [ + "#### See why a trip would have speed_mph of 0" + ] + }, + { + "cell_type": "code", + "execution_count": 116, + "id": "93e5d405-bd05-4108-89ac-83ffbaef40d9", + "metadata": { + "scrolled": true, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
gtfs_dataset_keytrip_instance_keylocation_timestamp_localvp_idxshape_array_keyvp_geometryshape_metersprior_distprior_timechange_meterschange_sec
35703c0e3039da063db95ebabd3fe4ee611a404ae6ea9655473bdcaf9b80d443558ae2023-10-11 18:34:4712628774f82328b9817126227af1aa52033d8847POINT (-212105.135 -21893.355)1411.65NaNNaTNaNNaN
35704c0e3039da063db95ebabd3fe4ee611a404ae6ea9655473bdcaf9b80d443558ae2023-10-11 18:44:1812628802f82328b9817126227af1aa52033d8847POINT (-212100.059 -21901.268)1411.651411.652023-10-11 18:34:470.00571.00
35705c0e3039da063db95ebabd3fe4ee611a404ae6ea9655473bdcaf9b80d443558ae2023-10-11 18:53:1712628829f82328b9817126227af1aa52033d8847POINT (-212100.087 -21902.380)1411.651411.652023-10-11 18:44:180.00539.00
35706c0e3039da063db95ebabd3fe4ee611a404ae6ea9655473bdcaf9b80d443558ae2023-10-11 19:02:1712628856f82328b9817126227af1aa52033d8847POINT (-212100.228 -21907.936)1411.651411.652023-10-11 18:53:170.00540.00
35707c0e3039da063db95ebabd3fe4ee611a404ae6ea9655473bdcaf9b80d443558ae2023-10-11 19:11:3112628884f82328b9817126227af1aa52033d8847POINT (-212098.611 -21913.537)1411.651411.652023-10-11 19:02:170.00554.00
\n", + "
" + ], + "text/plain": [ + " gtfs_dataset_key trip_instance_key \\\n", + "35703 c0e3039da063db95ebabd3fe4ee611a4 04ae6ea9655473bdcaf9b80d443558ae \n", + "35704 c0e3039da063db95ebabd3fe4ee611a4 04ae6ea9655473bdcaf9b80d443558ae \n", + "35705 c0e3039da063db95ebabd3fe4ee611a4 04ae6ea9655473bdcaf9b80d443558ae \n", + "35706 c0e3039da063db95ebabd3fe4ee611a4 04ae6ea9655473bdcaf9b80d443558ae \n", + "35707 c0e3039da063db95ebabd3fe4ee611a4 04ae6ea9655473bdcaf9b80d443558ae \n", + "\n", + " location_timestamp_local vp_idx shape_array_key \\\n", + "35703 2023-10-11 18:34:47 12628774 f82328b9817126227af1aa52033d8847 \n", + "35704 2023-10-11 18:44:18 12628802 f82328b9817126227af1aa52033d8847 \n", + "35705 2023-10-11 18:53:17 12628829 f82328b9817126227af1aa52033d8847 \n", + "35706 2023-10-11 19:02:17 12628856 f82328b9817126227af1aa52033d8847 \n", + "35707 2023-10-11 19:11:31 12628884 f82328b9817126227af1aa52033d8847 \n", + "\n", + " vp_geometry shape_meters prior_dist \\\n", + "35703 POINT (-212105.135 -21893.355) 1411.65 NaN \n", + "35704 POINT (-212100.059 -21901.268) 1411.65 1411.65 \n", + "35705 POINT (-212100.087 -21902.380) 1411.65 1411.65 \n", + "35706 POINT (-212100.228 -21907.936) 1411.65 1411.65 \n", + "35707 POINT (-212098.611 -21913.537) 1411.65 1411.65 \n", + "\n", + " prior_time change_meters change_sec \n", + "35703 NaT NaN NaN \n", + "35704 2023-10-11 18:34:47 0.00 571.00 \n", + "35705 2023-10-11 18:44:18 0.00 539.00 \n", + "35706 2023-10-11 18:53:17 0.00 540.00 \n", + "35707 2023-10-11 19:02:17 0.00 554.00 " + ] + }, + "execution_count": 116, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "c3_m1.loc[c3_m1.trip_instance_key == \"04ae6ea9655473bdcaf9b80d443558ae\"].drop(columns = ['shape_geometry'])" + ] + }, + { + "cell_type": "code", + "execution_count": 209, + "id": "a303aab1-a574-4dbb-9a62-fff52cbd3070", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 9240 entries, 0 to 9239\n", + "Data columns (total 5 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 gtfs_dataset_key 9240 non-null category\n", + " 1 trip_instance_key 9240 non-null object \n", + " 2 change_meters 9240 non-null float64 \n", + " 3 change_sec 9240 non-null float64 \n", + " 4 speed_mph 7392 non-null float64 \n", + "dtypes: category(1), float64(3), object(1)\n", + "memory usage: 298.0+ KB\n" + ] + } + ], + "source": [ + "c3_m2.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 117, + "id": "91db6999-f3f8-4001-bdda-4ad59abeeef7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
gtfs_dataset_keytrip_instance_keychange_meterschange_secspeed_mph
167c0e3039da063db95ebabd3fe4ee611a404ae6ea9655473bdcaf9b80d443558ae0.002204.000.00
863c0e3039da063db95ebabd3fe4ee611a4183990ffbd08d7d7df0308b35227a0580.001530.000.00
1128c0e3039da063db95ebabd3fe4ee611a41fe28ce8669a7f946f272faf80d80ce70.001379.000.00
2854c0e3039da063db95ebabd3fe4ee611a45056febd44cda31254459111e1fc79d00.00875.000.00
3213c0e3039da063db95ebabd3fe4ee611a45a82f8c02f2036db77b1979f4ae228f50.00803.000.00
3232c0e3039da063db95ebabd3fe4ee611a45b07e2bf68a16c54d798ef4028e020260.001004.000.00
3937c0e3039da063db95ebabd3fe4ee611a46ee4a9165f47b59cb3e10b0125dd72e80.00649.000.00
4562c0e3039da063db95ebabd3fe4ee611a47ff345e91303290951ef0665a57dc90e0.001403.000.00
6758c0e3039da063db95ebabd3fe4ee611a4bcb78da06a56761d2a3b8b16a3436f3d0.002000.000.00
7043c0e3039da063db95ebabd3fe4ee611a4c3fc63ec2ff7a35c0f821659075be7080.001173.000.00
7239c0e3039da063db95ebabd3fe4ee611a4c992295be425ff2d17de0f0f29bec3010.00877.000.00
7543c0e3039da063db95ebabd3fe4ee611a4d21ff3a0d37a484dc4f979d0526525650.00873.000.00
8431c0e3039da063db95ebabd3fe4ee611a4ea8751445dcd6a175bd23ec5fc423d0c0.00866.000.00
\n", + "
" + ], + "text/plain": [ + " gtfs_dataset_key trip_instance_key \\\n", + "167 c0e3039da063db95ebabd3fe4ee611a4 04ae6ea9655473bdcaf9b80d443558ae \n", + "863 c0e3039da063db95ebabd3fe4ee611a4 183990ffbd08d7d7df0308b35227a058 \n", + "1128 c0e3039da063db95ebabd3fe4ee611a4 1fe28ce8669a7f946f272faf80d80ce7 \n", + "2854 c0e3039da063db95ebabd3fe4ee611a4 5056febd44cda31254459111e1fc79d0 \n", + "3213 c0e3039da063db95ebabd3fe4ee611a4 5a82f8c02f2036db77b1979f4ae228f5 \n", + "3232 c0e3039da063db95ebabd3fe4ee611a4 5b07e2bf68a16c54d798ef4028e02026 \n", + "3937 c0e3039da063db95ebabd3fe4ee611a4 6ee4a9165f47b59cb3e10b0125dd72e8 \n", + "4562 c0e3039da063db95ebabd3fe4ee611a4 7ff345e91303290951ef0665a57dc90e \n", + "6758 c0e3039da063db95ebabd3fe4ee611a4 bcb78da06a56761d2a3b8b16a3436f3d \n", + "7043 c0e3039da063db95ebabd3fe4ee611a4 c3fc63ec2ff7a35c0f821659075be708 \n", + "7239 c0e3039da063db95ebabd3fe4ee611a4 c992295be425ff2d17de0f0f29bec301 \n", + "7543 c0e3039da063db95ebabd3fe4ee611a4 d21ff3a0d37a484dc4f979d052652565 \n", + "8431 c0e3039da063db95ebabd3fe4ee611a4 ea8751445dcd6a175bd23ec5fc423d0c \n", + "\n", + " change_meters change_sec speed_mph \n", + "167 0.00 2204.00 0.00 \n", + "863 0.00 1530.00 0.00 \n", + "1128 0.00 1379.00 0.00 \n", + "2854 0.00 875.00 0.00 \n", + "3213 0.00 803.00 0.00 \n", + "3232 0.00 1004.00 0.00 \n", + "3937 0.00 649.00 0.00 \n", + "4562 0.00 1403.00 0.00 \n", + "6758 0.00 2000.00 0.00 \n", + "7043 0.00 1173.00 0.00 \n", + "7239 0.00 877.00 0.00 \n", + "7543 0.00 873.00 0.00 \n", + "8431 0.00 866.00 0.00 " + ] + }, + "execution_count": 117, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "c3_m2.loc[c3_m2.speed_mph == 0]" + ] + }, + { + "cell_type": "markdown", + "id": "328178ff-9e1e-48b6-9580-ae6b557f0987", + "metadata": {}, + "source": [ + "#### add_scheduled_trip_columns" + ] + }, + { + "cell_type": "code", + "execution_count": 118, + "id": "e465113c-2bc5-4792-bd10-822184882fe6", + "metadata": {}, + "outputs": [], + "source": [ + "# Attach scheduled trip columns, like route, direction, time_of_day\n", + "group_cols = [\"trip_instance_key\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 119, + "id": "25b7221d-725c-4f9a-9728-497e6b46464f", + "metadata": {}, + "outputs": [], + "source": [ + "keep_cols = [\n", + " \"gtfs_dataset_key\",\n", + " \"direction_id\", \n", + " \"route_id\", \"route_short_name\", \"route_long_name\", \"route_desc\",\n", + " ] + group_cols" + ] + }, + { + "cell_type": "code", + "execution_count": 120, + "id": "ab221f62-4c2b-4700-8612-0b491432c36f", + "metadata": {}, + "outputs": [], + "source": [ + "crosswalk = helpers.import_scheduled_trips(\n", + " analysis_date, \n", + " columns = keep_cols, \n", + " get_pandas = True\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 121, + "id": "29ddf9da-454a-40d6-bc2c-ab2b74504cbe", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
schedule_gtfs_dataset_keydirection_idroute_idroute_short_nameroute_long_nameroute_desctrip_instance_key
01770249a5a2e770ca90628434d4934b11.003402Route 11Route 11PACIFIC VIEW MALL via TELEPHONE RD1b3cc71bbb3c3166e8c5540ca26a97ba
11770249a5a2e770ca90628434d4934b11.003402Route 11Route 11PACIFIC VIEW MALL via TELEPHONE RDf780ba65965e61d394635fd80fc81232
\n", + "
" + ], + "text/plain": [ + " schedule_gtfs_dataset_key direction_id route_id route_short_name \\\n", + "0 1770249a5a2e770ca90628434d4934b1 1.00 3402 Route 11 \n", + "1 1770249a5a2e770ca90628434d4934b1 1.00 3402 Route 11 \n", + "\n", + " route_long_name route_desc \\\n", + "0 Route 11 PACIFIC VIEW MALL via TELEPHONE RD \n", + "1 Route 11 PACIFIC VIEW MALL via TELEPHONE RD \n", + "\n", + " trip_instance_key \n", + "0 1b3cc71bbb3c3166e8c5540ca26a97ba \n", + "1 f780ba65965e61d394635fd80fc81232 " + ] + }, + "execution_count": 121, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "crosswalk.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 122, + "id": "774e182c-9157-47e3-9c95-160a331353c6", + "metadata": {}, + "outputs": [], + "source": [ + "common_shape = sched_rt_utils.most_common_shape_by_route_direction(analysis_date)" + ] + }, + { + "cell_type": "code", + "execution_count": 123, + "id": "e83a2e56-3fe7-400f-8a67-9bad9989a5dc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
schedule_gtfs_dataset_keyroute_iddirection_idcommon_shape_idshape_array_key
0014d0998350083249a9eb310635548c2108668261.0010866826:180d84c820ca200c0b3d1791185c72b56
1014d0998350083249a9eb310635548c2108668491.0010866849:199faaf65ccc65b3ec0e6704765b60195
\n", + "
" + ], + "text/plain": [ + " schedule_gtfs_dataset_key route_id direction_id common_shape_id \\\n", + "0 014d0998350083249a9eb310635548c2 10866826 1.00 10866826:1 \n", + "1 014d0998350083249a9eb310635548c2 10866849 1.00 10866849:1 \n", + "\n", + " shape_array_key \n", + "0 80d84c820ca200c0b3d1791185c72b56 \n", + "1 99faaf65ccc65b3ec0e6704765b60195 " + ] + }, + "execution_count": 123, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "common_shape.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 124, + "id": "ada2f87d-85c4-4999-926e-7dbb0cc57b90", + "metadata": {}, + "outputs": [], + "source": [ + "crosswalk2 = pd.merge(\n", + " crosswalk,\n", + " common_shape,\n", + " on = [\"schedule_gtfs_dataset_key\", \"route_id\", \"direction_id\"],\n", + " how = \"inner\"\n", + " ).astype({\"direction_id\": \"Int64\"})\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 125, + "id": "c473c35b-ac7c-4839-81eb-3b31ec25ca38", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
schedule_gtfs_dataset_keydirection_idroute_idroute_short_nameroute_long_nameroute_desctrip_instance_keycommon_shape_idshape_array_key
846263c275e5acf8974e1afd765bd3011424c159HDHospital: DirectNone966a69a34ccc9b82dc65ae82346a12e459:2b47002305320d71375303e2de926642a
\n", + "
" + ], + "text/plain": [ + " schedule_gtfs_dataset_key direction_id route_id \\\n", + "84626 3c275e5acf8974e1afd765bd3011424c 1 59 \n", + "\n", + " route_short_name route_long_name route_desc \\\n", + "84626 HD Hospital: Direct None \n", + "\n", + " trip_instance_key common_shape_id \\\n", + "84626 966a69a34ccc9b82dc65ae82346a12e4 59:2 \n", + "\n", + " shape_array_key \n", + "84626 b47002305320d71375303e2de926642a " + ] + }, + "execution_count": 125, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "crosswalk2.sample()" + ] + }, + { + "cell_type": "code", + "execution_count": 126, + "id": "866defff-f203-4f25-a150-aea22333c7f2", + "metadata": {}, + "outputs": [], + "source": [ + "time_of_day = sched_rt_utils.get_trip_time_buckets(analysis_date)" + ] + }, + { + "cell_type": "code", + "execution_count": 127, + "id": "d52698d8-7021-4766-92b2-dd6674928944", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "((101973, 5), 101973)" + ] + }, + "execution_count": 127, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "time_of_day.shape, time_of_day.trip_instance_key.nunique()" + ] + }, + { + "cell_type": "code", + "execution_count": 128, + "id": "bc2161ba-d506-41a6-8517-63891af5f8e9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
trip_instance_keyservice_hourstrip_first_departure_datetime_pacifictime_of_dayservice_minutes
01b3cc71bbb3c3166e8c5540ca26a97ba0.602023-10-11 10:40:00Midday36.00
1f780ba65965e61d394635fd80fc812320.552023-10-11 17:40:00PM Peak33.00
\n", + "
" + ], + "text/plain": [ + " trip_instance_key service_hours \\\n", + "0 1b3cc71bbb3c3166e8c5540ca26a97ba 0.60 \n", + "1 f780ba65965e61d394635fd80fc81232 0.55 \n", + "\n", + " trip_first_departure_datetime_pacific time_of_day service_minutes \n", + "0 2023-10-11 10:40:00 Midday 36.00 \n", + "1 2023-10-11 17:40:00 PM Peak 33.00 " + ] + }, + "execution_count": 128, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "time_of_day.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 129, + "id": "834ce86a-8735-4d06-9be9-ef96d2fbbec0", + "metadata": {}, + "outputs": [], + "source": [ + "crosswalk2 = portfolio_utils.add_route_name(\n", + " crosswalk2\n", + " ).drop(columns = [\"route_short_name\", \"route_long_name\", \"route_desc\"])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 130, + "id": "226915d5-2dce-45af-aeae-0d6f32f36f1e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
schedule_gtfs_dataset_keydirection_idroute_idtrip_instance_keycommon_shape_idshape_array_keyroute_name_used
01770249a5a2e770ca90628434d4934b1134021b3cc71bbb3c3166e8c5540ca26a97ba8254a669792e07a16b260139c6950011b759PACIFIC VIEW MALL via TELEPHONE RD
11770249a5a2e770ca90628434d4934b113402f780ba65965e61d394635fd80fc812328254a669792e07a16b260139c6950011b759PACIFIC VIEW MALL via TELEPHONE RD
\n", + "
" + ], + "text/plain": [ + " schedule_gtfs_dataset_key direction_id route_id \\\n", + "0 1770249a5a2e770ca90628434d4934b1 1 3402 \n", + "1 1770249a5a2e770ca90628434d4934b1 1 3402 \n", + "\n", + " trip_instance_key common_shape_id \\\n", + "0 1b3cc71bbb3c3166e8c5540ca26a97ba 8254 \n", + "1 f780ba65965e61d394635fd80fc81232 8254 \n", + "\n", + " shape_array_key route_name_used \n", + "0 a669792e07a16b260139c6950011b759 PACIFIC VIEW MALL via TELEPHONE RD \n", + "1 a669792e07a16b260139c6950011b759 PACIFIC VIEW MALL via TELEPHONE RD " + ] + }, + "execution_count": 130, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "crosswalk2.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 131, + "id": "03971a5f-a151-43bf-a417-2d7be7a1d37b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "pandas.core.frame.DataFrame" + ] + }, + "execution_count": 131, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(c3_m2)" + ] + }, + { + "cell_type": "code", + "execution_count": 132, + "id": "0163454e-5e0c-41d5-895d-8e0a6985ff29", + "metadata": {}, + "outputs": [], + "source": [ + "# https://github.com/cal-itp/data-analyses/blob/metrics_rt/rt_segment_speeds/scripts/C3_trip_route_speed.py#L116-L124\n", + "c3_m3 = dd.merge(\n", + " c3_m2,\n", + " crosswalk2,\n", + " on = group_cols,\n", + " how = \"left\",\n", + " ).merge(\n", + " time_of_day,\n", + " on = group_cols,\n", + " how = \"left\"\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 133, + "id": "39bd5b37-d195-4dc1-85d4-e1c54b298248", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "pandas.core.frame.DataFrame" + ] + }, + "execution_count": 133, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(c3_m3)" + ] + }, + { + "cell_type": "code", + "execution_count": 134, + "id": "49e475a8-530d-4b12-9c9d-1741b0fc39dd", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['gtfs_dataset_key', 'trip_instance_key', 'change_meters', 'change_sec',\n", + " 'speed_mph', 'schedule_gtfs_dataset_key', 'direction_id', 'route_id',\n", + " 'common_shape_id', 'shape_array_key', 'route_name_used',\n", + " 'service_hours', 'trip_first_departure_datetime_pacific', 'time_of_day',\n", + " 'service_minutes'],\n", + " dtype='object')" + ] + }, + "execution_count": 134, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "c3_m3.columns" + ] + }, + { + "cell_type": "markdown", + "id": "7d40ef54-9b9e-40ea-9651-1a00a36d8105", + "metadata": {}, + "source": [ + "#### avg_route_speeds_by_time_of_day" + ] + }, + { + "cell_type": "code", + "execution_count": 135, + "id": "3e325d57-160f-40b5-8a7c-0692173e8874", + "metadata": {}, + "outputs": [], + "source": [ + "# https://github.com/cal-itp/data-analyses/blob/metrics_rt/rt_segment_speeds/scripts/C3_trip_route_speed.py#L166\n", + "def drop_extremely_low_and_high_speeds(\n", + " df: pd.DataFrame, \n", + " speed_range: tuple\n", + ") -> pd.DataFrame:\n", + " \"\"\"\n", + " Descriptives show the 5th percentile is around 5 mph, \n", + " and 95th percentile is around 25 mph.\n", + " \n", + " There are some weird calculations for <3 mph, and even\n", + " some negative values, so let's exclude those...maybe\n", + " the vp is not traveling across the entirety of the shape.\n", + " \n", + " Exclude unusually high speeds, over 70 mph.\n", + " \"\"\"\n", + " low, high = speed_range\n", + " \n", + " df2 = df[(df.speed_mph >= low) & \n", + " (df.speed_mph <= high)\n", + " ].reset_index(drop=True)\n", + " \n", + " return df2" + ] + }, + { + "cell_type": "code", + "execution_count": 136, + "id": "f5c8dc5d-61cd-4883-a089-000ca1e11d76", + "metadata": {}, + "outputs": [], + "source": [ + "c3_m3 = drop_extremely_low_and_high_speeds(c3_m3, speed_range = (3, 70))" + ] + }, + { + "cell_type": "code", + "execution_count": 178, + "id": "983a896a-e40b-4c46-93a9-78643c7e1e69", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "((6993, 15), (46198, 12), (9240, 5))" + ] + }, + "execution_count": 178, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "c3_m3.shape, c3_m1.shape, c3_m2.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 138, + "id": "a9be0c3a-f224-40f4-b807-bb7fc387e344", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "6993" + ] + }, + "execution_count": 138, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "c3_m3.trip_instance_key.nunique()" + ] + }, + { + "cell_type": "code", + "execution_count": 139, + "id": "33dbfa67-8bef-457c-b415-c5826e41601c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "9240" + ] + }, + "execution_count": 139, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "c3_m1.trip_instance_key.nunique()" + ] + }, + { + "cell_type": "code", + "execution_count": 177, + "id": "52ae9db4-6e23-4c8b-b421-cba1c583b31f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
gtfs_dataset_keytrip_instance_keychange_meterschange_secspeed_mphschedule_gtfs_dataset_keydirection_idroute_idcommon_shape_idshape_array_keyroute_name_usedservice_hourstrip_first_departure_datetime_pacifictime_of_dayservice_minutes
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: [gtfs_dataset_key, trip_instance_key, change_meters, change_sec, speed_mph, schedule_gtfs_dataset_key, direction_id, route_id, common_shape_id, shape_array_key, route_name_used, service_hours, trip_first_departure_datetime_pacific, time_of_day, service_minutes]\n", + "Index: []" + ] + }, + "execution_count": 177, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "c3_m3.loc[c3_m3.trip_instance_key == \"04ae6ea9655473bdcaf9b80d443558ae\"]" + ] + }, + { + "cell_type": "markdown", + "id": "b00d1985-6f8d-47da-9164-422fc6b515d0", + "metadata": {}, + "source": [ + "#### Question: Is this common to have 25% of trips dropped?" + ] + }, + { + "cell_type": "code", + "execution_count": 140, + "id": "8ffc55df-73e0-412e-b428-018184b5efff", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.7568181818181818" + ] + }, + "execution_count": 140, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "6993/9240" + ] + }, + { + "cell_type": "code", + "execution_count": 141, + "id": "626c4e6c-fc59-4bb0-91bb-007887d3e75b", + "metadata": {}, + "outputs": [], + "source": [ + "group_cols = [\n", + " \"gtfs_dataset_key\", \"time_of_day\",\n", + " \"route_id\", \"direction_id\",\n", + " \"route_name_used\",\n", + " \"common_shape_id\", \"shape_array_key\"\n", + " ]" + ] + }, + { + "cell_type": "code", + "execution_count": 142, + "id": "c2d6e858-4d63-4ffc-baea-d8a5ab32987a", + "metadata": {}, + "outputs": [], + "source": [ + "# test with one route\n", + "one_route = c3_m3.loc[c3_m3.route_id == \"14R\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 143, + "id": "a6a2c843-022e-4e4e-bd1c-eace7dd25d59", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "((193, 15), 193)" + ] + }, + "execution_count": 143, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "one_route.shape, one_route.trip_instance_key.nunique()" + ] + }, + { + "cell_type": "code", + "execution_count": 144, + "id": "c8bc46f0-bcd3-4aaa-8c6c-5e38b0fe7210", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
gtfs_dataset_keytrip_instance_keychange_meterschange_secspeed_mphschedule_gtfs_dataset_keydirection_idroute_idcommon_shape_idshape_array_keyroute_name_usedservice_hourstrip_first_departure_datetime_pacifictime_of_dayservice_minutes
73c0e3039da063db95ebabd3fe4ee611a40317ba64ff87ddd8b3dd626368a341a813487.575379.0040.837cc0cb1871dfd558f11a2885c145d144114R11452defb027e7468735f91300a9851f3e1d75am-10pm daily0.902023-10-11 18:25:00PM Peak54.00
144c0e3039da063db95ebabd3fe4ee611a4059a49f356f6b8bdb7f12458dafc68e420787.345069.0059.097cc0cb1871dfd558f11a2885c145d144114R11452defb027e7468735f91300a9851f3e1d75am-10pm daily0.752023-10-11 08:30:00AM Peak45.00
173c0e3039da063db95ebabd3fe4ee611a406dc64f00c021e66d0c7bd52df8fed9c12924.902594.0032.277cc0cb1871dfd558f11a2885c145d144014R11403453493134b4598d0dbdddb92e825ae245am-10pm daily0.852023-10-11 19:36:00PM Peak51.00
202c0e3039da063db95ebabd3fe4ee611a407ab77edf2b69295d643794e3cd497af13682.264427.0032.497cc0cb1871dfd558f11a2885c145d144114R11452defb027e7468735f91300a9851f3e1d75am-10pm daily0.772023-10-11 05:58:00Early AM46.00
230c0e3039da063db95ebabd3fe4ee611a408adb7403f95343c6c9579cb137c8c1513624.985094.0033.467cc0cb1871dfd558f11a2885c145d144114R11452defb027e7468735f91300a9851f3e1d75am-10pm daily0.902023-10-11 12:49:00Midday54.00
\n", + "
" + ], + "text/plain": [ + " gtfs_dataset_key trip_instance_key \\\n", + "73 c0e3039da063db95ebabd3fe4ee611a4 0317ba64ff87ddd8b3dd626368a341a8 \n", + "144 c0e3039da063db95ebabd3fe4ee611a4 059a49f356f6b8bdb7f12458dafc68e4 \n", + "173 c0e3039da063db95ebabd3fe4ee611a4 06dc64f00c021e66d0c7bd52df8fed9c \n", + "202 c0e3039da063db95ebabd3fe4ee611a4 07ab77edf2b69295d643794e3cd497af \n", + "230 c0e3039da063db95ebabd3fe4ee611a4 08adb7403f95343c6c9579cb137c8c15 \n", + "\n", + " change_meters change_sec speed_mph schedule_gtfs_dataset_key \\\n", + "73 13487.57 5379.00 40.83 7cc0cb1871dfd558f11a2885c145d144 \n", + "144 20787.34 5069.00 59.09 7cc0cb1871dfd558f11a2885c145d144 \n", + "173 12924.90 2594.00 32.27 7cc0cb1871dfd558f11a2885c145d144 \n", + "202 13682.26 4427.00 32.49 7cc0cb1871dfd558f11a2885c145d144 \n", + "230 13624.98 5094.00 33.46 7cc0cb1871dfd558f11a2885c145d144 \n", + "\n", + " direction_id route_id common_shape_id shape_array_key \\\n", + "73 1 14R 11452 defb027e7468735f91300a9851f3e1d7 \n", + "144 1 14R 11452 defb027e7468735f91300a9851f3e1d7 \n", + "173 0 14R 11403 453493134b4598d0dbdddb92e825ae24 \n", + "202 1 14R 11452 defb027e7468735f91300a9851f3e1d7 \n", + "230 1 14R 11452 defb027e7468735f91300a9851f3e1d7 \n", + "\n", + " route_name_used service_hours trip_first_departure_datetime_pacific \\\n", + "73 5am-10pm daily 0.90 2023-10-11 18:25:00 \n", + "144 5am-10pm daily 0.75 2023-10-11 08:30:00 \n", + "173 5am-10pm daily 0.85 2023-10-11 19:36:00 \n", + "202 5am-10pm daily 0.77 2023-10-11 05:58:00 \n", + "230 5am-10pm daily 0.90 2023-10-11 12:49:00 \n", + "\n", + " time_of_day service_minutes \n", + "73 PM Peak 54.00 \n", + "144 AM Peak 45.00 \n", + "173 PM Peak 51.00 \n", + "202 Early AM 46.00 \n", + "230 Midday 54.00 " + ] + }, + "execution_count": 144, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "one_route.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 145, + "id": "8ef2f776-d9f0-4ca9-a9f5-b384040f2f05", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['gtfs_dataset_key',\n", + " 'time_of_day',\n", + " 'route_id',\n", + " 'direction_id',\n", + " 'route_name_used',\n", + " 'common_shape_id',\n", + " 'shape_array_key']" + ] + }, + "execution_count": 145, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "group_cols" + ] + }, + { + "cell_type": "markdown", + "id": "aa8cf621-b772-44d4-97e5-c11c738adfb0", + "metadata": {}, + "source": [ + "#### One route test" + ] + }, + { + "cell_type": "code", + "execution_count": 146, + "id": "10f541bc-1e94-4878-bb1e-2ae88ece99ab", + "metadata": {}, + "outputs": [], + "source": [ + "one_route2 = (one_route.groupby(group_cols, \n", + " observed = True, group_keys = False)\n", + " .agg({\n", + " \"speed_mph\": \"mean\",\n", + " \"service_minutes\": \"mean\",\n", + " \"change_sec\": \"mean\",\n", + " \"trip_instance_key\": \"count\"\n", + " }).reset_index()\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 147, + "id": "3eac1991-7a9c-4a05-84f2-0769529609da", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(10, 11)" + ] + }, + "execution_count": 147, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "one_route2.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 148, + "id": "58647959-a118-403d-97bc-c1abb35d61b5", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
gtfs_dataset_keytime_of_dayroute_iddirection_idroute_name_usedcommon_shape_idshape_array_keyspeed_mphservice_minuteschange_sectrip_instance_key
0c0e3039da063db95ebabd3fe4ee611a4AM Peak14R05am-10pm daily11403453493134b4598d0dbdddb92e825ae2429.3843.193139.4416
1c0e3039da063db95ebabd3fe4ee611a4AM Peak14R15am-10pm daily11452defb027e7468735f91300a9851f3e1d739.8050.364804.3622
2c0e3039da063db95ebabd3fe4ee611a4Early AM14R05am-10pm daily11403453493134b4598d0dbdddb92e825ae2437.8942.003555.805
3c0e3039da063db95ebabd3fe4ee611a4Early AM14R15am-10pm daily11452defb027e7468735f91300a9851f3e1d734.4244.454602.4511
4c0e3039da063db95ebabd3fe4ee611a4Evening14R05am-10pm daily11403453493134b4598d0dbdddb92e825ae2433.6951.003014.449
5c0e3039da063db95ebabd3fe4ee611a4Evening14R15am-10pm daily11452defb027e7468735f91300a9851f3e1d732.2553.005149.005
6c0e3039da063db95ebabd3fe4ee611a4Midday14R05am-10pm daily11403453493134b4598d0dbdddb92e825ae2432.4646.943371.8635
7c0e3039da063db95ebabd3fe4ee611a4Midday14R15am-10pm daily11452defb027e7468735f91300a9851f3e1d731.3050.614478.1833
8c0e3039da063db95ebabd3fe4ee611a4PM Peak14R05am-10pm daily11403453493134b4598d0dbdddb92e825ae2438.4149.783893.8136
9c0e3039da063db95ebabd3fe4ee611a4PM Peak14R15am-10pm daily11452defb027e7468735f91300a9851f3e1d731.3553.714823.1921
\n", + "
" + ], + "text/plain": [ + " gtfs_dataset_key time_of_day route_id direction_id \\\n", + "0 c0e3039da063db95ebabd3fe4ee611a4 AM Peak 14R 0 \n", + "1 c0e3039da063db95ebabd3fe4ee611a4 AM Peak 14R 1 \n", + "2 c0e3039da063db95ebabd3fe4ee611a4 Early AM 14R 0 \n", + "3 c0e3039da063db95ebabd3fe4ee611a4 Early AM 14R 1 \n", + "4 c0e3039da063db95ebabd3fe4ee611a4 Evening 14R 0 \n", + "5 c0e3039da063db95ebabd3fe4ee611a4 Evening 14R 1 \n", + "6 c0e3039da063db95ebabd3fe4ee611a4 Midday 14R 0 \n", + "7 c0e3039da063db95ebabd3fe4ee611a4 Midday 14R 1 \n", + "8 c0e3039da063db95ebabd3fe4ee611a4 PM Peak 14R 0 \n", + "9 c0e3039da063db95ebabd3fe4ee611a4 PM Peak 14R 1 \n", + "\n", + " route_name_used common_shape_id shape_array_key \\\n", + "0 5am-10pm daily 11403 453493134b4598d0dbdddb92e825ae24 \n", + "1 5am-10pm daily 11452 defb027e7468735f91300a9851f3e1d7 \n", + "2 5am-10pm daily 11403 453493134b4598d0dbdddb92e825ae24 \n", + "3 5am-10pm daily 11452 defb027e7468735f91300a9851f3e1d7 \n", + "4 5am-10pm daily 11403 453493134b4598d0dbdddb92e825ae24 \n", + "5 5am-10pm daily 11452 defb027e7468735f91300a9851f3e1d7 \n", + "6 5am-10pm daily 11403 453493134b4598d0dbdddb92e825ae24 \n", + "7 5am-10pm daily 11452 defb027e7468735f91300a9851f3e1d7 \n", + "8 5am-10pm daily 11403 453493134b4598d0dbdddb92e825ae24 \n", + "9 5am-10pm daily 11452 defb027e7468735f91300a9851f3e1d7 \n", + "\n", + " speed_mph service_minutes change_sec trip_instance_key \n", + "0 29.38 43.19 3139.44 16 \n", + "1 39.80 50.36 4804.36 22 \n", + "2 37.89 42.00 3555.80 5 \n", + "3 34.42 44.45 4602.45 11 \n", + "4 33.69 51.00 3014.44 9 \n", + "5 32.25 53.00 5149.00 5 \n", + "6 32.46 46.94 3371.86 35 \n", + "7 31.30 50.61 4478.18 33 \n", + "8 38.41 49.78 3893.81 36 \n", + "9 31.35 53.71 4823.19 21 " + ] + }, + "execution_count": 148, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "one_route2" + ] + }, + { + "cell_type": "code", + "execution_count": 149, + "id": "7514d742-2076-487c-84a6-235a6ac65ef4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
gtfs_dataset_keytime_of_dayroute_iddirection_idroute_namecommon_shape_idshape_array_keyspeed_mphavg_sched_trip_minn_tripsavg_rt_trip_min
0c0e3039da063db95ebabd3fe4ee611a4AM Peak14R05am-10pm daily11403453493134b4598d0dbdddb92e825ae2429.4043.201652.30
1c0e3039da063db95ebabd3fe4ee611a4AM Peak14R15am-10pm daily11452defb027e7468735f91300a9851f3e1d739.8050.402280.10
2c0e3039da063db95ebabd3fe4ee611a4Early AM14R05am-10pm daily11403453493134b4598d0dbdddb92e825ae2437.9042.00559.30
3c0e3039da063db95ebabd3fe4ee611a4Early AM14R15am-10pm daily11452defb027e7468735f91300a9851f3e1d734.4044.501176.70
4c0e3039da063db95ebabd3fe4ee611a4Evening14R05am-10pm daily11403453493134b4598d0dbdddb92e825ae2433.7051.00950.20
5c0e3039da063db95ebabd3fe4ee611a4Evening14R15am-10pm daily11452defb027e7468735f91300a9851f3e1d732.3053.00585.80
6c0e3039da063db95ebabd3fe4ee611a4Midday14R05am-10pm daily11403453493134b4598d0dbdddb92e825ae2432.5046.903556.20
7c0e3039da063db95ebabd3fe4ee611a4Midday14R15am-10pm daily11452defb027e7468735f91300a9851f3e1d731.3050.603374.60
8c0e3039da063db95ebabd3fe4ee611a4PM Peak14R05am-10pm daily11403453493134b4598d0dbdddb92e825ae2438.4049.803664.90
9c0e3039da063db95ebabd3fe4ee611a4PM Peak14R15am-10pm daily11452defb027e7468735f91300a9851f3e1d731.4053.702180.40
\n", + "
" + ], + "text/plain": [ + " gtfs_dataset_key time_of_day route_id direction_id \\\n", + "0 c0e3039da063db95ebabd3fe4ee611a4 AM Peak 14R 0 \n", + "1 c0e3039da063db95ebabd3fe4ee611a4 AM Peak 14R 1 \n", + "2 c0e3039da063db95ebabd3fe4ee611a4 Early AM 14R 0 \n", + "3 c0e3039da063db95ebabd3fe4ee611a4 Early AM 14R 1 \n", + "4 c0e3039da063db95ebabd3fe4ee611a4 Evening 14R 0 \n", + "5 c0e3039da063db95ebabd3fe4ee611a4 Evening 14R 1 \n", + "6 c0e3039da063db95ebabd3fe4ee611a4 Midday 14R 0 \n", + "7 c0e3039da063db95ebabd3fe4ee611a4 Midday 14R 1 \n", + "8 c0e3039da063db95ebabd3fe4ee611a4 PM Peak 14R 0 \n", + "9 c0e3039da063db95ebabd3fe4ee611a4 PM Peak 14R 1 \n", + "\n", + " route_name common_shape_id shape_array_key \\\n", + "0 5am-10pm daily 11403 453493134b4598d0dbdddb92e825ae24 \n", + "1 5am-10pm daily 11452 defb027e7468735f91300a9851f3e1d7 \n", + "2 5am-10pm daily 11403 453493134b4598d0dbdddb92e825ae24 \n", + "3 5am-10pm daily 11452 defb027e7468735f91300a9851f3e1d7 \n", + "4 5am-10pm daily 11403 453493134b4598d0dbdddb92e825ae24 \n", + "5 5am-10pm daily 11452 defb027e7468735f91300a9851f3e1d7 \n", + "6 5am-10pm daily 11403 453493134b4598d0dbdddb92e825ae24 \n", + "7 5am-10pm daily 11452 defb027e7468735f91300a9851f3e1d7 \n", + "8 5am-10pm daily 11403 453493134b4598d0dbdddb92e825ae24 \n", + "9 5am-10pm daily 11452 defb027e7468735f91300a9851f3e1d7 \n", + "\n", + " speed_mph avg_sched_trip_min n_trips avg_rt_trip_min \n", + "0 29.40 43.20 16 52.30 \n", + "1 39.80 50.40 22 80.10 \n", + "2 37.90 42.00 5 59.30 \n", + "3 34.40 44.50 11 76.70 \n", + "4 33.70 51.00 9 50.20 \n", + "5 32.30 53.00 5 85.80 \n", + "6 32.50 46.90 35 56.20 \n", + "7 31.30 50.60 33 74.60 \n", + "8 38.40 49.80 36 64.90 \n", + "9 31.40 53.70 21 80.40 " + ] + }, + "execution_count": 149, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "one_route2.assign(\n", + " avg_rt_trip_min = one_route2.change_sec.divide(60).round(1),\n", + " service_minutes = one_route2.service_minutes.round(1),\n", + " speed_mph = one_route2.speed_mph.round(1),\n", + " ).rename(columns = {\n", + " \"service_minutes\": \"avg_sched_trip_min\",\n", + " \"trip_instance_key\": \"n_trips\",\n", + " \"route_name_used\": \"route_name\",\n", + " }).drop(columns = \"change_sec\")" + ] + }, + { + "cell_type": "code", + "execution_count": 150, + "id": "9279c8f2-ea9b-4476-b8e0-717774396fd2", + "metadata": {}, + "outputs": [], + "source": [ + "# https://github.com/cal-itp/data-analyses/blob/metrics_rt/rt_segment_speeds/scripts/C3_trip_route_speed.py#L168-L177\n", + "c3_m4 = (c3_m3.groupby(group_cols, \n", + " observed = True, group_keys = False)\n", + " .agg({\n", + " \"speed_mph\": \"mean\",\n", + " \"service_minutes\": \"mean\",\n", + " \"change_sec\": \"mean\",\n", + " \"trip_instance_key\": \"count\"\n", + " }).reset_index()\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 151, + "id": "d4440381-13fe-45c0-86a5-e0c3423c037f", + "metadata": {}, + "outputs": [], + "source": [ + "# https://github.com/cal-itp/data-analyses/blob/metrics_rt/rt_segment_speeds/scripts/C3_trip_route_speed.py#L178-L188\n", + "c3_m4 = c3_m4.assign(\n", + " avg_rt_trip_min = c3_m4.change_sec.divide(60).round(1),\n", + " service_minutes = c3_m4.service_minutes.round(1),\n", + " speed_mph = c3_m4.speed_mph.round(1),\n", + " ).rename(columns = {\n", + " \"service_minutes\": \"avg_sched_trip_min\",\n", + " \"trip_instance_key\": \"n_trips\",\n", + " \"route_name_used\": \"route_name\",\n", + " }).drop(columns = \"change_sec\")" + ] + }, + { + "cell_type": "markdown", + "id": "2c70074a-c4d4-4b73-a43f-7516f9ac9bdf", + "metadata": {}, + "source": [ + "#### Checks" + ] + }, + { + "cell_type": "markdown", + "id": "d9186189-d512-48c6-b1eb-8253561df44d", + "metadata": {}, + "source": [ + "##### Shape 1" + ] + }, + { + "cell_type": "code", + "execution_count": 152, + "id": "d1b02f96-e561-4b68-87da-cc2c6838ec1d", + "metadata": {}, + "outputs": [], + "source": [ + "shape_array_1 = \"0055ea6cd09cc68606d37851a6c91366\"\n", + "shape1_time = \"AM Peak\"" + ] + }, + { + "cell_type": "code", + "execution_count": 153, + "id": "3d9a7c26-3dd1-4589-8412-f0b763da274c", + "metadata": {}, + "outputs": [], + "source": [ + "def check_shapes(shape_array, time_of_day):\n", + " display(c3_m4.loc[(c3_m4.shape_array_key == shape_array) & (c3_m4.time_of_day == time_of_day)])\n", + " display(c3_m3.loc[(c3_m3.shape_array_key == shape_array) & (c3_m3.time_of_day == time_of_day)])" + ] + }, + { + "cell_type": "code", + "execution_count": 154, + "id": "fdacca64-50e2-4160-92bd-cda2e279d887", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
gtfs_dataset_keytime_of_dayroute_iddirection_idroute_namecommon_shape_idshape_array_keyspeed_mphavg_sched_trip_minn_tripsavg_rt_trip_min
69c0e3039da063db95ebabd3fe4ee611a4AM Peak5505am-10pm daily2600055ea6cd09cc68606d37851a6c9136610.1016.00431.70
\n", + "
" + ], + "text/plain": [ + " gtfs_dataset_key time_of_day route_id direction_id \\\n", + "69 c0e3039da063db95ebabd3fe4ee611a4 AM Peak 55 0 \n", + "\n", + " route_name common_shape_id shape_array_key \\\n", + "69 5am-10pm daily 260 0055ea6cd09cc68606d37851a6c91366 \n", + "\n", + " speed_mph avg_sched_trip_min n_trips avg_rt_trip_min \n", + "69 10.10 16.00 4 31.70 " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
gtfs_dataset_keytrip_instance_keychange_meterschange_secspeed_mphschedule_gtfs_dataset_keydirection_idroute_idcommon_shape_idshape_array_keyroute_name_usedservice_hourstrip_first_departure_datetime_pacifictime_of_dayservice_minutes
4963c0e3039da063db95ebabd3fe4ee611a4b68e89ab04e684839d2cde80be1272193832.262365.006.527cc0cb1871dfd558f11a2885c145d1440552600055ea6cd09cc68606d37851a6c913665am-10pm daily0.272023-10-11 07:59:00AM Peak16.00
5783c0e3039da063db95ebabd3fe4ee611a4d32ca8727a4d9c0ee126664507d42c083856.822255.006.997cc0cb1871dfd558f11a2885c145d1440552600055ea6cd09cc68606d37851a6c913665am-10pm daily0.272023-10-11 09:20:00AM Peak16.00
6308c0e3039da063db95ebabd3fe4ee611a4e59e81c33cab600d4009318f354fbf9a1662.33881.003.597cc0cb1871dfd558f11a2885c145d1440552600055ea6cd09cc68606d37851a6c913665am-10pm daily0.272023-10-11 08:20:00AM Peak16.00
6559c0e3039da063db95ebabd3fe4ee611a4ef66bcc7ebab13dc4cd4e2b0c6085ee73843.802099.0023.187cc0cb1871dfd558f11a2885c145d1440552600055ea6cd09cc68606d37851a6c913665am-10pm daily0.272023-10-11 09:41:00AM Peak16.00
\n", + "
" + ], + "text/plain": [ + " gtfs_dataset_key trip_instance_key \\\n", + "4963 c0e3039da063db95ebabd3fe4ee611a4 b68e89ab04e684839d2cde80be127219 \n", + "5783 c0e3039da063db95ebabd3fe4ee611a4 d32ca8727a4d9c0ee126664507d42c08 \n", + "6308 c0e3039da063db95ebabd3fe4ee611a4 e59e81c33cab600d4009318f354fbf9a \n", + "6559 c0e3039da063db95ebabd3fe4ee611a4 ef66bcc7ebab13dc4cd4e2b0c6085ee7 \n", + "\n", + " change_meters change_sec speed_mph schedule_gtfs_dataset_key \\\n", + "4963 3832.26 2365.00 6.52 7cc0cb1871dfd558f11a2885c145d144 \n", + "5783 3856.82 2255.00 6.99 7cc0cb1871dfd558f11a2885c145d144 \n", + "6308 1662.33 881.00 3.59 7cc0cb1871dfd558f11a2885c145d144 \n", + "6559 3843.80 2099.00 23.18 7cc0cb1871dfd558f11a2885c145d144 \n", + "\n", + " direction_id route_id common_shape_id shape_array_key \\\n", + "4963 0 55 260 0055ea6cd09cc68606d37851a6c91366 \n", + "5783 0 55 260 0055ea6cd09cc68606d37851a6c91366 \n", + "6308 0 55 260 0055ea6cd09cc68606d37851a6c91366 \n", + "6559 0 55 260 0055ea6cd09cc68606d37851a6c91366 \n", + "\n", + " route_name_used service_hours trip_first_departure_datetime_pacific \\\n", + "4963 5am-10pm daily 0.27 2023-10-11 07:59:00 \n", + "5783 5am-10pm daily 0.27 2023-10-11 09:20:00 \n", + "6308 5am-10pm daily 0.27 2023-10-11 08:20:00 \n", + "6559 5am-10pm daily 0.27 2023-10-11 09:41:00 \n", + "\n", + " time_of_day service_minutes \n", + "4963 AM Peak 16.00 \n", + "5783 AM Peak 16.00 \n", + "6308 AM Peak 16.00 \n", + "6559 AM Peak 16.00 " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "check_shapes(shape_array_1, shape1_time)" + ] + }, + { + "cell_type": "code", + "execution_count": 155, + "id": "327155b2-c46d-4d89-be06-58f5f2f324cc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "31.666666666666668" + ] + }, + "execution_count": 155, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "(2365+2255+881+2099)/4/60" + ] + }, + { + "cell_type": "markdown", + "id": "07b4ca28-3a71-424c-b742-9b47bc4176db", + "metadata": {}, + "source": [ + "##### Shape 2" + ] + }, + { + "cell_type": "code", + "execution_count": 156, + "id": "e391d24c-0def-4475-b570-dcb4c0fdb82d", + "metadata": {}, + "outputs": [], + "source": [ + "shape_key2 = \"e52c291259f04c198a0c1f245ec26be9\"\n", + "time2 = \"AM Peak\"" + ] + }, + { + "cell_type": "code", + "execution_count": 157, + "id": "f0513f75-7ade-4eb5-a5cf-95e7cb647ab8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
gtfs_dataset_keytime_of_dayroute_iddirection_idroute_namecommon_shape_idshape_array_keyspeed_mphavg_sched_trip_minn_tripsavg_rt_trip_min
65c0e3039da063db95ebabd3fe4ee611a4AM Peak520Weekdays 6am-10pm Weekends 8am-10pm5200e52c291259f04c198a0c1f245ec26be915.9026.00647.50
\n", + "
" + ], + "text/plain": [ + " gtfs_dataset_key time_of_day route_id direction_id \\\n", + "65 c0e3039da063db95ebabd3fe4ee611a4 AM Peak 52 0 \n", + "\n", + " route_name common_shape_id \\\n", + "65 Weekdays 6am-10pm Weekends 8am-10pm 5200 \n", + "\n", + " shape_array_key speed_mph avg_sched_trip_min n_trips \\\n", + "65 e52c291259f04c198a0c1f245ec26be9 15.90 26.00 6 \n", + "\n", + " avg_rt_trip_min \n", + "65 47.50 " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
gtfs_dataset_keytrip_instance_keychange_meterschange_secspeed_mphschedule_gtfs_dataset_keydirection_idroute_idcommon_shape_idshape_array_keyroute_name_usedservice_hourstrip_first_departure_datetime_pacifictime_of_dayservice_minutes
57c0e3039da063db95ebabd3fe4ee611a4027ffc2ecdce130df8e8b2f2d81b17d66505.342703.0018.197cc0cb1871dfd558f11a2885c145d1440525200e52c291259f04c198a0c1f245ec26be9Weekdays 6am-10pm Weekends 8am-10pm0.402023-10-11 09:34:00AM Peak24.00
91c0e3039da063db95ebabd3fe4ee611a403b5bb249b7188cc3935b355ad008a826625.952548.0018.467cc0cb1871dfd558f11a2885c145d1440525200e52c291259f04c198a0c1f245ec26be9Weekdays 6am-10pm Weekends 8am-10pm0.482023-10-11 07:56:00AM Peak29.00
2101c0e3039da063db95ebabd3fe4ee611a450848f3ca9a5b8db9149cf4d4c0ce0086660.493261.0012.027cc0cb1871dfd558f11a2885c145d1440525200e52c291259f04c198a0c1f245ec26be9Weekdays 6am-10pm Weekends 8am-10pm0.422023-10-11 08:34:00AM Peak25.00
4013c0e3039da063db95ebabd3fe4ee611a494f8ca1587768bca12471d4dfd63cace6786.872735.0014.687cc0cb1871dfd558f11a2885c145d1440525200e52c291259f04c198a0c1f245ec26be9Weekdays 6am-10pm Weekends 8am-10pm0.422023-10-11 08:54:00AM Peak25.00
4699c0e3039da063db95ebabd3fe4ee611a4acf1b602311569dd97867a45b5c16e4c9837.343691.0017.807cc0cb1871dfd558f11a2885c145d1440525200e52c291259f04c198a0c1f245ec26be9Weekdays 6am-10pm Weekends 8am-10pm0.482023-10-11 08:15:00AM Peak29.00
6700c0e3039da063db95ebabd3fe4ee611a4f5058b9f8fe6df4e55644a6cc0896faf6657.792162.0014.397cc0cb1871dfd558f11a2885c145d1440525200e52c291259f04c198a0c1f245ec26be9Weekdays 6am-10pm Weekends 8am-10pm0.402023-10-11 09:55:00AM Peak24.00
\n", + "
" + ], + "text/plain": [ + " gtfs_dataset_key trip_instance_key \\\n", + "57 c0e3039da063db95ebabd3fe4ee611a4 027ffc2ecdce130df8e8b2f2d81b17d6 \n", + "91 c0e3039da063db95ebabd3fe4ee611a4 03b5bb249b7188cc3935b355ad008a82 \n", + "2101 c0e3039da063db95ebabd3fe4ee611a4 50848f3ca9a5b8db9149cf4d4c0ce008 \n", + "4013 c0e3039da063db95ebabd3fe4ee611a4 94f8ca1587768bca12471d4dfd63cace \n", + "4699 c0e3039da063db95ebabd3fe4ee611a4 acf1b602311569dd97867a45b5c16e4c \n", + "6700 c0e3039da063db95ebabd3fe4ee611a4 f5058b9f8fe6df4e55644a6cc0896faf \n", + "\n", + " change_meters change_sec speed_mph schedule_gtfs_dataset_key \\\n", + "57 6505.34 2703.00 18.19 7cc0cb1871dfd558f11a2885c145d144 \n", + "91 6625.95 2548.00 18.46 7cc0cb1871dfd558f11a2885c145d144 \n", + "2101 6660.49 3261.00 12.02 7cc0cb1871dfd558f11a2885c145d144 \n", + "4013 6786.87 2735.00 14.68 7cc0cb1871dfd558f11a2885c145d144 \n", + "4699 9837.34 3691.00 17.80 7cc0cb1871dfd558f11a2885c145d144 \n", + "6700 6657.79 2162.00 14.39 7cc0cb1871dfd558f11a2885c145d144 \n", + "\n", + " direction_id route_id common_shape_id shape_array_key \\\n", + "57 0 52 5200 e52c291259f04c198a0c1f245ec26be9 \n", + "91 0 52 5200 e52c291259f04c198a0c1f245ec26be9 \n", + "2101 0 52 5200 e52c291259f04c198a0c1f245ec26be9 \n", + "4013 0 52 5200 e52c291259f04c198a0c1f245ec26be9 \n", + "4699 0 52 5200 e52c291259f04c198a0c1f245ec26be9 \n", + "6700 0 52 5200 e52c291259f04c198a0c1f245ec26be9 \n", + "\n", + " route_name_used service_hours \\\n", + "57 Weekdays 6am-10pm Weekends 8am-10pm 0.40 \n", + "91 Weekdays 6am-10pm Weekends 8am-10pm 0.48 \n", + "2101 Weekdays 6am-10pm Weekends 8am-10pm 0.42 \n", + "4013 Weekdays 6am-10pm Weekends 8am-10pm 0.42 \n", + "4699 Weekdays 6am-10pm Weekends 8am-10pm 0.48 \n", + "6700 Weekdays 6am-10pm Weekends 8am-10pm 0.40 \n", + "\n", + " trip_first_departure_datetime_pacific time_of_day service_minutes \n", + "57 2023-10-11 09:34:00 AM Peak 24.00 \n", + "91 2023-10-11 07:56:00 AM Peak 29.00 \n", + "2101 2023-10-11 08:34:00 AM Peak 25.00 \n", + "4013 2023-10-11 08:54:00 AM Peak 25.00 \n", + "4699 2023-10-11 08:15:00 AM Peak 29.00 \n", + "6700 2023-10-11 09:55:00 AM Peak 24.00 " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "check_shapes(shape_key2, time2)" + ] + }, + { + "cell_type": "markdown", + "id": "69d4639b-a9e9-44d0-8398-3aca1a0a11ff", + "metadata": {}, + "source": [ + "## Outer Join\n", + "#### Question: how to connect back to trip instance key?" + ] + }, + { + "cell_type": "code", + "execution_count": 183, + "id": "41d4e8fd-33fa-4dfc-aa12-98c8abe16240", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "pandas.core.frame.DataFrame" + ] + }, + "execution_count": 183, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Spatial Accuracy\n", + "type(muni)" + ] + }, + { + "cell_type": "code", + "execution_count": 184, + "id": "dfb2e9c8-67d3-4a4d-b64a-eebac0f35bac", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['trip_instance_key', 'total_vp', 'vp_in_shape'], dtype='object')" + ] + }, + "execution_count": 184, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "muni.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 185, + "id": "6b2780f0-a359-4fb1-9082-2c7d40ba42df", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "9240" + ] + }, + "execution_count": 185, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "muni.trip_instance_key.nunique()" + ] + }, + { + "cell_type": "code", + "execution_count": 186, + "id": "05adc5b0-2a0b-4860-af3d-bda2349bff99", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
trip_instance_keytotal_vpvp_in_shape
000068c2e2316950af50ffaa9584c7a46126126
1000c62b70d9438b7951457a74a4c89b2152140
20010126e10a24f22fb36018ed0f79572413315
30013822d90020aa52bae3dab73d15fd0205205
4001a66b1633388f4fe9eea5acf374481227227
\n", + "
" + ], + "text/plain": [ + " trip_instance_key total_vp vp_in_shape\n", + "0 00068c2e2316950af50ffaa9584c7a46 126 126\n", + "1 000c62b70d9438b7951457a74a4c89b2 152 140\n", + "2 0010126e10a24f22fb36018ed0f79572 413 315\n", + "3 0013822d90020aa52bae3dab73d15fd0 205 205\n", + "4 001a66b1633388f4fe9eea5acf374481 227 227" + ] + }, + "execution_count": 186, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "muni.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 187, + "id": "fd016ce7-c607-41c4-8fe3-fd5835a8abc2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['trip_instance_key', 'min_time', 'max_time', 'atleast2_trip_updates',\n", + " 'trip_min_elapsed', 'pct_update_complete', 'larger', 'test_mask',\n", + " 'pct_update_complete2'],\n", + " dtype='object')" + ] + }, + "execution_count": 187, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Update Completeness\n", + "test_group3.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 188, + "id": "09b39eab-5919-4b96-a127-a9e4ce4f86aa", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "9240" + ] + }, + "execution_count": 188, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_group3.trip_instance_key.nunique()" + ] + }, + { + "cell_type": "code", + "execution_count": 189, + "id": "96940334-7067-4f0f-aa6b-564ad1a6848d", + "metadata": {}, + "outputs": [], + "source": [ + "test_group3 = test_group3.drop(columns = ['min_time','max_time','trip_min_elapsed', 'pct_update_complete','larger','test_mask'])" + ] + }, + { + "cell_type": "code", + "execution_count": 190, + "id": "130b01f7-02a6-49b3-8f87-d76ebbf53a8f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
trip_instance_keyatleast2_trip_updatespct_update_complete2
000068c2e2316950af50ffaa9584c7a46421.00
1000c62b70d9438b7951457a74a4c89b2500.98
20010126e10a24f22fb36018ed0f795721381.00
30013822d90020aa52bae3dab73d15fd0681.00
4001a66b1633388f4fe9eea5acf374481761.00
\n", + "
" + ], + "text/plain": [ + " trip_instance_key atleast2_trip_updates \\\n", + "0 00068c2e2316950af50ffaa9584c7a46 42 \n", + "1 000c62b70d9438b7951457a74a4c89b2 50 \n", + "2 0010126e10a24f22fb36018ed0f79572 138 \n", + "3 0013822d90020aa52bae3dab73d15fd0 68 \n", + "4 001a66b1633388f4fe9eea5acf374481 76 \n", + "\n", + " pct_update_complete2 \n", + "0 1.00 \n", + "1 0.98 \n", + "2 1.00 \n", + "3 1.00 \n", + "4 1.00 " + ] + }, + "execution_count": 190, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "test_group3.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 191, + "id": "a24dbf3a-e060-49b1-982b-2a583c98286a", + "metadata": {}, + "outputs": [], + "source": [ + "outer_merge1 = pd.merge(test_group3, muni, \n", + " on = \"trip_instance_key\",\n", + " how = \"outer\")" + ] + }, + { + "cell_type": "code", + "execution_count": 192, + "id": "320cb842-c935-4c40-b7c9-6396c820da68", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
trip_instance_keyatleast2_trip_updatespct_update_complete2total_vpvp_in_shape
32025a26483ef38d216c42696681ee376614710.95217183
\n", + "
" + ], + "text/plain": [ + " trip_instance_key atleast2_trip_updates \\\n", + "3202 5a26483ef38d216c42696681ee376614 71 \n", + "\n", + " pct_update_complete2 total_vp vp_in_shape \n", + "3202 0.95 217 183 " + ] + }, + "execution_count": 192, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "outer_merge1.sample()" + ] + }, + { + "cell_type": "code", + "execution_count": 202, + "id": "44c12eae-0c78-4d50-9a48-42cbd7c2521f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "9240" + ] + }, + "execution_count": 202, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "outer_merge1.trip_instance_key.nunique()" + ] + }, + { + "cell_type": "code", + "execution_count": 193, + "id": "f75301c2-96bd-487e-9c10-176777fc4f8c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['gtfs_dataset_key', 'trip_instance_key', 'change_meters', 'change_sec',\n", + " 'speed_mph', 'schedule_gtfs_dataset_key', 'direction_id', 'route_id',\n", + " 'common_shape_id', 'shape_array_key', 'route_name_used',\n", + " 'service_hours', 'trip_first_departure_datetime_pacific', 'time_of_day',\n", + " 'service_minutes'],\n", + " dtype='object')" + ] + }, + "execution_count": 193, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "c3_m3.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 194, + "id": "2ee51b82-169c-449b-a507-103c9a839dbf", + "metadata": {}, + "outputs": [], + "source": [ + "# Averages\n", + "crosswalk_back_to_trips = c3_m3[['shape_array_key','gtfs_dataset_key','time_of_day','trip_instance_key']].drop_duplicates().reset_index(drop = True)" + ] + }, + { + "cell_type": "code", + "execution_count": 195, + "id": "7d5ddf3f-4cc8-43af-9575-c90bece499fb", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "6993" + ] + }, + "execution_count": 195, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "crosswalk_back_to_trips.trip_instance_key.nunique()" + ] + }, + { + "cell_type": "code", + "execution_count": 196, + "id": "5d3a24fd-362f-4a62-9fcf-c7f03b2770f3", + "metadata": {}, + "outputs": [], + "source": [ + "outer_merge2 = pd.merge(outer_merge1, crosswalk_back_to_trips, \n", + " on = \"trip_instance_key\",\n", + " how = \"outer\")" + ] + }, + { + "cell_type": "code", + "execution_count": 197, + "id": "2d6b71df-80e9-4fbd-8ac2-e1f55ba04929", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['trip_instance_key', 'atleast2_trip_updates', 'pct_update_complete2',\n", + " 'total_vp', 'vp_in_shape', 'shape_array_key', 'gtfs_dataset_key',\n", + " 'time_of_day'],\n", + " dtype='object')" + ] + }, + "execution_count": 197, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "outer_merge2.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 198, + "id": "6030cb7f-5655-4132-b214-4259e5d5090e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['gtfs_dataset_key', 'time_of_day', 'route_id', 'direction_id',\n", + " 'route_name', 'common_shape_id', 'shape_array_key', 'speed_mph',\n", + " 'avg_sched_trip_min', 'n_trips', 'avg_rt_trip_min'],\n", + " dtype='object')" + ] + }, + "execution_count": 198, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "c3_m4.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 199, + "id": "c8bea8f0-ea00-4e1d-9783-398f277e768c", + "metadata": {}, + "outputs": [], + "source": [ + "outer_merge3 = pd.merge(outer_merge2, c3_m4, \n", + " on = [\"gtfs_dataset_key\", \"shape_array_key\", \"time_of_day\"],\n", + " how = \"outer\")" + ] + }, + { + "cell_type": "markdown", + "id": "fe34ed54-badd-45ed-8ee2-f18a51207b3c", + "metadata": {}, + "source": [ + "#### Question: even if we drop the speeds, we should still have the shape_array_key and whatnot?\n", + "* How come some of the rows have no speeds?? What went wrong?\n", + "* 519cc26f9599677993f8d8cd269eb3cc should have a speed?" + ] + }, + { + "cell_type": "code", + "execution_count": 206, + "id": "e3883628-9629-45ef-9a29-d5f4e60215c2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
gtfs_dataset_keytrip_instance_keychange_meterschange_secspeed_mph
2900c0e3039da063db95ebabd3fe4ee611a4519cc26f9599677993f8d8cd269eb3cc11329.472960.00NaN
\n", + "
" + ], + "text/plain": [ + " gtfs_dataset_key trip_instance_key \\\n", + "2900 c0e3039da063db95ebabd3fe4ee611a4 519cc26f9599677993f8d8cd269eb3cc \n", + "\n", + " change_meters change_sec speed_mph \n", + "2900 11329.47 2960.00 NaN " + ] + }, + "execution_count": 206, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "c3_m2.loc[c3_m2.trip_instance_key == \"519cc26f9599677993f8d8cd269eb3cc\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 208, + "id": "363d9f9e-a299-4e70-8e80-ce608b74bd18", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
gtfs_dataset_keytrip_instance_keylocation_timestamp_localvp_idxshape_array_keyvp_geometryshape_metersprior_distprior_timechange_meterschange_sec
15100c0e3039da063db95ebabd3fe4ee611a4519cc26f9599677993f8d8cd269eb3cc2023-10-11 20:02:591195855615955f0486291f4d897d69cc2e2db08dPOINT (-208310.474 -19578.210)146.29NaNNaTNaNNaN
15101c0e3039da063db95ebabd3fe4ee611a4519cc26f9599677993f8d8cd269eb3cc2023-10-11 20:15:161195859315955f0486291f4d897d69cc2e2db08dPOINT (-208720.144 -18849.817)4400.58146.292023-10-11 20:02:594254.30737.00
15102c0e3039da063db95ebabd3fe4ee611a4519cc26f9599677993f8d8cd269eb3cc2023-10-11 20:27:311195863015955f0486291f4d897d69cc2e2db08dPOINT (-210732.513 -22505.869)11426.634400.582023-10-11 20:15:167026.05735.00
15103c0e3039da063db95ebabd3fe4ee611a4519cc26f9599677993f8d8cd269eb3cc2023-10-11 20:39:481195866715955f0486291f4d897d69cc2e2db08dPOINT (-210720.177 -22491.169)11445.8911426.632023-10-11 20:27:3119.26737.00
15104c0e3039da063db95ebabd3fe4ee611a4519cc26f9599677993f8d8cd269eb3cc2023-10-11 20:52:191195870415955f0486291f4d897d69cc2e2db08dPOINT (-210647.181 -22436.301)11475.7611445.892023-10-11 20:39:4829.86751.00
\n", + "
" + ], + "text/plain": [ + " gtfs_dataset_key trip_instance_key \\\n", + "15100 c0e3039da063db95ebabd3fe4ee611a4 519cc26f9599677993f8d8cd269eb3cc \n", + "15101 c0e3039da063db95ebabd3fe4ee611a4 519cc26f9599677993f8d8cd269eb3cc \n", + "15102 c0e3039da063db95ebabd3fe4ee611a4 519cc26f9599677993f8d8cd269eb3cc \n", + "15103 c0e3039da063db95ebabd3fe4ee611a4 519cc26f9599677993f8d8cd269eb3cc \n", + "15104 c0e3039da063db95ebabd3fe4ee611a4 519cc26f9599677993f8d8cd269eb3cc \n", + "\n", + " location_timestamp_local vp_idx shape_array_key \\\n", + "15100 2023-10-11 20:02:59 11958556 15955f0486291f4d897d69cc2e2db08d \n", + "15101 2023-10-11 20:15:16 11958593 15955f0486291f4d897d69cc2e2db08d \n", + "15102 2023-10-11 20:27:31 11958630 15955f0486291f4d897d69cc2e2db08d \n", + "15103 2023-10-11 20:39:48 11958667 15955f0486291f4d897d69cc2e2db08d \n", + "15104 2023-10-11 20:52:19 11958704 15955f0486291f4d897d69cc2e2db08d \n", + "\n", + " vp_geometry shape_meters prior_dist \\\n", + "15100 POINT (-208310.474 -19578.210) 146.29 NaN \n", + "15101 POINT (-208720.144 -18849.817) 4400.58 146.29 \n", + "15102 POINT (-210732.513 -22505.869) 11426.63 4400.58 \n", + "15103 POINT (-210720.177 -22491.169) 11445.89 11426.63 \n", + "15104 POINT (-210647.181 -22436.301) 11475.76 11445.89 \n", + "\n", + " prior_time change_meters change_sec \n", + "15100 NaT NaN NaN \n", + "15101 2023-10-11 20:02:59 4254.30 737.00 \n", + "15102 2023-10-11 20:15:16 7026.05 735.00 \n", + "15103 2023-10-11 20:27:31 19.26 737.00 \n", + "15104 2023-10-11 20:39:48 29.86 751.00 " + ] + }, + "execution_count": 208, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "c3_m1.loc[c3_m1.trip_instance_key == \"519cc26f9599677993f8d8cd269eb3cc\"].drop(columns = ['shape_geometry'])" + ] + }, + { + "cell_type": "markdown", + "id": "596112c3-1cc8-434e-97b5-12f1dc2864c7", + "metadata": {}, + "source": [ + "#### check w/ another trip" + ] + }, + { + "cell_type": "code", + "execution_count": 215, + "id": "b254128f-6388-4315-b99d-0616c973f3a1", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
gtfs_dataset_keytrip_instance_keychange_meterschange_secspeed_mph
5290c0e3039da063db95ebabd3fe4ee611a49359a381c89e52b7bc78bb4942e4b0774872.665109.00NaN
\n", + "
" + ], + "text/plain": [ + " gtfs_dataset_key trip_instance_key \\\n", + "5290 c0e3039da063db95ebabd3fe4ee611a4 9359a381c89e52b7bc78bb4942e4b077 \n", + "\n", + " change_meters change_sec speed_mph \n", + "5290 4872.66 5109.00 NaN " + ] + }, + "execution_count": 215, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "c3_m2.loc[c3_m2.trip_instance_key == \"9359a381c89e52b7bc78bb4942e4b077\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 216, + "id": "e38be645-43bf-4f7a-9a08-108fdfe76f7c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
gtfs_dataset_keytrip_instance_keylocation_timestamp_localvp_idxshape_array_keyvp_geometryshape_metersprior_distprior_timechange_meterschange_sec
2105c0e3039da063db95ebabd3fe4ee611a49359a381c89e52b7bc78bb4942e4b0772023-10-11 07:22:58114758336b99e6706ef5d2fb8e8518256a3c00c4POINT (-215158.815 -23030.763)357.86NaNNaTNaNNaN
2106c0e3039da063db95ebabd3fe4ee611a49359a381c89e52b7bc78bb4942e4b0772023-10-11 07:44:17114758976b99e6706ef5d2fb8e8518256a3c00c4POINT (-215158.815 -23030.763)357.86357.862023-10-11 07:22:580.001279.00
2107c0e3039da063db95ebabd3fe4ee611a49359a381c89e52b7bc78bb4942e4b0772023-10-11 08:05:37114759616b99e6706ef5d2fb8e8518256a3c00c4POINT (-213850.819 -22835.343)1645.28357.862023-10-11 07:44:171287.421280.00
2108c0e3039da063db95ebabd3fe4ee611a49359a381c89e52b7bc78bb4942e4b0772023-10-11 08:27:00114760256b99e6706ef5d2fb8e8518256a3c00c4POINT (-210713.785 -22130.479)5150.331645.282023-10-11 08:05:373505.051283.00
2109c0e3039da063db95ebabd3fe4ee611a49359a381c89e52b7bc78bb4942e4b0772023-10-11 08:48:07114760896b99e6706ef5d2fb8e8518256a3c00c4POINT (-210637.615 -22057.788)5230.535150.332023-10-11 08:27:0080.191267.00
\n", + "
" + ], + "text/plain": [ + " gtfs_dataset_key trip_instance_key \\\n", + "2105 c0e3039da063db95ebabd3fe4ee611a4 9359a381c89e52b7bc78bb4942e4b077 \n", + "2106 c0e3039da063db95ebabd3fe4ee611a4 9359a381c89e52b7bc78bb4942e4b077 \n", + "2107 c0e3039da063db95ebabd3fe4ee611a4 9359a381c89e52b7bc78bb4942e4b077 \n", + "2108 c0e3039da063db95ebabd3fe4ee611a4 9359a381c89e52b7bc78bb4942e4b077 \n", + "2109 c0e3039da063db95ebabd3fe4ee611a4 9359a381c89e52b7bc78bb4942e4b077 \n", + "\n", + " location_timestamp_local vp_idx shape_array_key \\\n", + "2105 2023-10-11 07:22:58 11475833 6b99e6706ef5d2fb8e8518256a3c00c4 \n", + "2106 2023-10-11 07:44:17 11475897 6b99e6706ef5d2fb8e8518256a3c00c4 \n", + "2107 2023-10-11 08:05:37 11475961 6b99e6706ef5d2fb8e8518256a3c00c4 \n", + "2108 2023-10-11 08:27:00 11476025 6b99e6706ef5d2fb8e8518256a3c00c4 \n", + "2109 2023-10-11 08:48:07 11476089 6b99e6706ef5d2fb8e8518256a3c00c4 \n", + "\n", + " vp_geometry shape_meters prior_dist \\\n", + "2105 POINT (-215158.815 -23030.763) 357.86 NaN \n", + "2106 POINT (-215158.815 -23030.763) 357.86 357.86 \n", + "2107 POINT (-213850.819 -22835.343) 1645.28 357.86 \n", + "2108 POINT (-210713.785 -22130.479) 5150.33 1645.28 \n", + "2109 POINT (-210637.615 -22057.788) 5230.53 5150.33 \n", + "\n", + " prior_time change_meters change_sec \n", + "2105 NaT NaN NaN \n", + "2106 2023-10-11 07:22:58 0.00 1279.00 \n", + "2107 2023-10-11 07:44:17 1287.42 1280.00 \n", + "2108 2023-10-11 08:05:37 3505.05 1283.00 \n", + "2109 2023-10-11 08:27:00 80.19 1267.00 " + ] + }, + "execution_count": 216, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "c3_m1.loc[c3_m1.trip_instance_key == \"9359a381c89e52b7bc78bb4942e4b077\"].drop(columns = ['shape_geometry'])" + ] + }, + { + "cell_type": "markdown", + "id": "fa73bd7b-9cd5-42d1-a5b2-38cf252ab5f6", + "metadata": {}, + "source": [ + "#### Check a trip with speed mph" + ] + }, + { + "cell_type": "code", + "execution_count": 217, + "id": "326763e6-75d3-4891-af7d-690b5ce1781d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
gtfs_dataset_keytrip_instance_keychange_meterschange_secspeed_mph
6889c0e3039da063db95ebabd3fe4ee611a4bfe892c83789ea8fb7cf2bfbf8c3704a6745.282668.0012.51
\n", + "
" + ], + "text/plain": [ + " gtfs_dataset_key trip_instance_key \\\n", + "6889 c0e3039da063db95ebabd3fe4ee611a4 bfe892c83789ea8fb7cf2bfbf8c3704a \n", + "\n", + " change_meters change_sec speed_mph \n", + "6889 6745.28 2668.00 12.51 " + ] + }, + "execution_count": 217, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "c3_m2.loc[~c3_m2.speed_mph.isna()].sample()" + ] + }, + { + "cell_type": "code", + "execution_count": 218, + "id": "f711e45d-98f8-40ed-a599-658669fd21d6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
gtfs_dataset_keytrip_instance_keychange_meterschange_secspeed_mph
6889c0e3039da063db95ebabd3fe4ee611a4bfe892c83789ea8fb7cf2bfbf8c3704a6745.282668.0012.51
\n", + "
" + ], + "text/plain": [ + " gtfs_dataset_key trip_instance_key \\\n", + "6889 c0e3039da063db95ebabd3fe4ee611a4 bfe892c83789ea8fb7cf2bfbf8c3704a \n", + "\n", + " change_meters change_sec speed_mph \n", + "6889 6745.28 2668.00 12.51 " + ] + }, + "execution_count": 218, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "c3_m2.loc[c3_m2.trip_instance_key == \"bfe892c83789ea8fb7cf2bfbf8c3704a\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 219, + "id": "36ad980e-d8fa-4a61-aac7-ecc688701625", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
gtfs_dataset_keytrip_instance_keylocation_timestamp_localvp_idxshape_array_keyvp_geometryshape_metersprior_distprior_timechange_meterschange_sec
23620c0e3039da063db95ebabd3fe4ee611a4bfe892c83789ea8fb7cf2bfbf8c3704a2023-10-11 07:27:20122689083493166801a9913231925d9a167b2182POINT (-210495.407 -23811.592)331.68NaNNaTNaNNaN
23621c0e3039da063db95ebabd3fe4ee611a4bfe892c83789ea8fb7cf2bfbf8c3704a2023-10-11 07:38:37122689423493166801a9913231925d9a167b2182POINT (-210542.304 -23633.930)570.97331.682023-10-11 07:27:20239.29677.00
23622c0e3039da063db95ebabd3fe4ee611a4bfe892c83789ea8fb7cf2bfbf8c3704a2023-10-11 07:49:25122689753493166801a9913231925d9a167b2182POINT (-211724.216 -22030.923)2816.80570.972023-10-11 07:38:372245.82648.00
23623c0e3039da063db95ebabd3fe4ee611a4bfe892c83789ea8fb7cf2bfbf8c3704a2023-10-11 08:00:28122690083493166801a9913231925d9a167b2182POINT (-213012.221 -21465.442)4744.722816.802023-10-11 07:49:251927.92663.00
23624c0e3039da063db95ebabd3fe4ee611a4bfe892c83789ea8fb7cf2bfbf8c3704a2023-10-11 08:11:48122690423493166801a9913231925d9a167b2182POINT (-215142.435 -21544.168)7076.964744.722023-10-11 08:00:282332.24680.00
\n", + "
" + ], + "text/plain": [ + " gtfs_dataset_key trip_instance_key \\\n", + "23620 c0e3039da063db95ebabd3fe4ee611a4 bfe892c83789ea8fb7cf2bfbf8c3704a \n", + "23621 c0e3039da063db95ebabd3fe4ee611a4 bfe892c83789ea8fb7cf2bfbf8c3704a \n", + "23622 c0e3039da063db95ebabd3fe4ee611a4 bfe892c83789ea8fb7cf2bfbf8c3704a \n", + "23623 c0e3039da063db95ebabd3fe4ee611a4 bfe892c83789ea8fb7cf2bfbf8c3704a \n", + "23624 c0e3039da063db95ebabd3fe4ee611a4 bfe892c83789ea8fb7cf2bfbf8c3704a \n", + "\n", + " location_timestamp_local vp_idx shape_array_key \\\n", + "23620 2023-10-11 07:27:20 12268908 3493166801a9913231925d9a167b2182 \n", + "23621 2023-10-11 07:38:37 12268942 3493166801a9913231925d9a167b2182 \n", + "23622 2023-10-11 07:49:25 12268975 3493166801a9913231925d9a167b2182 \n", + "23623 2023-10-11 08:00:28 12269008 3493166801a9913231925d9a167b2182 \n", + "23624 2023-10-11 08:11:48 12269042 3493166801a9913231925d9a167b2182 \n", + "\n", + " vp_geometry shape_meters prior_dist \\\n", + "23620 POINT (-210495.407 -23811.592) 331.68 NaN \n", + "23621 POINT (-210542.304 -23633.930) 570.97 331.68 \n", + "23622 POINT (-211724.216 -22030.923) 2816.80 570.97 \n", + "23623 POINT (-213012.221 -21465.442) 4744.72 2816.80 \n", + "23624 POINT (-215142.435 -21544.168) 7076.96 4744.72 \n", + "\n", + " prior_time change_meters change_sec \n", + "23620 NaT NaN NaN \n", + "23621 2023-10-11 07:27:20 239.29 677.00 \n", + "23622 2023-10-11 07:38:37 2245.82 648.00 \n", + "23623 2023-10-11 07:49:25 1927.92 663.00 \n", + "23624 2023-10-11 08:00:28 2332.24 680.00 " + ] + }, + "execution_count": 219, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "c3_m1.loc[c3_m1.trip_instance_key == \"bfe892c83789ea8fb7cf2bfbf8c3704a\"].drop(columns = ['shape_geometry'])" + ] + }, + { + "cell_type": "code", + "execution_count": 203, + "id": "cecd7b7d-cbb9-4ae9-859b-56c82b47f1b3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "9240" + ] + }, + "execution_count": 203, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "outer_merge3.trip_instance_key.nunique()" + ] + }, + { + "cell_type": "code", + "execution_count": 204, + "id": "81efe638-1c2c-41d0-a681-38cd310b0f51", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Int64Index: 9240 entries, 0 to 9239\n", + "Data columns (total 16 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 trip_instance_key 9240 non-null object \n", + " 1 atleast2_trip_updates 9240 non-null int64 \n", + " 2 pct_update_complete2 9240 non-null float64 \n", + " 3 total_vp 9240 non-null int32 \n", + " 4 vp_in_shape 9240 non-null int32 \n", + " 5 shape_array_key 6993 non-null object \n", + " 6 gtfs_dataset_key 6993 non-null category\n", + " 7 time_of_day 6993 non-null object \n", + " 8 route_id 6993 non-null object \n", + " 9 direction_id 6993 non-null Int64 \n", + " 10 route_name 6993 non-null object \n", + " 11 common_shape_id 6993 non-null object \n", + " 12 speed_mph 6993 non-null float64 \n", + " 13 avg_sched_trip_min 6993 non-null float64 \n", + " 14 n_trips 6993 non-null float64 \n", + " 15 avg_rt_trip_min 6993 non-null float64 \n", + "dtypes: Int64(1), category(1), float64(5), int32(2), int64(1), object(6)\n", + "memory usage: 1.1+ MB\n" + ] + } + ], + "source": [ + "outer_merge3.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 201, + "id": "ca82385c-2138-4ffa-9263-da69926f727c", + "metadata": {}, + "outputs": [], + "source": [ + "# check_shapes(\"dfd50ed85c60540a1f9b9c2d1afa93ff\", \"Evening\")" + ] + }, + { + "cell_type": "markdown", + "id": "381e9cf3-9dbf-4432-869d-f39d28927a7d", + "metadata": {}, + "source": [ + "## Final cleaning\n", + "* How come you export it twice? \n", + "* https://github.com/cal-itp/data-analyses/blob/metrics_rt/rt_segment_speeds/scripts/C3_trip_route_speed.py#L178-L188" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}