From b013651eb5b5f8091965b549f61bf92abe64ccdf Mon Sep 17 00:00:00 2001 From: amandaha8 Date: Wed, 20 Dec 2023 23:20:23 +0000 Subject: [PATCH 1/6] rm second update_vars and other unecessary files --- rt_scheduled_v_ran/05_fresh_nb.ipynb | 226 -------------- .../scripts/rt_v_scheduled_trip.py | 2 +- rt_scheduled_v_ran/scripts/update_vars.py | 7 +- rt_scheduled_v_ran/scripts/update_vars2.py | 8 - .../scripts/vp_usable_metrics.py | 281 ------------------ 5 files changed, 5 insertions(+), 519 deletions(-) delete mode 100644 rt_scheduled_v_ran/05_fresh_nb.ipynb delete mode 100644 rt_scheduled_v_ran/scripts/update_vars2.py delete mode 100644 rt_scheduled_v_ran/scripts/vp_usable_metrics.py diff --git a/rt_scheduled_v_ran/05_fresh_nb.ipynb b/rt_scheduled_v_ran/05_fresh_nb.ipynb deleted file mode 100644 index 13ef9965a..000000000 --- a/rt_scheduled_v_ran/05_fresh_nb.ipynb +++ /dev/null @@ -1,226 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "5aa8180f-9df6-4dad-a13e-7dfeb60f4fc7", - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "e64d4bd2-73ed-4636-9067-a459d1ead87d", - "metadata": {}, - "outputs": [], - "source": [ - "pd.options.display.max_columns = 100\n", - "pd.options.display.float_format = \"{:.2f}\".format\n", - "pd.set_option(\"display.max_rows\", None)\n", - "pd.set_option(\"display.max_colwidth\", None)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "aa21fe0c-de96-4747-b501-cd37ce145fd7", - "metadata": {}, - "outputs": [], - "source": [ - "df = pd.read_parquet('./scripts/vp_usable_metrics.parquet')" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "a569d042-c39c-4ee8-8343-1f8f30e7d337", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(86486, 11)" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "d5e4c8f2-5746-4f14-8bd4-31b7fdbf11bf", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
trip_instance_keyminutes_w_atleast2_trip_updatesmedian_pings_per_mintotal_minute_w_gtfstotal_trip_timetotal_vpvp_in_shapespeed_mphroute_idtime_of_dayservice_minutes
001d684e5d18b56f09f2eb816241b77b1113.001111.00NaNNaNNaNNoneNoneNaN
10828e4c01c7313d43981af8c2e198491163.001616.00NaNNaNNaNNoneNoneNaN
20975f532d046ada21eb55491d265ccde463.004953.00NaNNaNNaNNoneNoneNaN
30e0744f698ab90d835ab1c20f0e73818243.002525.00NaNNaNNaNNoneNoneNaN
40ecff5e49c02581abdd223ec8bcbfd23303.003333.00NaNNaNNaNNoneNoneNaN
\n", - "
" - ], - "text/plain": [ - " trip_instance_key minutes_w_atleast2_trip_updates \\\n", - "0 01d684e5d18b56f09f2eb816241b77b1 11 \n", - "1 0828e4c01c7313d43981af8c2e198491 16 \n", - "2 0975f532d046ada21eb55491d265ccde 46 \n", - "3 0e0744f698ab90d835ab1c20f0e73818 24 \n", - "4 0ecff5e49c02581abdd223ec8bcbfd23 30 \n", - "\n", - " median_pings_per_min total_minute_w_gtfs total_trip_time total_vp \\\n", - "0 3.00 11 11.00 NaN \n", - "1 3.00 16 16.00 NaN \n", - "2 3.00 49 53.00 NaN \n", - "3 3.00 25 25.00 NaN \n", - "4 3.00 33 33.00 NaN \n", - "\n", - " vp_in_shape speed_mph route_id time_of_day service_minutes \n", - "0 NaN NaN None None NaN \n", - "1 NaN NaN None None NaN \n", - "2 NaN NaN None None NaN \n", - "3 NaN NaN None None NaN \n", - "4 NaN NaN None None NaN " - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.head()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.13" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/rt_scheduled_v_ran/scripts/rt_v_scheduled_trip.py b/rt_scheduled_v_ran/scripts/rt_v_scheduled_trip.py index 0e22c18b9..285235893 100644 --- a/rt_scheduled_v_ran/scripts/rt_v_scheduled_trip.py +++ b/rt_scheduled_v_ran/scripts/rt_v_scheduled_trip.py @@ -357,6 +357,6 @@ def vp_usable_metrics(analysis_date:str) -> pd.DataFrame: logger.add(sys.stderr, format="{time:YYYY-MM-DD at HH:mm:ss} | {level} | {message}", level="INFO") - for date in update_vars2.analysis_date_list: + for date in update_vars.analysis_date_list: vp_usable_metrics(date) print('Done') \ No newline at end of file diff --git a/rt_scheduled_v_ran/scripts/update_vars.py b/rt_scheduled_v_ran/scripts/update_vars.py index d49e06b76..0c2b7d52b 100644 --- a/rt_scheduled_v_ran/scripts/update_vars.py +++ b/rt_scheduled_v_ran/scripts/update_vars.py @@ -1,7 +1,8 @@ from shared_utils import rt_dates -analysis_date = rt_dates.DATES["oct2023"] +months = ["nov", "oct", "sep", "aug", + "jul", "jun", "may", "apr", "mar"] analysis_date_list = [ - rt_dates.DATES["oct2023"] -] + rt_dates.DATES[f"{m}2023"] for m in months +] \ No newline at end of file diff --git a/rt_scheduled_v_ran/scripts/update_vars2.py b/rt_scheduled_v_ran/scripts/update_vars2.py deleted file mode 100644 index 359025573..000000000 --- a/rt_scheduled_v_ran/scripts/update_vars2.py +++ /dev/null @@ -1,8 +0,0 @@ -from shared_utils import rt_dates - -months = ["nov", "oct", "sep", "aug", - "jul", "jun", "may", "apr", "mar"] - -analysis_date_list = [ - rt_dates.DATES[f"{m}2023"] for m in months -] diff --git a/rt_scheduled_v_ran/scripts/vp_usable_metrics.py b/rt_scheduled_v_ran/scripts/vp_usable_metrics.py deleted file mode 100644 index 0d0616439..000000000 --- a/rt_scheduled_v_ran/scripts/vp_usable_metrics.py +++ /dev/null @@ -1,281 +0,0 @@ -import datetime - -import dask -import dask.dataframe as dd -import geopandas as gpd -import numpy as np -import pandas as pd -from calitp_data_analysis import utils -from calitp_data_analysis.geography_utils import WGS84 -import vp_spatial_accuracy -from segment_speed_utils import helpers -from segment_speed_utils.project_vars import ( - GCS_FILE_PATH, - PROJECT_CRS, - SEGMENT_GCS, - analysis_date, -) - -# Times -import sys -from loguru import logger - -# cd rt_segment_speeds && pip install -r requirements.txt && cd - -# UPDATE COMPLETENESS -def pings_trip_time(vp_usable_df: pd.DataFrame): - - # Find number of pings each minute - df = ( - vp_usable_df.groupby( - [ - "trip_instance_key", - pd.Grouper(key="location_timestamp_local", freq="1Min"), - ] - ) - .vp_idx.count() - .reset_index() - .rename(columns={"vp_idx": "number_of_pings_per_minute"}) - ) - - # Determine which rows have 2+ pings per minute - df = df.assign( - minutes_w_atleast2_trip_updates=df.apply( - lambda x: 1 if x.number_of_pings_per_minute >= 2 else 0, axis=1 - ) - ) - - # Need a copy of loc-timestamp-local to get max time - df["max_time"] = df.location_timestamp_local - - # Need a copy of numer of pings per minute to count - # for total minutes w gtfs - df["total_minute_w_gtfs"] = df.number_of_pings_per_minute - - # Find the min time for each trip and sum up total min with at least 2 pings per min - df = ( - df.groupby(["trip_instance_key"]) - .agg( - { - "location_timestamp_local": "min", - "max_time": "max", - "minutes_w_atleast2_trip_updates": "sum", - "number_of_pings_per_minute": "median", - "total_minute_w_gtfs": "count", - } - ) - .reset_index() - .rename( - columns={ - "location_timestamp_local": "min_time", - "number_of_pings_per_minute": "median_pings_per_min", - } - ) - ) - - # Find total trip time and add an extra minute - df["total_trip_time"] = (df.max_time - df.min_time) / pd.Timedelta(minutes=1) + 1 - - df = df.drop(columns=["min_time", "max_time"]) - return df - -def grab_shape_keys_in_vp(vp_usable: dd.DataFrame, analysis_date: str): - """ - Subset raw vp and find unique trip_instance_keys. - Create crosswalk to link trip_instance_key to shape_array_key. - """ - vp_usable = ( - vp_usable[["trip_instance_key"]].drop_duplicates().reset_index(drop=True) - ) - - trips_with_shape = helpers.import_scheduled_trips( - analysis_date, - columns=["trip_instance_key", "shape_array_key"], - get_pandas=True, - ) - - # Only one row per trip/shape - # trip_instance_key and shape_array_key are the only 2 cols left - m1 = dd.merge(vp_usable, trips_with_shape, on="trip_instance_key", how="inner") - - return m1 - -def buffer_shapes( - trips_with_shape: pd.DataFrame, - analysis_date: str, - buffer_meters: int = 35, -): - """ - Filter scheduled shapes down to the shapes that appear in vp. - Buffer these. - - Attach the shape geometry for a subset of shapes or trips. - """ - subset = trips_with_shape.shape_array_key.unique().compute().tolist() - - shapes = helpers.import_scheduled_shapes( - analysis_date, - columns=["shape_array_key", "geometry"], - filters=[[("shape_array_key", "in", subset)]], - crs=PROJECT_CRS, - get_pandas=False, - ).pipe(helpers.remove_shapes_outside_ca) - - # to_crs takes awhile, so do a filtering on only shapes we need - shapes = shapes.assign(geometry=shapes.geometry.buffer(buffer_meters)) - - trips_with_shape_geom = dd.merge( - shapes, trips_with_shape, on="shape_array_key", how="inner" - ) - - trips_with_shape_geom = trips_with_shape_geom.compute() - return trips_with_shape_geom - -# SPATIAL ACCURACY -def vp_in_shape( - vp_usable: dd.DataFrame, trips_with_buffered_shape: gpd.GeoDataFrame -) -> gpd.GeoDataFrame: - - keep = ["trip_instance_key", "x", "y", "location_timestamp_local"] - vp_usable = vp_usable[keep] - - vp_gdf = gpd.GeoDataFrame( - vp_usable, geometry=gpd.points_from_xy(vp_usable.x, vp_usable.y), crs=WGS84 - ).to_crs(PROJECT_CRS) - - gdf = pd.merge( - vp_gdf, trips_with_buffered_shape, on="trip_instance_key", how="inner" - ) - - gdf = gdf.assign(is_within=gdf.geometry_x.within(gdf.geometry_y)) - gdf = gdf[["trip_instance_key", "location_timestamp_local", "is_within"]] - - return gdf - -def total_counts(result: dd.DataFrame): - - total_vp = vp_spatial_accuracy.total_vp_counts_by_trip(result) - - result2 = result.loc[result.is_within == True].reset_index(drop = True) - result2 = result2[["trip_instance_key", "location_timestamp_local"]] - vps_in_shape = ( - result2.groupby("trip_instance_key", observed=True, group_keys=False) - .agg({"location_timestamp_local": "count"}) - .reset_index() - .rename(columns={"location_timestamp_local": "vp_in_shape"}) - ) - - # Count total vps for the trip - # total vp by trip can be done on vp_usable / break apart from vp_in_shape - - count_df = pd.merge(total_vp, vps_in_shape, on="trip_instance_key", how="left") - - count_df = count_df.assign( - vp_in_shape=count_df.vp_in_shape.fillna(0).astype("int32"), - total_vp=count_df.total_vp.fillna(0).astype("int32"), - ) - - return count_df - -# SPEEDS -def load_trip_speeds(analysis_date): - df = pd.read_parquet( - f"{SEGMENT_GCS}trip_summary/trip_speeds_{analysis_date}.parquet", - columns=[ - "trip_instance_key", - "speed_mph", - "route_id", - "time_of_day", - "service_minutes", - ]) - - return df - -# Complete -def vp_usable_metrics(analysis_date:str) -> pd.DataFrame: - - """ - Keep for testing - operator = "Bay Area 511 Muni VehiclePositions" - gtfs_key = "7cc0cb1871dfd558f11a2885c145d144" - - vp_usable= dd.read_parquet( - f"{SEGMENT_GCS}vp_usable_{analysis_date}", - filters=[ - [ - ("gtfs_dataset_name", "==", operator), - ("schedule_gtfs_dataset_key", "==", gtfs_key), - ] - ], - ) - """ - vp_usable = dd.read_parquet(f"{SEGMENT_GCS}vp_usable_{analysis_date}") - - ## Update Completeness ## - - # Find total min with gtfs, total trip time, - # median pings per minute - pings_trip_time_df = vp_usable.map_partitions( - pings_trip_time, - meta={ - "trip_instance_key": "object", - "minutes_w_atleast2_trip_updates": "int64", - "median_pings_per_min": "int64", - "total_minute_w_gtfs": "int64", - "total_trip_time": "float64", - }, - align_dataframes=False).persist() - - ## Spatial accuracy ## - - # Determine which trips have shapes associated with them - trips_with_shapes_df = grab_shape_keys_in_vp(vp_usable, analysis_date) - - # Buffer the shapes - buffered_shapes_df = buffer_shapes(trips_with_shapes_df, analysis_date, 35) - - # Find the vps that fall into buffered shapes - in_shape_df = vp_usable.map_partitions( - vp_in_shape, - buffered_shapes_df, - meta={ - "trip_instance_key": "object", - "location_timestamp_local": "datetime64[ns]", - "is_within":"bool", - }, - align_dataframes=False).persist() - - # Compare total vps for a trip versus total vps that - # fell in the recorded shape - spatial_accuracy_df = in_shape_df.map_partitions( - total_counts, - meta={"trip_instance_key": "object", "total_vp": "int32", "vp_in_shape": "int32"}, - align_dataframes=False,).persist() - - # Load trip speeds - trip_speeds_df = load_trip_speeds(analysis_date) - - # Merges - pings_trip_time_df = pings_trip_time_df.compute() - spatial_accuracy_df = spatial_accuracy_df.compute() - - m1 = ( - pings_trip_time_df.merge(spatial_accuracy_df, on=["trip_instance_key"], how="outer") - .merge(trip_speeds_df, on=["trip_instance_key"], how="outer") - ) - - m1.to_parquet('./vp_usable_metrics.parquet') - return m1 - -if __name__ == "__main__": - start = datetime.datetime.now() - LOG_FILE = "../logs/vp_usable_test.log" - logger.add(LOG_FILE, retention="3 months") - logger.add(sys.stderr, - format="{time:YYYY-MM-DD at HH:mm:ss} | {level} | {message}", - level="INFO") - end = datetime.datetime.now() - logger.info(f"Analysis date: {analysis_date}, started at {start}") - vp_usable_metrics(analysis_date) - logger.info(f"Ended at {end}, took {end - start} min to run") - print('done') \ No newline at end of file From 0fc1a1f3bd7e8397c68d53f4ee3c2bca4f8cbcb6 Mon Sep 17 00:00:00 2001 From: amandaha8 Date: Tue, 26 Dec 2023 23:25:22 +0000 Subject: [PATCH 2/6] agg up to route first attempt --- .../06_vp_usable_exploration.ipynb | 2722 +++++++++++++++++ .../scripts/rt_v_scheduled_trip.py | 6 +- rt_scheduled_v_ran/scripts/update_vars.py | 2 +- 3 files changed, 2726 insertions(+), 4 deletions(-) create mode 100644 rt_scheduled_v_ran/06_vp_usable_exploration.ipynb diff --git a/rt_scheduled_v_ran/06_vp_usable_exploration.ipynb b/rt_scheduled_v_ran/06_vp_usable_exploration.ipynb new file mode 100644 index 000000000..fcf20b268 --- /dev/null +++ b/rt_scheduled_v_ran/06_vp_usable_exploration.ipynb @@ -0,0 +1,2722 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "63b72e4f-ad51-4c17-90b5-e3667618a725", + "metadata": {}, + "source": [ + "## Check out Results" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "420e44a4-aa22-4475-840f-6caf92fa7844", + "metadata": {}, + "outputs": [], + "source": [ + "import geopandas as gpd\n", + "import pandas as pd" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "67351837-e385-464b-96b3-48fc13703af7", + "metadata": {}, + "outputs": [], + "source": [ + "from segment_speed_utils.project_vars import (\n", + " GCS_FILE_PATH,\n", + " PROJECT_CRS,\n", + " SEGMENT_GCS,\n", + " analysis_date,\n", + ")\n", + "from segment_speed_utils import helpers, wrangle_shapes,sched_rt_utils\n", + "from shared_utils import schedule_rt_utils, portfolio_utils, geography_utils" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "c12b17b1-8c43-40c8-8a8d-579618452b06", + "metadata": {}, + "outputs": [], + "source": [ + "pd.options.display.max_columns = 100\n", + "pd.options.display.float_format = \"{:.2f}\".format\n", + "pd.set_option(\"display.max_rows\", None)\n", + "pd.set_option(\"display.max_colwidth\", None)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "2ebc71d7-342d-4b8f-b261-709b0cbfe013", + "metadata": {}, + "outputs": [], + "source": [ + "GCS_PATH = 'gs://calitp-analytics-data/data-analyses/rt_vs_schedule/trip_level_metrics/'" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "26185ced-9f3b-4266-a080-0eeed0c0a825", + "metadata": {}, + "outputs": [], + "source": [ + "def check_out(df:pd.DataFrame):\n", + " display(df.spatial_accuracy_pct.describe())\n", + " display(df.pings_per_min.describe())\n", + " display(df.rt_triptime_w_gtfs_pct.describe())\n", + " display(df.rt_v_scheduled_trip_time_pct.describe())" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "aba941a7-bc28-4501-9505-0fd064c24a0c", + "metadata": {}, + "outputs": [], + "source": [ + "mar_df = pd.read_parquet(f'{GCS_PATH}2023-03-15_metrics.parquet')" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "89ef7ad6-d728-45d1-a4ae-75203df93cc9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['trip_instance_key', 'rt_service_min', 'min_w_atleast2_trip_updates',\n", + " 'total_pings_for_trip', 'total_min_w_gtfs', 'total_vp', 'vp_in_shape',\n", + " 'speed_mph', 'route_id', 'time_of_day', 'service_minutes',\n", + " 'pings_per_min', 'spatial_accuracy_pct', 'rt_triptime_w_gtfs_pct',\n", + " 'rt_v_scheduled_trip_time_pct'],\n", + " dtype='object')" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mar_df.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "af7c69e1-e363-42f8-bf1c-f9ef2ba141e2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 69494.00\n", + "mean 94.10\n", + "std 12.34\n", + "min 0.00\n", + "25% 95.24\n", + "50% 100.00\n", + "75% 100.00\n", + "max 100.00\n", + "Name: spatial_accuracy_pct, dtype: float64" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "count 83620.00\n", + "mean 2.46\n", + "std 0.68\n", + "min 0.00\n", + "25% 1.91\n", + "50% 2.86\n", + "75% 2.96\n", + "max 4.75\n", + "Name: pings_per_min, dtype: float64" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "count 83620.00\n", + "mean 95.20\n", + "std 14.55\n", + "min 0.28\n", + "25% 97.99\n", + "50% 99.58\n", + "75% 100.25\n", + "max 108.43\n", + "Name: rt_triptime_w_gtfs_pct, dtype: float64" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "count 71797.00\n", + "mean 60.68\n", + "std 333.76\n", + "min -87.37\n", + "25% 10.76\n", + "50% 25.19\n", + "75% 44.44\n", + "max 17909.79\n", + "Name: rt_v_scheduled_trip_time_pct, dtype: float64" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "check_out(mar_df)" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "26701c9c-669d-4e09-8c38-5ece391f5889", + "metadata": {}, + "outputs": [], + "source": [ + "apr_df = pd.read_parquet(f'{GCS_PATH}2023-04-12_metrics.parquet')" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "509ec31f-c521-4149-be35-b4548a9e1666", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 71094.00\n", + "mean 94.06\n", + "std 12.64\n", + "min 0.00\n", + "25% 95.45\n", + "50% 100.00\n", + "75% 100.00\n", + "max 100.00\n", + "Name: spatial_accuracy_pct, dtype: float64" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "count 84516.00\n", + "mean 2.45\n", + "std 0.67\n", + "min 0.01\n", + "25% 1.91\n", + "50% 2.83\n", + "75% 2.95\n", + "max 5.18\n", + "Name: pings_per_min, dtype: float64" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "count 84516.00\n", + "mean 95.23\n", + "std 14.48\n", + "min 0.56\n", + "25% 97.88\n", + "50% 99.53\n", + "75% 100.20\n", + "max 108.11\n", + "Name: rt_triptime_w_gtfs_pct, dtype: float64" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "count 73471.00\n", + "mean 61.42\n", + "std 349.18\n", + "min -86.02\n", + "25% 10.46\n", + "50% 25.10\n", + "75% 44.38\n", + "max 15903.70\n", + "Name: rt_v_scheduled_trip_time_pct, dtype: float64" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "check_out(apr_df)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "dcf7252b-eb12-4d76-bacf-ceb34d1e0c0a", + "metadata": {}, + "outputs": [], + "source": [ + "may_df = pd.read_parquet(f'{GCS_PATH}2023-05-17_metrics.parquet')" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "151c9d6f-ce92-4ab2-ad1c-525ca627dbfc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 65385.00\n", + "mean 94.07\n", + "std 12.45\n", + "min 0.00\n", + "25% 95.00\n", + "50% 100.00\n", + "75% 100.00\n", + "max 100.00\n", + "Name: spatial_accuracy_pct, dtype: float64" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "count 83606.00\n", + "mean 2.47\n", + "std 0.68\n", + "min 0.00\n", + "25% 1.95\n", + "50% 2.86\n", + "75% 2.95\n", + "max 5.14\n", + "Name: pings_per_min, dtype: float64" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "count 83606.00\n", + "mean 95.47\n", + "std 13.78\n", + "min 0.27\n", + "25% 97.76\n", + "50% 99.51\n", + "75% 100.21\n", + "max 108.43\n", + "Name: rt_triptime_w_gtfs_pct, dtype: float64" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "count 67864.00\n", + "mean 60.61\n", + "std 329.38\n", + "min -90.09\n", + "25% 10.96\n", + "50% 25.64\n", + "75% 44.61\n", + "max 17907.92\n", + "Name: rt_v_scheduled_trip_time_pct, dtype: float64" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "check_out(may_df)" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "976f786d-c3e5-44ae-991c-db7d95d5169a", + "metadata": {}, + "outputs": [], + "source": [ + "jun_df = pd.read_parquet(f'{GCS_PATH}2023-06-14_metrics.parquet')" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "c87d9f5e-ae39-4c4a-b313-037fc118dce4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 64270.00\n", + "mean 93.92\n", + "std 12.67\n", + "min 0.00\n", + "25% 94.77\n", + "50% 100.00\n", + "75% 100.00\n", + "max 100.00\n", + "Name: spatial_accuracy_pct, dtype: float64" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "count 80331.00\n", + "mean 2.51\n", + "std 0.67\n", + "min 0.01\n", + "25% 1.98\n", + "50% 2.90\n", + "75% 2.96\n", + "max 4.93\n", + "Name: pings_per_min, dtype: float64" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "count 80331.00\n", + "mean 95.92\n", + "std 13.48\n", + "min 0.28\n", + "25% 98.43\n", + "50% 99.67\n", + "75% 100.28\n", + "max 107.78\n", + "Name: rt_triptime_w_gtfs_pct, dtype: float64" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "count 66748.00\n", + "mean 63.20\n", + "std 345.10\n", + "min -90.25\n", + "25% 11.69\n", + "50% 25.91\n", + "75% 45.15\n", + "max 12997.42\n", + "Name: rt_v_scheduled_trip_time_pct, dtype: float64" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "check_out(jun_df)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "e55cb702-f2fa-4350-bcd9-cb057a7edbdc", + "metadata": {}, + "outputs": [], + "source": [ + "jul_df = pd.read_parquet(f'{GCS_PATH}2023-07-12_metrics.parquet')" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "a287bc73-76ff-4235-8a72-671045bac935", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 67091.00\n", + "mean 93.83\n", + "std 13.15\n", + "min 0.00\n", + "25% 94.96\n", + "50% 100.00\n", + "75% 100.00\n", + "max 100.00\n", + "Name: spatial_accuracy_pct, dtype: float64" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "count 82044.00\n", + "mean 2.49\n", + "std 0.67\n", + "min 0.00\n", + "25% 1.95\n", + "50% 2.90\n", + "75% 2.97\n", + "max 5.76\n", + "Name: pings_per_min, dtype: float64" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "count 82044.00\n", + "mean 96.42\n", + "std 11.07\n", + "min 0.43\n", + "25% 98.28\n", + "50% 99.65\n", + "75% 100.25\n", + "max 108.60\n", + "Name: rt_triptime_w_gtfs_pct, dtype: float64" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "count 69488.00\n", + "mean 56.85\n", + "std 291.52\n", + "min -91.05\n", + "25% 11.82\n", + "50% 26.10\n", + "75% 45.69\n", + "max 17880.62\n", + "Name: rt_v_scheduled_trip_time_pct, dtype: float64" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "check_out(jul_df)" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "21b4d1b8-598d-410a-9c73-69fb92d88c1b", + "metadata": {}, + "outputs": [], + "source": [ + "aug_df = pd.read_parquet(f'{GCS_PATH}2023-08-15_metrics.parquet')" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "1cda1e4d-25f7-4836-b8ab-7b7997edb446", + "metadata": {}, + "outputs": [], + "source": [ + "sept_df = pd.read_parquet(f'{GCS_PATH}2023-09-13_metrics.parquet')" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "497e04c0-1c33-4fdf-9cfc-94e2a925ebd3", + "metadata": {}, + "outputs": [], + "source": [ + "oct_df = pd.read_parquet(f'{GCS_PATH}2023-10-11_metrics.parquet')" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "07e73d1c-5b24-43a8-852e-35cff4f416e6", + "metadata": {}, + "outputs": [], + "source": [ + "nov_df = pd.read_parquet(f'{GCS_PATH}2023-11-15_metrics.parquet')" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "38f00976-c428-4c3c-a153-eb9035a709ef", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 74891.00\n", + "mean 93.53\n", + "std 13.12\n", + "min 0.00\n", + "25% 93.94\n", + "50% 99.68\n", + "75% 100.00\n", + "max 100.00\n", + "Name: spatial_accuracy_pct, dtype: float64" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "count 86832.00\n", + "mean 2.51\n", + "std 0.63\n", + "min 0.00\n", + "25% 2.09\n", + "50% 2.86\n", + "75% 2.96\n", + "max 5.29\n", + "Name: pings_per_min, dtype: float64" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "count 86832.00\n", + "mean 95.93\n", + "std 12.15\n", + "min 0.45\n", + "25% 98.40\n", + "50% 99.67\n", + "75% 100.26\n", + "max 108.93\n", + "Name: rt_triptime_w_gtfs_pct, dtype: float64" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "count 77194.00\n", + "mean 42.53\n", + "std 196.80\n", + "min -88.16\n", + "25% 11.44\n", + "50% 26.10\n", + "75% 46.17\n", + "max 11797.08\n", + "Name: rt_v_scheduled_trip_time_pct, dtype: float64" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "check_out(nov_df)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "8762a9c1-9a3b-490a-a447-4481b59d930c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "30287" + ] + }, + "execution_count": 22, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(nov_df[nov_df.rt_triptime_w_gtfs_pct > 100])" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "e3d9b4a9-70e1-40aa-b2eb-f2e23b3a4d1f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "86832" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(nov_df)" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "c2963bc8-5535-4dce-af95-767317c2201c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Int64Index: 86832 entries, 0 to 86831\n", + "Data columns (total 15 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 trip_instance_key 86832 non-null object \n", + " 1 rt_service_min 86832 non-null float64\n", + " 2 min_w_atleast2_trip_updates 86832 non-null int64 \n", + " 3 total_pings_for_trip 86832 non-null int64 \n", + " 4 total_min_w_gtfs 86832 non-null int64 \n", + " 5 total_vp 74891 non-null float64\n", + " 6 vp_in_shape 74891 non-null float64\n", + " 7 speed_mph 77194 non-null float64\n", + " 8 route_id 75619 non-null object \n", + " 9 time_of_day 77194 non-null object \n", + " 10 service_minutes 77194 non-null float64\n", + " 11 pings_per_min 86832 non-null float64\n", + " 12 spatial_accuracy_pct 74891 non-null float64\n", + " 13 rt_triptime_w_gtfs_pct 86832 non-null float64\n", + " 14 rt_v_scheduled_trip_time_pct 77194 non-null float64\n", + "dtypes: float64(9), int64(3), object(3)\n", + "memory usage: 10.6+ MB\n" + ] + } + ], + "source": [ + "nov_df.info()" + ] + }, + { + "cell_type": "markdown", + "id": "a227a3dd-7761-46d0-bcc2-2a1e0f18db09", + "metadata": {}, + "source": [ + "### Test aggregating with November" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "cfb7de65-5c85-4b1f-821d-2cabea0a8184", + "metadata": {}, + "outputs": [], + "source": [ + "analysis_date = \"2023-11-15\"" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "330395c9-176b-4ec0-a036-b1598f5d2b55", + "metadata": {}, + "outputs": [], + "source": [ + "nov_df.rt_triptime_w_gtfs_pct = nov_df.rt_triptime_w_gtfs_pct.mask(nov_df.rt_triptime_w_gtfs_pct > 100).fillna(100)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "fc6630d9-bdfe-42f1-86e9-2979143aac97", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['trip_instance_key', 'rt_service_min', 'min_w_atleast2_trip_updates',\n", + " 'total_pings_for_trip', 'total_min_w_gtfs', 'total_vp', 'vp_in_shape',\n", + " 'speed_mph', 'route_id', 'time_of_day', 'service_minutes',\n", + " 'pings_per_min', 'spatial_accuracy_pct', 'rt_triptime_w_gtfs_pct',\n", + " 'rt_v_scheduled_trip_time_pct'],\n", + " dtype='object')" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nov_df.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "be0fb1be-060d-4311-856e-5699e15e0c8c", + "metadata": {}, + "outputs": [], + "source": [ + "nov_df2= nov_df.drop(columns = ['route_id','service_minutes', 'time_of_day'])" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "c88f5faa-f3a2-4373-ad93-96fe61c7bbf3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'gs://calitp-analytics-data/data-analyses/rt_segment_speeds/trip_summary/trip_speeds_2023-11-15.parquet'" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "f\"{SEGMENT_GCS}trip_summary/trip_speeds_2023-11-15.parquet\"" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "dbffcd12-64f3-490e-8854-18f518e6d88e", + "metadata": {}, + "outputs": [], + "source": [ + "speeds = pd.read_parquet(\n", + " f\"{SEGMENT_GCS}trip_summary/trip_speeds_2023-11-15.parquet\")" + ] + }, + { + "cell_type": "markdown", + "id": "90958a55-27ca-447f-8304-37773081c973", + "metadata": {}, + "source": [ + "#### https://github.com/cal-itp/data-analyses/blob/main/rt_segment_speeds/scripts/avg_speeds_by_segment.py#L135" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "84cc7f28-1289-4df1-8073-cd6e71ed77a6", + "metadata": {}, + "outputs": [], + "source": [ + "keep_cols = [\n", + " \"gtfs_dataset_key\",\n", + " \"direction_id\", \n", + " \"route_id\", \"route_short_name\", \"route_long_name\", \"route_desc\", \"trip_instance_key\"\n", + " ]\n", + " \n", + "crosswalk = helpers.import_scheduled_trips(\n", + " analysis_date, \n", + " columns = keep_cols, \n", + " get_pandas = True\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "0b4b0f0d-cc15-4f3e-b8c7-df8579831539", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
trip_instance_keyrt_service_minmin_w_atleast2_trip_updatestotal_pings_for_triptotal_min_w_gtfstotal_vpvp_in_shapespeed_mphroute_idtime_of_dayservice_minutespings_per_minspatial_accuracy_pctrt_triptime_w_gtfs_pctrt_v_scheduled_trip_time_pct
0a3647253d4cc8f847e972ed8c83d1b9b22.62226523NaNNaNNaNNoneNoneNaN2.87NaN100.00NaN
17029f592047be84e5bb1d28d299be35d16.93164817NaNNaNNaNNoneNoneNaN2.83NaN100.00NaN
21040196034fd380818a2cbcf1eafd9b840.954011841NaNNaNNaNNoneNoneNaN2.88NaN100.00NaN
35c6d43026fe5f02e5b31c18fcb8c0bf562.956117663NaNNaNNaNNoneNoneNaN2.80NaN100.00NaN
4ee2f1fd83d87e85119f66014da5d74d514.07133715NaNNaNNaNNoneNoneNaN2.63NaN100.00NaN
\n", + "
" + ], + "text/plain": [ + " trip_instance_key rt_service_min \\\n", + "0 a3647253d4cc8f847e972ed8c83d1b9b 22.62 \n", + "1 7029f592047be84e5bb1d28d299be35d 16.93 \n", + "2 1040196034fd380818a2cbcf1eafd9b8 40.95 \n", + "3 5c6d43026fe5f02e5b31c18fcb8c0bf5 62.95 \n", + "4 ee2f1fd83d87e85119f66014da5d74d5 14.07 \n", + "\n", + " min_w_atleast2_trip_updates total_pings_for_trip total_min_w_gtfs \\\n", + "0 22 65 23 \n", + "1 16 48 17 \n", + "2 40 118 41 \n", + "3 61 176 63 \n", + "4 13 37 15 \n", + "\n", + " total_vp vp_in_shape speed_mph route_id time_of_day service_minutes \\\n", + "0 NaN NaN NaN None None NaN \n", + "1 NaN NaN NaN None None NaN \n", + "2 NaN NaN NaN None None NaN \n", + "3 NaN NaN NaN None None NaN \n", + "4 NaN NaN NaN None None NaN \n", + "\n", + " pings_per_min spatial_accuracy_pct rt_triptime_w_gtfs_pct \\\n", + "0 2.87 NaN 100.00 \n", + "1 2.83 NaN 100.00 \n", + "2 2.88 NaN 100.00 \n", + "3 2.80 NaN 100.00 \n", + "4 2.63 NaN 100.00 \n", + "\n", + " rt_v_scheduled_trip_time_pct \n", + "0 NaN \n", + "1 NaN \n", + "2 NaN \n", + "3 NaN \n", + "4 NaN " + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "nov_df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "1e89939b-1bca-4670-955f-013c16949ec9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
schedule_gtfs_dataset_keydirection_idroute_idroute_short_nameroute_long_nameroute_desctrip_instance_key
01770249a5a2e770ca90628434d4934b11.003402Route 11Route 11PACIFIC VIEW MALL via TELEPHONE RDed6aa732d8c47e4df3a2f7ba9d24415c
11770249a5a2e770ca90628434d4934b11.003402Route 11Route 11PACIFIC VIEW MALL via TELEPHONE RD72b2c8bdebbf14039af7c57133cc23e5
\n", + "
" + ], + "text/plain": [ + " schedule_gtfs_dataset_key direction_id route_id route_short_name \\\n", + "0 1770249a5a2e770ca90628434d4934b1 1.00 3402 Route 11 \n", + "1 1770249a5a2e770ca90628434d4934b1 1.00 3402 Route 11 \n", + "\n", + " route_long_name route_desc \\\n", + "0 Route 11 PACIFIC VIEW MALL via TELEPHONE RD \n", + "1 Route 11 PACIFIC VIEW MALL via TELEPHONE RD \n", + "\n", + " trip_instance_key \n", + "0 ed6aa732d8c47e4df3a2f7ba9d24415c \n", + "1 72b2c8bdebbf14039af7c57133cc23e5 " + ] + }, + "execution_count": 33, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "crosswalk.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "e862de9a-47dc-42e0-aec3-81645a09afea", + "metadata": {}, + "outputs": [], + "source": [ + "common_shape = sched_rt_utils.most_common_shape_by_route_direction(analysis_date)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "1dec5d15-718e-467f-b7a6-cfc5ad594d6e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'2023-11-15'" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "analysis_date" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "d6578b42-82b3-4edf-b97b-b4092c6f17ba", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
schedule_gtfs_dataset_keyroute_iddirection_idcommon_shape_idshape_array_key
0014d0998350083249a9eb310635548c2108668261.0010866826:1a7f294e50a9a8ff179d4c82cd9136625
1014d0998350083249a9eb310635548c2108668491.0010866849:1af9aea31d387f59024bf25fb1d9334a3
\n", + "
" + ], + "text/plain": [ + " schedule_gtfs_dataset_key route_id direction_id common_shape_id \\\n", + "0 014d0998350083249a9eb310635548c2 10866826 1.00 10866826:1 \n", + "1 014d0998350083249a9eb310635548c2 10866849 1.00 10866849:1 \n", + "\n", + " shape_array_key \n", + "0 a7f294e50a9a8ff179d4c82cd9136625 \n", + "1 af9aea31d387f59024bf25fb1d9334a3 " + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "common_shape.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "aba49709-75e1-43e2-84a0-953fc3219206", + "metadata": {}, + "outputs": [], + "source": [ + "crosswalk2 = pd.merge(\n", + " crosswalk,\n", + " common_shape,\n", + " on = [\"schedule_gtfs_dataset_key\", \"route_id\", \"direction_id\"],\n", + " how = \"inner\"\n", + " ).astype({\"direction_id\": \"Int64\"})" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "f23a0e8a-e7f3-411e-a6c0-784c87e559ed", + "metadata": {}, + "outputs": [], + "source": [ + "time_of_day = sched_rt_utils.get_trip_time_buckets(analysis_date)" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "99381d4c-b338-4e04-a351-b077c6b94224", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
trip_instance_keyservice_hourstrip_first_departure_datetime_pacifictime_of_dayservice_minutes
0ed6aa732d8c47e4df3a2f7ba9d24415c0.602023-11-15 15:10:00PM Peak36.00
172b2c8bdebbf14039af7c57133cc23e50.552023-11-15 16:40:00PM Peak33.00
2e829a24393004fd97a0c6da2efd2b38b0.532023-11-15 06:40:00Early AM32.00
305c66c28220d0a7982980194c8d479f60.552023-11-15 19:05:00PM Peak33.00
4169af6136355af8248800d0c1c0916b70.552023-11-15 17:10:00PM Peak33.00
\n", + "
" + ], + "text/plain": [ + " trip_instance_key service_hours \\\n", + "0 ed6aa732d8c47e4df3a2f7ba9d24415c 0.60 \n", + "1 72b2c8bdebbf14039af7c57133cc23e5 0.55 \n", + "2 e829a24393004fd97a0c6da2efd2b38b 0.53 \n", + "3 05c66c28220d0a7982980194c8d479f6 0.55 \n", + "4 169af6136355af8248800d0c1c0916b7 0.55 \n", + "\n", + " trip_first_departure_datetime_pacific time_of_day service_minutes \n", + "0 2023-11-15 15:10:00 PM Peak 36.00 \n", + "1 2023-11-15 16:40:00 PM Peak 33.00 \n", + "2 2023-11-15 06:40:00 Early AM 32.00 \n", + "3 2023-11-15 19:05:00 PM Peak 33.00 \n", + "4 2023-11-15 17:10:00 PM Peak 33.00 " + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "time_of_day.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "3defcf73-8930-48c7-ae05-7e3ec263887a", + "metadata": {}, + "outputs": [], + "source": [ + "crosswalk2 = portfolio_utils.add_route_name(\n", + " crosswalk2\n", + " ).drop(columns = [\"route_short_name\", \"route_long_name\", \"route_desc\"])\n" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "67e1a70f-d21c-4a9d-b4a3-2ebde459b4e1", + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.merge(\n", + " nov_df2,\n", + " crosswalk2,\n", + " on = \"trip_instance_key\",\n", + " how = \"left\",\n", + " ).merge(\n", + " time_of_day,\n", + " on = \"trip_instance_key\",\n", + " how = \"left\"\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "4576ab1e-2bde-4420-8254-7c8e2a2700d5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
trip_instance_keyrt_service_minmin_w_atleast2_trip_updatestotal_pings_for_triptotal_min_w_gtfstotal_vpvp_in_shapespeed_mphpings_per_minspatial_accuracy_pctrt_triptime_w_gtfs_pctrt_v_scheduled_trip_time_pctschedule_gtfs_dataset_keydirection_idroute_idcommon_shape_idshape_array_keyroute_name_usedservice_hourstrip_first_departure_datetime_pacifictime_of_dayservice_minutes
0a3647253d4cc8f847e972ed8c83d1b9b22.62226523NaNNaNNaN2.87NaN100.00NaNNaN<NA>NaNNaNNaNNaNNaNNaTNaNNaN
17029f592047be84e5bb1d28d299be35d16.93164817NaNNaNNaN2.83NaN100.00NaNNaN<NA>NaNNaNNaNNaNNaNNaTNaNNaN
21040196034fd380818a2cbcf1eafd9b840.954011841NaNNaNNaN2.88NaN100.00NaNNaN<NA>NaNNaNNaNNaNNaNNaTNaNNaN
35c6d43026fe5f02e5b31c18fcb8c0bf562.956117663NaNNaNNaN2.80NaN100.00NaNNaN<NA>NaNNaNNaNNaNNaNNaTNaNNaN
4ee2f1fd83d87e85119f66014da5d74d514.07133715NaNNaNNaN2.63NaN100.00NaNNaN<NA>NaNNaNNaNNaNNaNNaTNaNNaN
\n", + "
" + ], + "text/plain": [ + " trip_instance_key rt_service_min \\\n", + "0 a3647253d4cc8f847e972ed8c83d1b9b 22.62 \n", + "1 7029f592047be84e5bb1d28d299be35d 16.93 \n", + "2 1040196034fd380818a2cbcf1eafd9b8 40.95 \n", + "3 5c6d43026fe5f02e5b31c18fcb8c0bf5 62.95 \n", + "4 ee2f1fd83d87e85119f66014da5d74d5 14.07 \n", + "\n", + " min_w_atleast2_trip_updates total_pings_for_trip total_min_w_gtfs \\\n", + "0 22 65 23 \n", + "1 16 48 17 \n", + "2 40 118 41 \n", + "3 61 176 63 \n", + "4 13 37 15 \n", + "\n", + " total_vp vp_in_shape speed_mph pings_per_min spatial_accuracy_pct \\\n", + "0 NaN NaN NaN 2.87 NaN \n", + "1 NaN NaN NaN 2.83 NaN \n", + "2 NaN NaN NaN 2.88 NaN \n", + "3 NaN NaN NaN 2.80 NaN \n", + "4 NaN NaN NaN 2.63 NaN \n", + "\n", + " rt_triptime_w_gtfs_pct rt_v_scheduled_trip_time_pct \\\n", + "0 100.00 NaN \n", + "1 100.00 NaN \n", + "2 100.00 NaN \n", + "3 100.00 NaN \n", + "4 100.00 NaN \n", + "\n", + " schedule_gtfs_dataset_key direction_id route_id common_shape_id \\\n", + "0 NaN NaN NaN \n", + "1 NaN NaN NaN \n", + "2 NaN NaN NaN \n", + "3 NaN NaN NaN \n", + "4 NaN NaN NaN \n", + "\n", + " shape_array_key route_name_used service_hours \\\n", + "0 NaN NaN NaN \n", + "1 NaN NaN NaN \n", + "2 NaN NaN NaN \n", + "3 NaN NaN NaN \n", + "4 NaN NaN NaN \n", + "\n", + " trip_first_departure_datetime_pacific time_of_day service_minutes \n", + "0 NaT NaN NaN \n", + "1 NaT NaN NaN \n", + "2 NaT NaN NaN \n", + "3 NaT NaN NaN \n", + "4 NaT NaN NaN " + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "markdown", + "id": "d2ff8a3d-d3f5-42b4-b096-24b33b9842ca", + "metadata": {}, + "source": [ + "#### https://github.com/cal-itp/data-analyses/blob/main/rt_segment_speeds/scripts/export.py#L150\n", + "* Should I still drop overly quick speeds?" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "6f9d6077-0551-42e0-830c-a262364770f0", + "metadata": {}, + "outputs": [], + "source": [ + "df2 = df.loc[df.speed_mph <= 70].reset_index(drop = True)" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "e7ce443a-b466-4d07-a613-cfc05a1b764d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(77170, 86832)" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(df2), len(df)" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "b3695f80-f817-4e3f-b882-5f37e0c277ae", + "metadata": {}, + "outputs": [], + "source": [ + "route_cols = [\n", + " \"schedule_gtfs_dataset_key\", \"time_of_day\",\n", + " \"route_id\", \"direction_id\",\n", + " \"route_name_used\",\n", + " \"common_shape_id\", \"shape_array_key\"\n", + " ]" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "7f232164-7199-4ee2-9231-acf633509d89", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
trip_instance_keyrt_service_minmin_w_atleast2_trip_updatestotal_pings_for_triptotal_min_w_gtfstotal_vpvp_in_shapespeed_mphpings_per_minspatial_accuracy_pctrt_triptime_w_gtfs_pctrt_v_scheduled_trip_time_pctschedule_gtfs_dataset_keydirection_idroute_idcommon_shape_idshape_array_keyroute_name_usedservice_hourstrip_first_departure_datetime_pacifictime_of_dayservice_minutes
62094e77edc665d02c0c34d16290e6d3caad638.323811239112.00111.0012.842.9299.11100.0053.277cc0cb1871dfd558f11a2885c145d1441373751347b53a6f1c73d92b970c56cee4aa2d1Weekdays 6am-9pm Weekends 9am-9pm0.422023-11-15 22:00:00Evening25.00
25552ecb5631c1349a15c70b673e25fc5482747.934714148141.00140.0010.132.9499.29100.0022.913f3f36b4c41cc6b5df3eb7f5d8ea6e3c0690-131686900006_JUNE231a819c320e5f644df0a9bbabc869b049SYLMAR OLIVE VIEW HOSP. - SUNLAND VIA FOOTHILL BL0.652023-11-15 16:32:00PM Peak39.00
\n", + "
" + ], + "text/plain": [ + " trip_instance_key rt_service_min \\\n", + "62094 e77edc665d02c0c34d16290e6d3caad6 38.32 \n", + "25552 ecb5631c1349a15c70b673e25fc54827 47.93 \n", + "\n", + " min_w_atleast2_trip_updates total_pings_for_trip total_min_w_gtfs \\\n", + "62094 38 112 39 \n", + "25552 47 141 48 \n", + "\n", + " total_vp vp_in_shape speed_mph pings_per_min spatial_accuracy_pct \\\n", + "62094 112.00 111.00 12.84 2.92 99.11 \n", + "25552 141.00 140.00 10.13 2.94 99.29 \n", + "\n", + " rt_triptime_w_gtfs_pct rt_v_scheduled_trip_time_pct \\\n", + "62094 100.00 53.27 \n", + "25552 100.00 22.91 \n", + "\n", + " schedule_gtfs_dataset_key direction_id route_id \\\n", + "62094 7cc0cb1871dfd558f11a2885c145d144 1 37 \n", + "25552 3f3f36b4c41cc6b5df3eb7f5d8ea6e3c 0 690-13168 \n", + "\n", + " common_shape_id shape_array_key \\\n", + "62094 3751 347b53a6f1c73d92b970c56cee4aa2d1 \n", + "25552 6900006_JUNE23 1a819c320e5f644df0a9bbabc869b049 \n", + "\n", + " route_name_used service_hours \\\n", + "62094 Weekdays 6am-9pm Weekends 9am-9pm 0.42 \n", + "25552 SYLMAR OLIVE VIEW HOSP. - SUNLAND VIA FOOTHILL BL 0.65 \n", + "\n", + " trip_first_departure_datetime_pacific time_of_day service_minutes \n", + "62094 2023-11-15 22:00:00 Evening 25.00 \n", + "25552 2023-11-15 16:32:00 PM Peak 39.00 " + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df2.sample(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "id": "dcd9a500-aba4-42bd-bf15-b8d1f302fb00", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RangeIndex: 77170 entries, 0 to 77169\n", + "Data columns (total 22 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 trip_instance_key 77170 non-null object \n", + " 1 rt_service_min 77170 non-null float64 \n", + " 2 min_w_atleast2_trip_updates 77170 non-null int64 \n", + " 3 total_pings_for_trip 77170 non-null int64 \n", + " 4 total_min_w_gtfs 77170 non-null int64 \n", + " 5 total_vp 74245 non-null float64 \n", + " 6 vp_in_shape 74245 non-null float64 \n", + " 7 speed_mph 77170 non-null float64 \n", + " 8 pings_per_min 77170 non-null float64 \n", + " 9 spatial_accuracy_pct 74245 non-null float64 \n", + " 10 rt_triptime_w_gtfs_pct 77170 non-null float64 \n", + " 11 rt_v_scheduled_trip_time_pct 77170 non-null float64 \n", + " 12 schedule_gtfs_dataset_key 75595 non-null object \n", + " 13 direction_id 75595 non-null Int64 \n", + " 14 route_id 75595 non-null object \n", + " 15 common_shape_id 75595 non-null object \n", + " 16 shape_array_key 75595 non-null object \n", + " 17 route_name_used 75595 non-null object \n", + " 18 service_hours 77170 non-null float64 \n", + " 19 trip_first_departure_datetime_pacific 77170 non-null datetime64[ns]\n", + " 20 time_of_day 77170 non-null object \n", + " 21 service_minutes 77170 non-null float64 \n", + "dtypes: Int64(1), datetime64[ns](1), float64(10), int64(3), object(7)\n", + "memory usage: 13.0+ MB\n" + ] + } + ], + "source": [ + "df2.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "id": "e964a1f0-b66c-4d7f-92d0-c72e7c7bf39c", + "metadata": {}, + "outputs": [], + "source": [ + "df3 = (df2.groupby(route_cols)\n", + " .agg({\n", + " \"service_minutes\": \"mean\",\n", + " \"rt_service_min\": \"mean\",\n", + " \"speed_mph\": \"mean\",\n", + " \"pings_per_min\":\"mean\",\n", + " \"total_vp\":\"mean\",\n", + " \"vp_in_shape\":\"mean\",\n", + " \"trip_instance_key\": \"count\"\n", + " }).reset_index()\n", + ") " + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "id": "365803c0-a864-49ab-bb34-dcf2e936e867", + "metadata": {}, + "outputs": [], + "source": [ + "df4 = df3.assign(\n", + " rt_service_min = df2.rt_service_min.round(1),\n", + " service_minutes = df2.service_minutes.round(1),\n", + " speed_mph = df2.speed_mph.round(1)\n", + " ).rename(columns = {\n", + " \"service_minutes\": \"avg_sched_trip_min\",\n", + " \"rt_service_min\": \"avg_rt_trip_min\",\n", + " \"trip_instance_key\": \"n_trips\",\n", + " \"route_name_used\": \"route_name\",\n", + " \"schedule_gtfs_dataset_key\": \"gtfs_dataset_key\"\n", + " })" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "id": "0660f4e2-0c92-4656-a967-95ac7a26b440", + "metadata": {}, + "outputs": [], + "source": [ + "org_crosswalk = (\n", + " schedule_rt_utils.sample_gtfs_dataset_key_to_organization_crosswalk(\n", + " df4,\n", + " analysis_date,\n", + " quartet_data = \"schedule\",\n", + " dim_gtfs_dataset_cols = [\"key\", \"base64_url\"],\n", + " dim_organization_cols = [\"source_record_id\", \n", + " \"name\", \"caltrans_district\"])\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 60, + "id": "e43fa48f-cca4-4bfa-a8cd-971a7dc3f969", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "((82, 5), 82)" + ] + }, + "execution_count": 60, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "org_crosswalk.shape, org_crosswalk.schedule_gtfs_dataset_key.nunique()" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "id": "b0e1f34e-94b6-417e-ba35-0aebeccd3cee", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
schedule_gtfs_dataset_keybase64_urlorganization_source_record_idorganization_namecaltrans_district
0015d67d5b75b5cf2b710bbadadfb75f5aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZWVkcz9vcGVyYXRvcl9pZD1NQQ==recNOb7pqBRlQVG5eMarin County Transit District04 - Oakland
107d3b79f14cec8099119e1eb649f065baHR0cHM6Ly9kYXRhLnRyaWxsaXVtdHJhbnNpdC5jb20vZ3Rmcy90YWhvZS1jYS11cy90YWhvZS1jYS11cy56aXA=rec3u4aMplqObcoTRTahoe Transportation District03 - Marysville
20881af3822466784992a49f1cc57d38faHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZWVkcz9vcGVyYXRvcl9pZD1TQQ==recUmm4gcNXaqrwpnSonoma-Marin Area Rail Transit District04 - Oakland
309a703757d1ed14ca9580b1385e39315aHR0cHM6Ly9yaWRlbGF3bmRhbGViZWF0LmNvbS9ndGZzrecj8LXdeSurpSRNUCity of Lawndale07 - Los Angeles
409e16227fc42c4fe90204a9d11581034aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZWVkcz9vcGVyYXRvcl9pZD1TTw==recRM3c9Zfaft4V2BCloverdale Transit04 - Oakland
\n", + "
" + ], + "text/plain": [ + " schedule_gtfs_dataset_key \\\n", + "0 015d67d5b75b5cf2b710bbadadfb75f5 \n", + "1 07d3b79f14cec8099119e1eb649f065b \n", + "2 0881af3822466784992a49f1cc57d38f \n", + "3 09a703757d1ed14ca9580b1385e39315 \n", + "4 09e16227fc42c4fe90204a9d11581034 \n", + "\n", + " base64_url \\\n", + "0 aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZWVkcz9vcGVyYXRvcl9pZD1NQQ== \n", + "1 aHR0cHM6Ly9kYXRhLnRyaWxsaXVtdHJhbnNpdC5jb20vZ3Rmcy90YWhvZS1jYS11cy90YWhvZS1jYS11cy56aXA= \n", + "2 aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZWVkcz9vcGVyYXRvcl9pZD1TQQ== \n", + "3 aHR0cHM6Ly9yaWRlbGF3bmRhbGViZWF0LmNvbS9ndGZz \n", + "4 aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZWVkcz9vcGVyYXRvcl9pZD1TTw== \n", + "\n", + " organization_source_record_id organization_name \\\n", + "0 recNOb7pqBRlQVG5e Marin County Transit District \n", + "1 rec3u4aMplqObcoTR Tahoe Transportation District \n", + "2 recUmm4gcNXaqrwpn Sonoma-Marin Area Rail Transit District \n", + "3 recj8LXdeSurpSRNU City of Lawndale \n", + "4 recRM3c9Zfaft4V2B Cloverdale Transit \n", + "\n", + " caltrans_district \n", + "0 04 - Oakland \n", + "1 03 - Marysville \n", + "2 04 - Oakland \n", + "3 07 - Los Angeles \n", + "4 04 - Oakland " + ] + }, + "execution_count": 59, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "org_crosswalk.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "id": "71c0d174-736c-4093-a57f-1b39608fb2c2", + "metadata": {}, + "outputs": [], + "source": [ + "df_with_org = pd.merge(\n", + " df4,\n", + " org_crosswalk.rename(columns = {\n", + " \"schedule_gtfs_dataset_key\": \"gtfs_dataset_key\"}),\n", + " on = \"gtfs_dataset_key\",\n", + " how = \"inner\"\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "id": "275988a7-0b45-4799-b8d5-7826913745f3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(11229, 18)" + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_with_org.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "id": "3caa9bbc-51d2-4bc4-9d74-040f38ce67d5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
gtfs_dataset_keytime_of_dayroute_iddirection_idroute_namecommon_shape_idshape_array_keyavg_sched_trip_minavg_rt_trip_minspeed_mphpings_per_mintotal_vpvp_in_shapen_tripsbase64_urlorganization_source_record_idorganization_namecaltrans_district
0015d67d5b75b5cf2b710bbadadfb75f5AM Peak170Downtown San Rafael - Sausalito1044362491ca6006e88a076e2eaaf69376258.0062.4022.202.75205.00162.805aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZWVkcz9vcGVyYXRvcl9pZD1NQQ==recNOb7pqBRlQVG5eMarin County Transit District04 - Oakland
1015d67d5b75b5cf2b710bbadadfb75f5AM Peak171Downtown San Rafael - Sausalito109fe2f063891238d0132d4543c537cf57458.0067.7021.602.73181.60166.605aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZWVkcz9vcGVyYXRvcl9pZD1NQQ==recNOb7pqBRlQVG5eMarin County Transit District04 - Oakland
2015d67d5b75b5cf2b710bbadadfb75f5AM Peak2190Tiburon - Strawberry56168629b6e90c8b94b78dd60c8eeaf0ca58.00127.408.102.91119.00113.254aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZWVkcz9vcGVyYXRvcl9pZD1NQQ==recNOb7pqBRlQVG5eMarin County Transit District04 - Oakland
3015d67d5b75b5cf2b710bbadadfb75f5AM Peak2191Tiburon - Strawberry586457a064626d1b0abdce22dd159fbb2d58.00152.009.402.9386.7585.754aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZWVkcz9vcGVyYXRvcl9pZD1NQQ==recNOb7pqBRlQVG5eMarin County Transit District04 - Oakland
4015d67d5b75b5cf2b710bbadadfb75f5AM Peak220Downtown San Rafael - Marin City500e10aded9dede712f3c623c5deae87a455.0076.3028.602.9590.3353.336aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZWVkcz9vcGVyYXRvcl9pZD1NQQ==recNOb7pqBRlQVG5eMarin County Transit District04 - Oakland
\n", + "
" + ], + "text/plain": [ + " gtfs_dataset_key time_of_day route_id direction_id \\\n", + "0 015d67d5b75b5cf2b710bbadadfb75f5 AM Peak 17 0 \n", + "1 015d67d5b75b5cf2b710bbadadfb75f5 AM Peak 17 1 \n", + "2 015d67d5b75b5cf2b710bbadadfb75f5 AM Peak 219 0 \n", + "3 015d67d5b75b5cf2b710bbadadfb75f5 AM Peak 219 1 \n", + "4 015d67d5b75b5cf2b710bbadadfb75f5 AM Peak 22 0 \n", + "\n", + " route_name common_shape_id \\\n", + "0 Downtown San Rafael - Sausalito 104 \n", + "1 Downtown San Rafael - Sausalito 109 \n", + "2 Tiburon - Strawberry 56 \n", + "3 Tiburon - Strawberry 58 \n", + "4 Downtown San Rafael - Marin City 50 \n", + "\n", + " shape_array_key avg_sched_trip_min avg_rt_trip_min \\\n", + "0 4362491ca6006e88a076e2eaaf693762 58.00 62.40 \n", + "1 fe2f063891238d0132d4543c537cf574 58.00 67.70 \n", + "2 168629b6e90c8b94b78dd60c8eeaf0ca 58.00 127.40 \n", + "3 6457a064626d1b0abdce22dd159fbb2d 58.00 152.00 \n", + "4 0e10aded9dede712f3c623c5deae87a4 55.00 76.30 \n", + "\n", + " speed_mph pings_per_min total_vp vp_in_shape n_trips \\\n", + "0 22.20 2.75 205.00 162.80 5 \n", + "1 21.60 2.73 181.60 166.60 5 \n", + "2 8.10 2.91 119.00 113.25 4 \n", + "3 9.40 2.93 86.75 85.75 4 \n", + "4 28.60 2.95 90.33 53.33 6 \n", + "\n", + " base64_url \\\n", + "0 aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZWVkcz9vcGVyYXRvcl9pZD1NQQ== \n", + "1 aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZWVkcz9vcGVyYXRvcl9pZD1NQQ== \n", + "2 aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZWVkcz9vcGVyYXRvcl9pZD1NQQ== \n", + "3 aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZWVkcz9vcGVyYXRvcl9pZD1NQQ== \n", + "4 aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZWVkcz9vcGVyYXRvcl9pZD1NQQ== \n", + "\n", + " organization_source_record_id organization_name \\\n", + "0 recNOb7pqBRlQVG5e Marin County Transit District \n", + "1 recNOb7pqBRlQVG5e Marin County Transit District \n", + "2 recNOb7pqBRlQVG5e Marin County Transit District \n", + "3 recNOb7pqBRlQVG5e Marin County Transit District \n", + "4 recNOb7pqBRlQVG5e Marin County Transit District \n", + "\n", + " caltrans_district \n", + "0 04 - Oakland \n", + "1 04 - Oakland \n", + "2 04 - Oakland \n", + "3 04 - Oakland \n", + "4 04 - Oakland " + ] + }, + "execution_count": 53, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_with_org.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "id": "de3449eb-88c2-4d46-ae3a-94f881e8cade", + "metadata": {}, + "outputs": [], + "source": [ + "shapes = helpers.import_scheduled_shapes(\n", + " analysis_date,\n", + " columns = [\"shape_array_key\", \"geometry\"],\n", + " get_pandas = True,\n", + " crs = geography_utils.WGS84\n", + " )\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "id": "091ed4c9-6742-4a06-abdf-7b59abe7a948", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(7303, 2)" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "shapes.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "id": "dc0f152d-19ac-4687-83e8-8d5fd225005b", + "metadata": {}, + "outputs": [], + "source": [ + "df_with_shape = pd.merge(\n", + " shapes,\n", + " df_with_org,\n", + " on = \"shape_array_key\", # once merged, can drop shape_array_key\n", + " how = \"inner\"\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "id": "1d8bd521-7b7e-438e-9cdd-b23606acd644", + "metadata": {}, + "outputs": [], + "source": [ + "final_df['avg_pct_vp_shape'] = final_df.vp_in_shape / final_df.total_vp * 100" + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "id": "158662df-1735-4a1a-a4c5-7b3d00469311", + "metadata": {}, + "outputs": [], + "source": [ + "final_df['avg_pct_rt_v_sched'] = (final_df.avg_rt_trip_min / final_df.avg_sched_trip_min - 1) * 100" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "id": "7020e160-7e18-4099-8a97-9114eef06652", + "metadata": {}, + "outputs": [], + "source": [ + "final_df = final_df.drop(columns = ['total_vp','vp_in_shape'])" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "id": "2206947a-debc-4dcc-a459-913ec0b26443", + "metadata": {}, + "outputs": [], + "source": [ + "final_df = final_df.rename(columns = {'pings_per_min':'avg_pings_per_min'})" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "id": "9400eb5b-0ff4-4b6a-ade3-8e0fedc29d5b", + "metadata": {}, + "outputs": [], + "source": [ + "final_df = final_df.rename(\n", + " columns = {\"organization_source_record_id\": \"org_id\",\n", + " \"organization_name\": \"agency\", \n", + " \"caltrans_district\": \"district_name\"\n", + " })" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "id": "11b580bd-f482-4a11-be00-7ef88068bac9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(11229, 19)" + ] + }, + "execution_count": 74, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "final_df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "id": "58165517-e414-4843-8ece-b7631d4d7f27", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 11229.00\n", + "mean 2.46\n", + "std 0.55\n", + "min 0.04\n", + "25% 1.97\n", + "50% 2.75\n", + "75% 2.93\n", + "max 3.10\n", + "Name: avg_pings_per_min, dtype: float64" + ] + }, + "execution_count": 75, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "final_df.avg_pings_per_min.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "id": "49867873-4a76-49c4-8c95-0918d7468f82", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
shape_array_keygtfs_dataset_keytime_of_dayroute_iddirection_idroute_namecommon_shape_idavg_sched_trip_minavg_rt_trip_minspeed_mphavg_pings_per_minn_tripsorg_idagencydistrict_nameavg_pct_vp_shapeavg_pct_rt_v_sched
0f73c9e5905f12d7392b4fff9a31c83cef1b35a50955aeb498533c1c6fdafbe44AM Peak510LONG BEACH BLVD51007857.0079.204.901.6213rec00qSzZL8KqiXAoLong Beach Transit07 - Los Angeles92.6138.95
1f73c9e5905f12d7392b4fff9a31c83cef1b35a50955aeb498533c1c6fdafbe44Early AM510LONG BEACH BLVD51007847.0052.406.001.838rec00qSzZL8KqiXAoLong Beach Transit07 - Los Angeles87.6111.49
2f73c9e5905f12d7392b4fff9a31c83cef1b35a50955aeb498533c1c6fdafbe44Evening510LONG BEACH BLVD51007844.0068.604.801.874rec00qSzZL8KqiXAoLong Beach Transit07 - Los Angeles96.5355.91
3f73c9e5905f12d7392b4fff9a31c83cef1b35a50955aeb498533c1c6fdafbe44Midday510LONG BEACH BLVD51007851.0045.5016.801.6920rec00qSzZL8KqiXAoLong Beach Transit07 - Los Angeles96.03-10.78
4f73c9e5905f12d7392b4fff9a31c83cef1b35a50955aeb498533c1c6fdafbe44PM Peak510LONG BEACH BLVD51007875.0072.4020.301.6516rec00qSzZL8KqiXAoLong Beach Transit07 - Los Angeles95.01-3.47
\n", + "
" + ], + "text/plain": [ + " shape_array_key gtfs_dataset_key \\\n", + "0 f73c9e5905f12d7392b4fff9a31c83ce f1b35a50955aeb498533c1c6fdafbe44 \n", + "1 f73c9e5905f12d7392b4fff9a31c83ce f1b35a50955aeb498533c1c6fdafbe44 \n", + "2 f73c9e5905f12d7392b4fff9a31c83ce f1b35a50955aeb498533c1c6fdafbe44 \n", + "3 f73c9e5905f12d7392b4fff9a31c83ce f1b35a50955aeb498533c1c6fdafbe44 \n", + "4 f73c9e5905f12d7392b4fff9a31c83ce f1b35a50955aeb498533c1c6fdafbe44 \n", + "\n", + " time_of_day route_id direction_id route_name common_shape_id \\\n", + "0 AM Peak 51 0 LONG BEACH BLVD 510078 \n", + "1 Early AM 51 0 LONG BEACH BLVD 510078 \n", + "2 Evening 51 0 LONG BEACH BLVD 510078 \n", + "3 Midday 51 0 LONG BEACH BLVD 510078 \n", + "4 PM Peak 51 0 LONG BEACH BLVD 510078 \n", + "\n", + " avg_sched_trip_min avg_rt_trip_min speed_mph avg_pings_per_min n_trips \\\n", + "0 57.00 79.20 4.90 1.62 13 \n", + "1 47.00 52.40 6.00 1.83 8 \n", + "2 44.00 68.60 4.80 1.87 4 \n", + "3 51.00 45.50 16.80 1.69 20 \n", + "4 75.00 72.40 20.30 1.65 16 \n", + "\n", + " org_id agency district_name avg_pct_vp_shape \\\n", + "0 rec00qSzZL8KqiXAo Long Beach Transit 07 - Los Angeles 92.61 \n", + "1 rec00qSzZL8KqiXAo Long Beach Transit 07 - Los Angeles 87.61 \n", + "2 rec00qSzZL8KqiXAo Long Beach Transit 07 - Los Angeles 96.53 \n", + "3 rec00qSzZL8KqiXAo Long Beach Transit 07 - Los Angeles 96.03 \n", + "4 rec00qSzZL8KqiXAo Long Beach Transit 07 - Los Angeles 95.01 \n", + "\n", + " avg_pct_rt_v_sched \n", + "0 38.95 \n", + "1 11.49 \n", + "2 55.91 \n", + "3 -10.78 \n", + "4 -3.47 " + ] + }, + "execution_count": 76, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "final_df.drop(columns = ['geometry', 'base64_url']).head()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/rt_scheduled_v_ran/scripts/rt_v_scheduled_trip.py b/rt_scheduled_v_ran/scripts/rt_v_scheduled_trip.py index 285235893..1d8e40ce0 100644 --- a/rt_scheduled_v_ran/scripts/rt_v_scheduled_trip.py +++ b/rt_scheduled_v_ran/scripts/rt_v_scheduled_trip.py @@ -28,9 +28,6 @@ def load_trip_speeds(analysis_date): columns=[ "trip_instance_key", "speed_mph", - "route_id", - "time_of_day", - "service_minutes", ]) return df @@ -345,6 +342,9 @@ def vp_usable_metrics(analysis_date:str) -> pd.DataFrame: m1['rt_triptime_w_gtfs_pct'] = (m1.total_min_w_gtfs / m1.rt_service_min) * 100 m1['rt_v_scheduled_trip_time_pct'] = (m1.rt_service_min / m1.service_minutes - 1) * 100 + # Mask rt_triptime_w_gtfs_pct for any values above 100% + m1.rt_triptime_w_gtfs_pct = m1.rt_triptime_w_gtfs_pct.mask(m1.rt_triptime_w_gtfs_pct > 100).fillna(100) + # Save m1.to_parquet(f"{GCS_FILE_PATH}rt_vs_schedule/trip_level_metrics/{analysis_date}_metrics.parquet") diff --git a/rt_scheduled_v_ran/scripts/update_vars.py b/rt_scheduled_v_ran/scripts/update_vars.py index 0c2b7d52b..b898faed8 100644 --- a/rt_scheduled_v_ran/scripts/update_vars.py +++ b/rt_scheduled_v_ran/scripts/update_vars.py @@ -1,6 +1,6 @@ from shared_utils import rt_dates -months = ["nov", "oct", "sep", "aug", +months = ["dec","nov", "oct", "sep", "aug", "jul", "jun", "may", "apr", "mar"] analysis_date_list = [ From 591a7457fd55979c49c95b6d5e01dbfb73c57e70 Mon Sep 17 00:00:00 2001 From: amandaha8 Date: Tue, 2 Jan 2024 22:54:52 +0000 Subject: [PATCH 3/6] ran dec and troubleshoot why routes results are so different --- .../06_vp_usable_exploration.ipynb | 5486 +++++++++++++---- .../logs/rt_v_scheduled_trip_metrics.log | 13 + .../scripts/rt_v_scheduled_trip.py | 3 +- rt_scheduled_v_ran/scripts/update_vars.py | 6 +- 4 files changed, 4457 insertions(+), 1051 deletions(-) diff --git a/rt_scheduled_v_ran/06_vp_usable_exploration.ipynb b/rt_scheduled_v_ran/06_vp_usable_exploration.ipynb index fcf20b268..4422618bc 100644 --- a/rt_scheduled_v_ran/06_vp_usable_exploration.ipynb +++ b/rt_scheduled_v_ran/06_vp_usable_exploration.ipynb @@ -26,14 +26,14 @@ "metadata": {}, "outputs": [], "source": [ + "from segment_speed_utils import helpers, sched_rt_utils, wrangle_shapes\n", "from segment_speed_utils.project_vars import (\n", " GCS_FILE_PATH,\n", " PROJECT_CRS,\n", " SEGMENT_GCS,\n", " analysis_date,\n", ")\n", - "from segment_speed_utils import helpers, wrangle_shapes,sched_rt_utils\n", - "from shared_utils import schedule_rt_utils, portfolio_utils, geography_utils" + "from shared_utils import geography_utils, portfolio_utils, rt_dates, schedule_rt_utils" ] }, { @@ -56,62 +56,78 @@ "metadata": {}, "outputs": [], "source": [ - "GCS_PATH = 'gs://calitp-analytics-data/data-analyses/rt_vs_schedule/trip_level_metrics/'" + "GCS_PATH = \"gs://calitp-analytics-data/data-analyses/rt_vs_schedule/trip_level_metrics/\"" ] }, { "cell_type": "code", "execution_count": 5, - "id": "26185ced-9f3b-4266-a080-0eeed0c0a825", + "id": "be14001d-8599-4496-b267-d028174ebc78", "metadata": {}, "outputs": [], "source": [ - "def check_out(df:pd.DataFrame):\n", - " display(df.spatial_accuracy_pct.describe())\n", - " display(df.pings_per_min.describe())\n", - " display(df.rt_triptime_w_gtfs_pct.describe())\n", - " display(df.rt_v_scheduled_trip_time_pct.describe())" + "months = [\"dec\", \"nov\", \"oct\", \"sep\", \"aug\", \"jul\", \"jun\", \"may\", \"apr\", \"mar\"]\n", + "\n", + "analysis_date_list = [rt_dates.DATES[f\"{m}2023\"] for m in months]" ] }, { "cell_type": "code", "execution_count": 6, - "id": "aba941a7-bc28-4501-9505-0fd064c24a0c", - "metadata": {}, - "outputs": [], - "source": [ - "mar_df = pd.read_parquet(f'{GCS_PATH}2023-03-15_metrics.parquet')" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "89ef7ad6-d728-45d1-a4ae-75203df93cc9", + "id": "78669969-8d99-4049-99cb-2f933016b2d9", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "Index(['trip_instance_key', 'rt_service_min', 'min_w_atleast2_trip_updates',\n", - " 'total_pings_for_trip', 'total_min_w_gtfs', 'total_vp', 'vp_in_shape',\n", - " 'speed_mph', 'route_id', 'time_of_day', 'service_minutes',\n", - " 'pings_per_min', 'spatial_accuracy_pct', 'rt_triptime_w_gtfs_pct',\n", - " 'rt_v_scheduled_trip_time_pct'],\n", - " dtype='object')" + "['2023-12-13',\n", + " '2023-11-15',\n", + " '2023-10-11',\n", + " '2023-09-13',\n", + " '2023-08-15',\n", + " '2023-07-12',\n", + " '2023-06-14',\n", + " '2023-05-17',\n", + " '2023-04-12',\n", + " '2023-03-15']" ] }, - "execution_count": 7, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "mar_df.columns" + "analysis_date_list" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "26185ced-9f3b-4266-a080-0eeed0c0a825", + "metadata": {}, + "outputs": [], + "source": [ + "def check_out(df: pd.DataFrame):\n", + " display(df.spatial_accuracy_pct.describe())\n", + " display(df.pings_per_min.describe())\n", + " display(df.rt_triptime_w_gtfs_pct.describe())\n", + " display(df.rt_v_scheduled_trip_time_pct.describe())" ] }, { "cell_type": "code", "execution_count": 8, + "id": "aba941a7-bc28-4501-9505-0fd064c24a0c", + "metadata": {}, + "outputs": [], + "source": [ + "mar_df = pd.read_parquet(f\"{GCS_PATH}2023-03-15_metrics.parquet\")" + ] + }, + { + "cell_type": "code", + "execution_count": 9, "id": "af7c69e1-e363-42f8-bf1c-f9ef2ba141e2", "metadata": {}, "outputs": [ @@ -190,17 +206,17 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "id": "26701c9c-669d-4e09-8c38-5ece391f5889", "metadata": {}, "outputs": [], "source": [ - "apr_df = pd.read_parquet(f'{GCS_PATH}2023-04-12_metrics.parquet')" + "apr_df = pd.read_parquet(f\"{GCS_PATH}2023-04-12_metrics.parquet\")" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "id": "509ec31f-c521-4149-be35-b4548a9e1666", "metadata": {}, "outputs": [ @@ -279,17 +295,17 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 12, "id": "dcf7252b-eb12-4d76-bacf-ceb34d1e0c0a", "metadata": {}, "outputs": [], "source": [ - "may_df = pd.read_parquet(f'{GCS_PATH}2023-05-17_metrics.parquet')" + "may_df = pd.read_parquet(f\"{GCS_PATH}2023-05-17_metrics.parquet\")" ] }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 13, "id": "151c9d6f-ce92-4ab2-ad1c-525ca627dbfc", "metadata": {}, "outputs": [ @@ -368,17 +384,17 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 14, "id": "976f786d-c3e5-44ae-991c-db7d95d5169a", "metadata": {}, "outputs": [], "source": [ - "jun_df = pd.read_parquet(f'{GCS_PATH}2023-06-14_metrics.parquet')" + "jun_df = pd.read_parquet(f\"{GCS_PATH}2023-06-14_metrics.parquet\")" ] }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 15, "id": "c87d9f5e-ae39-4c4a-b313-037fc118dce4", "metadata": {}, "outputs": [ @@ -457,17 +473,17 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 16, "id": "e55cb702-f2fa-4350-bcd9-cb057a7edbdc", "metadata": {}, "outputs": [], "source": [ - "jul_df = pd.read_parquet(f'{GCS_PATH}2023-07-12_metrics.parquet')" + "jul_df = pd.read_parquet(f\"{GCS_PATH}2023-07-12_metrics.parquet\")" ] }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 17, "id": "a287bc73-76ff-4235-8a72-671045bac935", "metadata": {}, "outputs": [ @@ -546,47 +562,47 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 18, "id": "21b4d1b8-598d-410a-9c73-69fb92d88c1b", "metadata": {}, "outputs": [], "source": [ - "aug_df = pd.read_parquet(f'{GCS_PATH}2023-08-15_metrics.parquet')" + "aug_df = pd.read_parquet(f\"{GCS_PATH}2023-08-15_metrics.parquet\")" ] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 19, "id": "1cda1e4d-25f7-4836-b8ab-7b7997edb446", "metadata": {}, "outputs": [], "source": [ - "sept_df = pd.read_parquet(f'{GCS_PATH}2023-09-13_metrics.parquet')" + "sept_df = pd.read_parquet(f\"{GCS_PATH}2023-09-13_metrics.parquet\")" ] }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 20, "id": "497e04c0-1c33-4fdf-9cfc-94e2a925ebd3", "metadata": {}, "outputs": [], "source": [ - "oct_df = pd.read_parquet(f'{GCS_PATH}2023-10-11_metrics.parquet')" + "oct_df = pd.read_parquet(f\"{GCS_PATH}2023-10-11_metrics.parquet\")" ] }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 21, "id": "07e73d1c-5b24-43a8-852e-35cff4f416e6", "metadata": {}, "outputs": [], "source": [ - "nov_df = pd.read_parquet(f'{GCS_PATH}2023-11-15_metrics.parquet')" + "nov_df = pd.read_parquet(f\"{GCS_PATH}2023-11-15_metrics.parquet\")" ] }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 22, "id": "38f00976-c428-4c3c-a153-eb9035a709ef", "metadata": {}, "outputs": [ @@ -665,7 +681,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 23, "id": "8762a9c1-9a3b-490a-a447-4481b59d930c", "metadata": {}, "outputs": [ @@ -675,7 +691,7 @@ "30287" ] }, - "execution_count": 22, + "execution_count": 23, "metadata": {}, "output_type": "execute_result" } @@ -686,7 +702,7 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": 24, "id": "e3d9b4a9-70e1-40aa-b2eb-f2e23b3a4d1f", "metadata": {}, "outputs": [ @@ -696,7 +712,7 @@ "86832" ] }, - "execution_count": 23, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } @@ -707,7 +723,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": 25, "id": "c2963bc8-5535-4dce-af95-767317c2201c", "metadata": {}, "outputs": [ @@ -744,80 +760,130 @@ "nov_df.info()" ] }, - { - "cell_type": "markdown", - "id": "a227a3dd-7761-46d0-bcc2-2a1e0f18db09", - "metadata": {}, - "source": [ - "### Test aggregating with November" - ] - }, { "cell_type": "code", - "execution_count": 25, - "id": "cfb7de65-5c85-4b1f-821d-2cabea0a8184", + "execution_count": 26, + "id": "b38de56d-57a5-4f5f-a214-5bcd9dde42f7", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'2023-12-13'" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "analysis_date = \"2023-11-15\"" + "analysis_date" ] }, { "cell_type": "code", - "execution_count": 26, - "id": "330395c9-176b-4ec0-a036-b1598f5d2b55", + "execution_count": 27, + "id": "873ac459-c77f-4887-a003-01b572facbda", "metadata": {}, "outputs": [], "source": [ - "nov_df.rt_triptime_w_gtfs_pct = nov_df.rt_triptime_w_gtfs_pct.mask(nov_df.rt_triptime_w_gtfs_pct > 100).fillna(100)" + "dec_df = pd.read_parquet(f\"{GCS_PATH}2023-12-13_metrics.parquet\")" ] }, { "cell_type": "code", - "execution_count": 27, - "id": "fc6630d9-bdfe-42f1-86e9-2979143aac97", + "execution_count": 28, + "id": "bdc1e636-c102-43d6-a2b6-4150d0c8c4db", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "Index(['trip_instance_key', 'rt_service_min', 'min_w_atleast2_trip_updates',\n", - " 'total_pings_for_trip', 'total_min_w_gtfs', 'total_vp', 'vp_in_shape',\n", - " 'speed_mph', 'route_id', 'time_of_day', 'service_minutes',\n", - " 'pings_per_min', 'spatial_accuracy_pct', 'rt_triptime_w_gtfs_pct',\n", - " 'rt_v_scheduled_trip_time_pct'],\n", - " dtype='object')" + "count 74609.00\n", + "mean 93.55\n", + "std 13.20\n", + "min 0.00\n", + "25% 94.23\n", + "50% 99.64\n", + "75% 100.00\n", + "max 100.00\n", + "Name: spatial_accuracy_pct, dtype: float64" ] }, - "execution_count": 27, "metadata": {}, - "output_type": "execute_result" + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "count 86128.00\n", + "mean 2.44\n", + "std 0.60\n", + "min 0.00\n", + "25% 2.05\n", + "50% 2.67\n", + "75% 2.94\n", + "max 5.15\n", + "Name: pings_per_min, dtype: float64" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "count 86128.00\n", + "mean 95.94\n", + "std 11.70\n", + "min 0.10\n", + "25% 98.36\n", + "50% 99.65\n", + "75% 100.00\n", + "max 100.00\n", + "Name: rt_triptime_w_gtfs_pct, dtype: float64" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "count 76878.00\n", + "mean 44.40\n", + "std 272.99\n", + "min -86.89\n", + "25% 11.36\n", + "50% 25.56\n", + "75% 45.00\n", + "max 18873.69\n", + "Name: rt_v_scheduled_trip_time_pct, dtype: float64" + ] + }, + "metadata": {}, + "output_type": "display_data" } ], "source": [ - "nov_df.columns" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "id": "be0fb1be-060d-4311-856e-5699e15e0c8c", - "metadata": {}, - "outputs": [], - "source": [ - "nov_df2= nov_df.drop(columns = ['route_id','service_minutes', 'time_of_day'])" + "check_out(dec_df)" ] }, { "cell_type": "code", "execution_count": 29, - "id": "c88f5faa-f3a2-4373-ad93-96fe61c7bbf3", + "id": "26e59c78-1e6b-428e-acd2-746b34be8311", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'gs://calitp-analytics-data/data-analyses/rt_segment_speeds/trip_summary/trip_speeds_2023-11-15.parquet'" + "Index(['trip_instance_key', 'rt_service_min', 'min_w_atleast2_trip_updates',\n", + " 'total_pings_for_trip', 'total_min_w_gtfs', 'total_vp', 'vp_in_shape',\n", + " 'speed_mph', 'service_minutes', 'pings_per_min', 'spatial_accuracy_pct',\n", + " 'rt_triptime_w_gtfs_pct', 'rt_v_scheduled_trip_time_pct'],\n", + " dtype='object')" ] }, "execution_count": 29, @@ -826,52 +892,60 @@ } ], "source": [ - "f\"{SEGMENT_GCS}trip_summary/trip_speeds_2023-11-15.parquet\"" + "dec_df.columns" ] }, { "cell_type": "code", "execution_count": 30, - "id": "dbffcd12-64f3-490e-8854-18f518e6d88e", + "id": "cb1def78-76b0-4d8f-9a74-258987c62ce2", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "count 76878.00\n", + "mean 44.40\n", + "std 272.99\n", + "min -86.89\n", + "25% 11.36\n", + "50% 25.56\n", + "75% 45.00\n", + "max 18873.69\n", + "Name: rt_v_scheduled_trip_time_pct, dtype: float64" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "speeds = pd.read_parquet(\n", - " f\"{SEGMENT_GCS}trip_summary/trip_speeds_2023-11-15.parquet\")" + "dec_df.rt_v_scheduled_trip_time_pct.describe()" ] }, { "cell_type": "markdown", - "id": "90958a55-27ca-447f-8304-37773081c973", + "id": "857348c6-fcfd-4569-a80d-fd1e400c84f9", "metadata": {}, "source": [ - "#### https://github.com/cal-itp/data-analyses/blob/main/rt_segment_speeds/scripts/avg_speeds_by_segment.py#L135" + "### See why some trips have such crazy rt_v_scheduled_trip_time_pct" ] }, { "cell_type": "code", "execution_count": 31, - "id": "84cc7f28-1289-4df1-8073-cd6e71ed77a6", + "id": "6bffdcf2-29cf-4c55-8465-75ee7e87fa1a", "metadata": {}, "outputs": [], "source": [ - "keep_cols = [\n", - " \"gtfs_dataset_key\",\n", - " \"direction_id\", \n", - " \"route_id\", \"route_short_name\", \"route_long_name\", \"route_desc\", \"trip_instance_key\"\n", - " ]\n", - " \n", - "crosswalk = helpers.import_scheduled_trips(\n", - " analysis_date, \n", - " columns = keep_cols, \n", - " get_pandas = True\n", - " )" + "preview_cols = [\"rt_service_min\", \"service_minutes\", \"rt_v_scheduled_trip_time_pct\"]" ] }, { "cell_type": "code", "execution_count": 32, - "id": "0b4b0f0d-cc15-4f3e-b8c7-df8579831539", + "id": "a92ed528-9953-4cdb-8801-4b89e39b8feb", "metadata": {}, "outputs": [ { @@ -895,153 +969,25 @@ " \n", " \n", " \n", - " trip_instance_key\n", " rt_service_min\n", - " min_w_atleast2_trip_updates\n", - " total_pings_for_trip\n", - " total_min_w_gtfs\n", - " total_vp\n", - " vp_in_shape\n", - " speed_mph\n", - " route_id\n", - " time_of_day\n", " service_minutes\n", - " pings_per_min\n", - " spatial_accuracy_pct\n", - " rt_triptime_w_gtfs_pct\n", " rt_v_scheduled_trip_time_pct\n", " \n", " \n", " \n", " \n", - " 0\n", - " a3647253d4cc8f847e972ed8c83d1b9b\n", - " 22.62\n", - " 22\n", - " 65\n", - " 23\n", - " NaN\n", - " NaN\n", - " NaN\n", - " None\n", - " None\n", - " NaN\n", - " 2.87\n", - " NaN\n", - " 100.00\n", - " NaN\n", - " \n", - " \n", - " 1\n", - " 7029f592047be84e5bb1d28d299be35d\n", - " 16.93\n", - " 16\n", - " 48\n", - " 17\n", - " NaN\n", - " NaN\n", - " NaN\n", - " None\n", - " None\n", - " NaN\n", - " 2.83\n", - " NaN\n", - " 100.00\n", - " NaN\n", - " \n", - " \n", - " 2\n", - " 1040196034fd380818a2cbcf1eafd9b8\n", - " 40.95\n", - " 40\n", - " 118\n", - " 41\n", - " NaN\n", - " NaN\n", - " NaN\n", - " None\n", - " None\n", - " NaN\n", - " 2.88\n", - " NaN\n", - " 100.00\n", - " NaN\n", - " \n", - " \n", - " 3\n", - " 5c6d43026fe5f02e5b31c18fcb8c0bf5\n", - " 62.95\n", - " 61\n", - " 176\n", - " 63\n", - " NaN\n", - " NaN\n", - " NaN\n", - " None\n", - " None\n", - " NaN\n", - " 2.80\n", - " NaN\n", - " 100.00\n", - " NaN\n", - " \n", - " \n", - " 4\n", - " ee2f1fd83d87e85119f66014da5d74d5\n", - " 14.07\n", - " 13\n", - " 37\n", - " 15\n", - " NaN\n", - " NaN\n", - " NaN\n", - " None\n", - " None\n", - " NaN\n", - " 2.63\n", - " NaN\n", - " 100.00\n", - " NaN\n", + " 39462\n", + " 1341.02\n", + " 12.00\n", + " 11075.14\n", " \n", " \n", "\n", "" ], "text/plain": [ - " trip_instance_key rt_service_min \\\n", - "0 a3647253d4cc8f847e972ed8c83d1b9b 22.62 \n", - "1 7029f592047be84e5bb1d28d299be35d 16.93 \n", - "2 1040196034fd380818a2cbcf1eafd9b8 40.95 \n", - "3 5c6d43026fe5f02e5b31c18fcb8c0bf5 62.95 \n", - "4 ee2f1fd83d87e85119f66014da5d74d5 14.07 \n", - "\n", - " min_w_atleast2_trip_updates total_pings_for_trip total_min_w_gtfs \\\n", - "0 22 65 23 \n", - "1 16 48 17 \n", - "2 40 118 41 \n", - "3 61 176 63 \n", - "4 13 37 15 \n", - "\n", - " total_vp vp_in_shape speed_mph route_id time_of_day service_minutes \\\n", - "0 NaN NaN NaN None None NaN \n", - "1 NaN NaN NaN None None NaN \n", - "2 NaN NaN NaN None None NaN \n", - "3 NaN NaN NaN None None NaN \n", - "4 NaN NaN NaN None None NaN \n", - "\n", - " pings_per_min spatial_accuracy_pct rt_triptime_w_gtfs_pct \\\n", - "0 2.87 NaN 100.00 \n", - "1 2.83 NaN 100.00 \n", - "2 2.88 NaN 100.00 \n", - "3 2.80 NaN 100.00 \n", - "4 2.63 NaN 100.00 \n", - "\n", - " rt_v_scheduled_trip_time_pct \n", - "0 NaN \n", - "1 NaN \n", - "2 NaN \n", - "3 NaN \n", - "4 NaN " + " rt_service_min service_minutes rt_v_scheduled_trip_time_pct\n", + "39462 1341.02 12.00 11075.14" ] }, "execution_count": 32, @@ -1050,13 +996,13 @@ } ], "source": [ - "nov_df.head()" + "dec_df.loc[dec_df.rt_v_scheduled_trip_time_pct > 10000][preview_cols].sample()" ] }, { "cell_type": "code", "execution_count": 33, - "id": "1e89939b-1bca-4670-955f-013c16949ec9", + "id": "f242545c-0270-46ba-8f9e-1414d215d146", "metadata": {}, "outputs": [ { @@ -1080,52 +1026,235 @@ " \n", " \n", " \n", - " schedule_gtfs_dataset_key\n", - " direction_id\n", - " route_id\n", - " route_short_name\n", - " route_long_name\n", - " route_desc\n", - " trip_instance_key\n", + " rt_service_min\n", + " service_minutes\n", + " rt_v_scheduled_trip_time_pct\n", " \n", " \n", " \n", " \n", - " 0\n", - " 1770249a5a2e770ca90628434d4934b1\n", - " 1.00\n", - " 3402\n", - " Route 11\n", - " Route 11\n", - " PACIFIC VIEW MALL via TELEPHONE RD\n", - " ed6aa732d8c47e4df3a2f7ba9d24415c\n", + " 39307\n", + " 1297.03\n", + " 12.00\n", + " 10708.61\n", " \n", " \n", - " 1\n", - " 1770249a5a2e770ca90628434d4934b1\n", - " 1.00\n", - " 3402\n", - " Route 11\n", - " Route 11\n", - " PACIFIC VIEW MALL via TELEPHONE RD\n", - " 72b2c8bdebbf14039af7c57133cc23e5\n", + " 39308\n", + " 1309.60\n", + " 12.00\n", + " 10813.33\n", + " \n", + " \n", + " 39309\n", + " 1300.33\n", + " 12.00\n", + " 10736.11\n", + " \n", + " \n", + " 39359\n", + " 1390.38\n", + " 12.00\n", + " 11486.53\n", + " \n", + " \n", + " 39360\n", + " 1408.93\n", + " 12.00\n", + " 11641.11\n", + " \n", + " \n", + " 39361\n", + " 1402.70\n", + " 12.00\n", + " 11589.17\n", + " \n", + " \n", + " 39462\n", + " 1341.02\n", + " 12.00\n", + " 11075.14\n", + " \n", + " \n", + " 39463\n", + " 1363.98\n", + " 12.00\n", + " 11266.53\n", + " \n", + " \n", + " 39464\n", + " 1366.92\n", + " 12.00\n", + " 11290.97\n", + " \n", + " \n", + " 39602\n", + " 1364.30\n", + " 12.00\n", + " 11269.17\n", + " \n", + " \n", + " 39603\n", + " 1361.70\n", + " 12.00\n", + " 11247.50\n", + " \n", + " \n", + " 39613\n", + " 1326.03\n", + " 12.00\n", + " 10950.28\n", + " \n", + " \n", + " 39636\n", + " 1235.33\n", + " 12.00\n", + " 10194.44\n", + " \n", + " \n", + " 39646\n", + " 1215.97\n", + " 12.00\n", + " 10033.06\n", + " \n", + " \n", + " 39647\n", + " 1225.32\n", + " 12.00\n", + " 10110.97\n", + " \n", + " \n", + " 39753\n", + " 1328.27\n", + " 12.00\n", + " 10968.89\n", + " \n", + " \n", + " 39754\n", + " 1343.33\n", + " 12.00\n", + " 11094.44\n", + " \n", + " \n", + " 39765\n", + " 1333.98\n", + " 12.00\n", + " 11016.53\n", + " \n", + " \n", + " 39783\n", + " 1440.67\n", + " 12.00\n", + " 11905.56\n", + " \n", + " \n", + " 39837\n", + " 1440.67\n", + " 12.00\n", + " 11905.56\n", + " \n", + " \n", + " 39861\n", + " 1435.28\n", + " 12.00\n", + " 11860.69\n", + " \n", + " \n", + " 39897\n", + " 1440.67\n", + " 12.00\n", + " 11905.56\n", + " \n", + " \n", + " 39954\n", + " 1343.98\n", + " 12.00\n", + " 11099.86\n", + " \n", + " \n", + " 39955\n", + " 1374.97\n", + " 12.00\n", + " 11358.06\n", + " \n", + " \n", + " 39956\n", + " 1387.90\n", + " 12.00\n", + " 11465.83\n", + " \n", + " \n", + " 40037\n", + " 1423.93\n", + " 12.00\n", + " 11766.11\n", + " \n", + " \n", + " 40089\n", + " 1385.35\n", + " 12.00\n", + " 11444.58\n", + " \n", + " \n", + " 40090\n", + " 1388.03\n", + " 12.00\n", + " 11466.94\n", + " \n", + " \n", + " 40091\n", + " 1379.05\n", + " 12.00\n", + " 11392.08\n", + " \n", + " \n", + " 40196\n", + " 1440.67\n", + " 12.00\n", + " 11905.56\n", + " \n", + " \n", + " 75715\n", + " 7020.27\n", + " 37.00\n", + " 18873.69\n", " \n", " \n", "\n", "" ], "text/plain": [ - " schedule_gtfs_dataset_key direction_id route_id route_short_name \\\n", - "0 1770249a5a2e770ca90628434d4934b1 1.00 3402 Route 11 \n", - "1 1770249a5a2e770ca90628434d4934b1 1.00 3402 Route 11 \n", - "\n", - " route_long_name route_desc \\\n", - "0 Route 11 PACIFIC VIEW MALL via TELEPHONE RD \n", - "1 Route 11 PACIFIC VIEW MALL via TELEPHONE RD \n", - "\n", - " trip_instance_key \n", - "0 ed6aa732d8c47e4df3a2f7ba9d24415c \n", - "1 72b2c8bdebbf14039af7c57133cc23e5 " + " rt_service_min service_minutes rt_v_scheduled_trip_time_pct\n", + "39307 1297.03 12.00 10708.61\n", + "39308 1309.60 12.00 10813.33\n", + "39309 1300.33 12.00 10736.11\n", + "39359 1390.38 12.00 11486.53\n", + "39360 1408.93 12.00 11641.11\n", + "39361 1402.70 12.00 11589.17\n", + "39462 1341.02 12.00 11075.14\n", + "39463 1363.98 12.00 11266.53\n", + "39464 1366.92 12.00 11290.97\n", + "39602 1364.30 12.00 11269.17\n", + "39603 1361.70 12.00 11247.50\n", + "39613 1326.03 12.00 10950.28\n", + "39636 1235.33 12.00 10194.44\n", + "39646 1215.97 12.00 10033.06\n", + "39647 1225.32 12.00 10110.97\n", + "39753 1328.27 12.00 10968.89\n", + "39754 1343.33 12.00 11094.44\n", + "39765 1333.98 12.00 11016.53\n", + "39783 1440.67 12.00 11905.56\n", + "39837 1440.67 12.00 11905.56\n", + "39861 1435.28 12.00 11860.69\n", + "39897 1440.67 12.00 11905.56\n", + "39954 1343.98 12.00 11099.86\n", + "39955 1374.97 12.00 11358.06\n", + "39956 1387.90 12.00 11465.83\n", + "40037 1423.93 12.00 11766.11\n", + "40089 1385.35 12.00 11444.58\n", + "40090 1388.03 12.00 11466.94\n", + "40091 1379.05 12.00 11392.08\n", + "40196 1440.67 12.00 11905.56\n", + "75715 7020.27 37.00 18873.69" ] }, "execution_count": 33, @@ -1134,29 +1263,144 @@ } ], "source": [ - "crosswalk.head(2)" + "dec_df.loc[dec_df.rt_v_scheduled_trip_time_pct > 10000][preview_cols]" + ] + }, + { + "cell_type": "markdown", + "id": "a227a3dd-7761-46d0-bcc2-2a1e0f18db09", + "metadata": {}, + "source": [ + "### Test aggregating with Dec" + ] + }, + { + "cell_type": "markdown", + "id": "90958a55-27ca-447f-8304-37773081c973", + "metadata": {}, + "source": [ + "#### Step 1: add missing cols\n", + "* https://github.com/cal-itp/data-analyses/blob/main/rt_segment_speeds/scripts/avg_speeds_by_segment.py#L135" ] }, { "cell_type": "code", "execution_count": 34, - "id": "e862de9a-47dc-42e0-aec3-81645a09afea", + "id": "84cc7f28-1289-4df1-8073-cd6e71ed77a6", "metadata": {}, "outputs": [], "source": [ - "common_shape = sched_rt_utils.most_common_shape_by_route_direction(analysis_date)" + "group_cols = [\"trip_instance_key\"]\n", + "\n", + "keep_cols = [\n", + " \"gtfs_dataset_key\",\n", + " \"direction_id\",\n", + " \"route_id\",\n", + " \"route_short_name\",\n", + " \"route_long_name\",\n", + " \"route_desc\",\n", + "] + group_cols\n", + "\n", + "crosswalk = helpers.import_scheduled_trips(\n", + " analysis_date, columns=keep_cols, get_pandas=True\n", + ")" ] }, { "cell_type": "code", "execution_count": 35, - "id": "1dec5d15-718e-467f-b7a6-cfc5ad594d6e", + "id": "0b4b0f0d-cc15-4f3e-b8c7-df8579831539", "metadata": {}, "outputs": [ { "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
trip_instance_keyrt_service_minmin_w_atleast2_trip_updatestotal_pings_for_triptotal_min_w_gtfstotal_vpvp_in_shapespeed_mphservice_minutespings_per_minspatial_accuracy_pctrt_triptime_w_gtfs_pctrt_v_scheduled_trip_time_pct
05d25a4366c173007d9c29fdead0299d774.037321674216.00148.0021.0158.002.9268.5299.9527.64
14b72b80fc9cfe5e613bab95585cbe7e423.4521592359.0019.0054.9558.002.5232.2098.08-59.57
\n", + "
" + ], "text/plain": [ - "'2023-11-15'" + " trip_instance_key rt_service_min \\\n", + "0 5d25a4366c173007d9c29fdead0299d7 74.03 \n", + "1 4b72b80fc9cfe5e613bab95585cbe7e4 23.45 \n", + "\n", + " min_w_atleast2_trip_updates total_pings_for_trip total_min_w_gtfs \\\n", + "0 73 216 74 \n", + "1 21 59 23 \n", + "\n", + " total_vp vp_in_shape speed_mph service_minutes pings_per_min \\\n", + "0 216.00 148.00 21.01 58.00 2.92 \n", + "1 59.00 19.00 54.95 58.00 2.52 \n", + "\n", + " spatial_accuracy_pct rt_triptime_w_gtfs_pct rt_v_scheduled_trip_time_pct \n", + "0 68.52 99.95 27.64 \n", + "1 32.20 98.08 -59.57 " ] }, "execution_count": 35, @@ -1165,13 +1409,13 @@ } ], "source": [ - "analysis_date" + "dec_df.head(2)" ] }, { "cell_type": "code", "execution_count": 36, - "id": "d6578b42-82b3-4edf-b97b-b4092c6f17ba", + "id": "1e89939b-1bca-4670-955f-013c16949ec9", "metadata": {}, "outputs": [ { @@ -1196,41 +1440,51 @@ " \n", " \n", " schedule_gtfs_dataset_key\n", - " route_id\n", " direction_id\n", - " common_shape_id\n", - " shape_array_key\n", + " route_id\n", + " route_short_name\n", + " route_long_name\n", + " route_desc\n", + " trip_instance_key\n", " \n", " \n", " \n", " \n", " 0\n", - " 014d0998350083249a9eb310635548c2\n", - " 10866826\n", + " 1770249a5a2e770ca90628434d4934b1\n", " 1.00\n", - " 10866826:1\n", - " a7f294e50a9a8ff179d4c82cd9136625\n", + " 3402\n", + " Route 11\n", + " Route 11\n", + " PACIFIC VIEW MALL via TELEPHONE RD\n", + " 595914b0c046d093f4fd5f9e88ab5635\n", " \n", " \n", " 1\n", - " 014d0998350083249a9eb310635548c2\n", - " 10866849\n", + " 1770249a5a2e770ca90628434d4934b1\n", " 1.00\n", - " 10866849:1\n", - " af9aea31d387f59024bf25fb1d9334a3\n", + " 3402\n", + " Route 11\n", + " Route 11\n", + " PACIFIC VIEW MALL via TELEPHONE RD\n", + " 5ad8f3475c016f517dcb2611ccd69764\n", " \n", " \n", "\n", "" ], "text/plain": [ - " schedule_gtfs_dataset_key route_id direction_id common_shape_id \\\n", - "0 014d0998350083249a9eb310635548c2 10866826 1.00 10866826:1 \n", - "1 014d0998350083249a9eb310635548c2 10866849 1.00 10866849:1 \n", + " schedule_gtfs_dataset_key direction_id route_id route_short_name \\\n", + "0 1770249a5a2e770ca90628434d4934b1 1.00 3402 Route 11 \n", + "1 1770249a5a2e770ca90628434d4934b1 1.00 3402 Route 11 \n", "\n", - " shape_array_key \n", - "0 a7f294e50a9a8ff179d4c82cd9136625 \n", - "1 af9aea31d387f59024bf25fb1d9334a3 " + " route_long_name route_desc \\\n", + "0 Route 11 PACIFIC VIEW MALL via TELEPHONE RD \n", + "1 Route 11 PACIFIC VIEW MALL via TELEPHONE RD \n", + "\n", + " trip_instance_key \n", + "0 595914b0c046d093f4fd5f9e88ab5635 \n", + "1 5ad8f3475c016f517dcb2611ccd69764 " ] }, "execution_count": 36, @@ -1239,38 +1493,62 @@ } ], "source": [ - "common_shape.head(2)" + "crosswalk.head(2)" ] }, { "cell_type": "code", "execution_count": 37, - "id": "aba49709-75e1-43e2-84a0-953fc3219206", + "id": "7fabab72-c1a4-468f-83d6-b1e066014129", "metadata": {}, "outputs": [], "source": [ - "crosswalk2 = pd.merge(\n", - " crosswalk,\n", - " common_shape,\n", - " on = [\"schedule_gtfs_dataset_key\", \"route_id\", \"direction_id\"],\n", - " how = \"inner\"\n", - " ).astype({\"direction_id\": \"Int64\"})" + "# dec_df = dec_df.drop(columns=[\"service_minutes\"])" ] }, { "cell_type": "code", "execution_count": 38, - "id": "f23a0e8a-e7f3-411e-a6c0-784c87e559ed", + "id": "e12360a1-11fc-4e01-b2c9-2dbe0f49ee3d", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'2023-12-13'" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "time_of_day = sched_rt_utils.get_trip_time_buckets(analysis_date)" + "analysis_date" + ] + }, + { + "cell_type": "markdown", + "id": "ec25f2ca-bc4b-44aa-bf9c-4689c47eff29", + "metadata": {}, + "source": [ + "#### Don't need `common_shape_id`" ] }, { "cell_type": "code", "execution_count": 39, - "id": "99381d4c-b338-4e04-a351-b077c6b94224", + "id": "e862de9a-47dc-42e0-aec3-81645a09afea", + "metadata": {}, + "outputs": [], + "source": [ + "common_shape = sched_rt_utils.most_common_shape_by_route_direction(analysis_date)" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "d6578b42-82b3-4edf-b97b-b4092c6f17ba", "metadata": {}, "outputs": [ { @@ -1294,118 +1572,93 @@ " \n", " \n", " \n", - " trip_instance_key\n", - " service_hours\n", - " trip_first_departure_datetime_pacific\n", - " time_of_day\n", - " service_minutes\n", + " schedule_gtfs_dataset_key\n", + " route_id\n", + " direction_id\n", + " common_shape_id\n", + " shape_array_key\n", " \n", " \n", " \n", " \n", " 0\n", - " ed6aa732d8c47e4df3a2f7ba9d24415c\n", - " 0.60\n", - " 2023-11-15 15:10:00\n", - " PM Peak\n", - " 36.00\n", + " 014d0998350083249a9eb310635548c2\n", + " 10866826\n", + " 1.00\n", + " 10866826:1\n", + " a7f294e50a9a8ff179d4c82cd9136625\n", " \n", " \n", " 1\n", - " 72b2c8bdebbf14039af7c57133cc23e5\n", - " 0.55\n", - " 2023-11-15 16:40:00\n", - " PM Peak\n", - " 33.00\n", - " \n", - " \n", - " 2\n", - " e829a24393004fd97a0c6da2efd2b38b\n", - " 0.53\n", - " 2023-11-15 06:40:00\n", - " Early AM\n", - " 32.00\n", - " \n", - " \n", - " 3\n", - " 05c66c28220d0a7982980194c8d479f6\n", - " 0.55\n", - " 2023-11-15 19:05:00\n", - " PM Peak\n", - " 33.00\n", - " \n", - " \n", - " 4\n", - " 169af6136355af8248800d0c1c0916b7\n", - " 0.55\n", - " 2023-11-15 17:10:00\n", - " PM Peak\n", - " 33.00\n", + " 014d0998350083249a9eb310635548c2\n", + " 10866849\n", + " 1.00\n", + " 10866849:1\n", + " af9aea31d387f59024bf25fb1d9334a3\n", " \n", " \n", "\n", "" ], "text/plain": [ - " trip_instance_key service_hours \\\n", - "0 ed6aa732d8c47e4df3a2f7ba9d24415c 0.60 \n", - "1 72b2c8bdebbf14039af7c57133cc23e5 0.55 \n", - "2 e829a24393004fd97a0c6da2efd2b38b 0.53 \n", - "3 05c66c28220d0a7982980194c8d479f6 0.55 \n", - "4 169af6136355af8248800d0c1c0916b7 0.55 \n", + " schedule_gtfs_dataset_key route_id direction_id common_shape_id \\\n", + "0 014d0998350083249a9eb310635548c2 10866826 1.00 10866826:1 \n", + "1 014d0998350083249a9eb310635548c2 10866849 1.00 10866849:1 \n", "\n", - " trip_first_departure_datetime_pacific time_of_day service_minutes \n", - "0 2023-11-15 15:10:00 PM Peak 36.00 \n", - "1 2023-11-15 16:40:00 PM Peak 33.00 \n", - "2 2023-11-15 06:40:00 Early AM 32.00 \n", - "3 2023-11-15 19:05:00 PM Peak 33.00 \n", - "4 2023-11-15 17:10:00 PM Peak 33.00 " + " shape_array_key \n", + "0 a7f294e50a9a8ff179d4c82cd9136625 \n", + "1 af9aea31d387f59024bf25fb1d9334a3 " ] }, - "execution_count": 39, + "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "time_of_day.head()" + "common_shape.head(2)" ] }, { "cell_type": "code", - "execution_count": 40, - "id": "3defcf73-8930-48c7-ae05-7e3ec263887a", + "execution_count": 41, + "id": "aba49709-75e1-43e2-84a0-953fc3219206", "metadata": {}, "outputs": [], "source": [ - "crosswalk2 = portfolio_utils.add_route_name(\n", - " crosswalk2\n", - " ).drop(columns = [\"route_short_name\", \"route_long_name\", \"route_desc\"])\n" + "crosswalk2 = pd.merge(\n", + " crosswalk,\n", + " common_shape,\n", + " on=[\"schedule_gtfs_dataset_key\", \"route_id\", \"direction_id\"],\n", + " how=\"inner\",\n", + ").astype({\"direction_id\": \"Int64\"})" ] }, { "cell_type": "code", - "execution_count": 41, - "id": "67e1a70f-d21c-4a9d-b4a3-2ebde459b4e1", + "execution_count": 42, + "id": "f23a0e8a-e7f3-411e-a6c0-784c87e559ed", "metadata": {}, "outputs": [], "source": [ - "df = pd.merge(\n", - " nov_df2,\n", - " crosswalk2,\n", - " on = \"trip_instance_key\",\n", - " how = \"left\",\n", - " ).merge(\n", - " time_of_day,\n", - " on = \"trip_instance_key\",\n", - " how = \"left\"\n", - " )" + "time_of_day = sched_rt_utils.get_trip_time_buckets(analysis_date)" ] }, { "cell_type": "code", - "execution_count": 42, - "id": "4576ab1e-2bde-4420-8254-7c8e2a2700d5", + "execution_count": 43, + "id": "51be9b61-db22-4c1f-8f96-6b339a16e991", + "metadata": {}, + "outputs": [], + "source": [ + "# Drop service mins since we already ahve this and it matches\n", + "time_of_day = time_of_day.drop(columns=[\"service_minutes\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "99381d4c-b338-4e04-a351-b077c6b94224", "metadata": {}, "outputs": [ { @@ -1430,278 +1683,102 @@ " \n", " \n", " trip_instance_key\n", - " rt_service_min\n", - " min_w_atleast2_trip_updates\n", - " total_pings_for_trip\n", - " total_min_w_gtfs\n", - " total_vp\n", - " vp_in_shape\n", - " speed_mph\n", - " pings_per_min\n", - " spatial_accuracy_pct\n", - " rt_triptime_w_gtfs_pct\n", - " rt_v_scheduled_trip_time_pct\n", - " schedule_gtfs_dataset_key\n", - " direction_id\n", - " route_id\n", - " common_shape_id\n", - " shape_array_key\n", - " route_name_used\n", " service_hours\n", " trip_first_departure_datetime_pacific\n", " time_of_day\n", - " service_minutes\n", " \n", " \n", " \n", " \n", " 0\n", - " a3647253d4cc8f847e972ed8c83d1b9b\n", - " 22.62\n", - " 22\n", - " 65\n", - " 23\n", - " NaN\n", - " NaN\n", - " NaN\n", - " 2.87\n", - " NaN\n", - " 100.00\n", - " NaN\n", - " NaN\n", - " <NA>\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaT\n", - " NaN\n", - " NaN\n", + " 595914b0c046d093f4fd5f9e88ab5635\n", + " 0.55\n", + " 2023-12-13 18:35:00\n", + " PM Peak\n", " \n", " \n", " 1\n", - " 7029f592047be84e5bb1d28d299be35d\n", - " 16.93\n", - " 16\n", - " 48\n", - " 17\n", - " NaN\n", - " NaN\n", - " NaN\n", - " 2.83\n", - " NaN\n", - " 100.00\n", - " NaN\n", - " NaN\n", - " <NA>\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaT\n", - " NaN\n", - " NaN\n", - " \n", - " \n", - " 2\n", - " 1040196034fd380818a2cbcf1eafd9b8\n", - " 40.95\n", - " 40\n", - " 118\n", - " 41\n", - " NaN\n", - " NaN\n", - " NaN\n", - " 2.88\n", - " NaN\n", - " 100.00\n", - " NaN\n", - " NaN\n", - " <NA>\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaT\n", - " NaN\n", - " NaN\n", - " \n", - " \n", - " 3\n", - " 5c6d43026fe5f02e5b31c18fcb8c0bf5\n", - " 62.95\n", - " 61\n", - " 176\n", - " 63\n", - " NaN\n", - " NaN\n", - " NaN\n", - " 2.80\n", - " NaN\n", - " 100.00\n", - " NaN\n", - " NaN\n", - " <NA>\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaT\n", - " NaN\n", - " NaN\n", - " \n", - " \n", - " 4\n", - " ee2f1fd83d87e85119f66014da5d74d5\n", - " 14.07\n", - " 13\n", - " 37\n", - " 15\n", - " NaN\n", - " NaN\n", - " NaN\n", - " 2.63\n", - " NaN\n", - " 100.00\n", - " NaN\n", - " NaN\n", - " <NA>\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaN\n", - " NaT\n", - " NaN\n", - " NaN\n", + " 5ad8f3475c016f517dcb2611ccd69764\n", + " 0.55\n", + " 2023-12-13 19:05:00\n", + " PM Peak\n", " \n", " \n", "\n", "" ], "text/plain": [ - " trip_instance_key rt_service_min \\\n", - "0 a3647253d4cc8f847e972ed8c83d1b9b 22.62 \n", - "1 7029f592047be84e5bb1d28d299be35d 16.93 \n", - "2 1040196034fd380818a2cbcf1eafd9b8 40.95 \n", - "3 5c6d43026fe5f02e5b31c18fcb8c0bf5 62.95 \n", - "4 ee2f1fd83d87e85119f66014da5d74d5 14.07 \n", + " trip_instance_key service_hours \\\n", + "0 595914b0c046d093f4fd5f9e88ab5635 0.55 \n", + "1 5ad8f3475c016f517dcb2611ccd69764 0.55 \n", "\n", - " min_w_atleast2_trip_updates total_pings_for_trip total_min_w_gtfs \\\n", - "0 22 65 23 \n", - "1 16 48 17 \n", - "2 40 118 41 \n", - "3 61 176 63 \n", - "4 13 37 15 \n", - "\n", - " total_vp vp_in_shape speed_mph pings_per_min spatial_accuracy_pct \\\n", - "0 NaN NaN NaN 2.87 NaN \n", - "1 NaN NaN NaN 2.83 NaN \n", - "2 NaN NaN NaN 2.88 NaN \n", - "3 NaN NaN NaN 2.80 NaN \n", - "4 NaN NaN NaN 2.63 NaN \n", - "\n", - " rt_triptime_w_gtfs_pct rt_v_scheduled_trip_time_pct \\\n", - "0 100.00 NaN \n", - "1 100.00 NaN \n", - "2 100.00 NaN \n", - "3 100.00 NaN \n", - "4 100.00 NaN \n", - "\n", - " schedule_gtfs_dataset_key direction_id route_id common_shape_id \\\n", - "0 NaN NaN NaN \n", - "1 NaN NaN NaN \n", - "2 NaN NaN NaN \n", - "3 NaN NaN NaN \n", - "4 NaN NaN NaN \n", - "\n", - " shape_array_key route_name_used service_hours \\\n", - "0 NaN NaN NaN \n", - "1 NaN NaN NaN \n", - "2 NaN NaN NaN \n", - "3 NaN NaN NaN \n", - "4 NaN NaN NaN \n", - "\n", - " trip_first_departure_datetime_pacific time_of_day service_minutes \n", - "0 NaT NaN NaN \n", - "1 NaT NaN NaN \n", - "2 NaT NaN NaN \n", - "3 NaT NaN NaN \n", - "4 NaT NaN NaN " - ] - }, - "execution_count": 42, + " trip_first_departure_datetime_pacific time_of_day \n", + "0 2023-12-13 18:35:00 PM Peak \n", + "1 2023-12-13 19:05:00 PM Peak " + ] + }, + "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df.head()" + "time_of_day.head(2)" ] }, { - "cell_type": "markdown", - "id": "d2ff8a3d-d3f5-42b4-b096-24b33b9842ca", + "cell_type": "code", + "execution_count": 45, + "id": "3defcf73-8930-48c7-ae05-7e3ec263887a", "metadata": {}, + "outputs": [], "source": [ - "#### https://github.com/cal-itp/data-analyses/blob/main/rt_segment_speeds/scripts/export.py#L150\n", - "* Should I still drop overly quick speeds?" + "crosswalk2 = portfolio_utils.add_route_name(crosswalk2).drop(\n", + " columns=[\"route_short_name\", \"route_long_name\", \"route_desc\"]\n", + ")" ] }, { "cell_type": "code", - "execution_count": 43, - "id": "6f9d6077-0551-42e0-830c-a262364770f0", + "execution_count": 46, + "id": "67e1a70f-d21c-4a9d-b4a3-2ebde459b4e1", "metadata": {}, "outputs": [], "source": [ - "df2 = df.loc[df.speed_mph <= 70].reset_index(drop = True)" + "df = pd.merge(\n", + " dec_df,\n", + " crosswalk2,\n", + " on=\"trip_instance_key\",\n", + " how=\"left\",\n", + ").merge(time_of_day, on=\"trip_instance_key\", how=\"left\")" ] }, { "cell_type": "code", - "execution_count": 44, - "id": "e7ce443a-b466-4d07-a613-cfc05a1b764d", + "execution_count": 47, + "id": "deb99704-52a5-4119-8d09-ddb4bc3ca13d", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(77170, 86832)" - ] - }, - "execution_count": 44, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "len(df2), len(df)" + "# Test if servicve minutes match\n", + "# df['matching_service_mins'] = df.service_minutes_x - df.service_minutes_y" ] }, { "cell_type": "code", - "execution_count": 45, - "id": "b3695f80-f817-4e3f-b882-5f37e0c277ae", + "execution_count": 48, + "id": "777c0915-acc0-4fac-83e0-7ddc7a396e86", "metadata": {}, "outputs": [], "source": [ - "route_cols = [\n", - " \"schedule_gtfs_dataset_key\", \"time_of_day\",\n", - " \"route_id\", \"direction_id\",\n", - " \"route_name_used\",\n", - " \"common_shape_id\", \"shape_array_key\"\n", - " ]" + "# Yes it matchesp perfectly\n", + "# df['matching_service_mins'].describe()" ] }, { "cell_type": "code", - "execution_count": 46, - "id": "7f232164-7199-4ee2-9231-acf633509d89", + "execution_count": 49, + "id": "4576ab1e-2bde-4420-8254-7c8e2a2700d5", "metadata": {}, "outputs": [ { @@ -1733,6 +1810,7 @@ " total_vp\n", " vp_in_shape\n", " speed_mph\n", + " service_minutes\n", " pings_per_min\n", " spatial_accuracy_pct\n", " rt_triptime_w_gtfs_pct\n", @@ -1746,230 +1824,476 @@ " service_hours\n", " trip_first_departure_datetime_pacific\n", " time_of_day\n", - " service_minutes\n", " \n", " \n", " \n", " \n", - " 62094\n", - " e77edc665d02c0c34d16290e6d3caad6\n", - " 38.32\n", - " 38\n", - " 112\n", - " 39\n", - " 112.00\n", - " 111.00\n", - " 12.84\n", + " 0\n", + " 5d25a4366c173007d9c29fdead0299d7\n", + " 74.03\n", + " 73\n", + " 216\n", + " 74\n", + " 216.00\n", + " 148.00\n", + " 21.01\n", + " 58.00\n", " 2.92\n", - " 99.11\n", - " 100.00\n", - " 53.27\n", - " 7cc0cb1871dfd558f11a2885c145d144\n", + " 68.52\n", + " 99.95\n", + " 27.64\n", + " 63029a23cb0e73f2a5d98a345c5e2e40\n", " 1\n", - " 37\n", - " 3751\n", - " 347b53a6f1c73d92b970c56cee4aa2d1\n", - " Weekdays 6am-9pm Weekends 9am-9pm\n", - " 0.42\n", - " 2023-11-15 22:00:00\n", - " Evening\n", - " 25.00\n", - " \n", - " \n", - " 25552\n", - " ecb5631c1349a15c70b673e25fc54827\n", - " 47.93\n", - " 47\n", - " 141\n", - " 48\n", - " 141.00\n", - " 140.00\n", - " 10.13\n", - " 2.94\n", - " 99.29\n", + " 3428\n", + " 8371\n", + " 0d0ca5bc40fb6266a03f400c3aa7e6cb\n", + " \n", + " 0.97\n", + " 2023-12-13 05:34:00\n", + " Early AM\n", + " \n", + " \n", + " 1\n", + " 4b72b80fc9cfe5e613bab95585cbe7e4\n", + " 23.45\n", + " 21\n", + " 59\n", + " 23\n", + " 59.00\n", + " 19.00\n", + " 54.95\n", + " 58.00\n", + " 2.52\n", + " 32.20\n", + " 98.08\n", + " -59.57\n", + " 63029a23cb0e73f2a5d98a345c5e2e40\n", + " 1\n", + " 3428\n", + " 8371\n", + " 0d0ca5bc40fb6266a03f400c3aa7e6cb\n", + " \n", + " 0.97\n", + " 2023-12-13 06:34:00\n", + " Early AM\n", + " \n", + " \n", + " 2\n", + " 2f061fce31ec5f20f55a81177996db89\n", + " 104.37\n", + " 102\n", + " 309\n", + " 104\n", + " 309.00\n", + " 199.00\n", + " 8.94\n", + " 58.00\n", + " 2.96\n", + " 64.40\n", + " 99.65\n", + " 79.94\n", + " 63029a23cb0e73f2a5d98a345c5e2e40\n", + " 0\n", + " 3428\n", + " 13407\n", + " f05fbd11b7b08a3a09b24593b7a83497\n", + " \n", + " 0.97\n", + " 2023-12-13 15:37:00\n", + " PM Peak\n", + " \n", + " \n", + " 3\n", + " 3732269c8694ba9a0bd4c44aed97abe0\n", + " 141.95\n", + " 140\n", + " 422\n", + " 142\n", + " 422.00\n", + " 188.00\n", + " 9.38\n", + " 58.00\n", + " 2.97\n", + " 44.55\n", " 100.00\n", - " 22.91\n", - " 3f3f36b4c41cc6b5df3eb7f5d8ea6e3c\n", + " 144.74\n", + " 63029a23cb0e73f2a5d98a345c5e2e40\n", " 0\n", - " 690-13168\n", - " 6900006_JUNE23\n", - " 1a819c320e5f644df0a9bbabc869b049\n", - " SYLMAR OLIVE VIEW HOSP. - SUNLAND VIA FOOTHILL BL\n", - " 0.65\n", - " 2023-11-15 16:32:00\n", + " 3428\n", + " 13407\n", + " f05fbd11b7b08a3a09b24593b7a83497\n", + " \n", + " 0.97\n", + " 2023-12-13 16:37:00\n", " PM Peak\n", - " 39.00\n", + " \n", + " \n", + " 4\n", + " 101556fdfe31b5849787220373f21ed8\n", + " 72.78\n", + " 72\n", + " 210\n", + " 73\n", + " 210.00\n", + " 109.00\n", + " 31.65\n", + " 55.00\n", + " 2.89\n", + " 51.90\n", + " 100.00\n", + " 32.33\n", + " 63029a23cb0e73f2a5d98a345c5e2e40\n", + " 1\n", + " 3429\n", + " 8373\n", + " 0f9dd50e2356a5299046fb0ed1a00a89\n", + " \n", + " 0.92\n", + " 2023-12-13 05:55:00\n", + " Early AM\n", " \n", " \n", "\n", "" ], "text/plain": [ - " trip_instance_key rt_service_min \\\n", - "62094 e77edc665d02c0c34d16290e6d3caad6 38.32 \n", - "25552 ecb5631c1349a15c70b673e25fc54827 47.93 \n", - "\n", - " min_w_atleast2_trip_updates total_pings_for_trip total_min_w_gtfs \\\n", - "62094 38 112 39 \n", - "25552 47 141 48 \n", + " trip_instance_key rt_service_min \\\n", + "0 5d25a4366c173007d9c29fdead0299d7 74.03 \n", + "1 4b72b80fc9cfe5e613bab95585cbe7e4 23.45 \n", + "2 2f061fce31ec5f20f55a81177996db89 104.37 \n", + "3 3732269c8694ba9a0bd4c44aed97abe0 141.95 \n", + "4 101556fdfe31b5849787220373f21ed8 72.78 \n", "\n", - " total_vp vp_in_shape speed_mph pings_per_min spatial_accuracy_pct \\\n", - "62094 112.00 111.00 12.84 2.92 99.11 \n", - "25552 141.00 140.00 10.13 2.94 99.29 \n", + " min_w_atleast2_trip_updates total_pings_for_trip total_min_w_gtfs \\\n", + "0 73 216 74 \n", + "1 21 59 23 \n", + "2 102 309 104 \n", + "3 140 422 142 \n", + "4 72 210 73 \n", "\n", - " rt_triptime_w_gtfs_pct rt_v_scheduled_trip_time_pct \\\n", - "62094 100.00 53.27 \n", - "25552 100.00 22.91 \n", + " total_vp vp_in_shape speed_mph service_minutes pings_per_min \\\n", + "0 216.00 148.00 21.01 58.00 2.92 \n", + "1 59.00 19.00 54.95 58.00 2.52 \n", + "2 309.00 199.00 8.94 58.00 2.96 \n", + "3 422.00 188.00 9.38 58.00 2.97 \n", + "4 210.00 109.00 31.65 55.00 2.89 \n", "\n", - " schedule_gtfs_dataset_key direction_id route_id \\\n", - "62094 7cc0cb1871dfd558f11a2885c145d144 1 37 \n", - "25552 3f3f36b4c41cc6b5df3eb7f5d8ea6e3c 0 690-13168 \n", + " spatial_accuracy_pct rt_triptime_w_gtfs_pct rt_v_scheduled_trip_time_pct \\\n", + "0 68.52 99.95 27.64 \n", + "1 32.20 98.08 -59.57 \n", + "2 64.40 99.65 79.94 \n", + "3 44.55 100.00 144.74 \n", + "4 51.90 100.00 32.33 \n", "\n", - " common_shape_id shape_array_key \\\n", - "62094 3751 347b53a6f1c73d92b970c56cee4aa2d1 \n", - "25552 6900006_JUNE23 1a819c320e5f644df0a9bbabc869b049 \n", + " schedule_gtfs_dataset_key direction_id route_id common_shape_id \\\n", + "0 63029a23cb0e73f2a5d98a345c5e2e40 1 3428 8371 \n", + "1 63029a23cb0e73f2a5d98a345c5e2e40 1 3428 8371 \n", + "2 63029a23cb0e73f2a5d98a345c5e2e40 0 3428 13407 \n", + "3 63029a23cb0e73f2a5d98a345c5e2e40 0 3428 13407 \n", + "4 63029a23cb0e73f2a5d98a345c5e2e40 1 3429 8373 \n", "\n", - " route_name_used service_hours \\\n", - "62094 Weekdays 6am-9pm Weekends 9am-9pm 0.42 \n", - "25552 SYLMAR OLIVE VIEW HOSP. - SUNLAND VIA FOOTHILL BL 0.65 \n", + " shape_array_key route_name_used service_hours \\\n", + "0 0d0ca5bc40fb6266a03f400c3aa7e6cb 0.97 \n", + "1 0d0ca5bc40fb6266a03f400c3aa7e6cb 0.97 \n", + "2 f05fbd11b7b08a3a09b24593b7a83497 0.97 \n", + "3 f05fbd11b7b08a3a09b24593b7a83497 0.97 \n", + "4 0f9dd50e2356a5299046fb0ed1a00a89 0.92 \n", "\n", - " trip_first_departure_datetime_pacific time_of_day service_minutes \n", - "62094 2023-11-15 22:00:00 Evening 25.00 \n", - "25552 2023-11-15 16:32:00 PM Peak 39.00 " + " trip_first_departure_datetime_pacific time_of_day \n", + "0 2023-12-13 05:34:00 Early AM \n", + "1 2023-12-13 06:34:00 Early AM \n", + "2 2023-12-13 15:37:00 PM Peak \n", + "3 2023-12-13 16:37:00 PM Peak \n", + "4 2023-12-13 05:55:00 Early AM " ] }, - "execution_count": 46, + "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df2.sample(2)" + "df.head()" ] }, { "cell_type": "code", - "execution_count": 47, - "id": "dcd9a500-aba4-42bd-bf15-b8d1f302fb00", + "execution_count": 50, + "id": "94f4812c-8166-4f8c-a3c9-aa4d6f34df9c", "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "RangeIndex: 77170 entries, 0 to 77169\n", - "Data columns (total 22 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 trip_instance_key 77170 non-null object \n", - " 1 rt_service_min 77170 non-null float64 \n", - " 2 min_w_atleast2_trip_updates 77170 non-null int64 \n", - " 3 total_pings_for_trip 77170 non-null int64 \n", - " 4 total_min_w_gtfs 77170 non-null int64 \n", - " 5 total_vp 74245 non-null float64 \n", - " 6 vp_in_shape 74245 non-null float64 \n", - " 7 speed_mph 77170 non-null float64 \n", - " 8 pings_per_min 77170 non-null float64 \n", - " 9 spatial_accuracy_pct 74245 non-null float64 \n", - " 10 rt_triptime_w_gtfs_pct 77170 non-null float64 \n", - " 11 rt_v_scheduled_trip_time_pct 77170 non-null float64 \n", - " 12 schedule_gtfs_dataset_key 75595 non-null object \n", - " 13 direction_id 75595 non-null Int64 \n", - " 14 route_id 75595 non-null object \n", - " 15 common_shape_id 75595 non-null object \n", - " 16 shape_array_key 75595 non-null object \n", - " 17 route_name_used 75595 non-null object \n", - " 18 service_hours 77170 non-null float64 \n", - " 19 trip_first_departure_datetime_pacific 77170 non-null datetime64[ns]\n", - " 20 time_of_day 77170 non-null object \n", - " 21 service_minutes 77170 non-null float64 \n", - "dtypes: Int64(1), datetime64[ns](1), float64(10), int64(3), object(7)\n", - "memory usage: 13.0+ MB\n" - ] + "data": { + "text/plain": [ + "((86128, 22),)" + ] + }, + "execution_count": 50, + "metadata": {}, + "output_type": "execute_result" } ], "source": [ - "df2.info()" + "df.shape," ] }, { - "cell_type": "code", - "execution_count": 48, - "id": "e964a1f0-b66c-4d7f-92d0-c72e7c7bf39c", + "cell_type": "markdown", + "id": "d2ff8a3d-d3f5-42b4-b096-24b33b9842ca", "metadata": {}, - "outputs": [], "source": [ - "df3 = (df2.groupby(route_cols)\n", - " .agg({\n", - " \"service_minutes\": \"mean\",\n", - " \"rt_service_min\": \"mean\",\n", - " \"speed_mph\": \"mean\",\n", - " \"pings_per_min\":\"mean\",\n", - " \"total_vp\":\"mean\",\n", - " \"vp_in_shape\":\"mean\",\n", - " \"trip_instance_key\": \"count\"\n", - " }).reset_index()\n", - ") " + "#### Aggregate avg speed by route\n", + "* DO I need to use the other functions in the script\n", + "* Do we still drop rows that are above 70 mph?\n", + "* https://github.com/cal-itp/data-analyses/blob/main/rt_segment_speeds/scripts/export.py#L150\n" ] }, { "cell_type": "code", - "execution_count": 49, - "id": "365803c0-a864-49ab-bb34-dcf2e936e867", + "execution_count": 51, + "id": "6f9d6077-0551-42e0-830c-a262364770f0", "metadata": {}, "outputs": [], "source": [ - "df4 = df3.assign(\n", - " rt_service_min = df2.rt_service_min.round(1),\n", - " service_minutes = df2.service_minutes.round(1),\n", - " speed_mph = df2.speed_mph.round(1)\n", - " ).rename(columns = {\n", - " \"service_minutes\": \"avg_sched_trip_min\",\n", - " \"rt_service_min\": \"avg_rt_trip_min\",\n", - " \"trip_instance_key\": \"n_trips\",\n", - " \"route_name_used\": \"route_name\",\n", - " \"schedule_gtfs_dataset_key\": \"gtfs_dataset_key\"\n", - " })" + "df2 = df.loc[df.speed_mph <= 70].reset_index(drop=True)" ] }, { "cell_type": "code", - "execution_count": 50, - "id": "0660f4e2-0c92-4656-a967-95ac7a26b440", + "execution_count": 52, + "id": "e7ce443a-b466-4d07-a613-cfc05a1b764d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(76838, 86128)" + ] + }, + "execution_count": 52, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(df2), len(df)" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "id": "b3695f80-f817-4e3f-b882-5f37e0c277ae", "metadata": {}, "outputs": [], "source": [ - "org_crosswalk = (\n", - " schedule_rt_utils.sample_gtfs_dataset_key_to_organization_crosswalk(\n", - " df4,\n", - " analysis_date,\n", - " quartet_data = \"schedule\",\n", - " dim_gtfs_dataset_cols = [\"key\", \"base64_url\"],\n", - " dim_organization_cols = [\"source_record_id\", \n", - " \"name\", \"caltrans_district\"])\n", - " )" + "route_cols = [\n", + " \"schedule_gtfs_dataset_key\",\n", + " \"time_of_day\",\n", + " \"route_id\",\n", + " \"direction_id\",\n", + " \"route_name_used\",\n", + " \"shape_array_key\",\n", + "]" ] }, { "cell_type": "code", - "execution_count": 60, - "id": "e43fa48f-cca4-4bfa-a8cd-971a7dc3f969", + "execution_count": 54, + "id": "e964a1f0-b66c-4d7f-92d0-c72e7c7bf39c", + "metadata": {}, + "outputs": [], + "source": [ + "df3 = (\n", + " df2.groupby(route_cols, observed=False, group_keys=True)\n", + " .agg(\n", + " {\n", + " \"service_minutes\": \"mean\",\n", + " \"rt_service_min\": \"mean\",\n", + " \"speed_mph\": \"mean\",\n", + " \"pings_per_min\": \"mean\",\n", + " \"total_vp\": \"mean\",\n", + " \"vp_in_shape\": \"mean\",\n", + " \"trip_instance_key\": \"count\",\n", + " }\n", + " )\n", + " .reset_index()\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "id": "f086985e-076e-4b22-825e-bf230cecf2f9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'df3 = df3.assign(\\n rt_service_min=df2.rt_service_min.round(1),\\n service_minutes=df2.service_minutes.round(1),\\n speed_mph=df2.speed_mph.round(1),\\n pings_per_min=df2.pings_per_min.round(1),\\n).rename(\\n columns={\\n \"service_minutes\": \"avg_sched_trip_min\",\\n \"rt_service_min\": \"avg_rt_trip_min\",\\n \"trip_instance_key\": \"n_trips\",\\n \"route_name_used\": \"route_name\",\\n \"pings_per_min\": \"avg_pings_per_min\",\\n \"schedule_gtfs_dataset_key\": \"gtfs_dataset_key\",\\n }\\n)'" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "\"\"\"df3 = df3.assign(\n", + " rt_service_min=df2.rt_service_min.round(1),\n", + " service_minutes=df2.service_minutes.round(1),\n", + " speed_mph=df2.speed_mph.round(1),\n", + " pings_per_min=df2.pings_per_min.round(1),\n", + ").rename(\n", + " columns={\n", + " \"service_minutes\": \"avg_sched_trip_min\",\n", + " \"rt_service_min\": \"avg_rt_trip_min\",\n", + " \"trip_instance_key\": \"n_trips\",\n", + " \"route_name_used\": \"route_name\",\n", + " \"pings_per_min\": \"avg_pings_per_min\",\n", + " \"schedule_gtfs_dataset_key\": \"gtfs_dataset_key\",\n", + " }\n", + ")\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "id": "8236bf29-abaa-466c-978c-10199ffa840c", + "metadata": {}, + "outputs": [], + "source": [ + "df3 = df3.rename(\n", + " columns={\n", + " \"service_minutes\": \"avg_sched_trip_min\",\n", + " \"rt_service_min\": \"avg_rt_trip_min\",\n", + " \"trip_instance_key\": \"n_trips\",\n", + " \"route_name_used\": \"route_name\",\n", + " \"pings_per_min\": \"avg_pings_per_min\",\n", + " \"schedule_gtfs_dataset_key\": \"gtfs_dataset_key\",\n", + " }\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "id": "2a6ff425-643d-426b-803b-b96399f986d3", "metadata": {}, "outputs": [ { "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
gtfs_dataset_keytime_of_dayroute_iddirection_idroute_nameshape_array_keyavg_sched_trip_minavg_rt_trip_minspeed_mphavg_pings_per_mintotal_vpvp_in_shapen_trips
0015d67d5b75b5cf2b710bbadadfb75f5AM Peak170Downtown San Rafael - Sausalito15dd7643b1198055544091c267b6739755.6057.4615.172.76151.00142.805
1015d67d5b75b5cf2b710bbadadfb75f5AM Peak171Downtown San Rafael - Sausalitode1df9489fe7de15f492c9308289102b56.2055.019.852.54147.60140.405
\n", + "
" + ], "text/plain": [ - "((82, 5), 82)" + " gtfs_dataset_key time_of_day route_id direction_id \\\n", + "0 015d67d5b75b5cf2b710bbadadfb75f5 AM Peak 17 0 \n", + "1 015d67d5b75b5cf2b710bbadadfb75f5 AM Peak 17 1 \n", + "\n", + " route_name shape_array_key \\\n", + "0 Downtown San Rafael - Sausalito 15dd7643b1198055544091c267b67397 \n", + "1 Downtown San Rafael - Sausalito de1df9489fe7de15f492c9308289102b \n", + "\n", + " avg_sched_trip_min avg_rt_trip_min speed_mph avg_pings_per_min \\\n", + "0 55.60 57.46 15.17 2.76 \n", + "1 56.20 55.01 9.85 2.54 \n", + "\n", + " total_vp vp_in_shape n_trips \n", + "0 151.00 142.80 5 \n", + "1 147.60 140.40 5 " ] }, - "execution_count": 60, + "execution_count": 57, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "org_crosswalk.shape, org_crosswalk.schedule_gtfs_dataset_key.nunique()" + "df3.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "id": "0660f4e2-0c92-4656-a967-95ac7a26b440", + "metadata": {}, + "outputs": [], + "source": [ + "org_crosswalk = schedule_rt_utils.sample_gtfs_dataset_key_to_organization_crosswalk(\n", + " df3,\n", + " analysis_date,\n", + " quartet_data=\"schedule\",\n", + " dim_gtfs_dataset_cols=[\"key\", \"base64_url\"],\n", + " dim_organization_cols=[\"source_record_id\", \"name\", \"caltrans_district\"],\n", + ")" ] }, { @@ -2017,14 +2341,6 @@ " \n", " \n", " 1\n", - " 07d3b79f14cec8099119e1eb649f065b\n", - " aHR0cHM6Ly9kYXRhLnRyaWxsaXVtdHJhbnNpdC5jb20vZ3Rmcy90YWhvZS1jYS11cy90YWhvZS1jYS11cy56aXA=\n", - " rec3u4aMplqObcoTR\n", - " Tahoe Transportation District\n", - " 03 - Marysville\n", - " \n", - " \n", - " 2\n", " 0881af3822466784992a49f1cc57d38f\n", " aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZWVkcz9vcGVyYXRvcl9pZD1TQQ==\n", " recUmm4gcNXaqrwpn\n", @@ -2032,7 +2348,7 @@ " 04 - Oakland\n", " \n", " \n", - " 3\n", + " 2\n", " 09a703757d1ed14ca9580b1385e39315\n", " aHR0cHM6Ly9yaWRlbGF3bmRhbGViZWF0LmNvbS9ndGZz\n", " recj8LXdeSurpSRNU\n", @@ -2040,13 +2356,21 @@ " 07 - Los Angeles\n", " \n", " \n", - " 4\n", + " 3\n", " 09e16227fc42c4fe90204a9d11581034\n", " aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZWVkcz9vcGVyYXRvcl9pZD1TTw==\n", - " recRM3c9Zfaft4V2B\n", - " Cloverdale Transit\n", + " recDupUxInMUgxeiz\n", + " Sonoma County\n", " 04 - Oakland\n", " \n", + " \n", + " 4\n", + " 0a3c0b21c85fb09f8db91599e14dd7f7\n", + " aHR0cHM6Ly9kYXRhLnRyaWxsaXVtdHJhbnNpdC5jb20vZ3Rmcy9sYWtldHJhbnNpdC1jYS11cy9sYWtldHJhbnNpdC1jYS11cy56aXA=\n", + " recPwXKbGLL4aIqXV\n", + " Lake Transit Authority\n", + " 01 - Eureka\n", + " \n", " \n", "\n", "" @@ -2054,31 +2378,31 @@ "text/plain": [ " schedule_gtfs_dataset_key \\\n", "0 015d67d5b75b5cf2b710bbadadfb75f5 \n", - "1 07d3b79f14cec8099119e1eb649f065b \n", - "2 0881af3822466784992a49f1cc57d38f \n", - "3 09a703757d1ed14ca9580b1385e39315 \n", - "4 09e16227fc42c4fe90204a9d11581034 \n", - "\n", - " base64_url \\\n", - "0 aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZWVkcz9vcGVyYXRvcl9pZD1NQQ== \n", - "1 aHR0cHM6Ly9kYXRhLnRyaWxsaXVtdHJhbnNpdC5jb20vZ3Rmcy90YWhvZS1jYS11cy90YWhvZS1jYS11cy56aXA= \n", - "2 aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZWVkcz9vcGVyYXRvcl9pZD1TQQ== \n", - "3 aHR0cHM6Ly9yaWRlbGF3bmRhbGViZWF0LmNvbS9ndGZz \n", - "4 aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZWVkcz9vcGVyYXRvcl9pZD1TTw== \n", + "1 0881af3822466784992a49f1cc57d38f \n", + "2 09a703757d1ed14ca9580b1385e39315 \n", + "3 09e16227fc42c4fe90204a9d11581034 \n", + "4 0a3c0b21c85fb09f8db91599e14dd7f7 \n", + "\n", + " base64_url \\\n", + "0 aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZWVkcz9vcGVyYXRvcl9pZD1NQQ== \n", + "1 aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZWVkcz9vcGVyYXRvcl9pZD1TQQ== \n", + "2 aHR0cHM6Ly9yaWRlbGF3bmRhbGViZWF0LmNvbS9ndGZz \n", + "3 aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZWVkcz9vcGVyYXRvcl9pZD1TTw== \n", + "4 aHR0cHM6Ly9kYXRhLnRyaWxsaXVtdHJhbnNpdC5jb20vZ3Rmcy9sYWtldHJhbnNpdC1jYS11cy9sYWtldHJhbnNpdC1jYS11cy56aXA= \n", "\n", " organization_source_record_id organization_name \\\n", "0 recNOb7pqBRlQVG5e Marin County Transit District \n", - "1 rec3u4aMplqObcoTR Tahoe Transportation District \n", - "2 recUmm4gcNXaqrwpn Sonoma-Marin Area Rail Transit District \n", - "3 recj8LXdeSurpSRNU City of Lawndale \n", - "4 recRM3c9Zfaft4V2B Cloverdale Transit \n", + "1 recUmm4gcNXaqrwpn Sonoma-Marin Area Rail Transit District \n", + "2 recj8LXdeSurpSRNU City of Lawndale \n", + "3 recDupUxInMUgxeiz Sonoma County \n", + "4 recPwXKbGLL4aIqXV Lake Transit Authority \n", "\n", " caltrans_district \n", "0 04 - Oakland \n", - "1 03 - Marysville \n", - "2 04 - Oakland \n", - "3 07 - Los Angeles \n", - "4 04 - Oakland " + "1 04 - Oakland \n", + "2 07 - Los Angeles \n", + "3 04 - Oakland \n", + "4 01 - Eureka " ] }, "execution_count": 59, @@ -2092,33 +2416,32 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 60, "id": "71c0d174-736c-4093-a57f-1b39608fb2c2", "metadata": {}, "outputs": [], "source": [ "df_with_org = pd.merge(\n", - " df4,\n", - " org_crosswalk.rename(columns = {\n", - " \"schedule_gtfs_dataset_key\": \"gtfs_dataset_key\"}),\n", - " on = \"gtfs_dataset_key\",\n", - " how = \"inner\"\n", - " )" + " df3,\n", + " org_crosswalk.rename(columns={\"schedule_gtfs_dataset_key\": \"gtfs_dataset_key\"}),\n", + " on=\"gtfs_dataset_key\",\n", + " how=\"inner\",\n", + ")" ] }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 61, "id": "275988a7-0b45-4799-b8d5-7826913745f3", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "(11229, 18)" + "(11397, 17)" ] }, - "execution_count": 52, + "execution_count": 61, "metadata": {}, "output_type": "execute_result" } @@ -2129,7 +2452,7 @@ }, { "cell_type": "code", - "execution_count": 53, + "execution_count": 62, "id": "3caa9bbc-51d2-4bc4-9d74-040f38ce67d5", "metadata": {}, "outputs": [ @@ -2159,12 +2482,11 @@ " route_id\n", " direction_id\n", " route_name\n", - " common_shape_id\n", " shape_array_key\n", " avg_sched_trip_min\n", " avg_rt_trip_min\n", " speed_mph\n", - " pings_per_min\n", + " avg_pings_per_min\n", " total_vp\n", " vp_in_shape\n", " n_trips\n", @@ -2182,14 +2504,13 @@ " 17\n", " 0\n", " Downtown San Rafael - Sausalito\n", - " 104\n", - " 4362491ca6006e88a076e2eaaf693762\n", - " 58.00\n", - " 62.40\n", - " 22.20\n", - " 2.75\n", - " 205.00\n", - " 162.80\n", + " 15dd7643b1198055544091c267b67397\n", + " 55.60\n", + " 57.46\n", + " 15.17\n", + " 2.76\n", + " 151.00\n", + " 142.80\n", " 5\n", " aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZWVkcz9vcGVyYXRvcl9pZD1NQQ==\n", " recNOb7pqBRlQVG5e\n", @@ -2203,14 +2524,13 @@ " 17\n", " 1\n", " Downtown San Rafael - Sausalito\n", - " 109\n", - " fe2f063891238d0132d4543c537cf574\n", - " 58.00\n", - " 67.70\n", - " 21.60\n", - " 2.73\n", - " 181.60\n", - " 166.60\n", + " de1df9489fe7de15f492c9308289102b\n", + " 56.20\n", + " 55.01\n", + " 9.85\n", + " 2.54\n", + " 147.60\n", + " 140.40\n", " 5\n", " aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZWVkcz9vcGVyYXRvcl9pZD1NQQ==\n", " recNOb7pqBRlQVG5e\n", @@ -2224,15 +2544,14 @@ " 219\n", " 0\n", " Tiburon - Strawberry\n", - " 56\n", - " 168629b6e90c8b94b78dd60c8eeaf0ca\n", - " 58.00\n", - " 127.40\n", - " 8.10\n", + " 1a27f5f0785ae953f5dfded42e6d4e0e\n", + " 18.00\n", + " 17.33\n", + " 24.76\n", " 2.91\n", - " 119.00\n", - " 113.25\n", - " 4\n", + " 50.50\n", + " 44.00\n", + " 2\n", " aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZWVkcz9vcGVyYXRvcl9pZD1NQQ==\n", " recNOb7pqBRlQVG5e\n", " Marin County Transit District\n", @@ -2245,15 +2564,14 @@ " 219\n", " 1\n", " Tiburon - Strawberry\n", - " 58\n", - " 6457a064626d1b0abdce22dd159fbb2d\n", - " 58.00\n", - " 152.00\n", - " 9.40\n", + " a8c9fae8e07d7a553264d4de2ffb704d\n", + " 19.33\n", + " 27.56\n", + " 9.80\n", " 2.93\n", - " 86.75\n", - " 85.75\n", - " 4\n", + " 80.67\n", + " 79.00\n", + " 3\n", " aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZWVkcz9vcGVyYXRvcl9pZD1NQQ==\n", " recNOb7pqBRlQVG5e\n", " Marin County Transit District\n", @@ -2266,15 +2584,14 @@ " 22\n", " 0\n", " Downtown San Rafael - Marin City\n", - " 50\n", - " 0e10aded9dede712f3c623c5deae87a4\n", - " 55.00\n", - " 76.30\n", - " 28.60\n", - " 2.95\n", - " 90.33\n", - " 53.33\n", - " 6\n", + " 5c1924a3c980f9ec07d63f216d3de7af\n", + " 37.80\n", + " 42.08\n", + " 10.20\n", + " 2.94\n", + " 105.00\n", + " 72.00\n", + " 5\n", " aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZWVkcz9vcGVyYXRvcl9pZD1NQQ==\n", " recNOb7pqBRlQVG5e\n", " Marin County Transit District\n", @@ -2292,26 +2609,26 @@ "3 015d67d5b75b5cf2b710bbadadfb75f5 AM Peak 219 1 \n", "4 015d67d5b75b5cf2b710bbadadfb75f5 AM Peak 22 0 \n", "\n", - " route_name common_shape_id \\\n", - "0 Downtown San Rafael - Sausalito 104 \n", - "1 Downtown San Rafael - Sausalito 109 \n", - "2 Tiburon - Strawberry 56 \n", - "3 Tiburon - Strawberry 58 \n", - "4 Downtown San Rafael - Marin City 50 \n", - "\n", - " shape_array_key avg_sched_trip_min avg_rt_trip_min \\\n", - "0 4362491ca6006e88a076e2eaaf693762 58.00 62.40 \n", - "1 fe2f063891238d0132d4543c537cf574 58.00 67.70 \n", - "2 168629b6e90c8b94b78dd60c8eeaf0ca 58.00 127.40 \n", - "3 6457a064626d1b0abdce22dd159fbb2d 58.00 152.00 \n", - "4 0e10aded9dede712f3c623c5deae87a4 55.00 76.30 \n", - "\n", - " speed_mph pings_per_min total_vp vp_in_shape n_trips \\\n", - "0 22.20 2.75 205.00 162.80 5 \n", - "1 21.60 2.73 181.60 166.60 5 \n", - "2 8.10 2.91 119.00 113.25 4 \n", - "3 9.40 2.93 86.75 85.75 4 \n", - "4 28.60 2.95 90.33 53.33 6 \n", + " route_name shape_array_key \\\n", + "0 Downtown San Rafael - Sausalito 15dd7643b1198055544091c267b67397 \n", + "1 Downtown San Rafael - Sausalito de1df9489fe7de15f492c9308289102b \n", + "2 Tiburon - Strawberry 1a27f5f0785ae953f5dfded42e6d4e0e \n", + "3 Tiburon - Strawberry a8c9fae8e07d7a553264d4de2ffb704d \n", + "4 Downtown San Rafael - Marin City 5c1924a3c980f9ec07d63f216d3de7af \n", + "\n", + " avg_sched_trip_min avg_rt_trip_min speed_mph avg_pings_per_min \\\n", + "0 55.60 57.46 15.17 2.76 \n", + "1 56.20 55.01 9.85 2.54 \n", + "2 18.00 17.33 24.76 2.91 \n", + "3 19.33 27.56 9.80 2.93 \n", + "4 37.80 42.08 10.20 2.94 \n", + "\n", + " total_vp vp_in_shape n_trips \\\n", + "0 151.00 142.80 5 \n", + "1 147.60 140.40 5 \n", + "2 50.50 44.00 2 \n", + "3 80.67 79.00 3 \n", + "4 105.00 72.00 5 \n", "\n", " base64_url \\\n", "0 aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZWVkcz9vcGVyYXRvcl9pZD1NQQ== \n", @@ -2335,7 +2652,7 @@ "4 04 - Oakland " ] }, - "execution_count": 53, + "execution_count": 62, "metadata": {}, "output_type": "execute_result" } @@ -2346,33 +2663,32 @@ }, { "cell_type": "code", - "execution_count": 54, + "execution_count": 63, "id": "de3449eb-88c2-4d46-ae3a-94f881e8cade", "metadata": {}, "outputs": [], "source": [ "shapes = helpers.import_scheduled_shapes(\n", - " analysis_date,\n", - " columns = [\"shape_array_key\", \"geometry\"],\n", - " get_pandas = True,\n", - " crs = geography_utils.WGS84\n", - " )\n", - " " + " analysis_date,\n", + " columns=[\"shape_array_key\", \"geometry\"],\n", + " get_pandas=True,\n", + " crs=geography_utils.WGS84,\n", + ")" ] }, { "cell_type": "code", - "execution_count": 55, + "execution_count": 64, "id": "091ed4c9-6742-4a06-abdf-7b59abe7a948", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "(7303, 2)" + "(7286, 2)" ] }, - "execution_count": 55, + "execution_count": 64, "metadata": {}, "output_type": "execute_result" } @@ -2383,115 +2699,169 @@ }, { "cell_type": "code", - "execution_count": 67, + "execution_count": 65, "id": "dc0f152d-19ac-4687-83e8-8d5fd225005b", "metadata": {}, "outputs": [], "source": [ "df_with_shape = pd.merge(\n", - " shapes,\n", - " df_with_org,\n", - " on = \"shape_array_key\", # once merged, can drop shape_array_key\n", - " how = \"inner\"\n", - " )" + " shapes,\n", + " df_with_org,\n", + " on=\"shape_array_key\", # once merged, can drop shape_array_key\n", + " how=\"inner\",\n", + ")" ] }, { "cell_type": "code", - "execution_count": 68, + "execution_count": 66, "id": "1d8bd521-7b7e-438e-9cdd-b23606acd644", "metadata": {}, "outputs": [], "source": [ - "final_df['avg_pct_vp_shape'] = final_df.vp_in_shape / final_df.total_vp * 100" + "df_with_shape[\"avg_pct_vp_shape\"] = (\n", + " df_with_shape.vp_in_shape / df_with_shape.total_vp * 100\n", + ")" ] }, { "cell_type": "code", - "execution_count": 69, + "execution_count": 67, "id": "158662df-1735-4a1a-a4c5-7b3d00469311", "metadata": {}, "outputs": [], "source": [ - "final_df['avg_pct_rt_v_sched'] = (final_df.avg_rt_trip_min / final_df.avg_sched_trip_min - 1) * 100" + "df_with_shape[\"avg_pct_rt_v_sched\"] = (\n", + " df_with_shape.avg_rt_trip_min / df_with_shape.avg_sched_trip_min - 1\n", + ") * 100" ] }, { "cell_type": "code", - "execution_count": 70, + "execution_count": 68, "id": "7020e160-7e18-4099-8a97-9114eef06652", "metadata": {}, "outputs": [], "source": [ - "final_df = final_df.drop(columns = ['total_vp','vp_in_shape'])" + "final_df = df_with_shape.drop(columns=[\"total_vp\", \"vp_in_shape\"])" ] }, { "cell_type": "code", - "execution_count": 71, - "id": "2206947a-debc-4dcc-a459-913ec0b26443", + "execution_count": 69, + "id": "8079fc30-e2f2-459e-8446-120e97369dc5", "metadata": {}, "outputs": [], "source": [ - "final_df = final_df.rename(columns = {'pings_per_min':'avg_pings_per_min'})" + "agency_cols = [\"organization_source_record_id\", \"organization_name\"]\n", + "route_cols = [\n", + " \"route_id\",\n", + " \"route_name\",\n", + " \"direction_id\",\n", + "]\n", + "\n", + "col_order = (\n", + " agency_cols\n", + " + route_cols\n", + " + [\n", + " \"time_of_day\",\n", + " \"speed_mph\",\n", + " \"n_trips\",\n", + " \"avg_sched_trip_min\",\n", + " \"avg_rt_trip_min\",\n", + " \"base64_url\",\n", + " \"caltrans_district\",\n", + " \"geometry\",\n", + " \"avg_pings_per_min\",\n", + " \"avg_pct_vp_shape\",\n", + " \"avg_pct_rt_v_sched\",\n", + " ]\n", + ")" ] }, { "cell_type": "code", - "execution_count": 72, + "execution_count": 70, + "id": "11b580bd-f482-4a11-be00-7ef88068bac9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(11397, 18)" + ] + }, + "execution_count": 70, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "final_df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 71, "id": "9400eb5b-0ff4-4b6a-ade3-8e0fedc29d5b", "metadata": {}, "outputs": [], "source": [ - "final_df = final_df.rename(\n", - " columns = {\"organization_source_record_id\": \"org_id\",\n", - " \"organization_name\": \"agency\", \n", - " \"caltrans_district\": \"district_name\"\n", - " })" + "final_df = df_with_shape.reindex(columns=col_order).rename(\n", + " columns={\n", + " \"organization_source_record_id\": \"org_id\",\n", + " \"organization_name\": \"agency\",\n", + " \"caltrans_district\": \"district_name\",\n", + " }\n", + ")" ] }, { "cell_type": "code", - "execution_count": 74, - "id": "11b580bd-f482-4a11-be00-7ef88068bac9", + "execution_count": 72, + "id": "fe97ad8a-d0ce-40cd-982b-87877882693a", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "(11229, 19)" + "Index(['org_id', 'agency', 'route_id', 'route_name', 'direction_id',\n", + " 'time_of_day', 'speed_mph', 'n_trips', 'avg_sched_trip_min',\n", + " 'avg_rt_trip_min', 'base64_url', 'district_name', 'geometry',\n", + " 'avg_pings_per_min', 'avg_pct_vp_shape', 'avg_pct_rt_v_sched'],\n", + " dtype='object')" ] }, - "execution_count": 74, + "execution_count": 72, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "final_df.shape" + "final_df.columns" ] }, { "cell_type": "code", - "execution_count": 75, + "execution_count": 73, "id": "58165517-e414-4843-8ece-b7631d4d7f27", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "count 11229.00\n", - "mean 2.46\n", - "std 0.55\n", - "min 0.04\n", - "25% 1.97\n", - "50% 2.75\n", - "75% 2.93\n", - "max 3.10\n", + "count 11397.00\n", + "mean 2.38\n", + "std 0.56\n", + "min 0.06\n", + "25% 1.94\n", + "50% 2.54\n", + "75% 2.91\n", + "max 3.45\n", "Name: avg_pings_per_min, dtype: float64" ] }, - "execution_count": 75, + "execution_count": 73, "metadata": {}, "output_type": "execute_result" } @@ -2502,7 +2872,28 @@ }, { "cell_type": "code", - "execution_count": 76, + "execution_count": 74, + "id": "e99b22b7-f6c7-4d69-8b65-da3c39c85f33", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(11397, 16)" + ] + }, + "execution_count": 74, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "final_df.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 75, "id": "49867873-4a76-49c4-8c95-0918d7468f82", "metadata": {}, "outputs": [ @@ -2527,174 +2918,3173 @@ " \n", " \n", " \n", - " shape_array_key\n", - " gtfs_dataset_key\n", - " time_of_day\n", + " org_id\n", + " agency\n", " route_id\n", - " direction_id\n", " route_name\n", - " common_shape_id\n", - " avg_sched_trip_min\n", - " avg_rt_trip_min\n", + " direction_id\n", + " time_of_day\n", " speed_mph\n", - " avg_pings_per_min\n", " n_trips\n", - " org_id\n", - " agency\n", + " avg_sched_trip_min\n", + " avg_rt_trip_min\n", " district_name\n", + " avg_pings_per_min\n", " avg_pct_vp_shape\n", " avg_pct_rt_v_sched\n", " \n", " \n", " \n", " \n", - " 0\n", - " f73c9e5905f12d7392b4fff9a31c83ce\n", - " f1b35a50955aeb498533c1c6fdafbe44\n", - " AM Peak\n", - " 51\n", - " 0\n", - " LONG BEACH BLVD\n", - " 510078\n", - " 57.00\n", - " 79.20\n", - " 4.90\n", - " 1.62\n", - " 13\n", - " rec00qSzZL8KqiXAo\n", - " Long Beach Transit\n", - " 07 - Los Angeles\n", - " 92.61\n", - " 38.95\n", + " 10425\n", + " recG5aXxDPI645S86\n", + " OmniTrans\n", + " 10950\n", + " CAL STATE-SIERRA WAY-SAN BDNO\n", + " 1\n", + " Midday\n", + " 9.56\n", + " 10\n", + " 43.80\n", + " 59.57\n", + " 08 - San Bernardino\n", + " 2.53\n", + " 94.52\n", + " 36.00\n", " \n", " \n", - " 1\n", - " f73c9e5905f12d7392b4fff9a31c83ce\n", - " f1b35a50955aeb498533c1c6fdafbe44\n", + " 7407\n", + " recPnGkwdpnr8jmHB\n", + " Los Angeles County Metropolitan Transportation Authority\n", + " 236-13172\n", + " SYLMAR STA-ENCINO VIA BALBOA BL RINALDI ST\n", + " 1\n", " Early AM\n", - " 51\n", - " 0\n", - " LONG BEACH BLVD\n", - " 510078\n", - " 47.00\n", - " 52.40\n", - " 6.00\n", - " 1.83\n", - " 8\n", - " rec00qSzZL8KqiXAo\n", - " Long Beach Transit\n", + " 11.77\n", + " 3\n", + " 62.67\n", + " 84.16\n", " 07 - Los Angeles\n", - " 87.61\n", - " 11.49\n", + " 2.66\n", + " 70.88\n", + " 34.29\n", " \n", " \n", - " 2\n", - " f73c9e5905f12d7392b4fff9a31c83ce\n", - " f1b35a50955aeb498533c1c6fdafbe44\n", - " Evening\n", - " 51\n", - " 0\n", - " LONG BEACH BLVD\n", - " 510078\n", - " 44.00\n", - " 68.60\n", - " 4.80\n", - " 1.87\n", - " 4\n", - " rec00qSzZL8KqiXAo\n", - " Long Beach Transit\n", - " 07 - Los Angeles\n", - " 96.53\n", - " 55.91\n", + " 10210\n", + " recZgWVXkpix390of\n", + " San Joaquin Regional Transit District\n", + " 580\n", + " DTC - SECTION/ORO\n", + " 1\n", + " PM Peak\n", + " 9.62\n", + " 5\n", + " 22.00\n", + " 40.15\n", + " 10 - Stockton\n", + " 2.95\n", + " 97.64\n", + " 82.52\n", + " \n", + " \n", + "\n", + "" + ], + "text/plain": [ + " org_id \\\n", + "10425 recG5aXxDPI645S86 \n", + "7407 recPnGkwdpnr8jmHB \n", + "10210 recZgWVXkpix390of \n", + "\n", + " agency route_id \\\n", + "10425 OmniTrans 10950 \n", + "7407 Los Angeles County Metropolitan Transportation Authority 236-13172 \n", + "10210 San Joaquin Regional Transit District 580 \n", + "\n", + " route_name direction_id time_of_day \\\n", + "10425 CAL STATE-SIERRA WAY-SAN BDNO 1 Midday \n", + "7407 SYLMAR STA-ENCINO VIA BALBOA BL RINALDI ST 1 Early AM \n", + "10210 DTC - SECTION/ORO 1 PM Peak \n", + "\n", + " speed_mph n_trips avg_sched_trip_min avg_rt_trip_min \\\n", + "10425 9.56 10 43.80 59.57 \n", + "7407 11.77 3 62.67 84.16 \n", + "10210 9.62 5 22.00 40.15 \n", + "\n", + " district_name avg_pings_per_min avg_pct_vp_shape \\\n", + "10425 08 - San Bernardino 2.53 94.52 \n", + "7407 07 - Los Angeles 2.66 70.88 \n", + "10210 10 - Stockton 2.95 97.64 \n", + "\n", + " avg_pct_rt_v_sched \n", + "10425 36.00 \n", + "7407 34.29 \n", + "10210 82.52 " + ] + }, + "execution_count": 75, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "final_df.drop(columns=[\"geometry\", \"base64_url\"]).sample(3)" + ] + }, + { + "cell_type": "markdown", + "id": "a3acfeb1-54bf-4cee-9810-51c7e5fe0aa6", + "metadata": {}, + "source": [ + "### Check results after aggregating up to route\n", + "* How are the results sooo wrong with `265-13172`" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "id": "aa2cc0c2-da7b-4ae1-9ac4-84c8d6d60e95", + "metadata": {}, + "outputs": [], + "source": [ + "def checkout_route(route_id: str, time_of_day: str, direction_id: int):\n", + " print(\"final\")\n", + " display(\n", + " final_df.loc[\n", + " (final_df.route_id == route_id)\n", + " & (final_df.time_of_day == time_of_day)\n", + " & (final_df.direction_id == direction_id)\n", + " ].drop(columns = ['geometry','base64_url'])\n", + " )\n", + " df2_cols = [\n", + " \"trip_instance_key\",\n", + " \"time_of_day\",\n", + " \"speed_mph\",\n", + " \"rt_service_min\",\n", + " \"service_minutes\",\n", + " \"pings_per_min\",\n", + " ]\n", + " print(\"df3\")\n", + " display(df3.loc[\n", + " (df3.route_id == route_id)\n", + " & (df3.time_of_day == time_of_day)\n", + " & (df3.direction_id == direction_id)\n", + " ])\n", + " \n", + " print(\"df2\")\n", + " df2_filtered = df2.loc[\n", + " (df2.route_id == route_id)\n", + " & (df2.time_of_day == time_of_day)\n", + " & (df2.direction_id == direction_id)\n", + " ]\n", + " \n", + " display(df2_filtered.pings_per_min.mean())\n", + " display(df2_filtered.speed_mph.mean())\n", + " display(df2_filtered.total_vp.mean())\n", + " display(df2_filtered.vp_in_shape.mean())\n", + " display(df2_filtered[df2_cols])\n", + " #print(\"original\")\n", + " #trip_instance_keys_keep = list(df2_filtered.trip_instance_key.unique())\n", + " #display(dec_df.loc[dec_df.trip_instance_key.isin(trip_instance_keys_keep)])\n", + " " + ] + }, + { + "cell_type": "markdown", + "id": "c8aae146-3b5f-4189-ad0f-b011221b5442", + "metadata": {}, + "source": [ + "#### scheduled trip min (renamed from service_mins) is completely lower." + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "id": "04ee1397-318c-4bb7-9f80-2a55b9c75055", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "final\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
org_idagencyroute_idroute_namedirection_idtime_of_dayspeed_mphn_tripsavg_sched_trip_minavg_rt_trip_mindistrict_nameavg_pings_per_minavg_pct_vp_shapeavg_pct_rt_v_sched
3f73c9e5905f12d7392b4fff9a31c83cef1b35a50955aeb498533c1c6fdafbe44Midday516533recPnGkwdpnr8jmHBLos Angeles County Metropolitan Transportation Authority265-13172PICO RIVERA - LAKEWOOD CTR MALL VIA PARAMOUNT BL0LONG BEACH BLVD51007851.0045.5016.801.6920rec00qSzZL8KqiXAoLong Beach TransitEarly AM6.89259.0099.5807 - Los Angeles96.03-10.782.6870.0968.77
\n", + "
" + ], + "text/plain": [ + " org_id \\\n", + "6533 recPnGkwdpnr8jmHB \n", + "\n", + " agency route_id \\\n", + "6533 Los Angeles County Metropolitan Transportation Authority 265-13172 \n", + "\n", + " route_name direction_id \\\n", + "6533 PICO RIVERA - LAKEWOOD CTR MALL VIA PARAMOUNT BL 0 \n", + "\n", + " time_of_day speed_mph n_trips avg_sched_trip_min avg_rt_trip_min \\\n", + "6533 Early AM 6.89 2 59.00 99.58 \n", + "\n", + " district_name avg_pings_per_min avg_pct_vp_shape \\\n", + "6533 07 - Los Angeles 2.68 70.09 \n", + "\n", + " avg_pct_rt_v_sched \n", + "6533 68.77 " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "df3\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "
gtfs_dataset_keytime_of_dayroute_iddirection_idroute_nameshape_array_keyavg_sched_trip_minavg_rt_trip_minspeed_mphavg_pings_per_mintotal_vpvp_in_shapen_trips
4f73c9e5905f12d7392b4fff9a31c83cef1b35a50955aeb498533c1c6fdafbe44PM Peak5117333f3f36b4c41cc6b5df3eb7f5d8ea6e3cEarly AM265-131720LONG BEACH BLVD51007875.0072.4020.301.6516rec00qSzZL8KqiXAoLong Beach Transit07 - Los Angeles95.01-3.47PICO RIVERA - LAKEWOOD CTR MALL VIA PARAMOUNT BL5c3d4b372420edebe33416aa0c6abe7059.0099.586.892.68267.50187.502
\n", "
" ], "text/plain": [ - " shape_array_key gtfs_dataset_key \\\n", - "0 f73c9e5905f12d7392b4fff9a31c83ce f1b35a50955aeb498533c1c6fdafbe44 \n", - "1 f73c9e5905f12d7392b4fff9a31c83ce f1b35a50955aeb498533c1c6fdafbe44 \n", - "2 f73c9e5905f12d7392b4fff9a31c83ce f1b35a50955aeb498533c1c6fdafbe44 \n", - "3 f73c9e5905f12d7392b4fff9a31c83ce f1b35a50955aeb498533c1c6fdafbe44 \n", - "4 f73c9e5905f12d7392b4fff9a31c83ce f1b35a50955aeb498533c1c6fdafbe44 \n", - "\n", - " time_of_day route_id direction_id route_name common_shape_id \\\n", - "0 AM Peak 51 0 LONG BEACH BLVD 510078 \n", - "1 Early AM 51 0 LONG BEACH BLVD 510078 \n", - "2 Evening 51 0 LONG BEACH BLVD 510078 \n", - "3 Midday 51 0 LONG BEACH BLVD 510078 \n", - "4 PM Peak 51 0 LONG BEACH BLVD 510078 \n", - "\n", - " avg_sched_trip_min avg_rt_trip_min speed_mph avg_pings_per_min n_trips \\\n", - "0 57.00 79.20 4.90 1.62 13 \n", - "1 47.00 52.40 6.00 1.83 8 \n", - "2 44.00 68.60 4.80 1.87 4 \n", - "3 51.00 45.50 16.80 1.69 20 \n", - "4 75.00 72.40 20.30 1.65 16 \n", - "\n", - " org_id agency district_name avg_pct_vp_shape \\\n", - "0 rec00qSzZL8KqiXAo Long Beach Transit 07 - Los Angeles 92.61 \n", - "1 rec00qSzZL8KqiXAo Long Beach Transit 07 - Los Angeles 87.61 \n", - "2 rec00qSzZL8KqiXAo Long Beach Transit 07 - Los Angeles 96.53 \n", - "3 rec00qSzZL8KqiXAo Long Beach Transit 07 - Los Angeles 96.03 \n", - "4 rec00qSzZL8KqiXAo Long Beach Transit 07 - Los Angeles 95.01 \n", - "\n", - " avg_pct_rt_v_sched \n", - "0 38.95 \n", - "1 11.49 \n", - "2 55.91 \n", - "3 -10.78 \n", - "4 -3.47 " - ] - }, - "execution_count": 76, + " gtfs_dataset_key time_of_day route_id direction_id \\\n", + "1733 3f3f36b4c41cc6b5df3eb7f5d8ea6e3c Early AM 265-13172 0 \n", + "\n", + " route_name \\\n", + "1733 PICO RIVERA - LAKEWOOD CTR MALL VIA PARAMOUNT BL \n", + "\n", + " shape_array_key avg_sched_trip_min avg_rt_trip_min \\\n", + "1733 5c3d4b372420edebe33416aa0c6abe70 59.00 99.58 \n", + "\n", + " speed_mph avg_pings_per_min total_vp vp_in_shape n_trips \n", + "1733 6.89 2.68 267.50 187.50 2 " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "df2\n" + ] + }, + { + "data": { + "text/plain": [ + "2.6849179704528776" + ] + }, "metadata": {}, - "output_type": "execute_result" + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "6.887376703252869" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "267.5" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "187.5" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
trip_instance_keytime_of_dayspeed_mphrt_service_minservice_minutespings_per_min
2486648a01217589c2faa46db395d6cf8317dEarly AM9.4095.0258.002.65
2486770674803a1c4416fc49f883bc3b2c18bEarly AM4.38104.1360.002.72
\n", + "
" + ], + "text/plain": [ + " trip_instance_key time_of_day speed_mph \\\n", + "24866 48a01217589c2faa46db395d6cf8317d Early AM 9.40 \n", + "24867 70674803a1c4416fc49f883bc3b2c18b Early AM 4.38 \n", + "\n", + " rt_service_min service_minutes pings_per_min \n", + "24866 95.02 58.00 2.65 \n", + "24867 104.13 60.00 2.72 " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "route_265 = checkout_route(\"265-13172\", \"Early AM\", 0)" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "id": "aa775538-5a7f-4500-bc5b-867e9b54d626", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "final\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
org_idagencyroute_idroute_namedirection_idtime_of_dayspeed_mphn_tripsavg_sched_trip_minavg_rt_trip_mindistrict_nameavg_pings_per_minavg_pct_vp_shapeavg_pct_rt_v_sched
193rec3u4aMplqObcoTRTahoe Transportation District5671Valley Express Daily1Early AM2.16138.00313.4803 - Marysville1.4222.65724.96
\n", + "
" + ], + "text/plain": [ + " org_id agency route_id \\\n", + "193 rec3u4aMplqObcoTR Tahoe Transportation District 5671 \n", + "\n", + " route_name direction_id time_of_day speed_mph n_trips \\\n", + "193 Valley Express Daily 1 Early AM 2.16 1 \n", + "\n", + " avg_sched_trip_min avg_rt_trip_min district_name avg_pings_per_min \\\n", + "193 38.00 313.48 03 - Marysville 1.42 \n", + "\n", + " avg_pct_vp_shape avg_pct_rt_v_sched \n", + "193 22.65 724.96 " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "df3\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
gtfs_dataset_keytime_of_dayroute_iddirection_idroute_nameshape_array_keyavg_sched_trip_minavg_rt_trip_minspeed_mphavg_pings_per_mintotal_vpvp_in_shapen_trips
6840c3499b856c717e5706299664fb1c5261Early AM56711Valley Express Dailyd99540208939e6a891e7ec67e3bf896438.00313.482.161.42446.00101.001
\n", + "
" + ], + "text/plain": [ + " gtfs_dataset_key time_of_day route_id direction_id \\\n", + "6840 c3499b856c717e5706299664fb1c5261 Early AM 5671 1 \n", + "\n", + " route_name shape_array_key \\\n", + "6840 Valley Express Daily d99540208939e6a891e7ec67e3bf8964 \n", + "\n", + " avg_sched_trip_min avg_rt_trip_min speed_mph avg_pings_per_min \\\n", + "6840 38.00 313.48 2.16 1.42 \n", + "\n", + " total_vp vp_in_shape n_trips \n", + "6840 446.00 101.00 1 " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "df2\n" + ] + }, + { + "data": { + "text/plain": [ + "1.422723164442554" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "2.158633017384419" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "446.0" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "101.0" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
trip_instance_keytime_of_dayspeed_mphrt_service_minservice_minutespings_per_min
277586fadf197f5bb105ed916de0a337386eeEarly AM2.16313.4838.001.42
\n", + "
" + ], + "text/plain": [ + " trip_instance_key time_of_day speed_mph \\\n", + "27758 6fadf197f5bb105ed916de0a337386ee Early AM 2.16 \n", + "\n", + " rt_service_min service_minutes pings_per_min \n", + "27758 313.48 38.00 1.42 " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "checkout_route(\"5671\", \"Early AM\", 1)" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "id": "a3c23900-fdaa-476c-a490-dbc703df0c28", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "final\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
org_idagencyroute_idroute_namedirection_idtime_of_dayspeed_mphn_tripsavg_sched_trip_minavg_rt_trip_mindistrict_nameavg_pings_per_minavg_pct_vp_shapeavg_pct_rt_v_sched
9012rechaapWbeffO33OXCity and County of San Francisco38RWeekdays 5am-10pm Weekends 6am-9pm1AM Peak6.683043.9358.9204 - Oakland2.9688.3334.10
\n", + "
" + ], + "text/plain": [ + " org_id agency route_id \\\n", + "9012 rechaapWbeffO33OX City and County of San Francisco 38R \n", + "\n", + " route_name direction_id time_of_day speed_mph \\\n", + "9012 Weekdays 5am-10pm Weekends 6am-9pm 1 AM Peak 6.68 \n", + "\n", + " n_trips avg_sched_trip_min avg_rt_trip_min district_name \\\n", + "9012 30 43.93 58.92 04 - Oakland \n", + "\n", + " avg_pings_per_min avg_pct_vp_shape avg_pct_rt_v_sched \n", + "9012 2.96 88.33 34.10 " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "df3\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
gtfs_dataset_keytime_of_dayroute_iddirection_idroute_nameshape_array_keyavg_sched_trip_minavg_rt_trip_minspeed_mphavg_pings_per_mintotal_vpvp_in_shapen_trips
41027cc0cb1871dfd558f11a2885c145d144AM Peak38R1Weekdays 5am-10pm Weekends 6am-9pm6e78cef03e15c5a71751030fc65e09b443.9358.926.682.96174.57154.2030
\n", + "
" + ], + "text/plain": [ + " gtfs_dataset_key time_of_day route_id direction_id \\\n", + "4102 7cc0cb1871dfd558f11a2885c145d144 AM Peak 38R 1 \n", + "\n", + " route_name shape_array_key \\\n", + "4102 Weekdays 5am-10pm Weekends 6am-9pm 6e78cef03e15c5a71751030fc65e09b4 \n", + "\n", + " avg_sched_trip_min avg_rt_trip_min speed_mph avg_pings_per_min \\\n", + "4102 43.93 58.92 6.68 2.96 \n", + "\n", + " total_vp vp_in_shape n_trips \n", + "4102 174.57 154.20 30 " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "df2\n" + ] + }, + { + "data": { + "text/plain": [ + "2.961175851535439" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "6.682525584870673" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "174.56666666666666" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "154.2" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
trip_instance_keytime_of_dayspeed_mphrt_service_minservice_minutespings_per_min
6161076fa3ed3fd8ef28a446eedb4c1e94e6aAM Peak9.3948.8239.002.97
6161147d8da2afbc3e4c78f9be4c6c53a7776AM Peak10.0049.3839.002.96
6161240590ba21f73ae1775a4538e34e67cc3AM Peak5.3497.6741.002.98
61613aa1b90f05357a29abc97fae90cd5bafeAM Peak9.7349.4541.002.95
616144d7cb7ddea2191f062c1ea3165df67c8AM Peak8.5857.9541.002.97
61615d4e16623b6cf749d8e389b905ab44089AM Peak4.4993.4342.002.99
61616ac9822b2d06b84790c10b7cbbf694410AM Peak7.1067.8342.002.96
61617092c4f20368b13bc5c92fda4fb16bd93AM Peak3.7754.9343.002.97
6161888cf8d5d7cd5d0c74a9b78d5e62fd8a3AM Peak7.0652.6243.002.96
616192845f0ae70ae06c3618d58fa99a1cddeAM Peak8.5751.5843.002.97
61620118feb9aedabd1b9bb354ce493c07735AM Peak5.0741.2744.002.96
61621adea377ad6e2ee6fb01e515724547b17AM Peak8.7256.7544.002.96
6162208797b2f64712704fd8fff8e3c3d1dc2AM Peak8.7252.3845.002.96
616236366ccc0f17673ccd753214d9cb433edAM Peak8.6184.3845.002.97
61624bda28536257ad187c4469c6f188635a6AM Peak4.1249.5745.002.97
61625585d84b16b4a418da661644e3a2314d1AM Peak8.2870.5045.002.98
616266789beb9af8dcb7d8b76afd9cce184a4AM Peak8.9147.7745.002.97
61627f36bb7386e5681abb33b97cf5c69b9dbAM Peak8.8155.5345.002.97
61628d2a71fa9b476e583315b6f203ccf8c67AM Peak3.3959.9545.002.97
6162982d0a21cfae4aa608861e3bb0172110cAM Peak7.3654.8045.002.81
6163018792d0d435c7d031ea66115a3e985eeAM Peak4.5145.5045.002.97
61631401e0c1fed455778f28834b60b66a4ffAM Peak3.2363.0745.002.97
61632abb36fd3b7d6d95dccf23ed5709bce69AM Peak8.4147.8246.002.97
61633743153cc91ad7ce0ff294a451347d87eAM Peak9.0148.3246.002.96
61634addecf9d30d8193bae40b35b00ced394AM Peak3.3959.9347.002.97
61635a95f1835bc54bdf7cf77b5bac056e103AM Peak3.6056.5747.002.97
616367dec1b76c7e8e141e19ecc2b780b202cAM Peak8.6152.4347.002.96
6163702efbb740cfeced30c8fd237ab981723AM Peak4.2947.7747.002.95
61638ff8e7fa939ecb40d43443bc1777eea65AM Peak3.3759.9047.002.95
668586058e1a8b5072b0ab2a6a6275aa125e7AM Peak6.0789.6339.002.98
\n", + "
" + ], + "text/plain": [ + " trip_instance_key time_of_day speed_mph \\\n", + "61610 76fa3ed3fd8ef28a446eedb4c1e94e6a AM Peak 9.39 \n", + "61611 47d8da2afbc3e4c78f9be4c6c53a7776 AM Peak 10.00 \n", + "61612 40590ba21f73ae1775a4538e34e67cc3 AM Peak 5.34 \n", + "61613 aa1b90f05357a29abc97fae90cd5bafe AM Peak 9.73 \n", + "61614 4d7cb7ddea2191f062c1ea3165df67c8 AM Peak 8.58 \n", + "61615 d4e16623b6cf749d8e389b905ab44089 AM Peak 4.49 \n", + "61616 ac9822b2d06b84790c10b7cbbf694410 AM Peak 7.10 \n", + "61617 092c4f20368b13bc5c92fda4fb16bd93 AM Peak 3.77 \n", + "61618 88cf8d5d7cd5d0c74a9b78d5e62fd8a3 AM Peak 7.06 \n", + "61619 2845f0ae70ae06c3618d58fa99a1cdde AM Peak 8.57 \n", + "61620 118feb9aedabd1b9bb354ce493c07735 AM Peak 5.07 \n", + "61621 adea377ad6e2ee6fb01e515724547b17 AM Peak 8.72 \n", + "61622 08797b2f64712704fd8fff8e3c3d1dc2 AM Peak 8.72 \n", + "61623 6366ccc0f17673ccd753214d9cb433ed AM Peak 8.61 \n", + "61624 bda28536257ad187c4469c6f188635a6 AM Peak 4.12 \n", + "61625 585d84b16b4a418da661644e3a2314d1 AM Peak 8.28 \n", + "61626 6789beb9af8dcb7d8b76afd9cce184a4 AM Peak 8.91 \n", + "61627 f36bb7386e5681abb33b97cf5c69b9db AM Peak 8.81 \n", + "61628 d2a71fa9b476e583315b6f203ccf8c67 AM Peak 3.39 \n", + "61629 82d0a21cfae4aa608861e3bb0172110c AM Peak 7.36 \n", + "61630 18792d0d435c7d031ea66115a3e985ee AM Peak 4.51 \n", + "61631 401e0c1fed455778f28834b60b66a4ff AM Peak 3.23 \n", + "61632 abb36fd3b7d6d95dccf23ed5709bce69 AM Peak 8.41 \n", + "61633 743153cc91ad7ce0ff294a451347d87e AM Peak 9.01 \n", + "61634 addecf9d30d8193bae40b35b00ced394 AM Peak 3.39 \n", + "61635 a95f1835bc54bdf7cf77b5bac056e103 AM Peak 3.60 \n", + "61636 7dec1b76c7e8e141e19ecc2b780b202c AM Peak 8.61 \n", + "61637 02efbb740cfeced30c8fd237ab981723 AM Peak 4.29 \n", + "61638 ff8e7fa939ecb40d43443bc1777eea65 AM Peak 3.37 \n", + "66858 6058e1a8b5072b0ab2a6a6275aa125e7 AM Peak 6.07 \n", + "\n", + " rt_service_min service_minutes pings_per_min \n", + "61610 48.82 39.00 2.97 \n", + "61611 49.38 39.00 2.96 \n", + "61612 97.67 41.00 2.98 \n", + "61613 49.45 41.00 2.95 \n", + "61614 57.95 41.00 2.97 \n", + "61615 93.43 42.00 2.99 \n", + "61616 67.83 42.00 2.96 \n", + "61617 54.93 43.00 2.97 \n", + "61618 52.62 43.00 2.96 \n", + "61619 51.58 43.00 2.97 \n", + "61620 41.27 44.00 2.96 \n", + "61621 56.75 44.00 2.96 \n", + "61622 52.38 45.00 2.96 \n", + "61623 84.38 45.00 2.97 \n", + "61624 49.57 45.00 2.97 \n", + "61625 70.50 45.00 2.98 \n", + "61626 47.77 45.00 2.97 \n", + "61627 55.53 45.00 2.97 \n", + "61628 59.95 45.00 2.97 \n", + "61629 54.80 45.00 2.81 \n", + "61630 45.50 45.00 2.97 \n", + "61631 63.07 45.00 2.97 \n", + "61632 47.82 46.00 2.97 \n", + "61633 48.32 46.00 2.96 \n", + "61634 59.93 47.00 2.97 \n", + "61635 56.57 47.00 2.97 \n", + "61636 52.43 47.00 2.96 \n", + "61637 47.77 47.00 2.95 \n", + "61638 59.90 47.00 2.95 \n", + "66858 89.63 39.00 2.98 " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "checkout_route(\"38R\", \"AM Peak\", 1)" + ] + }, + { + "cell_type": "code", + "execution_count": 80, + "id": "8168d872-49c3-44c2-bdc7-fa499124c5af", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "final\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
org_idagencyroute_idroute_namedirection_idtime_of_dayspeed_mphn_tripsavg_sched_trip_minavg_rt_trip_mindistrict_nameavg_pings_per_minavg_pct_vp_shapeavg_pct_rt_v_sched
5973recIKnsnTdKQ0vsivWestern Contra Costa Transit AuthorityLynxRodeo/Hercules/San Francisco Transbay Terminal1AM Peak13.40650.0060.6904 - Oakland2.83NaN21.39
\n", + "
" + ], + "text/plain": [ + " org_id agency route_id \\\n", + "5973 recIKnsnTdKQ0vsiv Western Contra Costa Transit Authority Lynx \n", + "\n", + " route_name direction_id \\\n", + "5973 Rodeo/Hercules/San Francisco Transbay Terminal 1 \n", + "\n", + " time_of_day speed_mph n_trips avg_sched_trip_min avg_rt_trip_min \\\n", + "5973 AM Peak 13.40 6 50.00 60.69 \n", + "\n", + " district_name avg_pings_per_min avg_pct_vp_shape avg_pct_rt_v_sched \n", + "5973 04 - Oakland 2.83 NaN 21.39 " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "df3\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
gtfs_dataset_keytime_of_dayroute_iddirection_idroute_nameshape_array_keyavg_sched_trip_minavg_rt_trip_minspeed_mphavg_pings_per_mintotal_vpvp_in_shapen_trips
399078b44303c1714f6c6a4801637c2a5c9dAM PeakLynx1Rodeo/Hercules/San Francisco Transbay Terminal810696deb677bfeb1d0b09047031a9c850.0060.6913.402.83NaNNaN6
\n", + "
" + ], + "text/plain": [ + " gtfs_dataset_key time_of_day route_id direction_id \\\n", + "3990 78b44303c1714f6c6a4801637c2a5c9d AM Peak Lynx 1 \n", + "\n", + " route_name \\\n", + "3990 Rodeo/Hercules/San Francisco Transbay Terminal \n", + "\n", + " shape_array_key avg_sched_trip_min avg_rt_trip_min \\\n", + "3990 810696deb677bfeb1d0b09047031a9c8 50.00 60.69 \n", + "\n", + " speed_mph avg_pings_per_min total_vp vp_in_shape n_trips \n", + "3990 13.40 2.83 NaN NaN 6 " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "df2\n" + ] + }, + { + "data": { + "text/plain": [ + "2.8253065827801014" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "13.403813542450534" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "nan" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "nan" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
trip_instance_keytime_of_dayspeed_mphrt_service_minservice_minutespings_per_min
1298714c9fc6b379e1fe869ba37bfd79a8245AM Peak13.3161.4050.002.72
129902ee02fe17c9acad17ccb44eaaea4debeAM Peak12.6963.1850.002.90
1299178a402dfe7d89e0b919a193ac59c69aeAM Peak12.6163.1850.002.82
129942bb4857e894a94d48a79620858c8384eAM Peak12.4464.4550.002.84
12997b97a1995cd54253c58e82bb7c9ad3414AM Peak15.5952.8250.002.82
13004baeeed7c3d6ab74ad9ff40f42a2f1da3AM Peak13.7859.1350.002.86
\n", + "
" + ], + "text/plain": [ + " trip_instance_key time_of_day speed_mph \\\n", + "12987 14c9fc6b379e1fe869ba37bfd79a8245 AM Peak 13.31 \n", + "12990 2ee02fe17c9acad17ccb44eaaea4debe AM Peak 12.69 \n", + "12991 78a402dfe7d89e0b919a193ac59c69ae AM Peak 12.61 \n", + "12994 2bb4857e894a94d48a79620858c8384e AM Peak 12.44 \n", + "12997 b97a1995cd54253c58e82bb7c9ad3414 AM Peak 15.59 \n", + "13004 baeeed7c3d6ab74ad9ff40f42a2f1da3 AM Peak 13.78 \n", + "\n", + " rt_service_min service_minutes pings_per_min \n", + "12987 61.40 50.00 2.72 \n", + "12990 63.18 50.00 2.90 \n", + "12991 63.18 50.00 2.82 \n", + "12994 64.45 50.00 2.84 \n", + "12997 52.82 50.00 2.82 \n", + "13004 59.13 50.00 2.86 " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "checkout_route(\"Lynx\", \"AM Peak\", 1)" + ] + }, + { + "cell_type": "markdown", + "id": "470e444a-41ce-47c9-8e70-9866904e936e", + "metadata": {}, + "source": [ + "#### Test grouping" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b8a1b06b-0e81-4641-8545-b5af34f6b47e", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 81, + "id": "97d81b5c-5d47-405c-a9bd-2a51ffe00b73", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['trip_instance_key', 'rt_service_min', 'min_w_atleast2_trip_updates',\n", + " 'total_pings_for_trip', 'total_min_w_gtfs', 'total_vp', 'vp_in_shape',\n", + " 'speed_mph', 'service_minutes', 'pings_per_min', 'spatial_accuracy_pct',\n", + " 'rt_triptime_w_gtfs_pct', 'rt_v_scheduled_trip_time_pct',\n", + " 'schedule_gtfs_dataset_key', 'direction_id', 'route_id',\n", + " 'common_shape_id', 'shape_array_key', 'route_name_used',\n", + " 'service_hours', 'trip_first_departure_datetime_pacific',\n", + " 'time_of_day'],\n", + " dtype='object')" + ] + }, + "execution_count": 81, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df2.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 82, + "id": "4e6e3f60-5e14-4fe4-96b4-9df6be9db761", + "metadata": {}, + "outputs": [], + "source": [ + "route_groupby_cols = [\n", + " \"schedule_gtfs_dataset_key\",\n", + " \"time_of_day\",\n", + " \"route_id\",\n", + " \"direction_id\",\n", + " \"route_name_used\",\n", + " \"shape_array_key\",\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "id": "e931b56b-1048-49f8-b0c8-4d9ea4fd34db", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "['schedule_gtfs_dataset_key',\n", + " 'time_of_day',\n", + " 'route_id',\n", + " 'direction_id',\n", + " 'route_name_used',\n", + " 'shape_array_key']" + ] + }, + "execution_count": 83, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "route_groupby_cols" + ] + }, + { + "cell_type": "code", + "execution_count": 84, + "id": "3b18d789-a84f-42c6-addc-2f97b9e6fafc", + "metadata": {}, + "outputs": [], + "source": [ + "test1 = (\n", + " df2.groupby(route_groupby_cols, observed=False, group_keys=True)\n", + " .agg(\n", + " {\n", + " \"service_minutes\": \"mean\",\n", + " \"rt_service_min\": \"mean\",\n", + " \"speed_mph\": \"mean\",\n", + " \"pings_per_min\": \"mean\",\n", + " \"total_vp\": \"mean\",\n", + " \"vp_in_shape\": \"mean\",\n", + " \"trip_instance_key\": \"count\",\n", + " }\n", + " )\n", + " .reset_index()\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 85, + "id": "429f3c5b-e1f6-40a0-86a3-586ad00685fb", + "metadata": {}, + "outputs": [], + "source": [ + "def checkout_test_groups(\n", + " df: pd.DataFrame, route_id: str, time_of_day: str, direction_id: int\n", + "):\n", + " test_cols = [\n", + " \"trip_instance_key\",\n", + " \"service_minutes\",\n", + " \"rt_service_min\",\n", + " \"pings_per_min\",\n", + " \"speed_mph\",\n", + " \"total_vp\",\n", + " \"vp_in_shape\",\n", + " ]\n", + " display(\n", + " df.loc[\n", + " (df.route_id == route_id)\n", + " & (df.time_of_day == time_of_day)\n", + " & (df.direction_id == direction_id)\n", + " ][test_cols]\n", + " )\n", + " df2_cols = [\n", + " \"trip_instance_key\",\n", + " \"time_of_day\",\n", + " \"speed_mph\",\n", + " \"rt_service_min\",\n", + " \"service_minutes\",\n", + " \"pings_per_min\",\n", + " \"total_vp\",\n", + " \"vp_in_shape\",\n", + " ]\n", + " df2_filtered = df2.loc[\n", + " (df2.route_id == route_id)\n", + " & (df2.time_of_day == time_of_day)\n", + " & (df2.direction_id == direction_id)\n", + " ]\n", + " display(df2_filtered.pings_per_min.mean())\n", + " display(df2_filtered.speed_mph.mean())\n", + " display(df2_filtered.total_vp.mean())\n", + " display(df2_filtered.vp_in_shape.mean())\n", + " display(df2_filtered[df2_cols])" + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "id": "ab2cc904-d02c-414b-ba2b-efcb351afa8f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
trip_instance_keyservice_minutesrt_service_minpings_per_minspeed_mphtotal_vpvp_in_shape
1733259.0099.582.686.89267.50187.50
\n", + "
" + ], + "text/plain": [ + " trip_instance_key service_minutes rt_service_min pings_per_min \\\n", + "1733 2 59.00 99.58 2.68 \n", + "\n", + " speed_mph total_vp vp_in_shape \n", + "1733 6.89 267.50 187.50 " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "2.6849179704528776" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "6.887376703252869" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "267.5" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "187.5" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
trip_instance_keytime_of_dayspeed_mphrt_service_minservice_minutespings_per_mintotal_vpvp_in_shape
2486648a01217589c2faa46db395d6cf8317dEarly AM9.4095.0258.002.65252.00180.00
2486770674803a1c4416fc49f883bc3b2c18bEarly AM4.38104.1360.002.72283.00195.00
\n", + "
" + ], + "text/plain": [ + " trip_instance_key time_of_day speed_mph \\\n", + "24866 48a01217589c2faa46db395d6cf8317d Early AM 9.40 \n", + "24867 70674803a1c4416fc49f883bc3b2c18b Early AM 4.38 \n", + "\n", + " rt_service_min service_minutes pings_per_min total_vp vp_in_shape \n", + "24866 95.02 58.00 2.65 252.00 180.00 \n", + "24867 104.13 60.00 2.72 283.00 195.00 " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "checkout_test_groups(test1, \"265-13172\", \"Early AM\", 0)" + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "id": "7ad06e7a-f7fa-4604-b28e-c08802ce4883", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
trip_instance_keyservice_minutesrt_service_minpings_per_minspeed_mphtotal_vpvp_in_shape
6840138.00313.481.422.16446.00101.00
\n", + "
" + ], + "text/plain": [ + " trip_instance_key service_minutes rt_service_min pings_per_min \\\n", + "6840 1 38.00 313.48 1.42 \n", + "\n", + " speed_mph total_vp vp_in_shape \n", + "6840 2.16 446.00 101.00 " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "1.422723164442554" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "2.158633017384419" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "446.0" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "101.0" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
trip_instance_keytime_of_dayspeed_mphrt_service_minservice_minutespings_per_mintotal_vpvp_in_shape
277586fadf197f5bb105ed916de0a337386eeEarly AM2.16313.4838.001.42446.00101.00
\n", + "
" + ], + "text/plain": [ + " trip_instance_key time_of_day speed_mph \\\n", + "27758 6fadf197f5bb105ed916de0a337386ee Early AM 2.16 \n", + "\n", + " rt_service_min service_minutes pings_per_min total_vp vp_in_shape \n", + "27758 313.48 38.00 1.42 446.00 101.00 " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "checkout_test_groups(test1, \"5671\", \"Early AM\", 1)" + ] + }, + { + "cell_type": "code", + "execution_count": 88, + "id": "79e3584a-ac1f-4ecc-9b69-8f4551b8b632", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
trip_instance_keyservice_minutesrt_service_minpings_per_minspeed_mphtotal_vpvp_in_shape
3990650.0060.692.8313.40NaNNaN
\n", + "
" + ], + "text/plain": [ + " trip_instance_key service_minutes rt_service_min pings_per_min \\\n", + "3990 6 50.00 60.69 2.83 \n", + "\n", + " speed_mph total_vp vp_in_shape \n", + "3990 13.40 NaN NaN " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "2.8253065827801014" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "13.403813542450534" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "nan" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "nan" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
trip_instance_keytime_of_dayspeed_mphrt_service_minservice_minutespings_per_mintotal_vpvp_in_shape
1298714c9fc6b379e1fe869ba37bfd79a8245AM Peak13.3161.4050.002.72NaNNaN
129902ee02fe17c9acad17ccb44eaaea4debeAM Peak12.6963.1850.002.90NaNNaN
1299178a402dfe7d89e0b919a193ac59c69aeAM Peak12.6163.1850.002.82NaNNaN
129942bb4857e894a94d48a79620858c8384eAM Peak12.4464.4550.002.84NaNNaN
12997b97a1995cd54253c58e82bb7c9ad3414AM Peak15.5952.8250.002.82NaNNaN
13004baeeed7c3d6ab74ad9ff40f42a2f1da3AM Peak13.7859.1350.002.86NaNNaN
\n", + "
" + ], + "text/plain": [ + " trip_instance_key time_of_day speed_mph \\\n", + "12987 14c9fc6b379e1fe869ba37bfd79a8245 AM Peak 13.31 \n", + "12990 2ee02fe17c9acad17ccb44eaaea4debe AM Peak 12.69 \n", + "12991 78a402dfe7d89e0b919a193ac59c69ae AM Peak 12.61 \n", + "12994 2bb4857e894a94d48a79620858c8384e AM Peak 12.44 \n", + "12997 b97a1995cd54253c58e82bb7c9ad3414 AM Peak 15.59 \n", + "13004 baeeed7c3d6ab74ad9ff40f42a2f1da3 AM Peak 13.78 \n", + "\n", + " rt_service_min service_minutes pings_per_min total_vp vp_in_shape \n", + "12987 61.40 50.00 2.72 NaN NaN \n", + "12990 63.18 50.00 2.90 NaN NaN \n", + "12991 63.18 50.00 2.82 NaN NaN \n", + "12994 64.45 50.00 2.84 NaN NaN \n", + "12997 52.82 50.00 2.82 NaN NaN \n", + "13004 59.13 50.00 2.86 NaN NaN " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "checkout_test_groups(test1, \"Lynx\", \"AM Peak\", 1)" + ] + }, + { + "cell_type": "code", + "execution_count": 89, + "id": "f6043d40-9178-4547-97f2-6140585ea418", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
trip_instance_keyservice_minutesrt_service_minpings_per_minspeed_mphtotal_vpvp_in_shape
41023043.9358.922.966.68174.57154.20
\n", + "
" + ], + "text/plain": [ + " trip_instance_key service_minutes rt_service_min pings_per_min \\\n", + "4102 30 43.93 58.92 2.96 \n", + "\n", + " speed_mph total_vp vp_in_shape \n", + "4102 6.68 174.57 154.20 " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "2.961175851535439" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "6.682525584870673" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "174.56666666666666" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "154.2" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
trip_instance_keytime_of_dayspeed_mphrt_service_minservice_minutespings_per_mintotal_vpvp_in_shape
6161076fa3ed3fd8ef28a446eedb4c1e94e6aAM Peak9.3948.8239.002.97145.00145.00
6161147d8da2afbc3e4c78f9be4c6c53a7776AM Peak10.0049.3839.002.96146.00146.00
6161240590ba21f73ae1775a4538e34e67cc3AM Peak5.3497.6741.002.98291.00192.00
61613aa1b90f05357a29abc97fae90cd5bafeAM Peak9.7349.4541.002.95146.00146.00
616144d7cb7ddea2191f062c1ea3165df67c8AM Peak8.5857.9541.002.97172.00163.00
61615d4e16623b6cf749d8e389b905ab44089AM Peak4.4993.4342.002.99279.00204.00
61616ac9822b2d06b84790c10b7cbbf694410AM Peak7.1067.8342.002.96201.00162.00
61617092c4f20368b13bc5c92fda4fb16bd93AM Peak3.7754.9343.002.97163.00163.00
6161888cf8d5d7cd5d0c74a9b78d5e62fd8a3AM Peak7.0652.6243.002.96156.00156.00
616192845f0ae70ae06c3618d58fa99a1cddeAM Peak8.5751.5843.002.97153.00153.00
61620118feb9aedabd1b9bb354ce493c07735AM Peak5.0741.2744.002.96122.0078.00
61621adea377ad6e2ee6fb01e515724547b17AM Peak8.7256.7544.002.96168.00168.00
6162208797b2f64712704fd8fff8e3c3d1dc2AM Peak8.7252.3845.002.96155.00146.00
616236366ccc0f17673ccd753214d9cb433edAM Peak8.6184.3845.002.97251.00187.00
61624bda28536257ad187c4469c6f188635a6AM Peak4.1249.5745.002.97147.00132.00
61625585d84b16b4a418da661644e3a2314d1AM Peak8.2870.5045.002.98210.00180.00
616266789beb9af8dcb7d8b76afd9cce184a4AM Peak8.9147.7745.002.97142.00142.00
61627f36bb7386e5681abb33b97cf5c69b9dbAM Peak8.8155.5345.002.97165.00165.00
61628d2a71fa9b476e583315b6f203ccf8c67AM Peak3.3959.9545.002.97178.00178.00
6162982d0a21cfae4aa608861e3bb0172110cAM Peak7.3654.8045.002.81154.00154.00
6163018792d0d435c7d031ea66115a3e985eeAM Peak4.5145.5045.002.97135.00135.00
61631401e0c1fed455778f28834b60b66a4ffAM Peak3.2363.0745.002.97187.00135.00
61632abb36fd3b7d6d95dccf23ed5709bce69AM Peak8.4147.8246.002.97142.00141.00
61633743153cc91ad7ce0ff294a451347d87eAM Peak9.0148.3246.002.96143.00143.00
61634addecf9d30d8193bae40b35b00ced394AM Peak3.3959.9347.002.97178.00142.00
61635a95f1835bc54bdf7cf77b5bac056e103AM Peak3.6056.5747.002.97168.00130.00
616367dec1b76c7e8e141e19ecc2b780b202cAM Peak8.6152.4347.002.96155.00155.00
6163702efbb740cfeced30c8fd237ab981723AM Peak4.2947.7747.002.95141.00141.00
61638ff8e7fa939ecb40d43443bc1777eea65AM Peak3.3759.9047.002.95177.00141.00
668586058e1a8b5072b0ab2a6a6275aa125e7AM Peak6.0789.6339.002.98267.00203.00
\n", + "
" + ], + "text/plain": [ + " trip_instance_key time_of_day speed_mph \\\n", + "61610 76fa3ed3fd8ef28a446eedb4c1e94e6a AM Peak 9.39 \n", + "61611 47d8da2afbc3e4c78f9be4c6c53a7776 AM Peak 10.00 \n", + "61612 40590ba21f73ae1775a4538e34e67cc3 AM Peak 5.34 \n", + "61613 aa1b90f05357a29abc97fae90cd5bafe AM Peak 9.73 \n", + "61614 4d7cb7ddea2191f062c1ea3165df67c8 AM Peak 8.58 \n", + "61615 d4e16623b6cf749d8e389b905ab44089 AM Peak 4.49 \n", + "61616 ac9822b2d06b84790c10b7cbbf694410 AM Peak 7.10 \n", + "61617 092c4f20368b13bc5c92fda4fb16bd93 AM Peak 3.77 \n", + "61618 88cf8d5d7cd5d0c74a9b78d5e62fd8a3 AM Peak 7.06 \n", + "61619 2845f0ae70ae06c3618d58fa99a1cdde AM Peak 8.57 \n", + "61620 118feb9aedabd1b9bb354ce493c07735 AM Peak 5.07 \n", + "61621 adea377ad6e2ee6fb01e515724547b17 AM Peak 8.72 \n", + "61622 08797b2f64712704fd8fff8e3c3d1dc2 AM Peak 8.72 \n", + "61623 6366ccc0f17673ccd753214d9cb433ed AM Peak 8.61 \n", + "61624 bda28536257ad187c4469c6f188635a6 AM Peak 4.12 \n", + "61625 585d84b16b4a418da661644e3a2314d1 AM Peak 8.28 \n", + "61626 6789beb9af8dcb7d8b76afd9cce184a4 AM Peak 8.91 \n", + "61627 f36bb7386e5681abb33b97cf5c69b9db AM Peak 8.81 \n", + "61628 d2a71fa9b476e583315b6f203ccf8c67 AM Peak 3.39 \n", + "61629 82d0a21cfae4aa608861e3bb0172110c AM Peak 7.36 \n", + "61630 18792d0d435c7d031ea66115a3e985ee AM Peak 4.51 \n", + "61631 401e0c1fed455778f28834b60b66a4ff AM Peak 3.23 \n", + "61632 abb36fd3b7d6d95dccf23ed5709bce69 AM Peak 8.41 \n", + "61633 743153cc91ad7ce0ff294a451347d87e AM Peak 9.01 \n", + "61634 addecf9d30d8193bae40b35b00ced394 AM Peak 3.39 \n", + "61635 a95f1835bc54bdf7cf77b5bac056e103 AM Peak 3.60 \n", + "61636 7dec1b76c7e8e141e19ecc2b780b202c AM Peak 8.61 \n", + "61637 02efbb740cfeced30c8fd237ab981723 AM Peak 4.29 \n", + "61638 ff8e7fa939ecb40d43443bc1777eea65 AM Peak 3.37 \n", + "66858 6058e1a8b5072b0ab2a6a6275aa125e7 AM Peak 6.07 \n", + "\n", + " rt_service_min service_minutes pings_per_min total_vp vp_in_shape \n", + "61610 48.82 39.00 2.97 145.00 145.00 \n", + "61611 49.38 39.00 2.96 146.00 146.00 \n", + "61612 97.67 41.00 2.98 291.00 192.00 \n", + "61613 49.45 41.00 2.95 146.00 146.00 \n", + "61614 57.95 41.00 2.97 172.00 163.00 \n", + "61615 93.43 42.00 2.99 279.00 204.00 \n", + "61616 67.83 42.00 2.96 201.00 162.00 \n", + "61617 54.93 43.00 2.97 163.00 163.00 \n", + "61618 52.62 43.00 2.96 156.00 156.00 \n", + "61619 51.58 43.00 2.97 153.00 153.00 \n", + "61620 41.27 44.00 2.96 122.00 78.00 \n", + "61621 56.75 44.00 2.96 168.00 168.00 \n", + "61622 52.38 45.00 2.96 155.00 146.00 \n", + "61623 84.38 45.00 2.97 251.00 187.00 \n", + "61624 49.57 45.00 2.97 147.00 132.00 \n", + "61625 70.50 45.00 2.98 210.00 180.00 \n", + "61626 47.77 45.00 2.97 142.00 142.00 \n", + "61627 55.53 45.00 2.97 165.00 165.00 \n", + "61628 59.95 45.00 2.97 178.00 178.00 \n", + "61629 54.80 45.00 2.81 154.00 154.00 \n", + "61630 45.50 45.00 2.97 135.00 135.00 \n", + "61631 63.07 45.00 2.97 187.00 135.00 \n", + "61632 47.82 46.00 2.97 142.00 141.00 \n", + "61633 48.32 46.00 2.96 143.00 143.00 \n", + "61634 59.93 47.00 2.97 178.00 142.00 \n", + "61635 56.57 47.00 2.97 168.00 130.00 \n", + "61636 52.43 47.00 2.96 155.00 155.00 \n", + "61637 47.77 47.00 2.95 141.00 141.00 \n", + "61638 59.90 47.00 2.95 177.00 141.00 \n", + "66858 89.63 39.00 2.98 267.00 203.00 " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "checkout_test_groups(test1, \"38R\", \"AM Peak\", 1)" + ] + }, + { + "cell_type": "code", + "execution_count": 90, + "id": "bff53f95-8520-44f0-bc9c-96d5bc5a8323", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
trip_instance_keyservice_minutesrt_service_minpings_per_minspeed_mphtotal_vpvp_in_shape
924689.3820.121.8712.8933.1232.75
\n", + "
" + ], + "text/plain": [ + " trip_instance_key service_minutes rt_service_min pings_per_min \\\n", + "9246 8 9.38 20.12 1.87 \n", + "\n", + " speed_mph total_vp vp_in_shape \n", + "9246 12.89 33.12 32.75 " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "1.8731670674101006" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "12.889389560897447" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "33.125" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "32.75" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
trip_instance_keytime_of_dayspeed_mphrt_service_minservice_minutespings_per_mintotal_vpvp_in_shape
7794d2c2b948028176c5305d4bf770e75055AM Peak8.5921.379.001.5934.0034.00
782552571020078afa22979c626adb4ce938AM Peak11.5430.689.001.1736.0036.00
7826a5e18c87995eb491eaaa9bf7ac2284bbAM Peak13.2922.789.001.3631.0031.00
78275ff3c2cbe06fee1237a653998d155713AM Peak10.1325.859.001.3234.0033.00
78281da9828bbb28a1b4df2cc8d1e4cfa76fAM Peak7.3724.939.001.3233.0033.00
797615db3e9fa139b2b88aafbc8e3f13133aAM Peak17.6011.7310.002.7332.0032.00
7977683fc69ab6dc56de27d9e7f43244836cAM Peak15.9712.6510.002.7735.0034.00
7978d28a02c5a1956e27033d43ca26bdb868AM Peak18.6311.0010.002.7330.0029.00
\n", + "
" + ], + "text/plain": [ + " trip_instance_key time_of_day speed_mph rt_service_min \\\n", + "7794 d2c2b948028176c5305d4bf770e75055 AM Peak 8.59 21.37 \n", + "7825 52571020078afa22979c626adb4ce938 AM Peak 11.54 30.68 \n", + "7826 a5e18c87995eb491eaaa9bf7ac2284bb AM Peak 13.29 22.78 \n", + "7827 5ff3c2cbe06fee1237a653998d155713 AM Peak 10.13 25.85 \n", + "7828 1da9828bbb28a1b4df2cc8d1e4cfa76f AM Peak 7.37 24.93 \n", + "7976 15db3e9fa139b2b88aafbc8e3f13133a AM Peak 17.60 11.73 \n", + "7977 683fc69ab6dc56de27d9e7f43244836c AM Peak 15.97 12.65 \n", + "7978 d28a02c5a1956e27033d43ca26bdb868 AM Peak 18.63 11.00 \n", + "\n", + " service_minutes pings_per_min total_vp vp_in_shape \n", + "7794 9.00 1.59 34.00 34.00 \n", + "7825 9.00 1.17 36.00 36.00 \n", + "7826 9.00 1.36 31.00 31.00 \n", + "7827 9.00 1.32 34.00 33.00 \n", + "7828 9.00 1.32 33.00 33.00 \n", + "7976 10.00 2.73 32.00 32.00 \n", + "7977 10.00 2.77 35.00 34.00 \n", + "7978 10.00 2.73 30.00 29.00 " + ] + }, + "metadata": {}, + "output_type": "display_data" } ], "source": [ - "final_df.drop(columns = ['geometry', 'base64_url']).head()" + "checkout_test_groups(test1, \"16611\", \"AM Peak\", 0)" ] } ], diff --git a/rt_scheduled_v_ran/logs/rt_v_scheduled_trip_metrics.log b/rt_scheduled_v_ran/logs/rt_v_scheduled_trip_metrics.log index be9ccb4f7..e4ecbff91 100644 --- a/rt_scheduled_v_ran/logs/rt_v_scheduled_trip_metrics.log +++ b/rt_scheduled_v_ran/logs/rt_v_scheduled_trip_metrics.log @@ -196,3 +196,16 @@ 2023-12-19 13:38:59.052 | INFO | __main__:vp_usable_metrics:317 - Find vps that fall into shapes: 0:13:30.215262 2023-12-19 13:39:01.648 | INFO | __main__:vp_usable_metrics:329 - Spatial accuracy grouping metric: 0:00:02.595941 2023-12-19 13:40:04.373 | INFO | __main__:vp_usable_metrics:352 - Total run time for metrics on 2023-03-15: 0:18:54.290504 +2023-12-28 12:49:00.723 | INFO | __main__:vp_usable_metrics:273 - Rt service min: 0:00:01.472212 +2023-12-28 12:50:57.636 | INFO | __main__:vp_usable_metrics:288 - Grouping by each minute: 0:01:56.913503 +2023-12-28 12:50:57.691 | INFO | __main__:vp_usable_metrics:293 - Spatial accuracy metric: 0:00:00.054926 +2023-12-28 12:53:24.958 | INFO | __main__:vp_usable_metrics:302 - Buffering: 0:02:27.266647 +2023-12-28 13:09:02.926 | INFO | __main__:vp_usable_metrics:314 - Find vps that fall into shapes: 0:15:37.968354 +2023-12-28 13:09:05.964 | INFO | __main__:vp_usable_metrics:326 - Spatial accuracy grouping metric: 0:00:03.037395 +2023-12-28 14:06:06.454 | INFO | __main__:vp_usable_metrics:274 - Rt service min: 0:00:00.851373 +2023-12-28 14:07:55.044 | INFO | __main__:vp_usable_metrics:289 - Grouping by each minute: 0:01:48.589721 +2023-12-28 14:07:55.069 | INFO | __main__:vp_usable_metrics:294 - Spatial accuracy metric: 0:00:00.025656 +2023-12-28 14:10:03.766 | INFO | __main__:vp_usable_metrics:303 - Buffering: 0:02:08.696958 +2023-12-28 14:25:39.680 | INFO | __main__:vp_usable_metrics:315 - Find vps that fall into shapes: 0:15:35.914302 +2023-12-28 14:25:42.477 | INFO | __main__:vp_usable_metrics:327 - Spatial accuracy grouping metric: 0:00:02.796187 +2023-12-28 14:26:52.690 | INFO | __main__:vp_usable_metrics:353 - Total run time for metrics on 2023-12-13: 0:20:47.087647 diff --git a/rt_scheduled_v_ran/scripts/rt_v_scheduled_trip.py b/rt_scheduled_v_ran/scripts/rt_v_scheduled_trip.py index 1d8e40ce0..06025e45a 100644 --- a/rt_scheduled_v_ran/scripts/rt_v_scheduled_trip.py +++ b/rt_scheduled_v_ran/scripts/rt_v_scheduled_trip.py @@ -4,7 +4,7 @@ import pandas as pd from calitp_data_analysis.geography_utils import WGS84 import vp_spatial_accuracy -import update_vars2 +import update_vars from segment_speed_utils.project_vars import ( GCS_FILE_PATH, PROJECT_CRS, @@ -28,6 +28,7 @@ def load_trip_speeds(analysis_date): columns=[ "trip_instance_key", "speed_mph", + "service_minutes", ]) return df diff --git a/rt_scheduled_v_ran/scripts/update_vars.py b/rt_scheduled_v_ran/scripts/update_vars.py index b898faed8..abc839fd9 100644 --- a/rt_scheduled_v_ran/scripts/update_vars.py +++ b/rt_scheduled_v_ran/scripts/update_vars.py @@ -1,7 +1,9 @@ from shared_utils import rt_dates -months = ["dec","nov", "oct", "sep", "aug", - "jul", "jun", "may", "apr", "mar"] +months = ["dec"] + +#months = ["dec","nov", "oct", "sep", "aug", +# "jul", "jun", "may", "apr", "mar"] analysis_date_list = [ rt_dates.DATES[f"{m}2023"] for m in months From 113a38188e4632c3b964db6ea5d570fdb72bb3af Mon Sep 17 00:00:00 2001 From: amandaha8 Date: Wed, 3 Jan 2024 20:38:52 +0000 Subject: [PATCH 4/6] more testing for groupby --- .../06_vp_usable_exploration.ipynb | 1951 +++-------------- 1 file changed, 248 insertions(+), 1703 deletions(-) diff --git a/rt_scheduled_v_ran/06_vp_usable_exploration.ipynb b/rt_scheduled_v_ran/06_vp_usable_exploration.ipynb index 4422618bc..be4f0a5b3 100644 --- a/rt_scheduled_v_ran/06_vp_usable_exploration.ipynb +++ b/rt_scheduled_v_ran/06_vp_usable_exploration.ipynb @@ -976,10 +976,10 @@ " \n", " \n", " \n", - " 39462\n", - " 1341.02\n", + " 39613\n", + " 1326.03\n", " 12.00\n", - " 11075.14\n", + " 10950.28\n", " \n", " \n", "\n", @@ -987,7 +987,7 @@ ], "text/plain": [ " rt_service_min service_minutes rt_v_scheduled_trip_time_pct\n", - "39462 1341.02 12.00 11075.14" + "39613 1326.03 12.00 10950.28" ] }, "execution_count": 32, @@ -2104,7 +2104,7 @@ "outputs": [], "source": [ "df3 = (\n", - " df2.groupby(route_cols, observed=False, group_keys=True)\n", + " df2.groupby(route_cols)\n", " .agg(\n", " {\n", " \"service_minutes\": \"mean\",\n", @@ -2125,24 +2125,13 @@ "execution_count": 55, "id": "f086985e-076e-4b22-825e-bf230cecf2f9", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'df3 = df3.assign(\\n rt_service_min=df2.rt_service_min.round(1),\\n service_minutes=df2.service_minutes.round(1),\\n speed_mph=df2.speed_mph.round(1),\\n pings_per_min=df2.pings_per_min.round(1),\\n).rename(\\n columns={\\n \"service_minutes\": \"avg_sched_trip_min\",\\n \"rt_service_min\": \"avg_rt_trip_min\",\\n \"trip_instance_key\": \"n_trips\",\\n \"route_name_used\": \"route_name\",\\n \"pings_per_min\": \"avg_pings_per_min\",\\n \"schedule_gtfs_dataset_key\": \"gtfs_dataset_key\",\\n }\\n)'" - ] - }, - "execution_count": 55, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "\"\"\"df3 = df3.assign(\n", - " rt_service_min=df2.rt_service_min.round(1),\n", - " service_minutes=df2.service_minutes.round(1),\n", - " speed_mph=df2.speed_mph.round(1),\n", - " pings_per_min=df2.pings_per_min.round(1),\n", + "df3 = df3.assign(\n", + " rt_service_min=df3.rt_service_min.round(1),\n", + " service_minutes=df3.service_minutes.round(1),\n", + " speed_mph=df3.speed_mph.round(1),\n", + " pings_per_min=df3.pings_per_min.round(1),\n", ").rename(\n", " columns={\n", " \"service_minutes\": \"avg_sched_trip_min\",\n", @@ -2152,7 +2141,7 @@ " \"pings_per_min\": \"avg_pings_per_min\",\n", " \"schedule_gtfs_dataset_key\": \"gtfs_dataset_key\",\n", " }\n", - ")\"\"\"" + ")" ] }, { @@ -2160,9 +2149,20 @@ "execution_count": 56, "id": "8236bf29-abaa-466c-978c-10199ffa840c", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'df3 = df3.rename(\\n columns={\\n \"service_minutes\": \"avg_sched_trip_min\",\\n \"rt_service_min\": \"avg_rt_trip_min\",\\n \"trip_instance_key\": \"n_trips\",\\n \"route_name_used\": \"route_name\",\\n \"pings_per_min\": \"avg_pings_per_min\",\\n \"schedule_gtfs_dataset_key\": \"gtfs_dataset_key\",\\n }\\n)'" + ] + }, + "execution_count": 56, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "df3 = df3.rename(\n", + "\"\"\"df3 = df3.rename(\n", " columns={\n", " \"service_minutes\": \"avg_sched_trip_min\",\n", " \"rt_service_min\": \"avg_rt_trip_min\",\n", @@ -2171,7 +2171,7 @@ " \"pings_per_min\": \"avg_pings_per_min\",\n", " \"schedule_gtfs_dataset_key\": \"gtfs_dataset_key\",\n", " }\n", - ")" + ")\"\"\"" ] }, { @@ -2226,9 +2226,9 @@ " Downtown San Rafael - Sausalito\n", " 15dd7643b1198055544091c267b67397\n", " 55.60\n", - " 57.46\n", - " 15.17\n", - " 2.76\n", + " 57.50\n", + " 15.20\n", + " 2.80\n", " 151.00\n", " 142.80\n", " 5\n", @@ -2242,9 +2242,9 @@ " Downtown San Rafael - Sausalito\n", " de1df9489fe7de15f492c9308289102b\n", " 56.20\n", - " 55.01\n", - " 9.85\n", - " 2.54\n", + " 55.00\n", + " 9.80\n", + " 2.50\n", " 147.60\n", " 140.40\n", " 5\n", @@ -2263,8 +2263,8 @@ "1 Downtown San Rafael - Sausalito de1df9489fe7de15f492c9308289102b \n", "\n", " avg_sched_trip_min avg_rt_trip_min speed_mph avg_pings_per_min \\\n", - "0 55.60 57.46 15.17 2.76 \n", - "1 56.20 55.01 9.85 2.54 \n", + "0 55.60 57.50 15.20 2.80 \n", + "1 56.20 55.00 9.80 2.50 \n", "\n", " total_vp vp_in_shape n_trips \n", "0 151.00 142.80 5 \n", @@ -2506,9 +2506,9 @@ " Downtown San Rafael - Sausalito\n", " 15dd7643b1198055544091c267b67397\n", " 55.60\n", - " 57.46\n", - " 15.17\n", - " 2.76\n", + " 57.50\n", + " 15.20\n", + " 2.80\n", " 151.00\n", " 142.80\n", " 5\n", @@ -2526,9 +2526,9 @@ " Downtown San Rafael - Sausalito\n", " de1df9489fe7de15f492c9308289102b\n", " 56.20\n", - " 55.01\n", - " 9.85\n", - " 2.54\n", + " 55.00\n", + " 9.80\n", + " 2.50\n", " 147.60\n", " 140.40\n", " 5\n", @@ -2546,9 +2546,9 @@ " Tiburon - Strawberry\n", " 1a27f5f0785ae953f5dfded42e6d4e0e\n", " 18.00\n", - " 17.33\n", - " 24.76\n", - " 2.91\n", + " 17.30\n", + " 24.80\n", + " 2.90\n", " 50.50\n", " 44.00\n", " 2\n", @@ -2565,10 +2565,10 @@ " 1\n", " Tiburon - Strawberry\n", " a8c9fae8e07d7a553264d4de2ffb704d\n", - " 19.33\n", - " 27.56\n", + " 19.30\n", + " 27.60\n", " 9.80\n", - " 2.93\n", + " 2.90\n", " 80.67\n", " 79.00\n", " 3\n", @@ -2586,9 +2586,9 @@ " Downtown San Rafael - Marin City\n", " 5c1924a3c980f9ec07d63f216d3de7af\n", " 37.80\n", - " 42.08\n", + " 42.10\n", " 10.20\n", - " 2.94\n", + " 2.90\n", " 105.00\n", " 72.00\n", " 5\n", @@ -2617,11 +2617,11 @@ "4 Downtown San Rafael - Marin City 5c1924a3c980f9ec07d63f216d3de7af \n", "\n", " avg_sched_trip_min avg_rt_trip_min speed_mph avg_pings_per_min \\\n", - "0 55.60 57.46 15.17 2.76 \n", - "1 56.20 55.01 9.85 2.54 \n", - "2 18.00 17.33 24.76 2.91 \n", - "3 19.33 27.56 9.80 2.93 \n", - "4 37.80 42.08 10.20 2.94 \n", + "0 55.60 57.50 15.20 2.80 \n", + "1 56.20 55.00 9.80 2.50 \n", + "2 18.00 17.30 24.80 2.90 \n", + "3 19.30 27.60 9.80 2.90 \n", + "4 37.80 42.10 10.20 2.90 \n", "\n", " total_vp vp_in_shape n_trips \\\n", "0 151.00 142.80 5 \n", @@ -2853,11 +2853,11 @@ "count 11397.00\n", "mean 2.38\n", "std 0.56\n", - "min 0.06\n", - "25% 1.94\n", - "50% 2.54\n", - "75% 2.91\n", - "max 3.45\n", + "min 0.10\n", + "25% 1.90\n", + "50% 2.50\n", + "75% 2.90\n", + "max 3.50\n", "Name: avg_pings_per_min, dtype: float64" ] }, @@ -2936,90 +2936,85 @@ " \n", " \n", " \n", - " 10425\n", - " recG5aXxDPI645S86\n", - " OmniTrans\n", - " 10950\n", - " CAL STATE-SIERRA WAY-SAN BDNO\n", + " 5645\n", + " rec43oyrfhtPDdRHj\n", + " City of Rancho Cordova\n", + " 084\n", + " WATT\n", " 1\n", - " Midday\n", - " 9.56\n", - " 10\n", - " 43.80\n", - " 59.57\n", - " 08 - San Bernardino\n", - " 2.53\n", - " 94.52\n", - " 36.00\n", + " PM Peak\n", + " 6.40\n", + " 8\n", + " 65.20\n", + " 87.60\n", + " 03 - Marysville\n", + " 1.80\n", + " 100.00\n", + " 34.36\n", + " \n", + " \n", + " 9678\n", + " receZJ9sEnP9vy3g0\n", + " Monterey-Salinas Transit\n", + " 020-131\n", + " Monterey - Salinas\n", + " 0\n", + " AM Peak\n", + " 10.90\n", + " 6\n", + " 50.20\n", + " 56.50\n", + " 05 - San Luis Obispo\n", + " 1.90\n", + " 99.69\n", + " 12.55\n", " \n", " \n", - " 7407\n", + " 7041\n", " recPnGkwdpnr8jmHB\n", " Los Angeles County Metropolitan Transportation Authority\n", - " 236-13172\n", - " SYLMAR STA-ENCINO VIA BALBOA BL RINALDI ST\n", - " 1\n", - " Early AM\n", - " 11.77\n", - " 3\n", - " 62.67\n", - " 84.16\n", + " 754-13172\n", + " HOLLYWOOD - ATHENS VIA VERMONT AV\n", + " 0\n", + " Midday\n", + " 7.30\n", + " 31\n", + " 69.80\n", + " 91.00\n", " 07 - Los Angeles\n", - " 2.66\n", - " 70.88\n", - " 34.29\n", - " \n", - " \n", - " 10210\n", - " recZgWVXkpix390of\n", - " San Joaquin Regional Transit District\n", - " 580\n", - " DTC - SECTION/ORO\n", - " 1\n", - " PM Peak\n", - " 9.62\n", - " 5\n", - " 22.00\n", - " 40.15\n", - " 10 - Stockton\n", - " 2.95\n", - " 97.64\n", - " 82.52\n", + " 2.40\n", + " 96.69\n", + " 30.37\n", " \n", " \n", "\n", "" ], "text/plain": [ - " org_id \\\n", - "10425 recG5aXxDPI645S86 \n", - "7407 recPnGkwdpnr8jmHB \n", - "10210 recZgWVXkpix390of \n", - "\n", - " agency route_id \\\n", - "10425 OmniTrans 10950 \n", - "7407 Los Angeles County Metropolitan Transportation Authority 236-13172 \n", - "10210 San Joaquin Regional Transit District 580 \n", + " org_id \\\n", + "5645 rec43oyrfhtPDdRHj \n", + "9678 receZJ9sEnP9vy3g0 \n", + "7041 recPnGkwdpnr8jmHB \n", "\n", - " route_name direction_id time_of_day \\\n", - "10425 CAL STATE-SIERRA WAY-SAN BDNO 1 Midday \n", - "7407 SYLMAR STA-ENCINO VIA BALBOA BL RINALDI ST 1 Early AM \n", - "10210 DTC - SECTION/ORO 1 PM Peak \n", + " agency route_id \\\n", + "5645 City of Rancho Cordova 084 \n", + "9678 Monterey-Salinas Transit 020-131 \n", + "7041 Los Angeles County Metropolitan Transportation Authority 754-13172 \n", "\n", - " speed_mph n_trips avg_sched_trip_min avg_rt_trip_min \\\n", - "10425 9.56 10 43.80 59.57 \n", - "7407 11.77 3 62.67 84.16 \n", - "10210 9.62 5 22.00 40.15 \n", + " route_name direction_id time_of_day speed_mph \\\n", + "5645 WATT 1 PM Peak 6.40 \n", + "9678 Monterey - Salinas 0 AM Peak 10.90 \n", + "7041 HOLLYWOOD - ATHENS VIA VERMONT AV 0 Midday 7.30 \n", "\n", - " district_name avg_pings_per_min avg_pct_vp_shape \\\n", - "10425 08 - San Bernardino 2.53 94.52 \n", - "7407 07 - Los Angeles 2.66 70.88 \n", - "10210 10 - Stockton 2.95 97.64 \n", + " n_trips avg_sched_trip_min avg_rt_trip_min district_name \\\n", + "5645 8 65.20 87.60 03 - Marysville \n", + "9678 6 50.20 56.50 05 - San Luis Obispo \n", + "7041 31 69.80 91.00 07 - Los Angeles \n", "\n", - " avg_pct_rt_v_sched \n", - "10425 36.00 \n", - "7407 34.29 \n", - "10210 82.52 " + " avg_pings_per_min avg_pct_vp_shape avg_pct_rt_v_sched \n", + "5645 1.80 100.00 34.36 \n", + "9678 1.90 99.69 12.55 \n", + "7041 2.40 96.69 30.37 " ] }, "execution_count": 75, @@ -3078,10 +3073,10 @@ " & (df2.direction_id == direction_id)\n", " ]\n", " \n", - " display(df2_filtered.pings_per_min.mean())\n", - " display(df2_filtered.speed_mph.mean())\n", - " display(df2_filtered.total_vp.mean())\n", - " display(df2_filtered.vp_in_shape.mean())\n", + " print(f\"pings per min {df2_filtered.pings_per_min.mean()}\")\n", + " print(f\"speed_mph {df2_filtered.speed_mph.mean()}\")\n", + " print(f\"total_vp {df2_filtered.total_vp.mean()}\")\n", + " print(f\"vp_in_shape {df2_filtered.vp_in_shape.mean()}\")\n", " display(df2_filtered[df2_cols])\n", " #print(\"original\")\n", " #trip_instance_keys_keep = list(df2_filtered.trip_instance_key.unique())\n", @@ -3156,14 +3151,14 @@ " PICO RIVERA - LAKEWOOD CTR MALL VIA PARAMOUNT BL\n", " 0\n", " Early AM\n", - " 6.89\n", + " 6.90\n", " 2\n", " 59.00\n", - " 99.58\n", + " 99.60\n", " 07 - Los Angeles\n", - " 2.68\n", + " 2.70\n", " 70.09\n", - " 68.77\n", + " 68.81\n", " \n", " \n", "\n", @@ -3180,13 +3175,13 @@ "6533 PICO RIVERA - LAKEWOOD CTR MALL VIA PARAMOUNT BL 0 \n", "\n", " time_of_day speed_mph n_trips avg_sched_trip_min avg_rt_trip_min \\\n", - "6533 Early AM 6.89 2 59.00 99.58 \n", + "6533 Early AM 6.90 2 59.00 99.60 \n", "\n", " district_name avg_pings_per_min avg_pct_vp_shape \\\n", - "6533 07 - Los Angeles 2.68 70.09 \n", + "6533 07 - Los Angeles 2.70 70.09 \n", "\n", " avg_pct_rt_v_sched \n", - "6533 68.77 " + "6533 68.81 " ] }, "metadata": {}, @@ -3245,9 +3240,9 @@ " PICO RIVERA - LAKEWOOD CTR MALL VIA PARAMOUNT BL\n", " 5c3d4b372420edebe33416aa0c6abe70\n", " 59.00\n", - " 99.58\n", - " 6.89\n", - " 2.68\n", + " 99.60\n", + " 6.90\n", + " 2.70\n", " 267.50\n", " 187.50\n", " 2\n", @@ -3264,10 +3259,10 @@ "1733 PICO RIVERA - LAKEWOOD CTR MALL VIA PARAMOUNT BL \n", "\n", " shape_array_key avg_sched_trip_min avg_rt_trip_min \\\n", - "1733 5c3d4b372420edebe33416aa0c6abe70 59.00 99.58 \n", + "1733 5c3d4b372420edebe33416aa0c6abe70 59.00 99.60 \n", "\n", " speed_mph avg_pings_per_min total_vp vp_in_shape n_trips \n", - "1733 6.89 2.68 267.50 187.50 2 " + "1733 6.90 2.70 267.50 187.50 2 " ] }, "metadata": {}, @@ -3277,45 +3272,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "df2\n" + "df2\n", + "pings per min 2.6849179704528776\n", + "speed_mph 6.887376703252869\n", + "total_vp 267.5\n", + "vp_in_shape 187.5\n" ] }, - { - "data": { - "text/plain": [ - "2.6849179704528776" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "6.887376703252869" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "267.5" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "187.5" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, { "data": { "text/html": [ @@ -3445,14 +3408,14 @@ " Valley Express Daily\n", " 1\n", " Early AM\n", - " 2.16\n", + " 2.20\n", " 1\n", " 38.00\n", - " 313.48\n", + " 313.50\n", " 03 - Marysville\n", - " 1.42\n", + " 1.40\n", " 22.65\n", - " 724.96\n", + " 725.00\n", " \n", " \n", "\n", @@ -3463,13 +3426,13 @@ "193 rec3u4aMplqObcoTR Tahoe Transportation District 5671 \n", "\n", " route_name direction_id time_of_day speed_mph n_trips \\\n", - "193 Valley Express Daily 1 Early AM 2.16 1 \n", + "193 Valley Express Daily 1 Early AM 2.20 1 \n", "\n", " avg_sched_trip_min avg_rt_trip_min district_name avg_pings_per_min \\\n", - "193 38.00 313.48 03 - Marysville 1.42 \n", + "193 38.00 313.50 03 - Marysville 1.40 \n", "\n", " avg_pct_vp_shape avg_pct_rt_v_sched \n", - "193 22.65 724.96 " + "193 22.65 725.00 " ] }, "metadata": {}, @@ -3528,9 +3491,9 @@ " Valley Express Daily\n", " d99540208939e6a891e7ec67e3bf8964\n", " 38.00\n", - " 313.48\n", - " 2.16\n", - " 1.42\n", + " 313.50\n", + " 2.20\n", + " 1.40\n", " 446.00\n", " 101.00\n", " 1\n", @@ -3547,7 +3510,7 @@ "6840 Valley Express Daily d99540208939e6a891e7ec67e3bf8964 \n", "\n", " avg_sched_trip_min avg_rt_trip_min speed_mph avg_pings_per_min \\\n", - "6840 38.00 313.48 2.16 1.42 \n", + "6840 38.00 313.50 2.20 1.40 \n", "\n", " total_vp vp_in_shape n_trips \n", "6840 446.00 101.00 1 " @@ -3560,45 +3523,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "df2\n" + "df2\n", + "pings per min 1.422723164442554\n", + "speed_mph 2.158633017384419\n", + "total_vp 446.0\n", + "vp_in_shape 101.0\n" ] }, - { - "data": { - "text/plain": [ - "1.422723164442554" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "2.158633017384419" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "446.0" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "101.0" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, { "data": { "text/html": [ @@ -3717,14 +3648,14 @@ " Weekdays 5am-10pm Weekends 6am-9pm\n", " 1\n", " AM Peak\n", - " 6.68\n", + " 6.70\n", " 30\n", - " 43.93\n", - " 58.92\n", + " 43.90\n", + " 58.90\n", " 04 - Oakland\n", - " 2.96\n", + " 3.00\n", " 88.33\n", - " 34.10\n", + " 34.17\n", " \n", " \n", "\n", @@ -3735,13 +3666,13 @@ "9012 rechaapWbeffO33OX City and County of San Francisco 38R \n", "\n", " route_name direction_id time_of_day speed_mph \\\n", - "9012 Weekdays 5am-10pm Weekends 6am-9pm 1 AM Peak 6.68 \n", + "9012 Weekdays 5am-10pm Weekends 6am-9pm 1 AM Peak 6.70 \n", "\n", " n_trips avg_sched_trip_min avg_rt_trip_min district_name \\\n", - "9012 30 43.93 58.92 04 - Oakland \n", + "9012 30 43.90 58.90 04 - Oakland \n", "\n", " avg_pings_per_min avg_pct_vp_shape avg_pct_rt_v_sched \n", - "9012 2.96 88.33 34.10 " + "9012 3.00 88.33 34.17 " ] }, "metadata": {}, @@ -3799,10 +3730,10 @@ " 1\n", " Weekdays 5am-10pm Weekends 6am-9pm\n", " 6e78cef03e15c5a71751030fc65e09b4\n", - " 43.93\n", - " 58.92\n", - " 6.68\n", - " 2.96\n", + " 43.90\n", + " 58.90\n", + " 6.70\n", + " 3.00\n", " 174.57\n", " 154.20\n", " 30\n", @@ -3819,7 +3750,7 @@ "4102 Weekdays 5am-10pm Weekends 6am-9pm 6e78cef03e15c5a71751030fc65e09b4 \n", "\n", " avg_sched_trip_min avg_rt_trip_min speed_mph avg_pings_per_min \\\n", - "4102 43.93 58.92 6.68 2.96 \n", + "4102 43.90 58.90 6.70 3.00 \n", "\n", " total_vp vp_in_shape n_trips \n", "4102 174.57 154.20 30 " @@ -3832,45 +3763,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "df2\n" + "df2\n", + "pings per min 2.961175851535439\n", + "speed_mph 6.682525584870673\n", + "total_vp 174.56666666666666\n", + "vp_in_shape 154.2\n" ] }, - { - "data": { - "text/plain": [ - "2.961175851535439" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "6.682525584870673" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "174.56666666666666" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "154.2" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, { "data": { "text/html": [ @@ -4311,11 +4210,11 @@ " 13.40\n", " 6\n", " 50.00\n", - " 60.69\n", + " 60.70\n", " 04 - Oakland\n", - " 2.83\n", + " 2.80\n", " NaN\n", - " 21.39\n", + " 21.40\n", " \n", " \n", "\n", @@ -4329,10 +4228,10 @@ "5973 Rodeo/Hercules/San Francisco Transbay Terminal 1 \n", "\n", " time_of_day speed_mph n_trips avg_sched_trip_min avg_rt_trip_min \\\n", - "5973 AM Peak 13.40 6 50.00 60.69 \n", + "5973 AM Peak 13.40 6 50.00 60.70 \n", "\n", " district_name avg_pings_per_min avg_pct_vp_shape avg_pct_rt_v_sched \n", - "5973 04 - Oakland 2.83 NaN 21.39 " + "5973 04 - Oakland 2.80 NaN 21.40 " ] }, "metadata": {}, @@ -4391,9 +4290,9 @@ " Rodeo/Hercules/San Francisco Transbay Terminal\n", " 810696deb677bfeb1d0b09047031a9c8\n", " 50.00\n", - " 60.69\n", + " 60.70\n", " 13.40\n", - " 2.83\n", + " 2.80\n", " NaN\n", " NaN\n", " 6\n", @@ -4410,10 +4309,10 @@ "3990 Rodeo/Hercules/San Francisco Transbay Terminal \n", "\n", " shape_array_key avg_sched_trip_min avg_rt_trip_min \\\n", - "3990 810696deb677bfeb1d0b09047031a9c8 50.00 60.69 \n", + "3990 810696deb677bfeb1d0b09047031a9c8 50.00 60.70 \n", "\n", " speed_mph avg_pings_per_min total_vp vp_in_shape n_trips \n", - "3990 13.40 2.83 NaN NaN 6 " + "3990 13.40 2.80 NaN NaN 6 " ] }, "metadata": {}, @@ -4423,45 +4322,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "df2\n" + "df2\n", + "pings per min 2.8253065827801014\n", + "speed_mph 13.403813542450534\n", + "total_vp nan\n", + "vp_in_shape nan\n" ] }, - { - "data": { - "text/plain": [ - "2.8253065827801014" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "13.403813542450534" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "nan" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "nan" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, { "data": { "text/html": [ @@ -4586,44 +4453,39 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 81, "id": "b8a1b06b-0e81-4641-8545-b5af34f6b47e", "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'stop' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[81], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mstop\u001b[49m\n", + "\u001b[0;31mNameError\u001b[0m: name 'stop' is not defined" + ] + } + ], + "source": [ + "stop" + ] }, { "cell_type": "code", - "execution_count": 81, + "execution_count": null, "id": "97d81b5c-5d47-405c-a9bd-2a51ffe00b73", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Index(['trip_instance_key', 'rt_service_min', 'min_w_atleast2_trip_updates',\n", - " 'total_pings_for_trip', 'total_min_w_gtfs', 'total_vp', 'vp_in_shape',\n", - " 'speed_mph', 'service_minutes', 'pings_per_min', 'spatial_accuracy_pct',\n", - " 'rt_triptime_w_gtfs_pct', 'rt_v_scheduled_trip_time_pct',\n", - " 'schedule_gtfs_dataset_key', 'direction_id', 'route_id',\n", - " 'common_shape_id', 'shape_array_key', 'route_name_used',\n", - " 'service_hours', 'trip_first_departure_datetime_pacific',\n", - " 'time_of_day'],\n", - " dtype='object')" - ] - }, - "execution_count": 81, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df2.columns" ] }, { "cell_type": "code", - "execution_count": 82, + "execution_count": null, "id": "4e6e3f60-5e14-4fe4-96b4-9df6be9db761", "metadata": {}, "outputs": [], @@ -4640,33 +4502,17 @@ }, { "cell_type": "code", - "execution_count": 83, + "execution_count": null, "id": "e931b56b-1048-49f8-b0c8-4d9ea4fd34db", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "['schedule_gtfs_dataset_key',\n", - " 'time_of_day',\n", - " 'route_id',\n", - " 'direction_id',\n", - " 'route_name_used',\n", - " 'shape_array_key']" - ] - }, - "execution_count": 83, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "route_groupby_cols" ] }, { "cell_type": "code", - "execution_count": 84, + "execution_count": null, "id": "3b18d789-a84f-42c6-addc-2f97b9e6fafc", "metadata": {}, "outputs": [], @@ -4690,7 +4536,7 @@ }, { "cell_type": "code", - "execution_count": 85, + "execution_count": null, "id": "429f3c5b-e1f6-40a0-86a3-586ad00685fb", "metadata": {}, "outputs": [], @@ -4738,1351 +4584,50 @@ }, { "cell_type": "code", - "execution_count": 86, + "execution_count": null, "id": "ab2cc904-d02c-414b-ba2b-efcb351afa8f", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
trip_instance_keyservice_minutesrt_service_minpings_per_minspeed_mphtotal_vpvp_in_shape
1733259.0099.582.686.89267.50187.50
\n", - "
" - ], - "text/plain": [ - " trip_instance_key service_minutes rt_service_min pings_per_min \\\n", - "1733 2 59.00 99.58 2.68 \n", - "\n", - " speed_mph total_vp vp_in_shape \n", - "1733 6.89 267.50 187.50 " - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "2.6849179704528776" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "6.887376703252869" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "267.5" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "187.5" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
trip_instance_keytime_of_dayspeed_mphrt_service_minservice_minutespings_per_mintotal_vpvp_in_shape
2486648a01217589c2faa46db395d6cf8317dEarly AM9.4095.0258.002.65252.00180.00
2486770674803a1c4416fc49f883bc3b2c18bEarly AM4.38104.1360.002.72283.00195.00
\n", - "
" - ], - "text/plain": [ - " trip_instance_key time_of_day speed_mph \\\n", - "24866 48a01217589c2faa46db395d6cf8317d Early AM 9.40 \n", - "24867 70674803a1c4416fc49f883bc3b2c18b Early AM 4.38 \n", - "\n", - " rt_service_min service_minutes pings_per_min total_vp vp_in_shape \n", - "24866 95.02 58.00 2.65 252.00 180.00 \n", - "24867 104.13 60.00 2.72 283.00 195.00 " - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "checkout_test_groups(test1, \"265-13172\", \"Early AM\", 0)" - ] - }, - { - "cell_type": "code", - "execution_count": 87, - "id": "7ad06e7a-f7fa-4604-b28e-c08802ce4883", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
trip_instance_keyservice_minutesrt_service_minpings_per_minspeed_mphtotal_vpvp_in_shape
6840138.00313.481.422.16446.00101.00
\n", - "
" - ], - "text/plain": [ - " trip_instance_key service_minutes rt_service_min pings_per_min \\\n", - "6840 1 38.00 313.48 1.42 \n", - "\n", - " speed_mph total_vp vp_in_shape \n", - "6840 2.16 446.00 101.00 " - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "1.422723164442554" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "2.158633017384419" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "446.0" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "101.0" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
trip_instance_keytime_of_dayspeed_mphrt_service_minservice_minutespings_per_mintotal_vpvp_in_shape
277586fadf197f5bb105ed916de0a337386eeEarly AM2.16313.4838.001.42446.00101.00
\n", - "
" - ], - "text/plain": [ - " trip_instance_key time_of_day speed_mph \\\n", - "27758 6fadf197f5bb105ed916de0a337386ee Early AM 2.16 \n", - "\n", - " rt_service_min service_minutes pings_per_min total_vp vp_in_shape \n", - "27758 313.48 38.00 1.42 446.00 101.00 " - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "checkout_test_groups(test1, \"5671\", \"Early AM\", 1)" - ] - }, - { - "cell_type": "code", - "execution_count": 88, - "id": "79e3584a-ac1f-4ecc-9b69-8f4551b8b632", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
trip_instance_keyservice_minutesrt_service_minpings_per_minspeed_mphtotal_vpvp_in_shape
3990650.0060.692.8313.40NaNNaN
\n", - "
" - ], - "text/plain": [ - " trip_instance_key service_minutes rt_service_min pings_per_min \\\n", - "3990 6 50.00 60.69 2.83 \n", - "\n", - " speed_mph total_vp vp_in_shape \n", - "3990 13.40 NaN NaN " - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "2.8253065827801014" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "13.403813542450534" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "nan" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "nan" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
trip_instance_keytime_of_dayspeed_mphrt_service_minservice_minutespings_per_mintotal_vpvp_in_shape
1298714c9fc6b379e1fe869ba37bfd79a8245AM Peak13.3161.4050.002.72NaNNaN
129902ee02fe17c9acad17ccb44eaaea4debeAM Peak12.6963.1850.002.90NaNNaN
1299178a402dfe7d89e0b919a193ac59c69aeAM Peak12.6163.1850.002.82NaNNaN
129942bb4857e894a94d48a79620858c8384eAM Peak12.4464.4550.002.84NaNNaN
12997b97a1995cd54253c58e82bb7c9ad3414AM Peak15.5952.8250.002.82NaNNaN
13004baeeed7c3d6ab74ad9ff40f42a2f1da3AM Peak13.7859.1350.002.86NaNNaN
\n", - "
" - ], - "text/plain": [ - " trip_instance_key time_of_day speed_mph \\\n", - "12987 14c9fc6b379e1fe869ba37bfd79a8245 AM Peak 13.31 \n", - "12990 2ee02fe17c9acad17ccb44eaaea4debe AM Peak 12.69 \n", - "12991 78a402dfe7d89e0b919a193ac59c69ae AM Peak 12.61 \n", - "12994 2bb4857e894a94d48a79620858c8384e AM Peak 12.44 \n", - "12997 b97a1995cd54253c58e82bb7c9ad3414 AM Peak 15.59 \n", - "13004 baeeed7c3d6ab74ad9ff40f42a2f1da3 AM Peak 13.78 \n", - "\n", - " rt_service_min service_minutes pings_per_min total_vp vp_in_shape \n", - "12987 61.40 50.00 2.72 NaN NaN \n", - "12990 63.18 50.00 2.90 NaN NaN \n", - "12991 63.18 50.00 2.82 NaN NaN \n", - "12994 64.45 50.00 2.84 NaN NaN \n", - "12997 52.82 50.00 2.82 NaN NaN \n", - "13004 59.13 50.00 2.86 NaN NaN " - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "checkout_test_groups(test1, \"Lynx\", \"AM Peak\", 1)" - ] - }, - { - "cell_type": "code", - "execution_count": 89, - "id": "f6043d40-9178-4547-97f2-6140585ea418", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
trip_instance_keyservice_minutesrt_service_minpings_per_minspeed_mphtotal_vpvp_in_shape
41023043.9358.922.966.68174.57154.20
\n", - "
" - ], - "text/plain": [ - " trip_instance_key service_minutes rt_service_min pings_per_min \\\n", - "4102 30 43.93 58.92 2.96 \n", - "\n", - " speed_mph total_vp vp_in_shape \n", - "4102 6.68 174.57 154.20 " - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "2.961175851535439" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "6.682525584870673" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "174.56666666666666" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "154.2" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
trip_instance_keytime_of_dayspeed_mphrt_service_minservice_minutespings_per_mintotal_vpvp_in_shape
6161076fa3ed3fd8ef28a446eedb4c1e94e6aAM Peak9.3948.8239.002.97145.00145.00
6161147d8da2afbc3e4c78f9be4c6c53a7776AM Peak10.0049.3839.002.96146.00146.00
6161240590ba21f73ae1775a4538e34e67cc3AM Peak5.3497.6741.002.98291.00192.00
61613aa1b90f05357a29abc97fae90cd5bafeAM Peak9.7349.4541.002.95146.00146.00
616144d7cb7ddea2191f062c1ea3165df67c8AM Peak8.5857.9541.002.97172.00163.00
61615d4e16623b6cf749d8e389b905ab44089AM Peak4.4993.4342.002.99279.00204.00
61616ac9822b2d06b84790c10b7cbbf694410AM Peak7.1067.8342.002.96201.00162.00
61617092c4f20368b13bc5c92fda4fb16bd93AM Peak3.7754.9343.002.97163.00163.00
6161888cf8d5d7cd5d0c74a9b78d5e62fd8a3AM Peak7.0652.6243.002.96156.00156.00
616192845f0ae70ae06c3618d58fa99a1cddeAM Peak8.5751.5843.002.97153.00153.00
61620118feb9aedabd1b9bb354ce493c07735AM Peak5.0741.2744.002.96122.0078.00
61621adea377ad6e2ee6fb01e515724547b17AM Peak8.7256.7544.002.96168.00168.00
6162208797b2f64712704fd8fff8e3c3d1dc2AM Peak8.7252.3845.002.96155.00146.00
616236366ccc0f17673ccd753214d9cb433edAM Peak8.6184.3845.002.97251.00187.00
61624bda28536257ad187c4469c6f188635a6AM Peak4.1249.5745.002.97147.00132.00
61625585d84b16b4a418da661644e3a2314d1AM Peak8.2870.5045.002.98210.00180.00
616266789beb9af8dcb7d8b76afd9cce184a4AM Peak8.9147.7745.002.97142.00142.00
61627f36bb7386e5681abb33b97cf5c69b9dbAM Peak8.8155.5345.002.97165.00165.00
61628d2a71fa9b476e583315b6f203ccf8c67AM Peak3.3959.9545.002.97178.00178.00
6162982d0a21cfae4aa608861e3bb0172110cAM Peak7.3654.8045.002.81154.00154.00
6163018792d0d435c7d031ea66115a3e985eeAM Peak4.5145.5045.002.97135.00135.00
61631401e0c1fed455778f28834b60b66a4ffAM Peak3.2363.0745.002.97187.00135.00
61632abb36fd3b7d6d95dccf23ed5709bce69AM Peak8.4147.8246.002.97142.00141.00
61633743153cc91ad7ce0ff294a451347d87eAM Peak9.0148.3246.002.96143.00143.00
61634addecf9d30d8193bae40b35b00ced394AM Peak3.3959.9347.002.97178.00142.00
61635a95f1835bc54bdf7cf77b5bac056e103AM Peak3.6056.5747.002.97168.00130.00
616367dec1b76c7e8e141e19ecc2b780b202cAM Peak8.6152.4347.002.96155.00155.00
6163702efbb740cfeced30c8fd237ab981723AM Peak4.2947.7747.002.95141.00141.00
61638ff8e7fa939ecb40d43443bc1777eea65AM Peak3.3759.9047.002.95177.00141.00
668586058e1a8b5072b0ab2a6a6275aa125e7AM Peak6.0789.6339.002.98267.00203.00
\n", - "
" - ], - "text/plain": [ - " trip_instance_key time_of_day speed_mph \\\n", - "61610 76fa3ed3fd8ef28a446eedb4c1e94e6a AM Peak 9.39 \n", - "61611 47d8da2afbc3e4c78f9be4c6c53a7776 AM Peak 10.00 \n", - "61612 40590ba21f73ae1775a4538e34e67cc3 AM Peak 5.34 \n", - "61613 aa1b90f05357a29abc97fae90cd5bafe AM Peak 9.73 \n", - "61614 4d7cb7ddea2191f062c1ea3165df67c8 AM Peak 8.58 \n", - "61615 d4e16623b6cf749d8e389b905ab44089 AM Peak 4.49 \n", - "61616 ac9822b2d06b84790c10b7cbbf694410 AM Peak 7.10 \n", - "61617 092c4f20368b13bc5c92fda4fb16bd93 AM Peak 3.77 \n", - "61618 88cf8d5d7cd5d0c74a9b78d5e62fd8a3 AM Peak 7.06 \n", - "61619 2845f0ae70ae06c3618d58fa99a1cdde AM Peak 8.57 \n", - "61620 118feb9aedabd1b9bb354ce493c07735 AM Peak 5.07 \n", - "61621 adea377ad6e2ee6fb01e515724547b17 AM Peak 8.72 \n", - "61622 08797b2f64712704fd8fff8e3c3d1dc2 AM Peak 8.72 \n", - "61623 6366ccc0f17673ccd753214d9cb433ed AM Peak 8.61 \n", - "61624 bda28536257ad187c4469c6f188635a6 AM Peak 4.12 \n", - "61625 585d84b16b4a418da661644e3a2314d1 AM Peak 8.28 \n", - "61626 6789beb9af8dcb7d8b76afd9cce184a4 AM Peak 8.91 \n", - "61627 f36bb7386e5681abb33b97cf5c69b9db AM Peak 8.81 \n", - "61628 d2a71fa9b476e583315b6f203ccf8c67 AM Peak 3.39 \n", - "61629 82d0a21cfae4aa608861e3bb0172110c AM Peak 7.36 \n", - "61630 18792d0d435c7d031ea66115a3e985ee AM Peak 4.51 \n", - "61631 401e0c1fed455778f28834b60b66a4ff AM Peak 3.23 \n", - "61632 abb36fd3b7d6d95dccf23ed5709bce69 AM Peak 8.41 \n", - "61633 743153cc91ad7ce0ff294a451347d87e AM Peak 9.01 \n", - "61634 addecf9d30d8193bae40b35b00ced394 AM Peak 3.39 \n", - "61635 a95f1835bc54bdf7cf77b5bac056e103 AM Peak 3.60 \n", - "61636 7dec1b76c7e8e141e19ecc2b780b202c AM Peak 8.61 \n", - "61637 02efbb740cfeced30c8fd237ab981723 AM Peak 4.29 \n", - "61638 ff8e7fa939ecb40d43443bc1777eea65 AM Peak 3.37 \n", - "66858 6058e1a8b5072b0ab2a6a6275aa125e7 AM Peak 6.07 \n", - "\n", - " rt_service_min service_minutes pings_per_min total_vp vp_in_shape \n", - "61610 48.82 39.00 2.97 145.00 145.00 \n", - "61611 49.38 39.00 2.96 146.00 146.00 \n", - "61612 97.67 41.00 2.98 291.00 192.00 \n", - "61613 49.45 41.00 2.95 146.00 146.00 \n", - "61614 57.95 41.00 2.97 172.00 163.00 \n", - "61615 93.43 42.00 2.99 279.00 204.00 \n", - "61616 67.83 42.00 2.96 201.00 162.00 \n", - "61617 54.93 43.00 2.97 163.00 163.00 \n", - "61618 52.62 43.00 2.96 156.00 156.00 \n", - "61619 51.58 43.00 2.97 153.00 153.00 \n", - "61620 41.27 44.00 2.96 122.00 78.00 \n", - "61621 56.75 44.00 2.96 168.00 168.00 \n", - "61622 52.38 45.00 2.96 155.00 146.00 \n", - "61623 84.38 45.00 2.97 251.00 187.00 \n", - "61624 49.57 45.00 2.97 147.00 132.00 \n", - "61625 70.50 45.00 2.98 210.00 180.00 \n", - "61626 47.77 45.00 2.97 142.00 142.00 \n", - "61627 55.53 45.00 2.97 165.00 165.00 \n", - "61628 59.95 45.00 2.97 178.00 178.00 \n", - "61629 54.80 45.00 2.81 154.00 154.00 \n", - "61630 45.50 45.00 2.97 135.00 135.00 \n", - "61631 63.07 45.00 2.97 187.00 135.00 \n", - "61632 47.82 46.00 2.97 142.00 141.00 \n", - "61633 48.32 46.00 2.96 143.00 143.00 \n", - "61634 59.93 47.00 2.97 178.00 142.00 \n", - "61635 56.57 47.00 2.97 168.00 130.00 \n", - "61636 52.43 47.00 2.96 155.00 155.00 \n", - "61637 47.77 47.00 2.95 141.00 141.00 \n", - "61638 59.90 47.00 2.95 177.00 141.00 \n", - "66858 89.63 39.00 2.98 267.00 203.00 " - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], + "source": [ + "checkout_test_groups(test1, \"265-13172\", \"Early AM\", 0)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7ad06e7a-f7fa-4604-b28e-c08802ce4883", + "metadata": {}, + "outputs": [], + "source": [ + "checkout_test_groups(test1, \"5671\", \"Early AM\", 1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "79e3584a-ac1f-4ecc-9b69-8f4551b8b632", + "metadata": {}, + "outputs": [], + "source": [ + "checkout_test_groups(test1, \"Lynx\", \"AM Peak\", 1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f6043d40-9178-4547-97f2-6140585ea418", + "metadata": {}, + "outputs": [], "source": [ "checkout_test_groups(test1, \"38R\", \"AM Peak\", 1)" ] }, { "cell_type": "code", - "execution_count": 90, + "execution_count": null, "id": "bff53f95-8520-44f0-bc9c-96d5bc5a8323", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
trip_instance_keyservice_minutesrt_service_minpings_per_minspeed_mphtotal_vpvp_in_shape
924689.3820.121.8712.8933.1232.75
\n", - "
" - ], - "text/plain": [ - " trip_instance_key service_minutes rt_service_min pings_per_min \\\n", - "9246 8 9.38 20.12 1.87 \n", - "\n", - " speed_mph total_vp vp_in_shape \n", - "9246 12.89 33.12 32.75 " - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "1.8731670674101006" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "12.889389560897447" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "33.125" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/plain": [ - "32.75" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
trip_instance_keytime_of_dayspeed_mphrt_service_minservice_minutespings_per_mintotal_vpvp_in_shape
7794d2c2b948028176c5305d4bf770e75055AM Peak8.5921.379.001.5934.0034.00
782552571020078afa22979c626adb4ce938AM Peak11.5430.689.001.1736.0036.00
7826a5e18c87995eb491eaaa9bf7ac2284bbAM Peak13.2922.789.001.3631.0031.00
78275ff3c2cbe06fee1237a653998d155713AM Peak10.1325.859.001.3234.0033.00
78281da9828bbb28a1b4df2cc8d1e4cfa76fAM Peak7.3724.939.001.3233.0033.00
797615db3e9fa139b2b88aafbc8e3f13133aAM Peak17.6011.7310.002.7332.0032.00
7977683fc69ab6dc56de27d9e7f43244836cAM Peak15.9712.6510.002.7735.0034.00
7978d28a02c5a1956e27033d43ca26bdb868AM Peak18.6311.0010.002.7330.0029.00
\n", - "
" - ], - "text/plain": [ - " trip_instance_key time_of_day speed_mph rt_service_min \\\n", - "7794 d2c2b948028176c5305d4bf770e75055 AM Peak 8.59 21.37 \n", - "7825 52571020078afa22979c626adb4ce938 AM Peak 11.54 30.68 \n", - "7826 a5e18c87995eb491eaaa9bf7ac2284bb AM Peak 13.29 22.78 \n", - "7827 5ff3c2cbe06fee1237a653998d155713 AM Peak 10.13 25.85 \n", - "7828 1da9828bbb28a1b4df2cc8d1e4cfa76f AM Peak 7.37 24.93 \n", - "7976 15db3e9fa139b2b88aafbc8e3f13133a AM Peak 17.60 11.73 \n", - "7977 683fc69ab6dc56de27d9e7f43244836c AM Peak 15.97 12.65 \n", - "7978 d28a02c5a1956e27033d43ca26bdb868 AM Peak 18.63 11.00 \n", - "\n", - " service_minutes pings_per_min total_vp vp_in_shape \n", - "7794 9.00 1.59 34.00 34.00 \n", - "7825 9.00 1.17 36.00 36.00 \n", - "7826 9.00 1.36 31.00 31.00 \n", - "7827 9.00 1.32 34.00 33.00 \n", - "7828 9.00 1.32 33.00 33.00 \n", - "7976 10.00 2.73 32.00 32.00 \n", - "7977 10.00 2.77 35.00 34.00 \n", - "7978 10.00 2.73 30.00 29.00 " - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "checkout_test_groups(test1, \"16611\", \"AM Peak\", 0)" ] From 26b31b8df182d0f946b6b8af3edc41d3831fca9d Mon Sep 17 00:00:00 2001 From: amandaha8 Date: Thu, 4 Jan 2024 23:26:19 +0000 Subject: [PATCH 5/6] put work into a function --- .../06_vp_usable_exploration.ipynb | 2445 ++++------------- 1 file changed, 464 insertions(+), 1981 deletions(-) diff --git a/rt_scheduled_v_ran/06_vp_usable_exploration.ipynb b/rt_scheduled_v_ran/06_vp_usable_exploration.ipynb index be4f0a5b3..53de4b5f1 100644 --- a/rt_scheduled_v_ran/06_vp_usable_exploration.ipynb +++ b/rt_scheduled_v_ran/06_vp_usable_exploration.ipynb @@ -976,10 +976,10 @@ " \n", " \n", " \n", - " 39613\n", - " 1326.03\n", - " 12.00\n", - " 10950.28\n", + " 75715\n", + " 7020.27\n", + " 37.00\n", + " 18873.69\n", " \n", " \n", "\n", @@ -987,7 +987,7 @@ ], "text/plain": [ " rt_service_min service_minutes rt_v_scheduled_trip_time_pct\n", - "39613 1326.03 12.00 10950.28" + "75715 7020.27 37.00 18873.69" ] }, "execution_count": 32, @@ -1286,205 +1286,87 @@ { "cell_type": "code", "execution_count": 34, - "id": "84cc7f28-1289-4df1-8073-cd6e71ed77a6", + "id": "989a09e4-2f5a-4065-94f0-fc6cee1b1126", "metadata": {}, "outputs": [], "source": [ - "group_cols = [\"trip_instance_key\"]\n", + "def add_scheduled_trip_columns(\n", + " df: pd.DataFrame, analysis_date: str, group_cols: list = [\"trip_instance_key\"]\n", + ") -> pd.DataFrame:\n", + " \"\"\"\n", + " Merge RT trips (vehicle positions) to scheduled trips.\n", + " Add in the needed scheduled trip columns to take\n", + " route-direction-time_of_day averages.\n", + " \"\"\"\n", + " keep_cols = [\n", + " \"gtfs_dataset_key\",\n", + " \"direction_id\",\n", + " \"route_id\",\n", + " \"route_short_name\",\n", + " \"route_long_name\",\n", + " \"route_desc\",\n", + " ] + group_cols\n", "\n", - "keep_cols = [\n", - " \"gtfs_dataset_key\",\n", - " \"direction_id\",\n", - " \"route_id\",\n", - " \"route_short_name\",\n", - " \"route_long_name\",\n", - " \"route_desc\",\n", - "] + group_cols\n", + " crosswalk = helpers.import_scheduled_trips(\n", + " analysis_date, columns=keep_cols, get_pandas=True\n", + " )\n", "\n", - "crosswalk = helpers.import_scheduled_trips(\n", - " analysis_date, columns=keep_cols, get_pandas=True\n", - ")" + " common_keep_cols = [\n", + " \"schedule_gtfs_dataset_key\",\n", + " \"route_id\",\n", + " \"direction_id\",\n", + " \"shape_array_key\",\n", + " ]\n", + " common_shape = sched_rt_utils.most_common_shape_by_route_direction(analysis_date)[\n", + " common_keep_cols\n", + " ]\n", + "\n", + " crosswalk2 = pd.merge(\n", + " crosswalk,\n", + " common_shape,\n", + " on=[\"schedule_gtfs_dataset_key\", \"route_id\", \"direction_id\"],\n", + " how=\"inner\",\n", + " ).astype({\"direction_id\": \"Int64\"})\n", + "\n", + " crosswalk2 = portfolio_utils.add_route_name(crosswalk2).drop(\n", + " columns=[\"route_short_name\", \"route_long_name\", \"route_desc\"]\n", + " )\n", + "\n", + " time_keep_cols = [\n", + " \"trip_instance_key\",\n", + " \"service_hours\",\n", + " \"trip_first_departure_datetime_pacific\",\n", + " \"time_of_day\",\n", + " ]\n", + " time_of_day = sched_rt_utils.get_trip_time_buckets(analysis_date)[time_keep_cols]\n", + "\n", + " df2 = pd.merge(df, crosswalk2, on=\"trip_instance_key\", how=\"left\").merge(\n", + " time_of_day, on=\"trip_instance_key\", how=\"left\"\n", + " )\n", + "\n", + " return df2" ] }, { "cell_type": "code", "execution_count": 35, - "id": "0b4b0f0d-cc15-4f3e-b8c7-df8579831539", + "id": "7fabab72-c1a4-468f-83d6-b1e066014129", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
trip_instance_keyrt_service_minmin_w_atleast2_trip_updatestotal_pings_for_triptotal_min_w_gtfstotal_vpvp_in_shapespeed_mphservice_minutespings_per_minspatial_accuracy_pctrt_triptime_w_gtfs_pctrt_v_scheduled_trip_time_pct
05d25a4366c173007d9c29fdead0299d774.037321674216.00148.0021.0158.002.9268.5299.9527.64
14b72b80fc9cfe5e613bab95585cbe7e423.4521592359.0019.0054.9558.002.5232.2098.08-59.57
\n", - "
" - ], - "text/plain": [ - " trip_instance_key rt_service_min \\\n", - "0 5d25a4366c173007d9c29fdead0299d7 74.03 \n", - "1 4b72b80fc9cfe5e613bab95585cbe7e4 23.45 \n", - "\n", - " min_w_atleast2_trip_updates total_pings_for_trip total_min_w_gtfs \\\n", - "0 73 216 74 \n", - "1 21 59 23 \n", - "\n", - " total_vp vp_in_shape speed_mph service_minutes pings_per_min \\\n", - "0 216.00 148.00 21.01 58.00 2.92 \n", - "1 59.00 19.00 54.95 58.00 2.52 \n", - "\n", - " spatial_accuracy_pct rt_triptime_w_gtfs_pct rt_v_scheduled_trip_time_pct \n", - "0 68.52 99.95 27.64 \n", - "1 32.20 98.08 -59.57 " - ] - }, - "execution_count": 35, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "dec_df.head(2)" + "dec_df2 = add_scheduled_trip_columns(dec_df, analysis_date, [\"trip_instance_key\"])" ] }, { "cell_type": "code", "execution_count": 36, - "id": "1e89939b-1bca-4670-955f-013c16949ec9", + "id": "fea7cda5-606c-4054-b189-58a12d250957", "metadata": {}, "outputs": [ { "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
schedule_gtfs_dataset_keydirection_idroute_idroute_short_nameroute_long_nameroute_desctrip_instance_key
01770249a5a2e770ca90628434d4934b11.003402Route 11Route 11PACIFIC VIEW MALL via TELEPHONE RD595914b0c046d093f4fd5f9e88ab5635
11770249a5a2e770ca90628434d4934b11.003402Route 11Route 11PACIFIC VIEW MALL via TELEPHONE RD5ad8f3475c016f517dcb2611ccd69764
\n", - "
" - ], "text/plain": [ - " schedule_gtfs_dataset_key direction_id route_id route_short_name \\\n", - "0 1770249a5a2e770ca90628434d4934b1 1.00 3402 Route 11 \n", - "1 1770249a5a2e770ca90628434d4934b1 1.00 3402 Route 11 \n", - "\n", - " route_long_name route_desc \\\n", - "0 Route 11 PACIFIC VIEW MALL via TELEPHONE RD \n", - "1 Route 11 PACIFIC VIEW MALL via TELEPHONE RD \n", - "\n", - " trip_instance_key \n", - "0 595914b0c046d093f4fd5f9e88ab5635 \n", - "1 5ad8f3475c016f517dcb2611ccd69764 " + "pandas.core.frame.DataFrame" ] }, "execution_count": 36, @@ -1493,62 +1375,220 @@ } ], "source": [ - "crosswalk.head(2)" + "type(dec_df2)" + ] + }, + { + "cell_type": "markdown", + "id": "d2ff8a3d-d3f5-42b4-b096-24b33b9842ca", + "metadata": {}, + "source": [ + "#### Aggregate avg speed by route\n", + "* DO I need to use the other functions in the script\n", + "* Do we still drop rows that are above 70 mph?\n", + "* https://github.com/cal-itp/data-analyses/blob/main/rt_segment_speeds/scripts/export.py#L150\n" ] }, { "cell_type": "code", "execution_count": 37, - "id": "7fabab72-c1a4-468f-83d6-b1e066014129", + "id": "21db7580-cf79-4385-b9dc-80cd99206011", "metadata": {}, "outputs": [], "source": [ - "# dec_df = dec_df.drop(columns=[\"service_minutes\"])" + "def average_route_speeds_for_export(\n", + " df: pd.DataFrame,\n", + " analysis_date: str,\n", + " max_speed: int,\n", + ") -> gpd.GeoDataFrame:\n", + " \"\"\"\n", + " Aggregate trip speeds to route-direction.\n", + " Attach shape geometry to most common shape_id.\n", + " \"\"\"\n", + " df2 = df.loc[df.speed_mph <= 70].reset_index(drop=True)\n", + "\n", + " route_cols = [\n", + " \"schedule_gtfs_dataset_key\",\n", + " \"time_of_day\",\n", + " \"route_id\",\n", + " \"direction_id\",\n", + " \"route_name_used\",\n", + " \"shape_array_key\",\n", + " ]\n", + "\n", + " mean_cols = [\n", + " \"service_minutes\",\n", + " \"rt_service_min\",\n", + " \"speed_mph\",\n", + " \"pings_per_min\",\n", + " \"total_vp\",\n", + " \"vp_in_shape\",\n", + " ]\n", + " count_cols = [\"trip_instance_key\"]\n", + "\n", + " df3 = (\n", + " df2.groupby(route_cols)\n", + " .agg({**{e: \"mean\" for e in mean_cols}, **{e: \"count\" for e in count_cols}})\n", + " .reset_index()\n", + " )\n", + "\n", + " df4 = df3.assign(\n", + " rt_service_min=df3.rt_service_min.round(1),\n", + " service_minutes=df3.service_minutes.round(1),\n", + " speed_mph=df3.speed_mph.round(1),\n", + " pings_per_min=df3.pings_per_min.round(1),\n", + " ).rename(\n", + " columns={\n", + " \"service_minutes\": \"avg_sched_trip_min\",\n", + " \"rt_service_min\": \"avg_rt_trip_min\",\n", + " \"trip_instance_key\": \"n_trips\",\n", + " \"route_name_used\": \"route_name\",\n", + " \"pings_per_min\": \"avg_pings_per_min\",\n", + " \"schedule_gtfs_dataset_key\": \"gtfs_dataset_key\",\n", + " }\n", + " )\n", + "\n", + " org_crosswalk = schedule_rt_utils.sample_gtfs_dataset_key_to_organization_crosswalk(\n", + " df4,\n", + " analysis_date,\n", + " quartet_data=\"schedule\",\n", + " dim_gtfs_dataset_cols=[\"key\", \"base64_url\"],\n", + " dim_organization_cols=[\"source_record_id\", \"name\", \"caltrans_district\"],\n", + " )\n", + "\n", + " df_with_org = pd.merge(\n", + " df4,\n", + " org_crosswalk.rename(columns={\"schedule_gtfs_dataset_key\": \"gtfs_dataset_key\"}),\n", + " on=\"gtfs_dataset_key\",\n", + " how=\"inner\",\n", + " )\n", + "\n", + " shapes = helpers.import_scheduled_shapes(\n", + " analysis_date,\n", + " columns=[\"shape_array_key\", \"geometry\"],\n", + " get_pandas=True,\n", + " crs=geography_utils.WGS84,\n", + " )\n", + "\n", + " df_with_shape = pd.merge(\n", + " shapes,\n", + " df_with_org,\n", + " on=\"shape_array_key\", # once merged, can drop shape_array_key\n", + " how=\"inner\",\n", + " )\n", + "\n", + " df_with_shape[\"avg_pct_vp_shape\"] = (\n", + " df_with_shape.vp_in_shape / df_with_shape.total_vp * 100\n", + " )\n", + "\n", + " df_with_shape[\"avg_pct_rt_v_sched\"] = (\n", + " df_with_shape.avg_rt_trip_min / df_with_shape.avg_sched_trip_min - 1\n", + " ) * 100\n", + "\n", + " final_df = df_with_shape.drop(columns=[\"total_vp\", \"vp_in_shape\"])\n", + "\n", + " agency_cols = [\"organization_source_record_id\", \"organization_name\"]\n", + " route_cols = [\n", + " \"route_id\",\n", + " \"route_name\",\n", + " \"direction_id\",\n", + " ]\n", + "\n", + " col_order = (\n", + " agency_cols\n", + " + route_cols\n", + " + [\n", + " \"time_of_day\",\n", + " \"speed_mph\",\n", + " \"n_trips\",\n", + " \"avg_sched_trip_min\",\n", + " \"avg_rt_trip_min\",\n", + " \"base64_url\",\n", + " \"caltrans_district\",\n", + " \"geometry\",\n", + " \"avg_pings_per_min\",\n", + " \"avg_pct_vp_shape\",\n", + " \"avg_pct_rt_v_sched\",\n", + " ]\n", + " )\n", + "\n", + " final_df = df_with_shape.reindex(columns=col_order).rename(\n", + " columns={\n", + " \"organization_source_record_id\": \"org_id\",\n", + " \"organization_name\": \"agency\",\n", + " \"caltrans_district\": \"district_name\",\n", + " }\n", + " )\n", + "\n", + " return df2, final_df" ] }, { "cell_type": "code", "execution_count": 38, - "id": "e12360a1-11fc-4e01-b2c9-2dbe0f49ee3d", + "id": "fe97ad8a-d0ce-40cd-982b-87877882693a", + "metadata": {}, + "outputs": [], + "source": [ + "dec_intermediary, dec_final = average_route_speeds_for_export(\n", + " dec_df2, analysis_date, 70\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "58165517-e414-4843-8ece-b7631d4d7f27", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'2023-12-13'" + "count 11397.00\n", + "mean 2.38\n", + "std 0.56\n", + "min 0.10\n", + "25% 1.90\n", + "50% 2.50\n", + "75% 2.90\n", + "max 3.50\n", + "Name: avg_pings_per_min, dtype: float64" ] }, - "execution_count": 38, + "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "analysis_date" - ] - }, - { - "cell_type": "markdown", - "id": "ec25f2ca-bc4b-44aa-bf9c-4689c47eff29", - "metadata": {}, - "source": [ - "#### Don't need `common_shape_id`" + "dec_final.avg_pings_per_min.describe()" ] }, { "cell_type": "code", - "execution_count": 39, - "id": "e862de9a-47dc-42e0-aec3-81645a09afea", + "execution_count": 40, + "id": "e99b22b7-f6c7-4d69-8b65-da3c39c85f33", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "(11397, 16)" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "common_shape = sched_rt_utils.most_common_shape_by_route_direction(analysis_date)" + "dec_final.shape" ] }, { "cell_type": "code", - "execution_count": 40, - "id": "d6578b42-82b3-4edf-b97b-b4092c6f17ba", + "execution_count": 41, + "id": "49867873-4a76-49c4-8c95-0918d7468f82", "metadata": {}, "outputs": [ { @@ -1572,213 +1612,118 @@ " \n", " \n", " \n", - " schedule_gtfs_dataset_key\n", + " org_id\n", + " agency\n", " route_id\n", + " route_name\n", " direction_id\n", - " common_shape_id\n", - " shape_array_key\n", + " time_of_day\n", + " speed_mph\n", + " n_trips\n", + " avg_sched_trip_min\n", + " avg_rt_trip_min\n", + " district_name\n", + " avg_pings_per_min\n", + " avg_pct_vp_shape\n", + " avg_pct_rt_v_sched\n", " \n", " \n", " \n", " \n", - " 0\n", - " 014d0998350083249a9eb310635548c2\n", - " 10866826\n", - " 1.00\n", - " 10866826:1\n", - " a7f294e50a9a8ff179d4c82cd9136625\n", + " 31\n", + " rec8zhnCPETu6qEiH\n", + " City of Redondo Beach\n", + " 4819\n", + " Redondo Beach Pier / Greenline Station\n", + " 1\n", + " AM Peak\n", + " 7.20\n", + " 6\n", + " 46.50\n", + " 119.70\n", + " 07 - Los Angeles\n", + " 2.40\n", + " 99.61\n", + " 157.42\n", + " \n", + " \n", + " 8468\n", + " recSiaaMmBXW7fUZS\n", + " Stanislaus Regional Transit Authority\n", + " 29\n", + " \n", + " 1\n", + " PM Peak\n", + " 8.50\n", + " 5\n", + " 27.00\n", + " 29.40\n", + " 10 - Stockton\n", + " 3.00\n", + " 83.92\n", + " 8.89\n", " \n", " \n", - " 1\n", - " 014d0998350083249a9eb310635548c2\n", - " 10866849\n", - " 1.00\n", - " 10866849:1\n", - " af9aea31d387f59024bf25fb1d9334a3\n", + " 2440\n", + " recOZgevYf7Jimm9L\n", + " Alameda-Contra Costa Transit District\n", + " 6\n", + " Berkeley - Telegraph - Oakland\n", + " 1\n", + " AM Peak\n", + " 6.30\n", + " 15\n", + " 32.20\n", + " 46.90\n", + " 04 - Oakland\n", + " 2.80\n", + " 87.65\n", + " 45.65\n", " \n", " \n", "\n", "" ], "text/plain": [ - " schedule_gtfs_dataset_key route_id direction_id common_shape_id \\\n", - "0 014d0998350083249a9eb310635548c2 10866826 1.00 10866826:1 \n", - "1 014d0998350083249a9eb310635548c2 10866849 1.00 10866849:1 \n", + " org_id agency route_id \\\n", + "31 rec8zhnCPETu6qEiH City of Redondo Beach 4819 \n", + "8468 recSiaaMmBXW7fUZS Stanislaus Regional Transit Authority 29 \n", + "2440 recOZgevYf7Jimm9L Alameda-Contra Costa Transit District 6 \n", + "\n", + " route_name direction_id time_of_day \\\n", + "31 Redondo Beach Pier / Greenline Station 1 AM Peak \n", + "8468 1 PM Peak \n", + "2440 Berkeley - Telegraph - Oakland 1 AM Peak \n", "\n", - " shape_array_key \n", - "0 a7f294e50a9a8ff179d4c82cd9136625 \n", - "1 af9aea31d387f59024bf25fb1d9334a3 " + " speed_mph n_trips avg_sched_trip_min avg_rt_trip_min \\\n", + "31 7.20 6 46.50 119.70 \n", + "8468 8.50 5 27.00 29.40 \n", + "2440 6.30 15 32.20 46.90 \n", + "\n", + " district_name avg_pings_per_min avg_pct_vp_shape \\\n", + "31 07 - Los Angeles 2.40 99.61 \n", + "8468 10 - Stockton 3.00 83.92 \n", + "2440 04 - Oakland 2.80 87.65 \n", + "\n", + " avg_pct_rt_v_sched \n", + "31 157.42 \n", + "8468 8.89 \n", + "2440 45.65 " ] }, - "execution_count": 40, + "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "common_shape.head(2)" + "dec_final.drop(columns=[\"geometry\", \"base64_url\"]).sample(3)" ] }, { "cell_type": "code", - "execution_count": 41, - "id": "aba49709-75e1-43e2-84a0-953fc3219206", - "metadata": {}, - "outputs": [], - "source": [ - "crosswalk2 = pd.merge(\n", - " crosswalk,\n", - " common_shape,\n", - " on=[\"schedule_gtfs_dataset_key\", \"route_id\", \"direction_id\"],\n", - " how=\"inner\",\n", - ").astype({\"direction_id\": \"Int64\"})" - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "id": "f23a0e8a-e7f3-411e-a6c0-784c87e559ed", - "metadata": {}, - "outputs": [], - "source": [ - "time_of_day = sched_rt_utils.get_trip_time_buckets(analysis_date)" - ] - }, - { - "cell_type": "code", - "execution_count": 43, - "id": "51be9b61-db22-4c1f-8f96-6b339a16e991", - "metadata": {}, - "outputs": [], - "source": [ - "# Drop service mins since we already ahve this and it matches\n", - "time_of_day = time_of_day.drop(columns=[\"service_minutes\"])" - ] - }, - { - "cell_type": "code", - "execution_count": 44, - "id": "99381d4c-b338-4e04-a351-b077c6b94224", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
trip_instance_keyservice_hourstrip_first_departure_datetime_pacifictime_of_day
0595914b0c046d093f4fd5f9e88ab56350.552023-12-13 18:35:00PM Peak
15ad8f3475c016f517dcb2611ccd697640.552023-12-13 19:05:00PM Peak
\n", - "
" - ], - "text/plain": [ - " trip_instance_key service_hours \\\n", - "0 595914b0c046d093f4fd5f9e88ab5635 0.55 \n", - "1 5ad8f3475c016f517dcb2611ccd69764 0.55 \n", - "\n", - " trip_first_departure_datetime_pacific time_of_day \n", - "0 2023-12-13 18:35:00 PM Peak \n", - "1 2023-12-13 19:05:00 PM Peak " - ] - }, - "execution_count": 44, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "time_of_day.head(2)" - ] - }, - { - "cell_type": "code", - "execution_count": 45, - "id": "3defcf73-8930-48c7-ae05-7e3ec263887a", - "metadata": {}, - "outputs": [], - "source": [ - "crosswalk2 = portfolio_utils.add_route_name(crosswalk2).drop(\n", - " columns=[\"route_short_name\", \"route_long_name\", \"route_desc\"]\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 46, - "id": "67e1a70f-d21c-4a9d-b4a3-2ebde459b4e1", - "metadata": {}, - "outputs": [], - "source": [ - "df = pd.merge(\n", - " dec_df,\n", - " crosswalk2,\n", - " on=\"trip_instance_key\",\n", - " how=\"left\",\n", - ").merge(time_of_day, on=\"trip_instance_key\", how=\"left\")" - ] - }, - { - "cell_type": "code", - "execution_count": 47, - "id": "deb99704-52a5-4119-8d09-ddb4bc3ca13d", - "metadata": {}, - "outputs": [], - "source": [ - "# Test if servicve minutes match\n", - "# df['matching_service_mins'] = df.service_minutes_x - df.service_minutes_y" - ] - }, - { - "cell_type": "code", - "execution_count": 48, - "id": "777c0915-acc0-4fac-83e0-7ddc7a396e86", - "metadata": {}, - "outputs": [], - "source": [ - "# Yes it matchesp perfectly\n", - "# df['matching_service_mins'].describe()" - ] - }, - { - "cell_type": "code", - "execution_count": 49, - "id": "4576ab1e-2bde-4420-8254-7c8e2a2700d5", + "execution_count": 42, + "id": "3844b20a-6af8-4d23-a42e-166b7330907d", "metadata": {}, "outputs": [ { @@ -1815,1383 +1760,156 @@ " spatial_accuracy_pct\n", " rt_triptime_w_gtfs_pct\n", " rt_v_scheduled_trip_time_pct\n", - " schedule_gtfs_dataset_key\n", - " direction_id\n", - " route_id\n", - " common_shape_id\n", - " shape_array_key\n", - " route_name_used\n", - " service_hours\n", - " trip_first_departure_datetime_pacific\n", - " time_of_day\n", " \n", " \n", " \n", " \n", - " 0\n", - " 5d25a4366c173007d9c29fdead0299d7\n", - " 74.03\n", - " 73\n", - " 216\n", - " 74\n", - " 216.00\n", - " 148.00\n", - " 21.01\n", - " 58.00\n", - " 2.92\n", - " 68.52\n", - " 99.95\n", - " 27.64\n", - " 63029a23cb0e73f2a5d98a345c5e2e40\n", - " 1\n", - " 3428\n", - " 8371\n", - " 0d0ca5bc40fb6266a03f400c3aa7e6cb\n", - " \n", - " 0.97\n", - " 2023-12-13 05:34:00\n", - " Early AM\n", - " \n", - " \n", - " 1\n", - " 4b72b80fc9cfe5e613bab95585cbe7e4\n", - " 23.45\n", - " 21\n", - " 59\n", - " 23\n", - " 59.00\n", - " 19.00\n", - " 54.95\n", - " 58.00\n", - " 2.52\n", - " 32.20\n", - " 98.08\n", - " -59.57\n", - " 63029a23cb0e73f2a5d98a345c5e2e40\n", - " 1\n", - " 3428\n", - " 8371\n", - " 0d0ca5bc40fb6266a03f400c3aa7e6cb\n", - " \n", - " 0.97\n", - " 2023-12-13 06:34:00\n", - " Early AM\n", - " \n", - " \n", - " 2\n", - " 2f061fce31ec5f20f55a81177996db89\n", - " 104.37\n", - " 102\n", - " 309\n", - " 104\n", - " 309.00\n", - " 199.00\n", - " 8.94\n", - " 58.00\n", + " 29113\n", + " 220b6e20e6957e20bceb14947d71d367\n", + " 68.02\n", + " 67\n", + " 201\n", + " 68\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", " 2.96\n", - " 64.40\n", - " 99.65\n", - " 79.94\n", - " 63029a23cb0e73f2a5d98a345c5e2e40\n", - " 0\n", - " 3428\n", - " 13407\n", - " f05fbd11b7b08a3a09b24593b7a83497\n", - " \n", - " 0.97\n", - " 2023-12-13 15:37:00\n", - " PM Peak\n", - " \n", - " \n", - " 3\n", - " 3732269c8694ba9a0bd4c44aed97abe0\n", - " 141.95\n", - " 140\n", - " 422\n", - " 142\n", - " 422.00\n", - " 188.00\n", - " 9.38\n", - " 58.00\n", - " 2.97\n", - " 44.55\n", - " 100.00\n", - " 144.74\n", - " 63029a23cb0e73f2a5d98a345c5e2e40\n", - " 0\n", - " 3428\n", - " 13407\n", - " f05fbd11b7b08a3a09b24593b7a83497\n", - " \n", - " 0.97\n", - " 2023-12-13 16:37:00\n", - " PM Peak\n", - " \n", - " \n", - " 4\n", - " 101556fdfe31b5849787220373f21ed8\n", - " 72.78\n", - " 72\n", - " 210\n", - " 73\n", - " 210.00\n", - " 109.00\n", - " 31.65\n", - " 55.00\n", - " 2.89\n", - " 51.90\n", - " 100.00\n", - " 32.33\n", - " 63029a23cb0e73f2a5d98a345c5e2e40\n", - " 1\n", - " 3429\n", - " 8373\n", - " 0f9dd50e2356a5299046fb0ed1a00a89\n", - " \n", - " 0.92\n", - " 2023-12-13 05:55:00\n", - " Early AM\n", + " NaN\n", + " 99.98\n", + " NaN\n", " \n", " \n", "\n", "" ], "text/plain": [ - " trip_instance_key rt_service_min \\\n", - "0 5d25a4366c173007d9c29fdead0299d7 74.03 \n", - "1 4b72b80fc9cfe5e613bab95585cbe7e4 23.45 \n", - "2 2f061fce31ec5f20f55a81177996db89 104.37 \n", - "3 3732269c8694ba9a0bd4c44aed97abe0 141.95 \n", - "4 101556fdfe31b5849787220373f21ed8 72.78 \n", + " trip_instance_key rt_service_min \\\n", + "29113 220b6e20e6957e20bceb14947d71d367 68.02 \n", "\n", - " min_w_atleast2_trip_updates total_pings_for_trip total_min_w_gtfs \\\n", - "0 73 216 74 \n", - "1 21 59 23 \n", - "2 102 309 104 \n", - "3 140 422 142 \n", - "4 72 210 73 \n", + " min_w_atleast2_trip_updates total_pings_for_trip total_min_w_gtfs \\\n", + "29113 67 201 68 \n", "\n", - " total_vp vp_in_shape speed_mph service_minutes pings_per_min \\\n", - "0 216.00 148.00 21.01 58.00 2.92 \n", - "1 59.00 19.00 54.95 58.00 2.52 \n", - "2 309.00 199.00 8.94 58.00 2.96 \n", - "3 422.00 188.00 9.38 58.00 2.97 \n", - "4 210.00 109.00 31.65 55.00 2.89 \n", + " total_vp vp_in_shape speed_mph service_minutes pings_per_min \\\n", + "29113 NaN NaN NaN NaN 2.96 \n", "\n", - " spatial_accuracy_pct rt_triptime_w_gtfs_pct rt_v_scheduled_trip_time_pct \\\n", - "0 68.52 99.95 27.64 \n", - "1 32.20 98.08 -59.57 \n", - "2 64.40 99.65 79.94 \n", - "3 44.55 100.00 144.74 \n", - "4 51.90 100.00 32.33 \n", + " spatial_accuracy_pct rt_triptime_w_gtfs_pct \\\n", + "29113 NaN 99.98 \n", "\n", - " schedule_gtfs_dataset_key direction_id route_id common_shape_id \\\n", - "0 63029a23cb0e73f2a5d98a345c5e2e40 1 3428 8371 \n", - "1 63029a23cb0e73f2a5d98a345c5e2e40 1 3428 8371 \n", - "2 63029a23cb0e73f2a5d98a345c5e2e40 0 3428 13407 \n", - "3 63029a23cb0e73f2a5d98a345c5e2e40 0 3428 13407 \n", - "4 63029a23cb0e73f2a5d98a345c5e2e40 1 3429 8373 \n", - "\n", - " shape_array_key route_name_used service_hours \\\n", - "0 0d0ca5bc40fb6266a03f400c3aa7e6cb 0.97 \n", - "1 0d0ca5bc40fb6266a03f400c3aa7e6cb 0.97 \n", - "2 f05fbd11b7b08a3a09b24593b7a83497 0.97 \n", - "3 f05fbd11b7b08a3a09b24593b7a83497 0.97 \n", - "4 0f9dd50e2356a5299046fb0ed1a00a89 0.92 \n", - "\n", - " trip_first_departure_datetime_pacific time_of_day \n", - "0 2023-12-13 05:34:00 Early AM \n", - "1 2023-12-13 06:34:00 Early AM \n", - "2 2023-12-13 15:37:00 PM Peak \n", - "3 2023-12-13 16:37:00 PM Peak \n", - "4 2023-12-13 05:55:00 Early AM " - ] - }, - "execution_count": 49, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 50, - "id": "94f4812c-8166-4f8c-a3c9-aa4d6f34df9c", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "((86128, 22),)" + " rt_v_scheduled_trip_time_pct \n", + "29113 NaN " ] }, - "execution_count": 50, + "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "df.shape," + "dec_df.sample()" ] }, { "cell_type": "markdown", - "id": "d2ff8a3d-d3f5-42b4-b096-24b33b9842ca", - "metadata": {}, - "source": [ - "#### Aggregate avg speed by route\n", - "* DO I need to use the other functions in the script\n", - "* Do we still drop rows that are above 70 mph?\n", - "* https://github.com/cal-itp/data-analyses/blob/main/rt_segment_speeds/scripts/export.py#L150\n" - ] - }, - { - "cell_type": "code", - "execution_count": 51, - "id": "6f9d6077-0551-42e0-830c-a262364770f0", - "metadata": {}, - "outputs": [], - "source": [ - "df2 = df.loc[df.speed_mph <= 70].reset_index(drop=True)" - ] - }, - { - "cell_type": "code", - "execution_count": 52, - "id": "e7ce443a-b466-4d07-a613-cfc05a1b764d", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(76838, 86128)" - ] - }, - "execution_count": 52, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "len(df2), len(df)" - ] - }, - { - "cell_type": "code", - "execution_count": 53, - "id": "b3695f80-f817-4e3f-b882-5f37e0c277ae", + "id": "a3acfeb1-54bf-4cee-9810-51c7e5fe0aa6", "metadata": {}, - "outputs": [], "source": [ - "route_cols = [\n", - " \"schedule_gtfs_dataset_key\",\n", - " \"time_of_day\",\n", - " \"route_id\",\n", - " \"direction_id\",\n", - " \"route_name_used\",\n", - " \"shape_array_key\",\n", - "]" + "### Check results after aggregating up to route\n", + "* How are the results sooo wrong with `265-13172`" ] }, { "cell_type": "code", - "execution_count": 54, - "id": "e964a1f0-b66c-4d7f-92d0-c72e7c7bf39c", + "execution_count": 46, + "id": "aa2cc0c2-da7b-4ae1-9ac4-84c8d6d60e95", "metadata": {}, "outputs": [], "source": [ - "df3 = (\n", - " df2.groupby(route_cols)\n", - " .agg(\n", - " {\n", - " \"service_minutes\": \"mean\",\n", - " \"rt_service_min\": \"mean\",\n", - " \"speed_mph\": \"mean\",\n", - " \"pings_per_min\": \"mean\",\n", - " \"total_vp\": \"mean\",\n", - " \"vp_in_shape\": \"mean\",\n", - " \"trip_instance_key\": \"count\",\n", - " }\n", + "def checkout_route(\n", + " og_df: pd.DataFrame,\n", + " route_agg: gpd.GeoDataFrame,\n", + " route_id: str,\n", + " time_of_day: str,\n", + " direction_id: int,\n", + "):\n", + " print(\"final\")\n", + " display(\n", + " route_agg.loc[\n", + " (route_agg.route_id == route_id)\n", + " & (route_agg.time_of_day == time_of_day)\n", + " & (route_agg.direction_id == direction_id)\n", + " ].drop(columns=[\"geometry\", \"base64_url\"])\n", " )\n", - " .reset_index()\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 55, - "id": "f086985e-076e-4b22-825e-bf230cecf2f9", - "metadata": {}, - "outputs": [], - "source": [ - "df3 = df3.assign(\n", - " rt_service_min=df3.rt_service_min.round(1),\n", - " service_minutes=df3.service_minutes.round(1),\n", - " speed_mph=df3.speed_mph.round(1),\n", - " pings_per_min=df3.pings_per_min.round(1),\n", - ").rename(\n", - " columns={\n", - " \"service_minutes\": \"avg_sched_trip_min\",\n", - " \"rt_service_min\": \"avg_rt_trip_min\",\n", - " \"trip_instance_key\": \"n_trips\",\n", - " \"route_name_used\": \"route_name\",\n", - " \"pings_per_min\": \"avg_pings_per_min\",\n", - " \"schedule_gtfs_dataset_key\": \"gtfs_dataset_key\",\n", - " }\n", - ")" + " cols = [\n", + " \"trip_instance_key\",\n", + " \"time_of_day\",\n", + " \"speed_mph\",\n", + " \"rt_service_min\",\n", + " \"service_minutes\",\n", + " \"pings_per_min\",\n", + " ]\n", + "\n", + " print(\"original\")\n", + " og_df2 = og_df.loc[\n", + " (og_df.route_id == route_id)\n", + " & (og_df.time_of_day == time_of_day)\n", + " & (og_df.direction_id == direction_id)\n", + " ]\n", + "\n", + " print(f\"pings per min {og_df2.pings_per_min.mean()}\")\n", + " print(f\"speed_mph {og_df2.speed_mph.mean()}\")\n", + " print(f\"total_vp {og_df2.total_vp.mean()}\")\n", + " print(f\"vp_in_shape {og_df2.vp_in_shape.mean()}\")\n", + " display(og_df2[cols])" ] }, { "cell_type": "code", - "execution_count": 56, - "id": "8236bf29-abaa-466c-978c-10199ffa840c", + "execution_count": 47, + "id": "10b8676a-783b-4df9-84cb-013785f0c1ca", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'df3 = df3.rename(\\n columns={\\n \"service_minutes\": \"avg_sched_trip_min\",\\n \"rt_service_min\": \"avg_rt_trip_min\",\\n \"trip_instance_key\": \"n_trips\",\\n \"route_name_used\": \"route_name\",\\n \"pings_per_min\": \"avg_pings_per_min\",\\n \"schedule_gtfs_dataset_key\": \"gtfs_dataset_key\",\\n }\\n)'" + "Index(['trip_instance_key', 'rt_service_min', 'min_w_atleast2_trip_updates',\n", + " 'total_pings_for_trip', 'total_min_w_gtfs', 'total_vp', 'vp_in_shape',\n", + " 'speed_mph', 'service_minutes', 'pings_per_min', 'spatial_accuracy_pct',\n", + " 'rt_triptime_w_gtfs_pct', 'rt_v_scheduled_trip_time_pct',\n", + " 'schedule_gtfs_dataset_key', 'direction_id', 'route_id',\n", + " 'shape_array_key', 'route_name_used', 'service_hours',\n", + " 'trip_first_departure_datetime_pacific', 'time_of_day'],\n", + " dtype='object')" ] }, - "execution_count": 56, + "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "\"\"\"df3 = df3.rename(\n", - " columns={\n", - " \"service_minutes\": \"avg_sched_trip_min\",\n", - " \"rt_service_min\": \"avg_rt_trip_min\",\n", - " \"trip_instance_key\": \"n_trips\",\n", - " \"route_name_used\": \"route_name\",\n", - " \"pings_per_min\": \"avg_pings_per_min\",\n", - " \"schedule_gtfs_dataset_key\": \"gtfs_dataset_key\",\n", - " }\n", - ")\"\"\"" - ] - }, - { - "cell_type": "code", - "execution_count": 57, - "id": "2a6ff425-643d-426b-803b-b96399f986d3", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
gtfs_dataset_keytime_of_dayroute_iddirection_idroute_nameshape_array_keyavg_sched_trip_minavg_rt_trip_minspeed_mphavg_pings_per_mintotal_vpvp_in_shapen_trips
0015d67d5b75b5cf2b710bbadadfb75f5AM Peak170Downtown San Rafael - Sausalito15dd7643b1198055544091c267b6739755.6057.5015.202.80151.00142.805
1015d67d5b75b5cf2b710bbadadfb75f5AM Peak171Downtown San Rafael - Sausalitode1df9489fe7de15f492c9308289102b56.2055.009.802.50147.60140.405
\n", - "
" - ], - "text/plain": [ - " gtfs_dataset_key time_of_day route_id direction_id \\\n", - "0 015d67d5b75b5cf2b710bbadadfb75f5 AM Peak 17 0 \n", - "1 015d67d5b75b5cf2b710bbadadfb75f5 AM Peak 17 1 \n", - "\n", - " route_name shape_array_key \\\n", - "0 Downtown San Rafael - Sausalito 15dd7643b1198055544091c267b67397 \n", - "1 Downtown San Rafael - Sausalito de1df9489fe7de15f492c9308289102b \n", - "\n", - " avg_sched_trip_min avg_rt_trip_min speed_mph avg_pings_per_min \\\n", - "0 55.60 57.50 15.20 2.80 \n", - "1 56.20 55.00 9.80 2.50 \n", - "\n", - " total_vp vp_in_shape n_trips \n", - "0 151.00 142.80 5 \n", - "1 147.60 140.40 5 " - ] - }, - "execution_count": 57, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df3.head(2)" - ] - }, - { - "cell_type": "code", - "execution_count": 58, - "id": "0660f4e2-0c92-4656-a967-95ac7a26b440", - "metadata": {}, - "outputs": [], - "source": [ - "org_crosswalk = schedule_rt_utils.sample_gtfs_dataset_key_to_organization_crosswalk(\n", - " df3,\n", - " analysis_date,\n", - " quartet_data=\"schedule\",\n", - " dim_gtfs_dataset_cols=[\"key\", \"base64_url\"],\n", - " dim_organization_cols=[\"source_record_id\", \"name\", \"caltrans_district\"],\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 59, - "id": "b0e1f34e-94b6-417e-ba35-0aebeccd3cee", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
schedule_gtfs_dataset_keybase64_urlorganization_source_record_idorganization_namecaltrans_district
0015d67d5b75b5cf2b710bbadadfb75f5aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZWVkcz9vcGVyYXRvcl9pZD1NQQ==recNOb7pqBRlQVG5eMarin County Transit District04 - Oakland
10881af3822466784992a49f1cc57d38faHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZWVkcz9vcGVyYXRvcl9pZD1TQQ==recUmm4gcNXaqrwpnSonoma-Marin Area Rail Transit District04 - Oakland
209a703757d1ed14ca9580b1385e39315aHR0cHM6Ly9yaWRlbGF3bmRhbGViZWF0LmNvbS9ndGZzrecj8LXdeSurpSRNUCity of Lawndale07 - Los Angeles
309e16227fc42c4fe90204a9d11581034aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZWVkcz9vcGVyYXRvcl9pZD1TTw==recDupUxInMUgxeizSonoma County04 - Oakland
40a3c0b21c85fb09f8db91599e14dd7f7aHR0cHM6Ly9kYXRhLnRyaWxsaXVtdHJhbnNpdC5jb20vZ3Rmcy9sYWtldHJhbnNpdC1jYS11cy9sYWtldHJhbnNpdC1jYS11cy56aXA=recPwXKbGLL4aIqXVLake Transit Authority01 - Eureka
\n", - "
" - ], - "text/plain": [ - " schedule_gtfs_dataset_key \\\n", - "0 015d67d5b75b5cf2b710bbadadfb75f5 \n", - "1 0881af3822466784992a49f1cc57d38f \n", - "2 09a703757d1ed14ca9580b1385e39315 \n", - "3 09e16227fc42c4fe90204a9d11581034 \n", - "4 0a3c0b21c85fb09f8db91599e14dd7f7 \n", - "\n", - " base64_url \\\n", - "0 aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZWVkcz9vcGVyYXRvcl9pZD1NQQ== \n", - "1 aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZWVkcz9vcGVyYXRvcl9pZD1TQQ== \n", - "2 aHR0cHM6Ly9yaWRlbGF3bmRhbGViZWF0LmNvbS9ndGZz \n", - "3 aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZWVkcz9vcGVyYXRvcl9pZD1TTw== \n", - "4 aHR0cHM6Ly9kYXRhLnRyaWxsaXVtdHJhbnNpdC5jb20vZ3Rmcy9sYWtldHJhbnNpdC1jYS11cy9sYWtldHJhbnNpdC1jYS11cy56aXA= \n", - "\n", - " organization_source_record_id organization_name \\\n", - "0 recNOb7pqBRlQVG5e Marin County Transit District \n", - "1 recUmm4gcNXaqrwpn Sonoma-Marin Area Rail Transit District \n", - "2 recj8LXdeSurpSRNU City of Lawndale \n", - "3 recDupUxInMUgxeiz Sonoma County \n", - "4 recPwXKbGLL4aIqXV Lake Transit Authority \n", - "\n", - " caltrans_district \n", - "0 04 - Oakland \n", - "1 04 - Oakland \n", - "2 07 - Los Angeles \n", - "3 04 - Oakland \n", - "4 01 - Eureka " - ] - }, - "execution_count": 59, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "org_crosswalk.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 60, - "id": "71c0d174-736c-4093-a57f-1b39608fb2c2", - "metadata": {}, - "outputs": [], - "source": [ - "df_with_org = pd.merge(\n", - " df3,\n", - " org_crosswalk.rename(columns={\"schedule_gtfs_dataset_key\": \"gtfs_dataset_key\"}),\n", - " on=\"gtfs_dataset_key\",\n", - " how=\"inner\",\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 61, - "id": "275988a7-0b45-4799-b8d5-7826913745f3", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(11397, 17)" - ] - }, - "execution_count": 61, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_with_org.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 62, - "id": "3caa9bbc-51d2-4bc4-9d74-040f38ce67d5", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
gtfs_dataset_keytime_of_dayroute_iddirection_idroute_nameshape_array_keyavg_sched_trip_minavg_rt_trip_minspeed_mphavg_pings_per_mintotal_vpvp_in_shapen_tripsbase64_urlorganization_source_record_idorganization_namecaltrans_district
0015d67d5b75b5cf2b710bbadadfb75f5AM Peak170Downtown San Rafael - Sausalito15dd7643b1198055544091c267b6739755.6057.5015.202.80151.00142.805aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZWVkcz9vcGVyYXRvcl9pZD1NQQ==recNOb7pqBRlQVG5eMarin County Transit District04 - Oakland
1015d67d5b75b5cf2b710bbadadfb75f5AM Peak171Downtown San Rafael - Sausalitode1df9489fe7de15f492c9308289102b56.2055.009.802.50147.60140.405aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZWVkcz9vcGVyYXRvcl9pZD1NQQ==recNOb7pqBRlQVG5eMarin County Transit District04 - Oakland
2015d67d5b75b5cf2b710bbadadfb75f5AM Peak2190Tiburon - Strawberry1a27f5f0785ae953f5dfded42e6d4e0e18.0017.3024.802.9050.5044.002aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZWVkcz9vcGVyYXRvcl9pZD1NQQ==recNOb7pqBRlQVG5eMarin County Transit District04 - Oakland
3015d67d5b75b5cf2b710bbadadfb75f5AM Peak2191Tiburon - Strawberrya8c9fae8e07d7a553264d4de2ffb704d19.3027.609.802.9080.6779.003aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZWVkcz9vcGVyYXRvcl9pZD1NQQ==recNOb7pqBRlQVG5eMarin County Transit District04 - Oakland
4015d67d5b75b5cf2b710bbadadfb75f5AM Peak220Downtown San Rafael - Marin City5c1924a3c980f9ec07d63f216d3de7af37.8042.1010.202.90105.0072.005aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZWVkcz9vcGVyYXRvcl9pZD1NQQ==recNOb7pqBRlQVG5eMarin County Transit District04 - Oakland
\n", - "
" - ], - "text/plain": [ - " gtfs_dataset_key time_of_day route_id direction_id \\\n", - "0 015d67d5b75b5cf2b710bbadadfb75f5 AM Peak 17 0 \n", - "1 015d67d5b75b5cf2b710bbadadfb75f5 AM Peak 17 1 \n", - "2 015d67d5b75b5cf2b710bbadadfb75f5 AM Peak 219 0 \n", - "3 015d67d5b75b5cf2b710bbadadfb75f5 AM Peak 219 1 \n", - "4 015d67d5b75b5cf2b710bbadadfb75f5 AM Peak 22 0 \n", - "\n", - " route_name shape_array_key \\\n", - "0 Downtown San Rafael - Sausalito 15dd7643b1198055544091c267b67397 \n", - "1 Downtown San Rafael - Sausalito de1df9489fe7de15f492c9308289102b \n", - "2 Tiburon - Strawberry 1a27f5f0785ae953f5dfded42e6d4e0e \n", - "3 Tiburon - Strawberry a8c9fae8e07d7a553264d4de2ffb704d \n", - "4 Downtown San Rafael - Marin City 5c1924a3c980f9ec07d63f216d3de7af \n", - "\n", - " avg_sched_trip_min avg_rt_trip_min speed_mph avg_pings_per_min \\\n", - "0 55.60 57.50 15.20 2.80 \n", - "1 56.20 55.00 9.80 2.50 \n", - "2 18.00 17.30 24.80 2.90 \n", - "3 19.30 27.60 9.80 2.90 \n", - "4 37.80 42.10 10.20 2.90 \n", - "\n", - " total_vp vp_in_shape n_trips \\\n", - "0 151.00 142.80 5 \n", - "1 147.60 140.40 5 \n", - "2 50.50 44.00 2 \n", - "3 80.67 79.00 3 \n", - "4 105.00 72.00 5 \n", - "\n", - " base64_url \\\n", - "0 aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZWVkcz9vcGVyYXRvcl9pZD1NQQ== \n", - "1 aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZWVkcz9vcGVyYXRvcl9pZD1NQQ== \n", - "2 aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZWVkcz9vcGVyYXRvcl9pZD1NQQ== \n", - "3 aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZWVkcz9vcGVyYXRvcl9pZD1NQQ== \n", - "4 aHR0cHM6Ly9hcGkuNTExLm9yZy90cmFuc2l0L2RhdGFmZWVkcz9vcGVyYXRvcl9pZD1NQQ== \n", - "\n", - " organization_source_record_id organization_name \\\n", - "0 recNOb7pqBRlQVG5e Marin County Transit District \n", - "1 recNOb7pqBRlQVG5e Marin County Transit District \n", - "2 recNOb7pqBRlQVG5e Marin County Transit District \n", - "3 recNOb7pqBRlQVG5e Marin County Transit District \n", - "4 recNOb7pqBRlQVG5e Marin County Transit District \n", - "\n", - " caltrans_district \n", - "0 04 - Oakland \n", - "1 04 - Oakland \n", - "2 04 - Oakland \n", - "3 04 - Oakland \n", - "4 04 - Oakland " - ] - }, - "execution_count": 62, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df_with_org.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 63, - "id": "de3449eb-88c2-4d46-ae3a-94f881e8cade", - "metadata": {}, - "outputs": [], - "source": [ - "shapes = helpers.import_scheduled_shapes(\n", - " analysis_date,\n", - " columns=[\"shape_array_key\", \"geometry\"],\n", - " get_pandas=True,\n", - " crs=geography_utils.WGS84,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 64, - "id": "091ed4c9-6742-4a06-abdf-7b59abe7a948", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(7286, 2)" - ] - }, - "execution_count": 64, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "shapes.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 65, - "id": "dc0f152d-19ac-4687-83e8-8d5fd225005b", - "metadata": {}, - "outputs": [], - "source": [ - "df_with_shape = pd.merge(\n", - " shapes,\n", - " df_with_org,\n", - " on=\"shape_array_key\", # once merged, can drop shape_array_key\n", - " how=\"inner\",\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 66, - "id": "1d8bd521-7b7e-438e-9cdd-b23606acd644", - "metadata": {}, - "outputs": [], - "source": [ - "df_with_shape[\"avg_pct_vp_shape\"] = (\n", - " df_with_shape.vp_in_shape / df_with_shape.total_vp * 100\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 67, - "id": "158662df-1735-4a1a-a4c5-7b3d00469311", - "metadata": {}, - "outputs": [], - "source": [ - "df_with_shape[\"avg_pct_rt_v_sched\"] = (\n", - " df_with_shape.avg_rt_trip_min / df_with_shape.avg_sched_trip_min - 1\n", - ") * 100" - ] - }, - { - "cell_type": "code", - "execution_count": 68, - "id": "7020e160-7e18-4099-8a97-9114eef06652", - "metadata": {}, - "outputs": [], - "source": [ - "final_df = df_with_shape.drop(columns=[\"total_vp\", \"vp_in_shape\"])" - ] - }, - { - "cell_type": "code", - "execution_count": 69, - "id": "8079fc30-e2f2-459e-8446-120e97369dc5", - "metadata": {}, - "outputs": [], - "source": [ - "agency_cols = [\"organization_source_record_id\", \"organization_name\"]\n", - "route_cols = [\n", - " \"route_id\",\n", - " \"route_name\",\n", - " \"direction_id\",\n", - "]\n", - "\n", - "col_order = (\n", - " agency_cols\n", - " + route_cols\n", - " + [\n", - " \"time_of_day\",\n", - " \"speed_mph\",\n", - " \"n_trips\",\n", - " \"avg_sched_trip_min\",\n", - " \"avg_rt_trip_min\",\n", - " \"base64_url\",\n", - " \"caltrans_district\",\n", - " \"geometry\",\n", - " \"avg_pings_per_min\",\n", - " \"avg_pct_vp_shape\",\n", - " \"avg_pct_rt_v_sched\",\n", - " ]\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 70, - "id": "11b580bd-f482-4a11-be00-7ef88068bac9", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(11397, 18)" - ] - }, - "execution_count": 70, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "final_df.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 71, - "id": "9400eb5b-0ff4-4b6a-ade3-8e0fedc29d5b", - "metadata": {}, - "outputs": [], - "source": [ - "final_df = df_with_shape.reindex(columns=col_order).rename(\n", - " columns={\n", - " \"organization_source_record_id\": \"org_id\",\n", - " \"organization_name\": \"agency\",\n", - " \"caltrans_district\": \"district_name\",\n", - " }\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 72, - "id": "fe97ad8a-d0ce-40cd-982b-87877882693a", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Index(['org_id', 'agency', 'route_id', 'route_name', 'direction_id',\n", - " 'time_of_day', 'speed_mph', 'n_trips', 'avg_sched_trip_min',\n", - " 'avg_rt_trip_min', 'base64_url', 'district_name', 'geometry',\n", - " 'avg_pings_per_min', 'avg_pct_vp_shape', 'avg_pct_rt_v_sched'],\n", - " dtype='object')" - ] - }, - "execution_count": 72, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "final_df.columns" - ] - }, - { - "cell_type": "code", - "execution_count": 73, - "id": "58165517-e414-4843-8ece-b7631d4d7f27", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "count 11397.00\n", - "mean 2.38\n", - "std 0.56\n", - "min 0.10\n", - "25% 1.90\n", - "50% 2.50\n", - "75% 2.90\n", - "max 3.50\n", - "Name: avg_pings_per_min, dtype: float64" - ] - }, - "execution_count": 73, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "final_df.avg_pings_per_min.describe()" - ] - }, - { - "cell_type": "code", - "execution_count": 74, - "id": "e99b22b7-f6c7-4d69-8b65-da3c39c85f33", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(11397, 16)" - ] - }, - "execution_count": 74, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "final_df.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 75, - "id": "49867873-4a76-49c4-8c95-0918d7468f82", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
org_idagencyroute_idroute_namedirection_idtime_of_dayspeed_mphn_tripsavg_sched_trip_minavg_rt_trip_mindistrict_nameavg_pings_per_minavg_pct_vp_shapeavg_pct_rt_v_sched
5645rec43oyrfhtPDdRHjCity of Rancho Cordova084WATT1PM Peak6.40865.2087.6003 - Marysville1.80100.0034.36
9678receZJ9sEnP9vy3g0Monterey-Salinas Transit020-131Monterey - Salinas0AM Peak10.90650.2056.5005 - San Luis Obispo1.9099.6912.55
7041recPnGkwdpnr8jmHBLos Angeles County Metropolitan Transportation Authority754-13172HOLLYWOOD - ATHENS VIA VERMONT AV0Midday7.303169.8091.0007 - Los Angeles2.4096.6930.37
\n", - "
" - ], - "text/plain": [ - " org_id \\\n", - "5645 rec43oyrfhtPDdRHj \n", - "9678 receZJ9sEnP9vy3g0 \n", - "7041 recPnGkwdpnr8jmHB \n", - "\n", - " agency route_id \\\n", - "5645 City of Rancho Cordova 084 \n", - "9678 Monterey-Salinas Transit 020-131 \n", - "7041 Los Angeles County Metropolitan Transportation Authority 754-13172 \n", - "\n", - " route_name direction_id time_of_day speed_mph \\\n", - "5645 WATT 1 PM Peak 6.40 \n", - "9678 Monterey - Salinas 0 AM Peak 10.90 \n", - "7041 HOLLYWOOD - ATHENS VIA VERMONT AV 0 Midday 7.30 \n", - "\n", - " n_trips avg_sched_trip_min avg_rt_trip_min district_name \\\n", - "5645 8 65.20 87.60 03 - Marysville \n", - "9678 6 50.20 56.50 05 - San Luis Obispo \n", - "7041 31 69.80 91.00 07 - Los Angeles \n", - "\n", - " avg_pings_per_min avg_pct_vp_shape avg_pct_rt_v_sched \n", - "5645 1.80 100.00 34.36 \n", - "9678 1.90 99.69 12.55 \n", - "7041 2.40 96.69 30.37 " - ] - }, - "execution_count": 75, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "final_df.drop(columns=[\"geometry\", \"base64_url\"]).sample(3)" - ] - }, - { - "cell_type": "markdown", - "id": "a3acfeb1-54bf-4cee-9810-51c7e5fe0aa6", - "metadata": {}, - "source": [ - "### Check results after aggregating up to route\n", - "* How are the results sooo wrong with `265-13172`" - ] - }, - { - "cell_type": "code", - "execution_count": 76, - "id": "aa2cc0c2-da7b-4ae1-9ac4-84c8d6d60e95", - "metadata": {}, - "outputs": [], - "source": [ - "def checkout_route(route_id: str, time_of_day: str, direction_id: int):\n", - " print(\"final\")\n", - " display(\n", - " final_df.loc[\n", - " (final_df.route_id == route_id)\n", - " & (final_df.time_of_day == time_of_day)\n", - " & (final_df.direction_id == direction_id)\n", - " ].drop(columns = ['geometry','base64_url'])\n", - " )\n", - " df2_cols = [\n", - " \"trip_instance_key\",\n", - " \"time_of_day\",\n", - " \"speed_mph\",\n", - " \"rt_service_min\",\n", - " \"service_minutes\",\n", - " \"pings_per_min\",\n", - " ]\n", - " print(\"df3\")\n", - " display(df3.loc[\n", - " (df3.route_id == route_id)\n", - " & (df3.time_of_day == time_of_day)\n", - " & (df3.direction_id == direction_id)\n", - " ])\n", - " \n", - " print(\"df2\")\n", - " df2_filtered = df2.loc[\n", - " (df2.route_id == route_id)\n", - " & (df2.time_of_day == time_of_day)\n", - " & (df2.direction_id == direction_id)\n", - " ]\n", - " \n", - " print(f\"pings per min {df2_filtered.pings_per_min.mean()}\")\n", - " print(f\"speed_mph {df2_filtered.speed_mph.mean()}\")\n", - " print(f\"total_vp {df2_filtered.total_vp.mean()}\")\n", - " print(f\"vp_in_shape {df2_filtered.vp_in_shape.mean()}\")\n", - " display(df2_filtered[df2_cols])\n", - " #print(\"original\")\n", - " #trip_instance_keys_keep = list(df2_filtered.trip_instance_key.unique())\n", - " #display(dec_df.loc[dec_df.trip_instance_key.isin(trip_instance_keys_keep)])\n", - " " + "dec_intermediary.columns" ] }, { "cell_type": "markdown", "id": "c8aae146-3b5f-4189-ad0f-b011221b5442", - "metadata": {}, - "source": [ - "#### scheduled trip min (renamed from service_mins) is completely lower." - ] - }, - { - "cell_type": "code", - "execution_count": 77, - "id": "04ee1397-318c-4bb7-9f80-2a55b9c75055", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "final\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
org_idagencyroute_idroute_namedirection_idtime_of_dayspeed_mphn_tripsavg_sched_trip_minavg_rt_trip_mindistrict_nameavg_pings_per_minavg_pct_vp_shapeavg_pct_rt_v_sched
6533recPnGkwdpnr8jmHBLos Angeles County Metropolitan Transportation Authority265-13172PICO RIVERA - LAKEWOOD CTR MALL VIA PARAMOUNT BL0Early AM6.90259.0099.6007 - Los Angeles2.7070.0968.81
\n", - "
" - ], - "text/plain": [ - " org_id \\\n", - "6533 recPnGkwdpnr8jmHB \n", - "\n", - " agency route_id \\\n", - "6533 Los Angeles County Metropolitan Transportation Authority 265-13172 \n", - "\n", - " route_name direction_id \\\n", - "6533 PICO RIVERA - LAKEWOOD CTR MALL VIA PARAMOUNT BL 0 \n", - "\n", - " time_of_day speed_mph n_trips avg_sched_trip_min avg_rt_trip_min \\\n", - "6533 Early AM 6.90 2 59.00 99.60 \n", - "\n", - " district_name avg_pings_per_min avg_pct_vp_shape \\\n", - "6533 07 - Los Angeles 2.70 70.09 \n", - "\n", - " avg_pct_rt_v_sched \n", - "6533 68.81 " - ] - }, - "metadata": {}, - "output_type": "display_data" - }, + "metadata": {}, + "source": [ + "#### scheduled trip min (renamed from service_mins) is completely lower." + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "id": "04ee1397-318c-4bb7-9f80-2a55b9c75055", + "metadata": {}, + "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "df3\n" + "final\n" ] }, { @@ -3215,54 +1933,62 @@ " \n", " \n", " \n", - " gtfs_dataset_key\n", - " time_of_day\n", + " org_id\n", + " agency\n", " route_id\n", - " direction_id\n", " route_name\n", - " shape_array_key\n", + " direction_id\n", + " time_of_day\n", + " speed_mph\n", + " n_trips\n", " avg_sched_trip_min\n", " avg_rt_trip_min\n", - " speed_mph\n", + " district_name\n", " avg_pings_per_min\n", - " total_vp\n", - " vp_in_shape\n", - " n_trips\n", + " avg_pct_vp_shape\n", + " avg_pct_rt_v_sched\n", " \n", " \n", " \n", " \n", - " 1733\n", - " 3f3f36b4c41cc6b5df3eb7f5d8ea6e3c\n", - " Early AM\n", + " 6533\n", + " recPnGkwdpnr8jmHB\n", + " Los Angeles County Metropolitan Transportation Authority\n", " 265-13172\n", - " 0\n", " PICO RIVERA - LAKEWOOD CTR MALL VIA PARAMOUNT BL\n", - " 5c3d4b372420edebe33416aa0c6abe70\n", + " 0\n", + " Early AM\n", + " 6.90\n", + " 2\n", " 59.00\n", " 99.60\n", - " 6.90\n", + " 07 - Los Angeles\n", " 2.70\n", - " 267.50\n", - " 187.50\n", - " 2\n", + " 70.09\n", + " 68.81\n", " \n", " \n", "\n", "" ], "text/plain": [ - " gtfs_dataset_key time_of_day route_id direction_id \\\n", - "1733 3f3f36b4c41cc6b5df3eb7f5d8ea6e3c Early AM 265-13172 0 \n", + " org_id \\\n", + "6533 recPnGkwdpnr8jmHB \n", + "\n", + " agency route_id \\\n", + "6533 Los Angeles County Metropolitan Transportation Authority 265-13172 \n", + "\n", + " route_name direction_id \\\n", + "6533 PICO RIVERA - LAKEWOOD CTR MALL VIA PARAMOUNT BL 0 \n", "\n", - " route_name \\\n", - "1733 PICO RIVERA - LAKEWOOD CTR MALL VIA PARAMOUNT BL \n", + " time_of_day speed_mph n_trips avg_sched_trip_min avg_rt_trip_min \\\n", + "6533 Early AM 6.90 2 59.00 99.60 \n", "\n", - " shape_array_key avg_sched_trip_min avg_rt_trip_min \\\n", - "1733 5c3d4b372420edebe33416aa0c6abe70 59.00 99.60 \n", + " district_name avg_pings_per_min avg_pct_vp_shape \\\n", + "6533 07 - Los Angeles 2.70 70.09 \n", "\n", - " speed_mph avg_pings_per_min total_vp vp_in_shape n_trips \n", - "1733 6.90 2.70 267.50 187.50 2 " + " avg_pct_rt_v_sched \n", + "6533 68.81 " ] }, "metadata": {}, @@ -3272,7 +1998,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "df2\n", + "original\n", "pings per min 2.6849179704528776\n", "speed_mph 6.887376703252869\n", "total_vp 267.5\n", @@ -3346,12 +2072,12 @@ } ], "source": [ - "route_265 = checkout_route(\"265-13172\", \"Early AM\", 0)" + "route_265 = checkout_route(dec_intermediary, dec_final, \"265-13172\", \"Early AM\", 0)" ] }, { "cell_type": "code", - "execution_count": 78, + "execution_count": 49, "id": "aa775538-5a7f-4500-bc5b-867e9b54d626", "metadata": {}, "outputs": [ @@ -3442,88 +2168,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "df3\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
gtfs_dataset_keytime_of_dayroute_iddirection_idroute_nameshape_array_keyavg_sched_trip_minavg_rt_trip_minspeed_mphavg_pings_per_mintotal_vpvp_in_shapen_trips
6840c3499b856c717e5706299664fb1c5261Early AM56711Valley Express Dailyd99540208939e6a891e7ec67e3bf896438.00313.502.201.40446.00101.001
\n", - "
" - ], - "text/plain": [ - " gtfs_dataset_key time_of_day route_id direction_id \\\n", - "6840 c3499b856c717e5706299664fb1c5261 Early AM 5671 1 \n", - "\n", - " route_name shape_array_key \\\n", - "6840 Valley Express Daily d99540208939e6a891e7ec67e3bf8964 \n", - "\n", - " avg_sched_trip_min avg_rt_trip_min speed_mph avg_pings_per_min \\\n", - "6840 38.00 313.50 2.20 1.40 \n", - "\n", - " total_vp vp_in_shape n_trips \n", - "6840 446.00 101.00 1 " - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "df2\n", + "original\n", "pings per min 1.422723164442554\n", "speed_mph 2.158633017384419\n", "total_vp 446.0\n", @@ -3586,12 +2231,12 @@ } ], "source": [ - "checkout_route(\"5671\", \"Early AM\", 1)" + "checkout_route(dec_intermediary, dec_final, \"5671\", \"Early AM\", 1)" ] }, { "cell_type": "code", - "execution_count": 79, + "execution_count": 50, "id": "a3c23900-fdaa-476c-a490-dbc703df0c28", "metadata": {}, "outputs": [ @@ -3682,88 +2327,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "df3\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
gtfs_dataset_keytime_of_dayroute_iddirection_idroute_nameshape_array_keyavg_sched_trip_minavg_rt_trip_minspeed_mphavg_pings_per_mintotal_vpvp_in_shapen_trips
41027cc0cb1871dfd558f11a2885c145d144AM Peak38R1Weekdays 5am-10pm Weekends 6am-9pm6e78cef03e15c5a71751030fc65e09b443.9058.906.703.00174.57154.2030
\n", - "
" - ], - "text/plain": [ - " gtfs_dataset_key time_of_day route_id direction_id \\\n", - "4102 7cc0cb1871dfd558f11a2885c145d144 AM Peak 38R 1 \n", - "\n", - " route_name shape_array_key \\\n", - "4102 Weekdays 5am-10pm Weekends 6am-9pm 6e78cef03e15c5a71751030fc65e09b4 \n", - "\n", - " avg_sched_trip_min avg_rt_trip_min speed_mph avg_pings_per_min \\\n", - "4102 43.90 58.90 6.70 3.00 \n", - "\n", - " total_vp vp_in_shape n_trips \n", - "4102 174.57 154.20 30 " - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "df2\n", + "original\n", "pings per min 2.961175851535439\n", "speed_mph 6.682525584870673\n", "total_vp 174.56666666666666\n", @@ -4145,12 +2709,12 @@ } ], "source": [ - "checkout_route(\"38R\", \"AM Peak\", 1)" + "checkout_route(dec_intermediary, dec_final, \"38R\", \"AM Peak\", 1)" ] }, { "cell_type": "code", - "execution_count": 80, + "execution_count": 51, "id": "8168d872-49c3-44c2-bdc7-fa499124c5af", "metadata": {}, "outputs": [ @@ -4241,88 +2805,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "df3\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
gtfs_dataset_keytime_of_dayroute_iddirection_idroute_nameshape_array_keyavg_sched_trip_minavg_rt_trip_minspeed_mphavg_pings_per_mintotal_vpvp_in_shapen_trips
399078b44303c1714f6c6a4801637c2a5c9dAM PeakLynx1Rodeo/Hercules/San Francisco Transbay Terminal810696deb677bfeb1d0b09047031a9c850.0060.7013.402.80NaNNaN6
\n", - "
" - ], - "text/plain": [ - " gtfs_dataset_key time_of_day route_id direction_id \\\n", - "3990 78b44303c1714f6c6a4801637c2a5c9d AM Peak Lynx 1 \n", - "\n", - " route_name \\\n", - "3990 Rodeo/Hercules/San Francisco Transbay Terminal \n", - "\n", - " shape_array_key avg_sched_trip_min avg_rt_trip_min \\\n", - "3990 810696deb677bfeb1d0b09047031a9c8 50.00 60.70 \n", - "\n", - " speed_mph avg_pings_per_min total_vp vp_in_shape n_trips \n", - "3990 13.40 2.80 NaN NaN 6 " - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "df2\n", + "original\n", "pings per min 2.8253065827801014\n", "speed_mph 13.403813542450534\n", "total_vp nan\n", @@ -4440,7 +2923,7 @@ } ], "source": [ - "checkout_route(\"Lynx\", \"AM Peak\", 1)" + "checkout_route(dec_intermediary, dec_final, \"Lynx\", \"AM Peak\", 1)" ] }, { @@ -4453,7 +2936,7 @@ }, { "cell_type": "code", - "execution_count": 81, + "execution_count": 52, "id": "b8a1b06b-0e81-4641-8545-b5af34f6b47e", "metadata": {}, "outputs": [ @@ -4464,7 +2947,7 @@ "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[81], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mstop\u001b[49m\n", + "Cell \u001b[0;32mIn[52], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mstop\u001b[49m\n", "\u001b[0;31mNameError\u001b[0m: name 'stop' is not defined" ] } From 626d7f6aa117a274296eb51dc654b0c26fe01f0d Mon Sep 17 00:00:00 2001 From: amandaha8 Date: Fri, 5 Jan 2024 23:03:12 +0000 Subject: [PATCH 6/6] reran the months for trips bc i del some cols, added in metrics for route agg --- .../06_vp_usable_exploration.ipynb | 3970 ++++++++++------- .../logs/rt_v_scheduled_trip_metrics.log | 63 + rt_scheduled_v_ran/scripts/update_vars.py | 4 +- 3 files changed, 2312 insertions(+), 1725 deletions(-) diff --git a/rt_scheduled_v_ran/06_vp_usable_exploration.ipynb b/rt_scheduled_v_ran/06_vp_usable_exploration.ipynb index 53de4b5f1..a56971a02 100644 --- a/rt_scheduled_v_ran/06_vp_usable_exploration.ipynb +++ b/rt_scheduled_v_ran/06_vp_usable_exploration.ipynb @@ -49,16 +49,6 @@ "pd.set_option(\"display.max_colwidth\", None)" ] }, - { - "cell_type": "code", - "execution_count": 4, - "id": "2ebc71d7-342d-4b8f-b261-709b0cbfe013", - "metadata": {}, - "outputs": [], - "source": [ - "GCS_PATH = \"gs://calitp-analytics-data/data-analyses/rt_vs_schedule/trip_level_metrics/\"" - ] - }, { "cell_type": "code", "execution_count": 5, @@ -103,43 +93,162 @@ }, { "cell_type": "code", - "execution_count": 7, - "id": "26185ced-9f3b-4266-a080-0eeed0c0a825", + "execution_count": null, + "id": "73a27c22-ba75-4804-9ce6-f0d114962b0c", "metadata": {}, "outputs": [], "source": [ - "def check_out(df: pd.DataFrame):\n", - " display(df.spatial_accuracy_pct.describe())\n", - " display(df.pings_per_min.describe())\n", - " display(df.rt_triptime_w_gtfs_pct.describe())\n", - " display(df.rt_v_scheduled_trip_time_pct.describe())" + "\"gs://calitp-analytics-data/data-analyses/rt_vs_schedule/trip_level_metrics/2023-12-13_metrics.parquet\"" + ] + }, + { + "cell_type": "markdown", + "id": "45cd052f-1f2a-4434-8346-2c5a1e82683f", + "metadata": {}, + "source": [ + "### Open all the files" ] }, { "cell_type": "code", - "execution_count": 8, - "id": "aba941a7-bc28-4501-9505-0fd064c24a0c", + "execution_count": 37, + "id": "05e483c3-b620-42f1-9243-d711777c1903", "metadata": {}, "outputs": [], "source": [ - "mar_df = pd.read_parquet(f\"{GCS_PATH}2023-03-15_metrics.parquet\")" + "def read_files_into_dataframes(dates: list)-> list:\n", + " \"\"\"\n", + " Read files with given dates into separate pandas DataFrames.\n", + "\n", + " Parameters:\n", + " - dates (list): List of date strings in the format 'YYYY-MM-DD'.\n", + "\n", + " Returns:\n", + " - dfs (dict): A dictionary of DataFrames with keys as DataFrame names.\n", + " \"\"\"\n", + " dfs = {}\n", + " GCS_PATH = (\n", + " \"gs://calitp-analytics-data/data-analyses/rt_vs_schedule/trip_level_metrics/\"\n", + " )\n", + "\n", + " for date in dates:\n", + " file_path = (\n", + " f\"{GCS_PATH}{date}_metrics.parquet\" \n", + " )\n", + "\n", + " # Read the file into a DataFrame\n", + " df = pd.read_parquet(\n", + " file_path\n", + " ) \n", + "\n", + " df_name = f\"df_{date.replace('-', '_')}\"\n", + " # Store the DataFrame in the dictionary\n", + " dfs[df_name] = df\n", + "\n", + " return dfs" ] }, { "cell_type": "code", - "execution_count": 9, - "id": "af7c69e1-e363-42f8-bf1c-f9ef2ba141e2", + "execution_count": 38, + "id": "5703f90e-9ec1-4253-8392-e4df3a9dfda3", + "metadata": {}, + "outputs": [], + "source": [ + "dataframes = read_files_into_dataframes(analysis_date_list)" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "cccf51bb-4980-4d96-af0f-44259147f1fa", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "count 69494.00\n", - "mean 94.10\n", - "std 12.34\n", + "dict" + ] + }, + "execution_count": 40, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "type(dataframes)" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "077b3ff7-6707-43e6-b228-6d7fc174b306", + "metadata": {}, + "outputs": [], + "source": [ + "df_2023_12_13 = dataframes[\"df_2023_12_13\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "id": "531c6112-c633-412f-8350-ab01fc0a4c45", + "metadata": {}, + "outputs": [], + "source": [ + "df_2023_11_15 = dataframes[\"df_2023_11_15\"]\n", + "df_2023_10_11 = dataframes[\"df_2023_10_11\"]\n", + "df_2023_09_13 = dataframes[\"df_2023_09_13\"]\n", + "df_2023_08_15 = dataframes[\"df_2023_08_15\"]\n", + "df_2023_07_12 = dataframes[\"df_2023_07_12\"]\n", + "df_2023_06_14 = dataframes[\"df_2023_06_14\"]\n", + "df_2023_05_17 = dataframes[\"df_2023_05_17\"]\n", + "df_2023_04_12 = dataframes[\"df_2023_04_12\"]\n", + "df_2023_03_15 = dataframes[\"df_2023_03_15\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "id": "e4247b70-422f-466f-af12-060e92eb7d70", + "metadata": {}, + "outputs": [], + "source": [ + "all_dfs = [df_2023_12_13, df_2023_11_15, df_2023_10_11, df_2023_09_13, df_2023_08_15, df_2023_07_12, df_2023_06_14, df_2023_05_17, df_2023_04_12, df_2023_03_15]" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "id": "26185ced-9f3b-4266-a080-0eeed0c0a825", + "metadata": {}, + "outputs": [], + "source": [ + "def check_out(df: pd.DataFrame):\n", + " display(df.spatial_accuracy_pct.describe())\n", + " display(df.pings_per_min.describe())\n", + " display(df.rt_triptime_w_gtfs_pct.describe())\n", + " display(df.rt_v_scheduled_trip_time_pct.describe())" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "id": "af7c69e1-e363-42f8-bf1c-f9ef2ba141e2", + "metadata": { + "scrolled": true, + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "count 74609.00\n", + "mean 93.55\n", + "std 13.20\n", "min 0.00\n", - "25% 95.24\n", - "50% 100.00\n", + "25% 94.23\n", + "50% 99.64\n", "75% 100.00\n", "max 100.00\n", "Name: spatial_accuracy_pct, dtype: float64" @@ -151,14 +260,14 @@ { "data": { "text/plain": [ - "count 83620.00\n", - "mean 2.46\n", - "std 0.68\n", + "count 86128.00\n", + "mean 2.44\n", + "std 0.60\n", "min 0.00\n", - "25% 1.91\n", - "50% 2.86\n", - "75% 2.96\n", - "max 4.75\n", + "25% 2.05\n", + "50% 2.67\n", + "75% 2.94\n", + "max 5.15\n", "Name: pings_per_min, dtype: float64" ] }, @@ -168,14 +277,14 @@ { "data": { "text/plain": [ - "count 83620.00\n", - "mean 95.20\n", - "std 14.55\n", - "min 0.28\n", - "25% 97.99\n", - "50% 99.58\n", - "75% 100.25\n", - "max 108.43\n", + "count 86128.00\n", + "mean 95.94\n", + "std 11.70\n", + "min 0.10\n", + "25% 98.36\n", + "50% 99.65\n", + "75% 100.00\n", + "max 100.00\n", "Name: rt_triptime_w_gtfs_pct, dtype: float64" ] }, @@ -185,50 +294,29 @@ { "data": { "text/plain": [ - "count 71797.00\n", - "mean 60.68\n", - "std 333.76\n", - "min -87.37\n", - "25% 10.76\n", - "50% 25.19\n", - "75% 44.44\n", - "max 17909.79\n", + "count 76878.00\n", + "mean 44.40\n", + "std 272.99\n", + "min -86.89\n", + "25% 11.36\n", + "50% 25.56\n", + "75% 45.00\n", + "max 18873.69\n", "Name: rt_v_scheduled_trip_time_pct, dtype: float64" ] }, "metadata": {}, "output_type": "display_data" - } - ], - "source": [ - "check_out(mar_df)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "26701c9c-669d-4e09-8c38-5ece391f5889", - "metadata": {}, - "outputs": [], - "source": [ - "apr_df = pd.read_parquet(f\"{GCS_PATH}2023-04-12_metrics.parquet\")" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "509ec31f-c521-4149-be35-b4548a9e1666", - "metadata": {}, - "outputs": [ + }, { "data": { "text/plain": [ - "count 71094.00\n", - "mean 94.06\n", - "std 12.64\n", + "count 74891.00\n", + "mean 93.53\n", + "std 13.12\n", "min 0.00\n", - "25% 95.45\n", - "50% 100.00\n", + "25% 93.94\n", + "50% 99.68\n", "75% 100.00\n", "max 100.00\n", "Name: spatial_accuracy_pct, dtype: float64" @@ -240,14 +328,14 @@ { "data": { "text/plain": [ - "count 84516.00\n", - "mean 2.45\n", - "std 0.67\n", - "min 0.01\n", - "25% 1.91\n", - "50% 2.83\n", - "75% 2.95\n", - "max 5.18\n", + "count 86832.00\n", + "mean 2.51\n", + "std 0.63\n", + "min 0.00\n", + "25% 2.09\n", + "50% 2.86\n", + "75% 2.96\n", + "max 5.29\n", "Name: pings_per_min, dtype: float64" ] }, @@ -257,14 +345,14 @@ { "data": { "text/plain": [ - "count 84516.00\n", - "mean 95.23\n", - "std 14.48\n", - "min 0.56\n", - "25% 97.88\n", - "50% 99.53\n", - "75% 100.20\n", - "max 108.11\n", + "count 86832.00\n", + "mean 95.68\n", + "std 12.05\n", + "min 0.45\n", + "25% 98.40\n", + "50% 99.67\n", + "75% 100.00\n", + "max 100.00\n", "Name: rt_triptime_w_gtfs_pct, dtype: float64" ] }, @@ -274,49 +362,28 @@ { "data": { "text/plain": [ - "count 73471.00\n", - "mean 61.42\n", - "std 349.18\n", - "min -86.02\n", - "25% 10.46\n", - "50% 25.10\n", - "75% 44.38\n", - "max 15903.70\n", + "count 77194.00\n", + "mean 42.53\n", + "std 196.80\n", + "min -88.16\n", + "25% 11.44\n", + "50% 26.10\n", + "75% 46.17\n", + "max 11797.08\n", "Name: rt_v_scheduled_trip_time_pct, dtype: float64" ] }, "metadata": {}, "output_type": "display_data" - } - ], - "source": [ - "check_out(apr_df)" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "dcf7252b-eb12-4d76-bacf-ceb34d1e0c0a", - "metadata": {}, - "outputs": [], - "source": [ - "may_df = pd.read_parquet(f\"{GCS_PATH}2023-05-17_metrics.parquet\")" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "151c9d6f-ce92-4ab2-ad1c-525ca627dbfc", - "metadata": {}, - "outputs": [ + }, { "data": { "text/plain": [ - "count 65385.00\n", - "mean 94.07\n", - "std 12.45\n", + "count 73902.00\n", + "mean 94.06\n", + "std 12.62\n", "min 0.00\n", - "25% 95.00\n", + "25% 95.22\n", "50% 100.00\n", "75% 100.00\n", "max 100.00\n", @@ -329,14 +396,14 @@ { "data": { "text/plain": [ - "count 83606.00\n", - "mean 2.47\n", - "std 0.68\n", + "count 86486.00\n", + "mean 2.48\n", + "std 0.66\n", "min 0.00\n", - "25% 1.95\n", - "50% 2.86\n", - "75% 2.95\n", - "max 5.14\n", + "25% 1.93\n", + "50% 2.87\n", + "75% 2.97\n", + "max 4.75\n", "Name: pings_per_min, dtype: float64" ] }, @@ -346,14 +413,14 @@ { "data": { "text/plain": [ - "count 83606.00\n", - "mean 95.47\n", - "std 13.78\n", - "min 0.27\n", - "25% 97.76\n", - "50% 99.51\n", - "75% 100.21\n", - "max 108.43\n", + "count 86486.00\n", + "mean 96.36\n", + "std 10.91\n", + "min 0.14\n", + "25% 98.51\n", + "50% 99.69\n", + "75% 100.00\n", + "max 100.00\n", "Name: rt_triptime_w_gtfs_pct, dtype: float64" ] }, @@ -363,49 +430,96 @@ { "data": { "text/plain": [ - "count 67864.00\n", - "mean 60.61\n", - "std 329.38\n", - "min -90.09\n", - "25% 10.96\n", - "50% 25.64\n", - "75% 44.61\n", - "max 17907.92\n", + "count 76255.00\n", + "mean 43.72\n", + "std 232.88\n", + "min -86.20\n", + "25% 11.80\n", + "50% 25.92\n", + "75% 45.07\n", + "max 11905.42\n", "Name: rt_v_scheduled_trip_time_pct, dtype: float64" ] }, "metadata": {}, "output_type": "display_data" - } - ], - "source": [ - "check_out(may_df)" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "976f786d-c3e5-44ae-991c-db7d95d5169a", - "metadata": {}, - "outputs": [], - "source": [ - "jun_df = pd.read_parquet(f\"{GCS_PATH}2023-06-14_metrics.parquet\")" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "c87d9f5e-ae39-4c4a-b313-037fc118dce4", - "metadata": {}, - "outputs": [ + }, { "data": { "text/plain": [ - "count 64270.00\n", + "count 65175.00\n", + "mean 93.89\n", + "std 13.05\n", + "min 0.00\n", + "25% 94.83\n", + "50% 100.00\n", + "75% 100.00\n", + "max 100.00\n", + "Name: spatial_accuracy_pct, dtype: float64" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "count 86133.00\n", + "mean 2.46\n", + "std 0.68\n", + "min 0.01\n", + "25% 1.92\n", + "50% 2.85\n", + "75% 2.96\n", + "max 4.86\n", + "Name: pings_per_min, dtype: float64" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "count 86133.00\n", + "mean 96.03\n", + "std 11.53\n", + "min 0.71\n", + "25% 98.36\n", + "50% 99.66\n", + "75% 100.00\n", + "max 100.00\n", + "Name: rt_triptime_w_gtfs_pct, dtype: float64" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "count 67169.00\n", + "mean 42.21\n", + "std 219.14\n", + "min -85.54\n", + "25% 11.04\n", + "50% 24.77\n", + "75% 42.61\n", + "max 11905.69\n", + "Name: rt_v_scheduled_trip_time_pct, dtype: float64" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "count 72938.00\n", "mean 93.92\n", - "std 12.67\n", + "std 12.79\n", "min 0.00\n", - "25% 94.77\n", + "25% 94.89\n", "50% 100.00\n", "75% 100.00\n", "max 100.00\n", @@ -418,14 +532,14 @@ { "data": { "text/plain": [ - "count 80331.00\n", - "mean 2.51\n", + "count 84422.00\n", + "mean 2.44\n", "std 0.67\n", - "min 0.01\n", - "25% 1.98\n", - "50% 2.90\n", + "min 0.00\n", + "25% 1.92\n", + "50% 2.81\n", "75% 2.96\n", - "max 4.93\n", + "max 5.48\n", "Name: pings_per_min, dtype: float64" ] }, @@ -435,14 +549,14 @@ { "data": { "text/plain": [ - "count 80331.00\n", - "mean 95.92\n", - "std 13.48\n", - "min 0.28\n", - "25% 98.43\n", - "50% 99.67\n", - "75% 100.28\n", - "max 107.78\n", + "count 84422.00\n", + "mean 95.73\n", + "std 11.76\n", + "min 0.42\n", + "25% 98.10\n", + "50% 99.62\n", + "75% 100.00\n", + "max 100.00\n", "Name: rt_triptime_w_gtfs_pct, dtype: float64" ] }, @@ -452,41 +566,20 @@ { "data": { "text/plain": [ - "count 66748.00\n", - "mean 63.20\n", - "std 345.10\n", - "min -90.25\n", - "25% 11.69\n", - "50% 25.91\n", - "75% 45.15\n", - "max 12997.42\n", + "count 75286.00\n", + "mean 43.46\n", + "std 242.02\n", + "min -87.18\n", + "25% 11.11\n", + "50% 25.43\n", + "75% 44.25\n", + "max 11905.42\n", "Name: rt_v_scheduled_trip_time_pct, dtype: float64" ] }, "metadata": {}, "output_type": "display_data" - } - ], - "source": [ - "check_out(jun_df)" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "e55cb702-f2fa-4350-bcd9-cb057a7edbdc", - "metadata": {}, - "outputs": [], - "source": [ - "jul_df = pd.read_parquet(f\"{GCS_PATH}2023-07-12_metrics.parquet\")" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "a287bc73-76ff-4235-8a72-671045bac935", - "metadata": {}, - "outputs": [ + }, { "data": { "text/plain": [ @@ -525,13 +618,13 @@ "data": { "text/plain": [ "count 82044.00\n", - "mean 96.42\n", - "std 11.07\n", + "mean 96.17\n", + "std 10.97\n", "min 0.43\n", "25% 98.28\n", "50% 99.65\n", - "75% 100.25\n", - "max 108.60\n", + "75% 100.00\n", + "max 100.00\n", "Name: rt_triptime_w_gtfs_pct, dtype: float64" ] }, @@ -554,67 +647,16 @@ }, "metadata": {}, "output_type": "display_data" - } - ], - "source": [ - "check_out(jul_df)" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "id": "21b4d1b8-598d-410a-9c73-69fb92d88c1b", - "metadata": {}, - "outputs": [], - "source": [ - "aug_df = pd.read_parquet(f\"{GCS_PATH}2023-08-15_metrics.parquet\")" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "1cda1e4d-25f7-4836-b8ab-7b7997edb446", - "metadata": {}, - "outputs": [], - "source": [ - "sept_df = pd.read_parquet(f\"{GCS_PATH}2023-09-13_metrics.parquet\")" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "497e04c0-1c33-4fdf-9cfc-94e2a925ebd3", - "metadata": {}, - "outputs": [], - "source": [ - "oct_df = pd.read_parquet(f\"{GCS_PATH}2023-10-11_metrics.parquet\")" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "07e73d1c-5b24-43a8-852e-35cff4f416e6", - "metadata": {}, - "outputs": [], - "source": [ - "nov_df = pd.read_parquet(f\"{GCS_PATH}2023-11-15_metrics.parquet\")" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "id": "38f00976-c428-4c3c-a153-eb9035a709ef", - "metadata": {}, - "outputs": [ + }, { "data": { "text/plain": [ - "count 74891.00\n", - "mean 93.53\n", - "std 13.12\n", + "count 64270.00\n", + "mean 93.92\n", + "std 12.67\n", "min 0.00\n", - "25% 93.94\n", - "50% 99.68\n", + "25% 94.77\n", + "50% 100.00\n", "75% 100.00\n", "max 100.00\n", "Name: spatial_accuracy_pct, dtype: float64" @@ -626,14 +668,14 @@ { "data": { "text/plain": [ - "count 86832.00\n", + "count 80331.00\n", "mean 2.51\n", - "std 0.63\n", - "min 0.00\n", - "25% 2.09\n", - "50% 2.86\n", + "std 0.67\n", + "min 0.01\n", + "25% 1.98\n", + "50% 2.90\n", "75% 2.96\n", - "max 5.29\n", + "max 4.93\n", "Name: pings_per_min, dtype: float64" ] }, @@ -643,14 +685,14 @@ { "data": { "text/plain": [ - "count 86832.00\n", - "mean 95.93\n", - "std 12.15\n", - "min 0.45\n", - "25% 98.40\n", + "count 80331.00\n", + "mean 95.66\n", + "std 13.38\n", + "min 0.28\n", + "25% 98.43\n", "50% 99.67\n", - "75% 100.26\n", - "max 108.93\n", + "75% 100.00\n", + "max 100.00\n", "Name: rt_triptime_w_gtfs_pct, dtype: float64" ] }, @@ -660,152 +702,97 @@ { "data": { "text/plain": [ - "count 77194.00\n", - "mean 42.53\n", - "std 196.80\n", - "min -88.16\n", - "25% 11.44\n", - "50% 26.10\n", - "75% 46.17\n", - "max 11797.08\n", + "count 66748.00\n", + "mean 63.20\n", + "std 345.10\n", + "min -90.25\n", + "25% 11.69\n", + "50% 25.91\n", + "75% 45.15\n", + "max 12997.42\n", "Name: rt_v_scheduled_trip_time_pct, dtype: float64" ] }, "metadata": {}, "output_type": "display_data" - } - ], - "source": [ - "check_out(nov_df)" - ] - }, - { - "cell_type": "code", - "execution_count": 23, - "id": "8762a9c1-9a3b-490a-a447-4481b59d930c", - "metadata": {}, - "outputs": [ + }, { "data": { "text/plain": [ - "30287" + "count 65385.00\n", + "mean 94.07\n", + "std 12.45\n", + "min 0.00\n", + "25% 95.00\n", + "50% 100.00\n", + "75% 100.00\n", + "max 100.00\n", + "Name: spatial_accuracy_pct, dtype: float64" ] }, - "execution_count": 23, "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "len(nov_df[nov_df.rt_triptime_w_gtfs_pct > 100])" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "id": "e3d9b4a9-70e1-40aa-b2eb-f2e23b3a4d1f", - "metadata": {}, - "outputs": [ + "output_type": "display_data" + }, { "data": { "text/plain": [ - "86832" + "count 83606.00\n", + "mean 2.47\n", + "std 0.68\n", + "min 0.00\n", + "25% 1.95\n", + "50% 2.86\n", + "75% 2.95\n", + "max 5.14\n", + "Name: pings_per_min, dtype: float64" ] }, - "execution_count": 24, "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "len(nov_df)" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "id": "c2963bc8-5535-4dce-af95-767317c2201c", - "metadata": {}, - "outputs": [ + "output_type": "display_data" + }, { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Int64Index: 86832 entries, 0 to 86831\n", - "Data columns (total 15 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 trip_instance_key 86832 non-null object \n", - " 1 rt_service_min 86832 non-null float64\n", - " 2 min_w_atleast2_trip_updates 86832 non-null int64 \n", - " 3 total_pings_for_trip 86832 non-null int64 \n", - " 4 total_min_w_gtfs 86832 non-null int64 \n", - " 5 total_vp 74891 non-null float64\n", - " 6 vp_in_shape 74891 non-null float64\n", - " 7 speed_mph 77194 non-null float64\n", - " 8 route_id 75619 non-null object \n", - " 9 time_of_day 77194 non-null object \n", - " 10 service_minutes 77194 non-null float64\n", - " 11 pings_per_min 86832 non-null float64\n", - " 12 spatial_accuracy_pct 74891 non-null float64\n", - " 13 rt_triptime_w_gtfs_pct 86832 non-null float64\n", - " 14 rt_v_scheduled_trip_time_pct 77194 non-null float64\n", - "dtypes: float64(9), int64(3), object(3)\n", - "memory usage: 10.6+ MB\n" - ] - } - ], - "source": [ - "nov_df.info()" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "id": "b38de56d-57a5-4f5f-a214-5bcd9dde42f7", - "metadata": {}, - "outputs": [ + "data": { + "text/plain": [ + "count 83606.00\n", + "mean 95.22\n", + "std 13.69\n", + "min 0.27\n", + "25% 97.76\n", + "50% 99.51\n", + "75% 100.00\n", + "max 100.00\n", + "Name: rt_triptime_w_gtfs_pct, dtype: float64" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, { "data": { "text/plain": [ - "'2023-12-13'" + "count 67864.00\n", + "mean 60.61\n", + "std 329.38\n", + "min -90.09\n", + "25% 10.96\n", + "50% 25.64\n", + "75% 44.61\n", + "max 17907.92\n", + "Name: rt_v_scheduled_trip_time_pct, dtype: float64" ] }, - "execution_count": 26, "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "analysis_date" - ] - }, - { - "cell_type": "code", - "execution_count": 27, - "id": "873ac459-c77f-4887-a003-01b572facbda", - "metadata": {}, - "outputs": [], - "source": [ - "dec_df = pd.read_parquet(f\"{GCS_PATH}2023-12-13_metrics.parquet\")" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "id": "bdc1e636-c102-43d6-a2b6-4150d0c8c4db", - "metadata": {}, - "outputs": [ + "output_type": "display_data" + }, { "data": { "text/plain": [ - "count 74609.00\n", - "mean 93.55\n", - "std 13.20\n", + "count 71094.00\n", + "mean 94.06\n", + "std 12.64\n", "min 0.00\n", - "25% 94.23\n", - "50% 99.64\n", + "25% 95.45\n", + "50% 100.00\n", "75% 100.00\n", "max 100.00\n", "Name: spatial_accuracy_pct, dtype: float64" @@ -817,14 +804,14 @@ { "data": { "text/plain": [ - "count 86128.00\n", - "mean 2.44\n", - "std 0.60\n", - "min 0.00\n", - "25% 2.05\n", - "50% 2.67\n", - "75% 2.94\n", - "max 5.15\n", + "count 84516.00\n", + "mean 2.45\n", + "std 0.67\n", + "min 0.01\n", + "25% 1.91\n", + "50% 2.83\n", + "75% 2.95\n", + "max 5.18\n", "Name: pings_per_min, dtype: float64" ] }, @@ -834,12 +821,12 @@ { "data": { "text/plain": [ - "count 86128.00\n", - "mean 95.94\n", - "std 11.70\n", - "min 0.10\n", - "25% 98.36\n", - "50% 99.65\n", + "count 84516.00\n", + "mean 94.99\n", + "std 14.38\n", + "min 0.56\n", + "25% 97.88\n", + "50% 99.53\n", "75% 100.00\n", "max 100.00\n", "Name: rt_triptime_w_gtfs_pct, dtype: float64" @@ -851,110 +838,347 @@ { "data": { "text/plain": [ - "count 76878.00\n", - "mean 44.40\n", - "std 272.99\n", - "min -86.89\n", - "25% 11.36\n", - "50% 25.56\n", - "75% 45.00\n", - "max 18873.69\n", + "count 73471.00\n", + "mean 61.42\n", + "std 349.18\n", + "min -86.02\n", + "25% 10.46\n", + "50% 25.10\n", + "75% 44.38\n", + "max 15903.70\n", "Name: rt_v_scheduled_trip_time_pct, dtype: float64" ] }, "metadata": {}, "output_type": "display_data" - } - ], - "source": [ - "check_out(dec_df)" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "id": "26e59c78-1e6b-428e-acd2-746b34be8311", - "metadata": {}, - "outputs": [ + }, { "data": { "text/plain": [ - "Index(['trip_instance_key', 'rt_service_min', 'min_w_atleast2_trip_updates',\n", - " 'total_pings_for_trip', 'total_min_w_gtfs', 'total_vp', 'vp_in_shape',\n", - " 'speed_mph', 'service_minutes', 'pings_per_min', 'spatial_accuracy_pct',\n", - " 'rt_triptime_w_gtfs_pct', 'rt_v_scheduled_trip_time_pct'],\n", - " dtype='object')" + "count 69494.00\n", + "mean 94.10\n", + "std 12.34\n", + "min 0.00\n", + "25% 95.24\n", + "50% 100.00\n", + "75% 100.00\n", + "max 100.00\n", + "Name: spatial_accuracy_pct, dtype: float64" ] }, - "execution_count": 29, "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "dec_df.columns" - ] - }, - { - "cell_type": "code", - "execution_count": 30, - "id": "cb1def78-76b0-4d8f-9a74-258987c62ce2", - "metadata": {}, - "outputs": [ + "output_type": "display_data" + }, { "data": { "text/plain": [ - "count 76878.00\n", - "mean 44.40\n", - "std 272.99\n", - "min -86.89\n", - "25% 11.36\n", - "50% 25.56\n", - "75% 45.00\n", - "max 18873.69\n", + "count 83620.00\n", + "mean 2.46\n", + "std 0.68\n", + "min 0.00\n", + "25% 1.91\n", + "50% 2.86\n", + "75% 2.96\n", + "max 4.75\n", + "Name: pings_per_min, dtype: float64" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "count 83620.00\n", + "mean 94.96\n", + "std 14.45\n", + "min 0.28\n", + "25% 97.99\n", + "50% 99.58\n", + "75% 100.00\n", + "max 100.00\n", + "Name: rt_triptime_w_gtfs_pct, dtype: float64" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "count 71797.00\n", + "mean 60.68\n", + "std 333.76\n", + "min -87.37\n", + "25% 10.76\n", + "50% 25.19\n", + "75% 44.44\n", + "max 17909.79\n", "Name: rt_v_scheduled_trip_time_pct, dtype: float64" ] }, - "execution_count": 30, "metadata": {}, - "output_type": "execute_result" + "output_type": "display_data" } ], "source": [ - "dec_df.rt_v_scheduled_trip_time_pct.describe()" + "for i in all_dfs:\n", + " check_out(i)" ] }, { "cell_type": "markdown", - "id": "857348c6-fcfd-4569-a80d-fd1e400c84f9", + "id": "b71376f9-2343-4d14-99f0-025c05b7c7b0", "metadata": {}, "source": [ - "### See why some trips have such crazy rt_v_scheduled_trip_time_pct" + "### Aggregating up to the route level" + ] + }, + { + "cell_type": "markdown", + "id": "90958a55-27ca-447f-8304-37773081c973", + "metadata": { + "tags": [] + }, + "source": [ + "#### Step 1: add missing cols\n", + "* https://github.com/cal-itp/data-analyses/blob/main/rt_segment_speeds/scripts/avg_speeds_by_segment.py#L135" ] }, { "cell_type": "code", - "execution_count": 31, - "id": "6bffdcf2-29cf-4c55-8465-75ee7e87fa1a", + "execution_count": 57, + "id": "989a09e4-2f5a-4065-94f0-fc6cee1b1126", "metadata": {}, "outputs": [], "source": [ - "preview_cols = [\"rt_service_min\", \"service_minutes\", \"rt_v_scheduled_trip_time_pct\"]" + "def add_scheduled_trip_columns(\n", + " df: pd.DataFrame, analysis_date: str, group_cols: list = [\"trip_instance_key\"]\n", + ") -> pd.DataFrame:\n", + " \"\"\"\n", + " Merge RT trips (vehicle positions) to scheduled trips.\n", + " Add in the needed scheduled trip columns to take\n", + " route-direction-time_of_day averages.\n", + " \"\"\"\n", + " keep_cols = [\n", + " \"gtfs_dataset_key\",\n", + " \"direction_id\",\n", + " \"route_id\",\n", + " \"route_short_name\",\n", + " \"route_long_name\",\n", + " \"route_desc\",\n", + " ] + group_cols\n", + "\n", + " crosswalk = helpers.import_scheduled_trips(\n", + " analysis_date, columns=keep_cols, get_pandas=True\n", + " )\n", + "\n", + " common_keep_cols = [\n", + " \"schedule_gtfs_dataset_key\",\n", + " \"route_id\",\n", + " \"direction_id\",\n", + " \"shape_array_key\",\n", + " ]\n", + " common_shape = sched_rt_utils.most_common_shape_by_route_direction(analysis_date)[\n", + " common_keep_cols\n", + " ]\n", + "\n", + " crosswalk2 = pd.merge(\n", + " crosswalk,\n", + " common_shape,\n", + " on=[\"schedule_gtfs_dataset_key\", \"route_id\", \"direction_id\"],\n", + " how=\"inner\",\n", + " ).astype({\"direction_id\": \"Int64\"})\n", + "\n", + " crosswalk2 = portfolio_utils.add_route_name(crosswalk2).drop(\n", + " columns=[\"route_short_name\", \"route_long_name\", \"route_desc\"]\n", + " )\n", + "\n", + " time_keep_cols = [\n", + " \"trip_instance_key\",\n", + " \"service_hours\",\n", + " \"trip_first_departure_datetime_pacific\",\n", + " \"time_of_day\",\n", + " ]\n", + " time_of_day = sched_rt_utils.get_trip_time_buckets(analysis_date)[time_keep_cols]\n", + "\n", + " df2 = pd.merge(df, crosswalk2, on=\"trip_instance_key\", how=\"left\").merge(\n", + " time_of_day, on=\"trip_instance_key\", how=\"left\"\n", + " )\n", + "\n", + " return df2" ] }, { "cell_type": "code", - "execution_count": 32, - "id": "a92ed528-9953-4cdb-8801-4b89e39b8feb", + "execution_count": 58, + "id": "21db7580-cf79-4385-b9dc-80cd99206011", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
trip_instance_keyrt_service_minmin_w_atleast2_trip_updatestotal_pings_for_triptotal_min_w_gtfstotal_vpvp_in_shapespeed_mphservice_minutespings_per_minspatial_accuracy_pctrt_triptime_w_gtfs_pctrt_v_scheduled_trip_time_pct
75367bdaf00fefb641fad797f64761bade1df55.975616656166.00165.005.7823.002.9799.40100.00143.33
\n", + "
" + ], + "text/plain": [ + " trip_instance_key rt_service_min \\\n", + "75367 bdaf00fefb641fad797f64761bade1df 55.97 \n", + "\n", + " min_w_atleast2_trip_updates total_pings_for_trip total_min_w_gtfs \\\n", + "75367 56 166 56 \n", + "\n", + " total_vp vp_in_shape speed_mph service_minutes pings_per_min \\\n", + "75367 166.00 165.00 5.78 23.00 2.97 \n", + "\n", + " spatial_accuracy_pct rt_triptime_w_gtfs_pct \\\n", + "75367 99.40 100.00 \n", + "\n", + " rt_v_scheduled_trip_time_pct \n", + "75367 143.33 " + ] + }, + "execution_count": 66, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "### Test aggregating with Dec" + "dec_df.sample()" ] }, { "cell_type": "markdown", - "id": "90958a55-27ca-447f-8304-37773081c973", + "id": "a3acfeb1-54bf-4cee-9810-51c7e5fe0aa6", "metadata": {}, "source": [ - "#### Step 1: add missing cols\n", - "* https://github.com/cal-itp/data-analyses/blob/main/rt_segment_speeds/scripts/avg_speeds_by_segment.py#L135" + "### Check results after aggregating up to route\n", + "* How are the results sooo wrong with `265-13172`" ] }, { "cell_type": "code", - "execution_count": 34, - "id": "989a09e4-2f5a-4065-94f0-fc6cee1b1126", + "execution_count": 67, + "id": "aa2cc0c2-da7b-4ae1-9ac4-84c8d6d60e95", "metadata": {}, "outputs": [], "source": [ - "def add_scheduled_trip_columns(\n", - " df: pd.DataFrame, analysis_date: str, group_cols: list = [\"trip_instance_key\"]\n", - ") -> pd.DataFrame:\n", - " \"\"\"\n", - " Merge RT trips (vehicle positions) to scheduled trips.\n", - " Add in the needed scheduled trip columns to take\n", - " route-direction-time_of_day averages.\n", - " \"\"\"\n", - " keep_cols = [\n", - " \"gtfs_dataset_key\",\n", - " \"direction_id\",\n", - " \"route_id\",\n", - " \"route_short_name\",\n", - " \"route_long_name\",\n", - " \"route_desc\",\n", - " ] + group_cols\n", - "\n", - " crosswalk = helpers.import_scheduled_trips(\n", - " analysis_date, columns=keep_cols, get_pandas=True\n", - " )\n", - "\n", - " common_keep_cols = [\n", - " \"schedule_gtfs_dataset_key\",\n", - " \"route_id\",\n", - " \"direction_id\",\n", - " \"shape_array_key\",\n", - " ]\n", - " common_shape = sched_rt_utils.most_common_shape_by_route_direction(analysis_date)[\n", - " common_keep_cols\n", - " ]\n", - "\n", - " crosswalk2 = pd.merge(\n", - " crosswalk,\n", - " common_shape,\n", - " on=[\"schedule_gtfs_dataset_key\", \"route_id\", \"direction_id\"],\n", - " how=\"inner\",\n", - " ).astype({\"direction_id\": \"Int64\"})\n", - "\n", - " crosswalk2 = portfolio_utils.add_route_name(crosswalk2).drop(\n", - " columns=[\"route_short_name\", \"route_long_name\", \"route_desc\"]\n", + "def checkout_route(\n", + " og_df: pd.DataFrame,\n", + " route_agg: gpd.GeoDataFrame,\n", + " route_id: str,\n", + " time_of_day: str,\n", + " direction_id: int,\n", + "):\n", + " print(\"final\")\n", + " display(\n", + " route_agg.loc[\n", + " (route_agg.route_id == route_id)\n", + " & (route_agg.time_of_day == time_of_day)\n", + " & (route_agg.direction_id == direction_id)\n", + " ].drop(columns=[\"geometry\", \"base64_url\"])\n", " )\n", - "\n", - " time_keep_cols = [\n", + " cols = [\n", " \"trip_instance_key\",\n", - " \"service_hours\",\n", - " \"trip_first_departure_datetime_pacific\",\n", " \"time_of_day\",\n", + " \"speed_mph\",\n", + " \"rt_service_min\",\n", + " \"service_minutes\",\n", + " \"pings_per_min\",\n", + " \"total_min_w_gtfs\",\n", + " \"min_w_atleast2_trip_updates\",\n", " ]\n", - " time_of_day = sched_rt_utils.get_trip_time_buckets(analysis_date)[time_keep_cols]\n", "\n", - " df2 = pd.merge(df, crosswalk2, on=\"trip_instance_key\", how=\"left\").merge(\n", - " time_of_day, on=\"trip_instance_key\", how=\"left\"\n", - " )\n", + " print(\"original\")\n", + " og_df2 = og_df.loc[\n", + " (og_df.route_id == route_id)\n", + " & (og_df.time_of_day == time_of_day)\n", + " & (og_df.direction_id == direction_id)\n", + " ]\n", "\n", - " return df2" + " print(f\"pings per min {og_df2.pings_per_min.mean()}\")\n", + " print(f\"speed_mph {og_df2.speed_mph.mean()}\")\n", + " print(f\"total_vp {og_df2.total_vp.mean()}\")\n", + " print(f\"vp_in_shape {og_df2.vp_in_shape.mean()}\")\n", + " print(f\"min w gtfs {og_df2.total_min_w_gtfs.mean()}\")\n", + " print(f\"min w at least 2 pings {og_df2.min_w_atleast2_trip_updates.mean()}\")\n", + " display(og_df2[cols])" ] }, { - "cell_type": "code", - "execution_count": 35, - "id": "7fabab72-c1a4-468f-83d6-b1e066014129", + "cell_type": "markdown", + "id": "c8aae146-3b5f-4189-ad0f-b011221b5442", "metadata": {}, - "outputs": [], "source": [ - "dec_df2 = add_scheduled_trip_columns(dec_df, analysis_date, [\"trip_instance_key\"])" + "#### scheduled trip min (renamed from service_mins) is completely lower." ] }, { "cell_type": "code", - "execution_count": 36, - "id": "fea7cda5-606c-4054-b189-58a12d250957", + "execution_count": 68, + "id": "04ee1397-318c-4bb7-9f80-2a55b9c75055", "metadata": {}, "outputs": [ { - "data": { - "text/plain": [ - "pandas.core.frame.DataFrame" - ] - }, - "execution_count": 36, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "type(dec_df2)" - ] - }, - { - "cell_type": "markdown", - "id": "d2ff8a3d-d3f5-42b4-b096-24b33b9842ca", - "metadata": {}, - "source": [ - "#### Aggregate avg speed by route\n", - "* DO I need to use the other functions in the script\n", - "* Do we still drop rows that are above 70 mph?\n", - "* https://github.com/cal-itp/data-analyses/blob/main/rt_segment_speeds/scripts/export.py#L150\n" - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "id": "21db7580-cf79-4385-b9dc-80cd99206011", - "metadata": {}, - "outputs": [], - "source": [ - "def average_route_speeds_for_export(\n", - " df: pd.DataFrame,\n", - " analysis_date: str,\n", - " max_speed: int,\n", - ") -> gpd.GeoDataFrame:\n", - " \"\"\"\n", - " Aggregate trip speeds to route-direction.\n", - " Attach shape geometry to most common shape_id.\n", - " \"\"\"\n", - " df2 = df.loc[df.speed_mph <= 70].reset_index(drop=True)\n", - "\n", - " route_cols = [\n", - " \"schedule_gtfs_dataset_key\",\n", - " \"time_of_day\",\n", - " \"route_id\",\n", - " \"direction_id\",\n", - " \"route_name_used\",\n", - " \"shape_array_key\",\n", - " ]\n", - "\n", - " mean_cols = [\n", - " \"service_minutes\",\n", - " \"rt_service_min\",\n", - " \"speed_mph\",\n", - " \"pings_per_min\",\n", - " \"total_vp\",\n", - " \"vp_in_shape\",\n", - " ]\n", - " count_cols = [\"trip_instance_key\"]\n", - "\n", - " df3 = (\n", - " df2.groupby(route_cols)\n", - " .agg({**{e: \"mean\" for e in mean_cols}, **{e: \"count\" for e in count_cols}})\n", - " .reset_index()\n", - " )\n", - "\n", - " df4 = df3.assign(\n", - " rt_service_min=df3.rt_service_min.round(1),\n", - " service_minutes=df3.service_minutes.round(1),\n", - " speed_mph=df3.speed_mph.round(1),\n", - " pings_per_min=df3.pings_per_min.round(1),\n", - " ).rename(\n", - " columns={\n", - " \"service_minutes\": \"avg_sched_trip_min\",\n", - " \"rt_service_min\": \"avg_rt_trip_min\",\n", - " \"trip_instance_key\": \"n_trips\",\n", - " \"route_name_used\": \"route_name\",\n", - " \"pings_per_min\": \"avg_pings_per_min\",\n", - " \"schedule_gtfs_dataset_key\": \"gtfs_dataset_key\",\n", - " }\n", - " )\n", - "\n", - " org_crosswalk = schedule_rt_utils.sample_gtfs_dataset_key_to_organization_crosswalk(\n", - " df4,\n", - " analysis_date,\n", - " quartet_data=\"schedule\",\n", - " dim_gtfs_dataset_cols=[\"key\", \"base64_url\"],\n", - " dim_organization_cols=[\"source_record_id\", \"name\", \"caltrans_district\"],\n", - " )\n", - "\n", - " df_with_org = pd.merge(\n", - " df4,\n", - " org_crosswalk.rename(columns={\"schedule_gtfs_dataset_key\": \"gtfs_dataset_key\"}),\n", - " on=\"gtfs_dataset_key\",\n", - " how=\"inner\",\n", - " )\n", - "\n", - " shapes = helpers.import_scheduled_shapes(\n", - " analysis_date,\n", - " columns=[\"shape_array_key\", \"geometry\"],\n", - " get_pandas=True,\n", - " crs=geography_utils.WGS84,\n", - " )\n", - "\n", - " df_with_shape = pd.merge(\n", - " shapes,\n", - " df_with_org,\n", - " on=\"shape_array_key\", # once merged, can drop shape_array_key\n", - " how=\"inner\",\n", - " )\n", - "\n", - " df_with_shape[\"avg_pct_vp_shape\"] = (\n", - " df_with_shape.vp_in_shape / df_with_shape.total_vp * 100\n", - " )\n", - "\n", - " df_with_shape[\"avg_pct_rt_v_sched\"] = (\n", - " df_with_shape.avg_rt_trip_min / df_with_shape.avg_sched_trip_min - 1\n", - " ) * 100\n", - "\n", - " final_df = df_with_shape.drop(columns=[\"total_vp\", \"vp_in_shape\"])\n", - "\n", - " agency_cols = [\"organization_source_record_id\", \"organization_name\"]\n", - " route_cols = [\n", - " \"route_id\",\n", - " \"route_name\",\n", - " \"direction_id\",\n", - " ]\n", - "\n", - " col_order = (\n", - " agency_cols\n", - " + route_cols\n", - " + [\n", - " \"time_of_day\",\n", - " \"speed_mph\",\n", - " \"n_trips\",\n", - " \"avg_sched_trip_min\",\n", - " \"avg_rt_trip_min\",\n", - " \"base64_url\",\n", - " \"caltrans_district\",\n", - " \"geometry\",\n", - " \"avg_pings_per_min\",\n", - " \"avg_pct_vp_shape\",\n", - " \"avg_pct_rt_v_sched\",\n", - " ]\n", - " )\n", - "\n", - " final_df = df_with_shape.reindex(columns=col_order).rename(\n", - " columns={\n", - " \"organization_source_record_id\": \"org_id\",\n", - " \"organization_name\": \"agency\",\n", - " \"caltrans_district\": \"district_name\",\n", - " }\n", - " )\n", - "\n", - " return df2, final_df" - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "id": "fe97ad8a-d0ce-40cd-982b-87877882693a", - "metadata": {}, - "outputs": [], - "source": [ - "dec_intermediary, dec_final = average_route_speeds_for_export(\n", - " dec_df2, analysis_date, 70\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "id": "58165517-e414-4843-8ece-b7631d4d7f27", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "count 11397.00\n", - "mean 2.38\n", - "std 0.56\n", - "min 0.10\n", - "25% 1.90\n", - "50% 2.50\n", - "75% 2.90\n", - "max 3.50\n", - "Name: avg_pings_per_min, dtype: float64" - ] - }, - "execution_count": 39, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "dec_final.avg_pings_per_min.describe()" - ] - }, - { - "cell_type": "code", - "execution_count": 40, - "id": "e99b22b7-f6c7-4d69-8b65-da3c39c85f33", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(11397, 16)" - ] - }, - "execution_count": 40, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "dec_final.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "id": "49867873-4a76-49c4-8c95-0918d7468f82", - "metadata": {}, - "outputs": [ + "name": "stdout", + "output_type": "stream", + "text": [ + "final\n" + ] + }, { "data": { "text/html": [ @@ -1618,7 +1729,7 @@ " route_name\n", " direction_id\n", " time_of_day\n", - " speed_mph\n", + " avg_speed_mph\n", " n_trips\n", " avg_sched_trip_min\n", " avg_rt_trip_min\n", @@ -1626,346 +1737,29 @@ " avg_pings_per_min\n", " avg_pct_vp_shape\n", " avg_pct_rt_v_sched\n", + " avg_rt_triptime_w_gtfs_pct\n", + " avg_min_w_atleast2_trip_updates\n", " \n", " \n", " \n", " \n", - " 31\n", - " rec8zhnCPETu6qEiH\n", - " City of Redondo Beach\n", - " 4819\n", - " Redondo Beach Pier / Greenline Station\n", - " 1\n", - " AM Peak\n", - " 7.20\n", - " 6\n", - " 46.50\n", - " 119.70\n", + " 6533\n", + " recPnGkwdpnr8jmHB\n", + " Los Angeles County Metropolitan Transportation Authority\n", + " 265-13172\n", + " PICO RIVERA - LAKEWOOD CTR MALL VIA PARAMOUNT BL\n", + " 0\n", + " Early AM\n", + " 6.90\n", + " 2\n", + " 59.00\n", + " 99.60\n", " 07 - Los Angeles\n", - " 2.40\n", - " 99.61\n", - " 157.42\n", - " \n", - " \n", - " 8468\n", - " recSiaaMmBXW7fUZS\n", - " Stanislaus Regional Transit Authority\n", - " 29\n", - " \n", - " 1\n", - " PM Peak\n", - " 8.50\n", - " 5\n", - " 27.00\n", - " 29.40\n", - " 10 - Stockton\n", - " 3.00\n", - " 83.92\n", - " 8.89\n", - " \n", - " \n", - " 2440\n", - " recOZgevYf7Jimm9L\n", - " Alameda-Contra Costa Transit District\n", - " 6\n", - " Berkeley - Telegraph - Oakland\n", - " 1\n", - " AM Peak\n", - " 6.30\n", - " 15\n", - " 32.20\n", - " 46.90\n", - " 04 - Oakland\n", - " 2.80\n", - " 87.65\n", - " 45.65\n", - " \n", - " \n", - "\n", - "" - ], - "text/plain": [ - " org_id agency route_id \\\n", - "31 rec8zhnCPETu6qEiH City of Redondo Beach 4819 \n", - "8468 recSiaaMmBXW7fUZS Stanislaus Regional Transit Authority 29 \n", - "2440 recOZgevYf7Jimm9L Alameda-Contra Costa Transit District 6 \n", - "\n", - " route_name direction_id time_of_day \\\n", - "31 Redondo Beach Pier / Greenline Station 1 AM Peak \n", - "8468 1 PM Peak \n", - "2440 Berkeley - Telegraph - Oakland 1 AM Peak \n", - "\n", - " speed_mph n_trips avg_sched_trip_min avg_rt_trip_min \\\n", - "31 7.20 6 46.50 119.70 \n", - "8468 8.50 5 27.00 29.40 \n", - "2440 6.30 15 32.20 46.90 \n", - "\n", - " district_name avg_pings_per_min avg_pct_vp_shape \\\n", - "31 07 - Los Angeles 2.40 99.61 \n", - "8468 10 - Stockton 3.00 83.92 \n", - "2440 04 - Oakland 2.80 87.65 \n", - "\n", - " avg_pct_rt_v_sched \n", - "31 157.42 \n", - "8468 8.89 \n", - "2440 45.65 " - ] - }, - "execution_count": 41, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "dec_final.drop(columns=[\"geometry\", \"base64_url\"]).sample(3)" - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "id": "3844b20a-6af8-4d23-a42e-166b7330907d", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
trip_instance_keyrt_service_minmin_w_atleast2_trip_updatestotal_pings_for_triptotal_min_w_gtfstotal_vpvp_in_shapespeed_mphservice_minutespings_per_minspatial_accuracy_pctrt_triptime_w_gtfs_pctrt_v_scheduled_trip_time_pct
29113220b6e20e6957e20bceb14947d71d36768.026720168NaNNaNNaNNaN2.96NaN99.98NaN
\n", - "
" - ], - "text/plain": [ - " trip_instance_key rt_service_min \\\n", - "29113 220b6e20e6957e20bceb14947d71d367 68.02 \n", - "\n", - " min_w_atleast2_trip_updates total_pings_for_trip total_min_w_gtfs \\\n", - "29113 67 201 68 \n", - "\n", - " total_vp vp_in_shape speed_mph service_minutes pings_per_min \\\n", - "29113 NaN NaN NaN NaN 2.96 \n", - "\n", - " spatial_accuracy_pct rt_triptime_w_gtfs_pct \\\n", - "29113 NaN 99.98 \n", - "\n", - " rt_v_scheduled_trip_time_pct \n", - "29113 NaN " - ] - }, - "execution_count": 42, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "dec_df.sample()" - ] - }, - { - "cell_type": "markdown", - "id": "a3acfeb1-54bf-4cee-9810-51c7e5fe0aa6", - "metadata": {}, - "source": [ - "### Check results after aggregating up to route\n", - "* How are the results sooo wrong with `265-13172`" - ] - }, - { - "cell_type": "code", - "execution_count": 46, - "id": "aa2cc0c2-da7b-4ae1-9ac4-84c8d6d60e95", - "metadata": {}, - "outputs": [], - "source": [ - "def checkout_route(\n", - " og_df: pd.DataFrame,\n", - " route_agg: gpd.GeoDataFrame,\n", - " route_id: str,\n", - " time_of_day: str,\n", - " direction_id: int,\n", - "):\n", - " print(\"final\")\n", - " display(\n", - " route_agg.loc[\n", - " (route_agg.route_id == route_id)\n", - " & (route_agg.time_of_day == time_of_day)\n", - " & (route_agg.direction_id == direction_id)\n", - " ].drop(columns=[\"geometry\", \"base64_url\"])\n", - " )\n", - " cols = [\n", - " \"trip_instance_key\",\n", - " \"time_of_day\",\n", - " \"speed_mph\",\n", - " \"rt_service_min\",\n", - " \"service_minutes\",\n", - " \"pings_per_min\",\n", - " ]\n", - "\n", - " print(\"original\")\n", - " og_df2 = og_df.loc[\n", - " (og_df.route_id == route_id)\n", - " & (og_df.time_of_day == time_of_day)\n", - " & (og_df.direction_id == direction_id)\n", - " ]\n", - "\n", - " print(f\"pings per min {og_df2.pings_per_min.mean()}\")\n", - " print(f\"speed_mph {og_df2.speed_mph.mean()}\")\n", - " print(f\"total_vp {og_df2.total_vp.mean()}\")\n", - " print(f\"vp_in_shape {og_df2.vp_in_shape.mean()}\")\n", - " display(og_df2[cols])" - ] - }, - { - "cell_type": "code", - "execution_count": 47, - "id": "10b8676a-783b-4df9-84cb-013785f0c1ca", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Index(['trip_instance_key', 'rt_service_min', 'min_w_atleast2_trip_updates',\n", - " 'total_pings_for_trip', 'total_min_w_gtfs', 'total_vp', 'vp_in_shape',\n", - " 'speed_mph', 'service_minutes', 'pings_per_min', 'spatial_accuracy_pct',\n", - " 'rt_triptime_w_gtfs_pct', 'rt_v_scheduled_trip_time_pct',\n", - " 'schedule_gtfs_dataset_key', 'direction_id', 'route_id',\n", - " 'shape_array_key', 'route_name_used', 'service_hours',\n", - " 'trip_first_departure_datetime_pacific', 'time_of_day'],\n", - " dtype='object')" - ] - }, - "execution_count": 47, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "dec_intermediary.columns" - ] - }, - { - "cell_type": "markdown", - "id": "c8aae146-3b5f-4189-ad0f-b011221b5442", - "metadata": {}, - "source": [ - "#### scheduled trip min (renamed from service_mins) is completely lower." - ] - }, - { - "cell_type": "code", - "execution_count": 48, - "id": "04ee1397-318c-4bb7-9f80-2a55b9c75055", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "final\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "
org_idagencyroute_idroute_namedirection_idtime_of_dayspeed_mphn_tripsavg_sched_trip_minavg_rt_trip_mindistrict_nameavg_pings_per_minavg_pct_vp_shapeavg_pct_rt_v_sched
6533recPnGkwdpnr8jmHBLos Angeles County Metropolitan Transportation Authority265-13172PICO RIVERA - LAKEWOOD CTR MALL VIA PARAMOUNT BL0Early AM6.90259.0099.6007 - Los Angeles2.7070.0968.812.7070.0968.8199.9092.00
\n", @@ -1981,14 +1775,17 @@ " route_name direction_id \\\n", "6533 PICO RIVERA - LAKEWOOD CTR MALL VIA PARAMOUNT BL 0 \n", "\n", - " time_of_day speed_mph n_trips avg_sched_trip_min avg_rt_trip_min \\\n", - "6533 Early AM 6.90 2 59.00 99.60 \n", + " time_of_day avg_speed_mph n_trips avg_sched_trip_min avg_rt_trip_min \\\n", + "6533 Early AM 6.90 2 59.00 99.60 \n", "\n", " district_name avg_pings_per_min avg_pct_vp_shape \\\n", "6533 07 - Los Angeles 2.70 70.09 \n", "\n", - " avg_pct_rt_v_sched \n", - "6533 68.81 " + " avg_pct_rt_v_sched avg_rt_triptime_w_gtfs_pct \\\n", + "6533 68.81 99.90 \n", + "\n", + " avg_min_w_atleast2_trip_updates \n", + "6533 92.00 " ] }, "metadata": {}, @@ -2002,7 +1799,9 @@ "pings per min 2.6849179704528776\n", "speed_mph 6.887376703252869\n", "total_vp 267.5\n", - "vp_in_shape 187.5\n" + "vp_in_shape 187.5\n", + "min w gtfs 99.5\n", + "min w at least 2 pings 92.0\n" ] }, { @@ -2032,6 +1831,8 @@ " rt_service_min\n", " service_minutes\n", " pings_per_min\n", + " total_min_w_gtfs\n", + " min_w_atleast2_trip_updates\n", " \n", " \n", " \n", @@ -2043,6 +1844,8 @@ " 95.02\n", " 58.00\n", " 2.65\n", + " 95\n", + " 87\n", " \n", " \n", " 24867\n", @@ -2052,6 +1855,8 @@ " 104.13\n", " 60.00\n", " 2.72\n", + " 104\n", + " 97\n", " \n", " \n", "\n", @@ -2062,9 +1867,13 @@ "24866 48a01217589c2faa46db395d6cf8317d Early AM 9.40 \n", "24867 70674803a1c4416fc49f883bc3b2c18b Early AM 4.38 \n", "\n", - " rt_service_min service_minutes pings_per_min \n", - "24866 95.02 58.00 2.65 \n", - "24867 104.13 60.00 2.72 " + " rt_service_min service_minutes pings_per_min total_min_w_gtfs \\\n", + "24866 95.02 58.00 2.65 95 \n", + "24867 104.13 60.00 2.72 104 \n", + "\n", + " min_w_atleast2_trip_updates \n", + "24866 87 \n", + "24867 97 " ] }, "metadata": {}, @@ -2077,7 +1886,7 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 69, "id": "aa775538-5a7f-4500-bc5b-867e9b54d626", "metadata": {}, "outputs": [ @@ -2115,7 +1924,7 @@ " route_name\n", " direction_id\n", " time_of_day\n", - " speed_mph\n", + " avg_speed_mph\n", " n_trips\n", " avg_sched_trip_min\n", " avg_rt_trip_min\n", @@ -2123,6 +1932,8 @@ " avg_pings_per_min\n", " avg_pct_vp_shape\n", " avg_pct_rt_v_sched\n", + " avg_rt_triptime_w_gtfs_pct\n", + " avg_min_w_atleast2_trip_updates\n", " \n", " \n", " \n", @@ -2142,6 +1953,8 @@ " 1.40\n", " 22.65\n", " 725.00\n", + " 50.08\n", + " 152.00\n", " \n", " \n", "\n", @@ -2151,14 +1964,17 @@ " org_id agency route_id \\\n", "193 rec3u4aMplqObcoTR Tahoe Transportation District 5671 \n", "\n", - " route_name direction_id time_of_day speed_mph n_trips \\\n", - "193 Valley Express Daily 1 Early AM 2.20 1 \n", + " route_name direction_id time_of_day avg_speed_mph n_trips \\\n", + "193 Valley Express Daily 1 Early AM 2.20 1 \n", "\n", " avg_sched_trip_min avg_rt_trip_min district_name avg_pings_per_min \\\n", "193 38.00 313.50 03 - Marysville 1.40 \n", "\n", - " avg_pct_vp_shape avg_pct_rt_v_sched \n", - "193 22.65 725.00 " + " avg_pct_vp_shape avg_pct_rt_v_sched avg_rt_triptime_w_gtfs_pct \\\n", + "193 22.65 725.00 50.08 \n", + "\n", + " avg_min_w_atleast2_trip_updates \n", + "193 152.00 " ] }, "metadata": {}, @@ -2172,7 +1988,9 @@ "pings per min 1.422723164442554\n", "speed_mph 2.158633017384419\n", "total_vp 446.0\n", - "vp_in_shape 101.0\n" + "vp_in_shape 101.0\n", + "min w gtfs 157.0\n", + "min w at least 2 pings 152.0\n" ] }, { @@ -2202,6 +2020,8 @@ " rt_service_min\n", " service_minutes\n", " pings_per_min\n", + " total_min_w_gtfs\n", + " min_w_atleast2_trip_updates\n", " \n", " \n", " \n", @@ -2213,6 +2033,8 @@ " 313.48\n", " 38.00\n", " 1.42\n", + " 157\n", + " 152\n", " \n", " \n", "\n", @@ -2222,8 +2044,11 @@ " trip_instance_key time_of_day speed_mph \\\n", "27758 6fadf197f5bb105ed916de0a337386ee Early AM 2.16 \n", "\n", - " rt_service_min service_minutes pings_per_min \n", - "27758 313.48 38.00 1.42 " + " rt_service_min service_minutes pings_per_min total_min_w_gtfs \\\n", + "27758 313.48 38.00 1.42 157 \n", + "\n", + " min_w_atleast2_trip_updates \n", + "27758 152 " ] }, "metadata": {}, @@ -2236,7 +2061,7 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 70, "id": "a3c23900-fdaa-476c-a490-dbc703df0c28", "metadata": {}, "outputs": [ @@ -2274,7 +2099,7 @@ " route_name\n", " direction_id\n", " time_of_day\n", - " speed_mph\n", + " avg_speed_mph\n", " n_trips\n", " avg_sched_trip_min\n", " avg_rt_trip_min\n", @@ -2282,6 +2107,8 @@ " avg_pings_per_min\n", " avg_pct_vp_shape\n", " avg_pct_rt_v_sched\n", + " avg_rt_triptime_w_gtfs_pct\n", + " avg_min_w_atleast2_trip_updates\n", " \n", " \n", " \n", @@ -2301,6 +2128,8 @@ " 3.00\n", " 88.33\n", " 34.17\n", + " 100.17\n", + " 58.30\n", " \n", " \n", "\n", @@ -2310,14 +2139,17 @@ " org_id agency route_id \\\n", "9012 rechaapWbeffO33OX City and County of San Francisco 38R \n", "\n", - " route_name direction_id time_of_day speed_mph \\\n", - "9012 Weekdays 5am-10pm Weekends 6am-9pm 1 AM Peak 6.70 \n", + " route_name direction_id time_of_day \\\n", + "9012 Weekdays 5am-10pm Weekends 6am-9pm 1 AM Peak \n", + "\n", + " avg_speed_mph n_trips avg_sched_trip_min avg_rt_trip_min \\\n", + "9012 6.70 30 43.90 58.90 \n", "\n", - " n_trips avg_sched_trip_min avg_rt_trip_min district_name \\\n", - "9012 30 43.90 58.90 04 - Oakland \n", + " district_name avg_pings_per_min avg_pct_vp_shape avg_pct_rt_v_sched \\\n", + "9012 04 - Oakland 3.00 88.33 34.17 \n", "\n", - " avg_pings_per_min avg_pct_vp_shape avg_pct_rt_v_sched \n", - "9012 3.00 88.33 34.17 " + " avg_rt_triptime_w_gtfs_pct avg_min_w_atleast2_trip_updates \n", + "9012 100.17 58.30 " ] }, "metadata": {}, @@ -2331,7 +2163,9 @@ "pings per min 2.961175851535439\n", "speed_mph 6.682525584870673\n", "total_vp 174.56666666666666\n", - "vp_in_shape 154.2\n" + "vp_in_shape 154.2\n", + "min w gtfs 58.96666666666667\n", + "min w at least 2 pings 58.3\n" ] }, { @@ -2361,6 +2195,8 @@ " rt_service_min\n", " service_minutes\n", " pings_per_min\n", + " total_min_w_gtfs\n", + " min_w_atleast2_trip_updates\n", " \n", " \n", " \n", @@ -2372,6 +2208,8 @@ " 48.82\n", " 39.00\n", " 2.97\n", + " 49\n", + " 49\n", " \n", " \n", " 61611\n", @@ -2381,6 +2219,8 @@ " 49.38\n", " 39.00\n", " 2.96\n", + " 50\n", + " 49\n", " \n", " \n", " 61612\n", @@ -2390,6 +2230,8 @@ " 97.67\n", " 41.00\n", " 2.98\n", + " 98\n", + " 97\n", " \n", " \n", " 61613\n", @@ -2399,6 +2241,8 @@ " 49.45\n", " 41.00\n", " 2.95\n", + " 49\n", + " 49\n", " \n", " \n", " 61614\n", @@ -2408,6 +2252,8 @@ " 57.95\n", " 41.00\n", " 2.97\n", + " 58\n", + " 58\n", " \n", " \n", " 61615\n", @@ -2417,6 +2263,8 @@ " 93.43\n", " 42.00\n", " 2.99\n", + " 94\n", + " 93\n", " \n", " \n", " 61616\n", @@ -2426,6 +2274,8 @@ " 67.83\n", " 42.00\n", " 2.96\n", + " 68\n", + " 67\n", " \n", " \n", " 61617\n", @@ -2435,6 +2285,8 @@ " 54.93\n", " 43.00\n", " 2.97\n", + " 55\n", + " 54\n", " \n", " \n", " 61618\n", @@ -2444,6 +2296,8 @@ " 52.62\n", " 43.00\n", " 2.96\n", + " 52\n", + " 52\n", " \n", " \n", " 61619\n", @@ -2453,6 +2307,8 @@ " 51.58\n", " 43.00\n", " 2.97\n", + " 52\n", + " 51\n", " \n", " \n", " 61620\n", @@ -2462,6 +2318,8 @@ " 41.27\n", " 44.00\n", " 2.96\n", + " 41\n", + " 41\n", " \n", " \n", " 61621\n", @@ -2471,6 +2329,8 @@ " 56.75\n", " 44.00\n", " 2.96\n", + " 57\n", + " 57\n", " \n", " \n", " 61622\n", @@ -2480,6 +2340,8 @@ " 52.38\n", " 45.00\n", " 2.96\n", + " 52\n", + " 52\n", " \n", " \n", " 61623\n", @@ -2489,6 +2351,8 @@ " 84.38\n", " 45.00\n", " 2.97\n", + " 84\n", + " 84\n", " \n", " \n", " 61624\n", @@ -2498,6 +2362,8 @@ " 49.57\n", " 45.00\n", " 2.97\n", + " 50\n", + " 49\n", " \n", " \n", " 61625\n", @@ -2507,6 +2373,8 @@ " 70.50\n", " 45.00\n", " 2.98\n", + " 71\n", + " 70\n", " \n", " \n", " 61626\n", @@ -2516,6 +2384,8 @@ " 47.77\n", " 45.00\n", " 2.97\n", + " 48\n", + " 47\n", " \n", " \n", " 61627\n", @@ -2525,6 +2395,8 @@ " 55.53\n", " 45.00\n", " 2.97\n", + " 56\n", + " 55\n", " \n", " \n", " 61628\n", @@ -2534,6 +2406,8 @@ " 59.95\n", " 45.00\n", " 2.97\n", + " 59\n", + " 59\n", " \n", " \n", " 61629\n", @@ -2543,6 +2417,8 @@ " 54.80\n", " 45.00\n", " 2.81\n", + " 52\n", + " 52\n", " \n", " \n", " 61630\n", @@ -2552,6 +2428,8 @@ " 45.50\n", " 45.00\n", " 2.97\n", + " 46\n", + " 45\n", " \n", " \n", " 61631\n", @@ -2561,6 +2439,8 @@ " 63.07\n", " 45.00\n", " 2.97\n", + " 63\n", + " 63\n", " \n", " \n", " 61632\n", @@ -2570,6 +2450,8 @@ " 47.82\n", " 46.00\n", " 2.97\n", + " 48\n", + " 47\n", " \n", " \n", " 61633\n", @@ -2579,6 +2461,8 @@ " 48.32\n", " 46.00\n", " 2.96\n", + " 49\n", + " 47\n", " \n", " \n", " 61634\n", @@ -2588,6 +2472,8 @@ " 59.93\n", " 47.00\n", " 2.97\n", + " 60\n", + " 60\n", " \n", " \n", " 61635\n", @@ -2597,6 +2483,8 @@ " 56.57\n", " 47.00\n", " 2.97\n", + " 57\n", + " 56\n", " \n", " \n", " 61636\n", @@ -2606,6 +2494,8 @@ " 52.43\n", " 47.00\n", " 2.96\n", + " 53\n", + " 51\n", " \n", " \n", " 61637\n", @@ -2615,6 +2505,8 @@ " 47.77\n", " 47.00\n", " 2.95\n", + " 48\n", + " 47\n", " \n", " \n", " 61638\n", @@ -2624,15 +2516,905 @@ " 59.90\n", " 47.00\n", " 2.95\n", + " 60\n", + " 59\n", + " \n", + " \n", + " 66858\n", + " 6058e1a8b5072b0ab2a6a6275aa125e7\n", + " AM Peak\n", + " 6.07\n", + " 89.63\n", + " 39.00\n", + " 2.98\n", + " 90\n", + " 89\n", + " \n", + " \n", + "\n", + "
" + ], + "text/plain": [ + " trip_instance_key time_of_day speed_mph \\\n", + "61610 76fa3ed3fd8ef28a446eedb4c1e94e6a AM Peak 9.39 \n", + "61611 47d8da2afbc3e4c78f9be4c6c53a7776 AM Peak 10.00 \n", + "61612 40590ba21f73ae1775a4538e34e67cc3 AM Peak 5.34 \n", + "61613 aa1b90f05357a29abc97fae90cd5bafe AM Peak 9.73 \n", + "61614 4d7cb7ddea2191f062c1ea3165df67c8 AM Peak 8.58 \n", + "61615 d4e16623b6cf749d8e389b905ab44089 AM Peak 4.49 \n", + "61616 ac9822b2d06b84790c10b7cbbf694410 AM Peak 7.10 \n", + "61617 092c4f20368b13bc5c92fda4fb16bd93 AM Peak 3.77 \n", + "61618 88cf8d5d7cd5d0c74a9b78d5e62fd8a3 AM Peak 7.06 \n", + "61619 2845f0ae70ae06c3618d58fa99a1cdde AM Peak 8.57 \n", + "61620 118feb9aedabd1b9bb354ce493c07735 AM Peak 5.07 \n", + "61621 adea377ad6e2ee6fb01e515724547b17 AM Peak 8.72 \n", + "61622 08797b2f64712704fd8fff8e3c3d1dc2 AM Peak 8.72 \n", + "61623 6366ccc0f17673ccd753214d9cb433ed AM Peak 8.61 \n", + "61624 bda28536257ad187c4469c6f188635a6 AM Peak 4.12 \n", + "61625 585d84b16b4a418da661644e3a2314d1 AM Peak 8.28 \n", + "61626 6789beb9af8dcb7d8b76afd9cce184a4 AM Peak 8.91 \n", + "61627 f36bb7386e5681abb33b97cf5c69b9db AM Peak 8.81 \n", + "61628 d2a71fa9b476e583315b6f203ccf8c67 AM Peak 3.39 \n", + "61629 82d0a21cfae4aa608861e3bb0172110c AM Peak 7.36 \n", + "61630 18792d0d435c7d031ea66115a3e985ee AM Peak 4.51 \n", + "61631 401e0c1fed455778f28834b60b66a4ff AM Peak 3.23 \n", + "61632 abb36fd3b7d6d95dccf23ed5709bce69 AM Peak 8.41 \n", + "61633 743153cc91ad7ce0ff294a451347d87e AM Peak 9.01 \n", + "61634 addecf9d30d8193bae40b35b00ced394 AM Peak 3.39 \n", + "61635 a95f1835bc54bdf7cf77b5bac056e103 AM Peak 3.60 \n", + "61636 7dec1b76c7e8e141e19ecc2b780b202c AM Peak 8.61 \n", + "61637 02efbb740cfeced30c8fd237ab981723 AM Peak 4.29 \n", + "61638 ff8e7fa939ecb40d43443bc1777eea65 AM Peak 3.37 \n", + "66858 6058e1a8b5072b0ab2a6a6275aa125e7 AM Peak 6.07 \n", + "\n", + " rt_service_min service_minutes pings_per_min total_min_w_gtfs \\\n", + "61610 48.82 39.00 2.97 49 \n", + "61611 49.38 39.00 2.96 50 \n", + "61612 97.67 41.00 2.98 98 \n", + "61613 49.45 41.00 2.95 49 \n", + "61614 57.95 41.00 2.97 58 \n", + "61615 93.43 42.00 2.99 94 \n", + "61616 67.83 42.00 2.96 68 \n", + "61617 54.93 43.00 2.97 55 \n", + "61618 52.62 43.00 2.96 52 \n", + "61619 51.58 43.00 2.97 52 \n", + "61620 41.27 44.00 2.96 41 \n", + "61621 56.75 44.00 2.96 57 \n", + "61622 52.38 45.00 2.96 52 \n", + "61623 84.38 45.00 2.97 84 \n", + "61624 49.57 45.00 2.97 50 \n", + "61625 70.50 45.00 2.98 71 \n", + "61626 47.77 45.00 2.97 48 \n", + "61627 55.53 45.00 2.97 56 \n", + "61628 59.95 45.00 2.97 59 \n", + "61629 54.80 45.00 2.81 52 \n", + "61630 45.50 45.00 2.97 46 \n", + "61631 63.07 45.00 2.97 63 \n", + "61632 47.82 46.00 2.97 48 \n", + "61633 48.32 46.00 2.96 49 \n", + "61634 59.93 47.00 2.97 60 \n", + "61635 56.57 47.00 2.97 57 \n", + "61636 52.43 47.00 2.96 53 \n", + "61637 47.77 47.00 2.95 48 \n", + "61638 59.90 47.00 2.95 60 \n", + "66858 89.63 39.00 2.98 90 \n", + "\n", + " min_w_atleast2_trip_updates \n", + "61610 49 \n", + "61611 49 \n", + "61612 97 \n", + "61613 49 \n", + "61614 58 \n", + "61615 93 \n", + "61616 67 \n", + "61617 54 \n", + "61618 52 \n", + "61619 51 \n", + "61620 41 \n", + "61621 57 \n", + "61622 52 \n", + "61623 84 \n", + "61624 49 \n", + "61625 70 \n", + "61626 47 \n", + "61627 55 \n", + "61628 59 \n", + "61629 52 \n", + "61630 45 \n", + "61631 63 \n", + "61632 47 \n", + "61633 47 \n", + "61634 60 \n", + "61635 56 \n", + "61636 51 \n", + "61637 47 \n", + "61638 59 \n", + "66858 89 " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "checkout_route(dec_intermediary, dec_final, \"38R\", \"AM Peak\", 1)" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "id": "8168d872-49c3-44c2-bdc7-fa499124c5af", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "final\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
org_idagencyroute_idroute_namedirection_idtime_of_dayavg_speed_mphn_tripsavg_sched_trip_minavg_rt_trip_mindistrict_nameavg_pings_per_minavg_pct_vp_shapeavg_pct_rt_v_schedavg_rt_triptime_w_gtfs_pctavg_min_w_atleast2_trip_updates
5973recIKnsnTdKQ0vsivWestern Contra Costa Transit AuthorityLynxRodeo/Hercules/San Francisco Transbay Terminal1AM Peak13.40650.0060.7004 - Oakland2.80NaN21.4098.0257.70
\n", + "
" + ], + "text/plain": [ + " org_id agency route_id \\\n", + "5973 recIKnsnTdKQ0vsiv Western Contra Costa Transit Authority Lynx \n", + "\n", + " route_name direction_id \\\n", + "5973 Rodeo/Hercules/San Francisco Transbay Terminal 1 \n", + "\n", + " time_of_day avg_speed_mph n_trips avg_sched_trip_min avg_rt_trip_min \\\n", + "5973 AM Peak 13.40 6 50.00 60.70 \n", + "\n", + " district_name avg_pings_per_min avg_pct_vp_shape avg_pct_rt_v_sched \\\n", + "5973 04 - Oakland 2.80 NaN 21.40 \n", + "\n", + " avg_rt_triptime_w_gtfs_pct avg_min_w_atleast2_trip_updates \n", + "5973 98.02 57.70 " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "original\n", + "pings per min 2.8253065827801014\n", + "speed_mph 13.403813542450534\n", + "total_vp nan\n", + "vp_in_shape nan\n", + "min w gtfs 59.5\n", + "min w at least 2 pings 57.666666666666664\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
trip_instance_keytime_of_dayspeed_mphrt_service_minservice_minutespings_per_mintotal_min_w_gtfsmin_w_atleast2_trip_updates
1298714c9fc6b379e1fe869ba37bfd79a8245AM Peak13.3161.4050.002.725756
129902ee02fe17c9acad17ccb44eaaea4debeAM Peak12.6963.1850.002.906362
1299178a402dfe7d89e0b919a193ac59c69aeAM Peak12.6163.1850.002.826260
129942bb4857e894a94d48a79620858c8384eAM Peak12.4464.4550.002.846362
12997b97a1995cd54253c58e82bb7c9ad3414AM Peak15.5952.8250.002.825350
13004baeeed7c3d6ab74ad9ff40f42a2f1da3AM Peak13.7859.1350.002.865956
\n", + "
" + ], + "text/plain": [ + " trip_instance_key time_of_day speed_mph \\\n", + "12987 14c9fc6b379e1fe869ba37bfd79a8245 AM Peak 13.31 \n", + "12990 2ee02fe17c9acad17ccb44eaaea4debe AM Peak 12.69 \n", + "12991 78a402dfe7d89e0b919a193ac59c69ae AM Peak 12.61 \n", + "12994 2bb4857e894a94d48a79620858c8384e AM Peak 12.44 \n", + "12997 b97a1995cd54253c58e82bb7c9ad3414 AM Peak 15.59 \n", + "13004 baeeed7c3d6ab74ad9ff40f42a2f1da3 AM Peak 13.78 \n", + "\n", + " rt_service_min service_minutes pings_per_min total_min_w_gtfs \\\n", + "12987 61.40 50.00 2.72 57 \n", + "12990 63.18 50.00 2.90 63 \n", + "12991 63.18 50.00 2.82 62 \n", + "12994 64.45 50.00 2.84 63 \n", + "12997 52.82 50.00 2.82 53 \n", + "13004 59.13 50.00 2.86 59 \n", + "\n", + " min_w_atleast2_trip_updates \n", + "12987 56 \n", + "12990 62 \n", + "12991 60 \n", + "12994 62 \n", + "12997 50 \n", + "13004 56 " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "checkout_route(dec_intermediary, dec_final, \"Lynx\", \"AM Peak\", 1)" + ] + }, + { + "cell_type": "markdown", + "id": "bfc98904-dbed-4302-8a9c-55adea3676b9", + "metadata": {}, + "source": [ + "### Test aggregating with March" + ] + }, + { + "cell_type": "code", + "execution_count": 72, + "id": "0fd73539-ea23-4b0a-9509-ae0162f512af", + "metadata": {}, + "outputs": [], + "source": [ + "mar_date = \"2023-03-15\"" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "id": "d491e2ca-9da1-4ccc-a86e-ff535b5d2ace", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
trip_instance_keyrt_service_minmin_w_atleast2_trip_updatestotal_pings_for_triptotal_min_w_gtfstotal_vpvp_in_shapespeed_mphservice_minutespings_per_minspatial_accuracy_pctrt_triptime_w_gtfs_pctrt_v_scheduled_trip_time_pct
638313d8b42c5ef16df6405029c9c87f9161538.7228673767.0036.009.3819.001.7353.7395.57103.77
\n", + "
" + ], + "text/plain": [ + " trip_instance_key rt_service_min \\\n", + "63831 3d8b42c5ef16df6405029c9c87f91615 38.72 \n", + "\n", + " min_w_atleast2_trip_updates total_pings_for_trip total_min_w_gtfs \\\n", + "63831 28 67 37 \n", + "\n", + " total_vp vp_in_shape speed_mph service_minutes pings_per_min \\\n", + "63831 67.00 36.00 9.38 19.00 1.73 \n", + "\n", + " spatial_accuracy_pct rt_triptime_w_gtfs_pct \\\n", + "63831 53.73 95.57 \n", + "\n", + " rt_v_scheduled_trip_time_pct \n", + "63831 103.77 " + ] + }, + "execution_count": 73, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mar_df.sample()" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "id": "cd2a6ffd-03a9-432a-ae0d-ac15f795278e", + "metadata": {}, + "outputs": [], + "source": [ + "mar_df2 = add_scheduled_trip_columns(df_2023_03_15, mar_date, [\"trip_instance_key\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "id": "f9a78bb6-f91e-42ad-85da-b954b606c050", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Int64Index: 83620 entries, 0 to 83619\n", + "Data columns (total 21 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 trip_instance_key 83620 non-null object \n", + " 1 rt_service_min 83620 non-null float64 \n", + " 2 min_w_atleast2_trip_updates 83620 non-null int64 \n", + " 3 total_pings_for_trip 83620 non-null int64 \n", + " 4 total_min_w_gtfs 83620 non-null int64 \n", + " 5 total_vp 69494 non-null float64 \n", + " 6 vp_in_shape 69494 non-null float64 \n", + " 7 speed_mph 71797 non-null float64 \n", + " 8 service_minutes 71797 non-null float64 \n", + " 9 pings_per_min 83620 non-null float64 \n", + " 10 spatial_accuracy_pct 69494 non-null float64 \n", + " 11 rt_triptime_w_gtfs_pct 83620 non-null float64 \n", + " 12 rt_v_scheduled_trip_time_pct 71797 non-null float64 \n", + " 13 schedule_gtfs_dataset_key 71867 non-null object \n", + " 14 direction_id 71867 non-null Int64 \n", + " 15 route_id 71867 non-null object \n", + " 16 shape_array_key 71867 non-null object \n", + " 17 route_name_used 71867 non-null object \n", + " 18 service_hours 72874 non-null float64 \n", + " 19 trip_first_departure_datetime_pacific 72874 non-null datetime64[ns]\n", + " 20 time_of_day 72874 non-null object \n", + "dtypes: Int64(1), datetime64[ns](1), float64(10), int64(3), object(6)\n", + "memory usage: 14.1+ MB\n" + ] + } + ], + "source": [ + "mar_df2.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "id": "988731d1-b062-43e5-896d-8bad990fdf46", + "metadata": {}, + "outputs": [], + "source": [ + "mar_intermediary, mar_final = average_route_speeds_for_export(mar_df2, mar_date, 70)" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "id": "64960b35-b4ac-43e4-9e6f-594e349fe7e2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['org_id', 'agency', 'route_id', 'route_name', 'direction_id',\n", + " 'time_of_day', 'avg_speed_mph', 'n_trips', 'avg_sched_trip_min',\n", + " 'avg_rt_trip_min', 'base64_url', 'district_name', 'geometry',\n", + " 'avg_pings_per_min', 'avg_pct_vp_shape', 'avg_pct_rt_v_sched',\n", + " 'avg_rt_triptime_w_gtfs_pct', 'avg_min_w_atleast2_trip_updates'],\n", + " dtype='object')" + ] + }, + "execution_count": 77, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mar_final.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "id": "c70a9b62-f855-405b-9271-6b129b70cab9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
org_idagencyroute_idroute_namedirection_idtime_of_dayavg_speed_mphn_tripsavg_sched_trip_minavg_rt_trip_mindistrict_nameavg_pings_per_minavg_pct_vp_shapeavg_pct_rt_v_schedavg_rt_triptime_w_gtfs_pctavg_min_w_atleast2_trip_updates
1780recfma7GNR5lQTTTgOrange County Transportation Authority54Garden Grove - Orange via Chapman Ave1Midday11.701075.50112.5012 - Irvine2.9099.0449.0196.62106.90
8979recJcXMNC5MUm2uDeVictor Valley Transit Authority3215Victor Valley Mall - Victor Valley College0Evening10.00119.0043.0008 - San Bernardino2.80100.00126.3297.6740.00
4227recANs4M9yDhvDyobLivermore / Amador Valley Transit Authority611Ruby Hill1PM Peak13.40142.0047.2004 - Oakland3.0087.1412.3899.5847.00
\n", + "
" + ], + "text/plain": [ + " org_id agency route_id \\\n", + "1780 recfma7GNR5lQTTTg Orange County Transportation Authority 54 \n", + "8979 recJcXMNC5MUm2uDe Victor Valley Transit Authority 3215 \n", + "4227 recANs4M9yDhvDyob Livermore / Amador Valley Transit Authority 611 \n", + "\n", + " route_name direction_id time_of_day \\\n", + "1780 Garden Grove - Orange via Chapman Ave 1 Midday \n", + "8979 Victor Valley Mall - Victor Valley College 0 Evening \n", + "4227 Ruby Hill 1 PM Peak \n", + "\n", + " avg_speed_mph n_trips avg_sched_trip_min avg_rt_trip_min \\\n", + "1780 11.70 10 75.50 112.50 \n", + "8979 10.00 1 19.00 43.00 \n", + "4227 13.40 1 42.00 47.20 \n", + "\n", + " district_name avg_pings_per_min avg_pct_vp_shape \\\n", + "1780 12 - Irvine 2.90 99.04 \n", + "8979 08 - San Bernardino 2.80 100.00 \n", + "4227 04 - Oakland 3.00 87.14 \n", + "\n", + " avg_pct_rt_v_sched avg_rt_triptime_w_gtfs_pct \\\n", + "1780 49.01 96.62 \n", + "8979 126.32 97.67 \n", + "4227 12.38 99.58 \n", + "\n", + " avg_min_w_atleast2_trip_updates \n", + "1780 106.90 \n", + "8979 40.00 \n", + "4227 47.00 " + ] + }, + "execution_count": 78, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mar_final.sample(3).drop(columns=[\"base64_url\", \"geometry\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "id": "bf6c84be-eb6d-4142-9ec7-2c737cd96517", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "final\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
org_idagencyroute_idroute_namedirection_idtime_of_dayavg_speed_mphn_tripsavg_sched_trip_minavg_rt_trip_mindistrict_nameavg_pings_per_minavg_pct_vp_shapeavg_pct_rt_v_schedavg_rt_triptime_w_gtfs_pctavg_min_w_atleast2_trip_updates
3720reckQmUdXUzHFmlVfCity of Ojai4763200PM Peak26.10449.0033.0007 - Los Angeles2.90100.00-32.65100.6132.20
\n", + "
" + ], + "text/plain": [ + " org_id agency route_id route_name direction_id \\\n", + "3720 reckQmUdXUzHFmlVf City of Ojai 4763 20 0 \n", + "\n", + " time_of_day avg_speed_mph n_trips avg_sched_trip_min avg_rt_trip_min \\\n", + "3720 PM Peak 26.10 4 49.00 33.00 \n", + "\n", + " district_name avg_pings_per_min avg_pct_vp_shape \\\n", + "3720 07 - Los Angeles 2.90 100.00 \n", + "\n", + " avg_pct_rt_v_sched avg_rt_triptime_w_gtfs_pct \\\n", + "3720 -32.65 100.61 \n", + "\n", + " avg_min_w_atleast2_trip_updates \n", + "3720 32.20 " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "original\n", + "pings per min 2.909125279321039\n", + "speed_mph 26.116502281263557\n", + "total_vp 96.75\n", + "vp_in_shape 96.75\n", + "min w gtfs 33.25\n", + "min w at least 2 pings 32.25\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "
trip_instance_keytime_of_dayspeed_mphrt_service_minservice_minutespings_per_mintotal_min_w_gtfsmin_w_atleast2_trip_updates
364527ea7312469de416b5c96c2df46b07c85PM Peak46.1216.6734.002.881616
364533d6760f1db43f24d68d2e780ac2be0dbPM Peak15.0449.0554.002.945048
372601af731c948711c3f047f29f922d24ee6PM Peak14.2651.6854.002.965251
668586058e1a8b5072b0ab2a6a6275aa125e7AM Peak6.0789.6339.002.983727849ce553a741ac1fcd734f035f299b81bPM Peak29.0614.6854.002.861514
\n", @@ -2640,68 +3422,22 @@ ], "text/plain": [ " trip_instance_key time_of_day speed_mph \\\n", - "61610 76fa3ed3fd8ef28a446eedb4c1e94e6a AM Peak 9.39 \n", - "61611 47d8da2afbc3e4c78f9be4c6c53a7776 AM Peak 10.00 \n", - "61612 40590ba21f73ae1775a4538e34e67cc3 AM Peak 5.34 \n", - "61613 aa1b90f05357a29abc97fae90cd5bafe AM Peak 9.73 \n", - "61614 4d7cb7ddea2191f062c1ea3165df67c8 AM Peak 8.58 \n", - "61615 d4e16623b6cf749d8e389b905ab44089 AM Peak 4.49 \n", - "61616 ac9822b2d06b84790c10b7cbbf694410 AM Peak 7.10 \n", - "61617 092c4f20368b13bc5c92fda4fb16bd93 AM Peak 3.77 \n", - "61618 88cf8d5d7cd5d0c74a9b78d5e62fd8a3 AM Peak 7.06 \n", - "61619 2845f0ae70ae06c3618d58fa99a1cdde AM Peak 8.57 \n", - "61620 118feb9aedabd1b9bb354ce493c07735 AM Peak 5.07 \n", - "61621 adea377ad6e2ee6fb01e515724547b17 AM Peak 8.72 \n", - "61622 08797b2f64712704fd8fff8e3c3d1dc2 AM Peak 8.72 \n", - "61623 6366ccc0f17673ccd753214d9cb433ed AM Peak 8.61 \n", - "61624 bda28536257ad187c4469c6f188635a6 AM Peak 4.12 \n", - "61625 585d84b16b4a418da661644e3a2314d1 AM Peak 8.28 \n", - "61626 6789beb9af8dcb7d8b76afd9cce184a4 AM Peak 8.91 \n", - "61627 f36bb7386e5681abb33b97cf5c69b9db AM Peak 8.81 \n", - "61628 d2a71fa9b476e583315b6f203ccf8c67 AM Peak 3.39 \n", - "61629 82d0a21cfae4aa608861e3bb0172110c AM Peak 7.36 \n", - "61630 18792d0d435c7d031ea66115a3e985ee AM Peak 4.51 \n", - "61631 401e0c1fed455778f28834b60b66a4ff AM Peak 3.23 \n", - "61632 abb36fd3b7d6d95dccf23ed5709bce69 AM Peak 8.41 \n", - "61633 743153cc91ad7ce0ff294a451347d87e AM Peak 9.01 \n", - "61634 addecf9d30d8193bae40b35b00ced394 AM Peak 3.39 \n", - "61635 a95f1835bc54bdf7cf77b5bac056e103 AM Peak 3.60 \n", - "61636 7dec1b76c7e8e141e19ecc2b780b202c AM Peak 8.61 \n", - "61637 02efbb740cfeced30c8fd237ab981723 AM Peak 4.29 \n", - "61638 ff8e7fa939ecb40d43443bc1777eea65 AM Peak 3.37 \n", - "66858 6058e1a8b5072b0ab2a6a6275aa125e7 AM Peak 6.07 \n", + "36452 7ea7312469de416b5c96c2df46b07c85 PM Peak 46.12 \n", + "36453 3d6760f1db43f24d68d2e780ac2be0db PM Peak 15.04 \n", + "37260 1af731c948711c3f047f29f922d24ee6 PM Peak 14.26 \n", + "37278 49ce553a741ac1fcd734f035f299b81b PM Peak 29.06 \n", "\n", - " rt_service_min service_minutes pings_per_min \n", - "61610 48.82 39.00 2.97 \n", - "61611 49.38 39.00 2.96 \n", - "61612 97.67 41.00 2.98 \n", - "61613 49.45 41.00 2.95 \n", - "61614 57.95 41.00 2.97 \n", - "61615 93.43 42.00 2.99 \n", - "61616 67.83 42.00 2.96 \n", - "61617 54.93 43.00 2.97 \n", - "61618 52.62 43.00 2.96 \n", - "61619 51.58 43.00 2.97 \n", - "61620 41.27 44.00 2.96 \n", - "61621 56.75 44.00 2.96 \n", - "61622 52.38 45.00 2.96 \n", - "61623 84.38 45.00 2.97 \n", - "61624 49.57 45.00 2.97 \n", - "61625 70.50 45.00 2.98 \n", - "61626 47.77 45.00 2.97 \n", - "61627 55.53 45.00 2.97 \n", - "61628 59.95 45.00 2.97 \n", - "61629 54.80 45.00 2.81 \n", - "61630 45.50 45.00 2.97 \n", - "61631 63.07 45.00 2.97 \n", - "61632 47.82 46.00 2.97 \n", - "61633 48.32 46.00 2.96 \n", - "61634 59.93 47.00 2.97 \n", - "61635 56.57 47.00 2.97 \n", - "61636 52.43 47.00 2.96 \n", - "61637 47.77 47.00 2.95 \n", - "61638 59.90 47.00 2.95 \n", - "66858 89.63 39.00 2.98 " + " rt_service_min service_minutes pings_per_min total_min_w_gtfs \\\n", + "36452 16.67 34.00 2.88 16 \n", + "36453 49.05 54.00 2.94 50 \n", + "37260 51.68 54.00 2.96 52 \n", + "37278 14.68 54.00 2.86 15 \n", + "\n", + " min_w_atleast2_trip_updates \n", + "36452 16 \n", + "36453 48 \n", + "37260 51 \n", + "37278 14 " ] }, "metadata": {}, @@ -2709,13 +3445,13 @@ } ], "source": [ - "checkout_route(dec_intermediary, dec_final, \"38R\", \"AM Peak\", 1)" + "checkout_route(mar_intermediary, mar_final, \"4763\", \"PM Peak\", 0)" ] }, { "cell_type": "code", - "execution_count": 51, - "id": "8168d872-49c3-44c2-bdc7-fa499124c5af", + "execution_count": 80, + "id": "1993266f-05df-4613-9573-29402822b9da", "metadata": {}, "outputs": [ { @@ -2752,7 +3488,7 @@ " route_name\n", " direction_id\n", " time_of_day\n", - " speed_mph\n", + " avg_speed_mph\n", " n_trips\n", " avg_sched_trip_min\n", " avg_rt_trip_min\n", @@ -2760,42 +3496,49 @@ " avg_pings_per_min\n", " avg_pct_vp_shape\n", " avg_pct_rt_v_sched\n", + " avg_rt_triptime_w_gtfs_pct\n", + " avg_min_w_atleast2_trip_updates\n", " \n", " \n", " \n", " \n", - " 5973\n", - " recIKnsnTdKQ0vsiv\n", - " Western Contra Costa Transit Authority\n", - " Lynx\n", - " Rodeo/Hercules/San Francisco Transbay Terminal\n", - " 1\n", - " AM Peak\n", - " 13.40\n", - " 6\n", - " 50.00\n", - " 60.70\n", - " 04 - Oakland\n", - " 2.80\n", - " NaN\n", - " 21.40\n", + " 1285\n", + " recRBcrX4ZvTyvSnm\n", + " North County Transit District\n", + " 332\n", + " Vista TC - Buena Creek Station\n", + " 0\n", + " Evening\n", + " 16.00\n", + " 2\n", + " 32.00\n", + " 46.20\n", + " 11 - San Diego\n", + " 2.60\n", + " 100.00\n", + " 44.38\n", + " 99.57\n", + " 43.00\n", " \n", " \n", "\n", "
" ], "text/plain": [ - " org_id agency route_id \\\n", - "5973 recIKnsnTdKQ0vsiv Western Contra Costa Transit Authority Lynx \n", + " org_id agency route_id \\\n", + "1285 recRBcrX4ZvTyvSnm North County Transit District 332 \n", "\n", - " route_name direction_id \\\n", - "5973 Rodeo/Hercules/San Francisco Transbay Terminal 1 \n", + " route_name direction_id time_of_day avg_speed_mph \\\n", + "1285 Vista TC - Buena Creek Station 0 Evening 16.00 \n", + "\n", + " n_trips avg_sched_trip_min avg_rt_trip_min district_name \\\n", + "1285 2 32.00 46.20 11 - San Diego \n", "\n", - " time_of_day speed_mph n_trips avg_sched_trip_min avg_rt_trip_min \\\n", - "5973 AM Peak 13.40 6 50.00 60.70 \n", + " avg_pings_per_min avg_pct_vp_shape avg_pct_rt_v_sched \\\n", + "1285 2.60 100.00 44.38 \n", "\n", - " district_name avg_pings_per_min avg_pct_vp_shape avg_pct_rt_v_sched \n", - "5973 04 - Oakland 2.80 NaN 21.40 " + " avg_rt_triptime_w_gtfs_pct avg_min_w_atleast2_trip_updates \n", + "1285 99.57 43.00 " ] }, "metadata": {}, @@ -2806,10 +3549,12 @@ "output_type": "stream", "text": [ "original\n", - "pings per min 2.8253065827801014\n", - "speed_mph 13.403813542450534\n", - "total_vp nan\n", - "vp_in_shape nan\n" + "pings per min 2.5985041071389485\n", + "speed_mph 15.952292627153918\n", + "total_vp 120.0\n", + "vp_in_shape 120.0\n", + "min w gtfs 46.0\n", + "min w at least 2 pings 43.0\n" ] }, { @@ -2839,62 +3584,32 @@ " rt_service_min\n", " service_minutes\n", " pings_per_min\n", + " total_min_w_gtfs\n", + " min_w_atleast2_trip_updates\n", " \n", " \n", " \n", " \n", - " 12987\n", - " 14c9fc6b379e1fe869ba37bfd79a8245\n", - " AM Peak\n", - " 13.31\n", - " 61.40\n", - " 50.00\n", - " 2.72\n", - " \n", - " \n", - " 12990\n", - " 2ee02fe17c9acad17ccb44eaaea4debe\n", - " AM Peak\n", - " 12.69\n", - " 63.18\n", - " 50.00\n", - " 2.90\n", - " \n", - " \n", - " 12991\n", - " 78a402dfe7d89e0b919a193ac59c69ae\n", - " AM Peak\n", - " 12.61\n", - " 63.18\n", - " 50.00\n", - " 2.82\n", - " \n", - " \n", - " 12994\n", - " 2bb4857e894a94d48a79620858c8384e\n", - " AM Peak\n", - " 12.44\n", - " 64.45\n", - " 50.00\n", - " 2.84\n", - " \n", - " \n", - " 12997\n", - " b97a1995cd54253c58e82bb7c9ad3414\n", - " AM Peak\n", - " 15.59\n", - " 52.82\n", - " 50.00\n", - " 2.82\n", - " \n", - " \n", - " 13004\n", - " baeeed7c3d6ab74ad9ff40f42a2f1da3\n", - " AM Peak\n", - " 13.78\n", - " 59.13\n", - " 50.00\n", - " 2.86\n", + " 41418\n", + " e54c1ac191dc0b57df34834df825d0ad\n", + " Evening\n", + " 10.83\n", + " 47.10\n", + " 31.00\n", + " 2.59\n", + " 47\n", + " 43\n", + " \n", + " \n", + " 41433\n", + " e1d2ca1ad28f5fe1a3ec9a772cfae369\n", + " Evening\n", + " 21.07\n", + " 45.27\n", + " 33.00\n", + " 2.61\n", + " 45\n", + " 43\n", " \n", " \n", "\n", @@ -2902,20 +3617,16 @@ ], "text/plain": [ " trip_instance_key time_of_day speed_mph \\\n", - "12987 14c9fc6b379e1fe869ba37bfd79a8245 AM Peak 13.31 \n", - "12990 2ee02fe17c9acad17ccb44eaaea4debe AM Peak 12.69 \n", - "12991 78a402dfe7d89e0b919a193ac59c69ae AM Peak 12.61 \n", - "12994 2bb4857e894a94d48a79620858c8384e AM Peak 12.44 \n", - "12997 b97a1995cd54253c58e82bb7c9ad3414 AM Peak 15.59 \n", - "13004 baeeed7c3d6ab74ad9ff40f42a2f1da3 AM Peak 13.78 \n", + "41418 e54c1ac191dc0b57df34834df825d0ad Evening 10.83 \n", + "41433 e1d2ca1ad28f5fe1a3ec9a772cfae369 Evening 21.07 \n", + "\n", + " rt_service_min service_minutes pings_per_min total_min_w_gtfs \\\n", + "41418 47.10 31.00 2.59 47 \n", + "41433 45.27 33.00 2.61 45 \n", "\n", - " rt_service_min service_minutes pings_per_min \n", - "12987 61.40 50.00 2.72 \n", - "12990 63.18 50.00 2.90 \n", - "12991 63.18 50.00 2.82 \n", - "12994 64.45 50.00 2.84 \n", - "12997 52.82 50.00 2.82 \n", - "13004 59.13 50.00 2.86 " + " min_w_atleast2_trip_updates \n", + "41418 43 \n", + "41433 43 " ] }, "metadata": {}, @@ -2923,196 +3634,7 @@ } ], "source": [ - "checkout_route(dec_intermediary, dec_final, \"Lynx\", \"AM Peak\", 1)" - ] - }, - { - "cell_type": "markdown", - "id": "470e444a-41ce-47c9-8e70-9866904e936e", - "metadata": {}, - "source": [ - "#### Test grouping" - ] - }, - { - "cell_type": "code", - "execution_count": 52, - "id": "b8a1b06b-0e81-4641-8545-b5af34f6b47e", - "metadata": {}, - "outputs": [ - { - "ename": "NameError", - "evalue": "name 'stop' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[52], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mstop\u001b[49m\n", - "\u001b[0;31mNameError\u001b[0m: name 'stop' is not defined" - ] - } - ], - "source": [ - "stop" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "97d81b5c-5d47-405c-a9bd-2a51ffe00b73", - "metadata": {}, - "outputs": [], - "source": [ - "df2.columns" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4e6e3f60-5e14-4fe4-96b4-9df6be9db761", - "metadata": {}, - "outputs": [], - "source": [ - "route_groupby_cols = [\n", - " \"schedule_gtfs_dataset_key\",\n", - " \"time_of_day\",\n", - " \"route_id\",\n", - " \"direction_id\",\n", - " \"route_name_used\",\n", - " \"shape_array_key\",\n", - "]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e931b56b-1048-49f8-b0c8-4d9ea4fd34db", - "metadata": {}, - "outputs": [], - "source": [ - "route_groupby_cols" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3b18d789-a84f-42c6-addc-2f97b9e6fafc", - "metadata": {}, - "outputs": [], - "source": [ - "test1 = (\n", - " df2.groupby(route_groupby_cols, observed=False, group_keys=True)\n", - " .agg(\n", - " {\n", - " \"service_minutes\": \"mean\",\n", - " \"rt_service_min\": \"mean\",\n", - " \"speed_mph\": \"mean\",\n", - " \"pings_per_min\": \"mean\",\n", - " \"total_vp\": \"mean\",\n", - " \"vp_in_shape\": \"mean\",\n", - " \"trip_instance_key\": \"count\",\n", - " }\n", - " )\n", - " .reset_index()\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "429f3c5b-e1f6-40a0-86a3-586ad00685fb", - "metadata": {}, - "outputs": [], - "source": [ - "def checkout_test_groups(\n", - " df: pd.DataFrame, route_id: str, time_of_day: str, direction_id: int\n", - "):\n", - " test_cols = [\n", - " \"trip_instance_key\",\n", - " \"service_minutes\",\n", - " \"rt_service_min\",\n", - " \"pings_per_min\",\n", - " \"speed_mph\",\n", - " \"total_vp\",\n", - " \"vp_in_shape\",\n", - " ]\n", - " display(\n", - " df.loc[\n", - " (df.route_id == route_id)\n", - " & (df.time_of_day == time_of_day)\n", - " & (df.direction_id == direction_id)\n", - " ][test_cols]\n", - " )\n", - " df2_cols = [\n", - " \"trip_instance_key\",\n", - " \"time_of_day\",\n", - " \"speed_mph\",\n", - " \"rt_service_min\",\n", - " \"service_minutes\",\n", - " \"pings_per_min\",\n", - " \"total_vp\",\n", - " \"vp_in_shape\",\n", - " ]\n", - " df2_filtered = df2.loc[\n", - " (df2.route_id == route_id)\n", - " & (df2.time_of_day == time_of_day)\n", - " & (df2.direction_id == direction_id)\n", - " ]\n", - " display(df2_filtered.pings_per_min.mean())\n", - " display(df2_filtered.speed_mph.mean())\n", - " display(df2_filtered.total_vp.mean())\n", - " display(df2_filtered.vp_in_shape.mean())\n", - " display(df2_filtered[df2_cols])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ab2cc904-d02c-414b-ba2b-efcb351afa8f", - "metadata": {}, - "outputs": [], - "source": [ - "checkout_test_groups(test1, \"265-13172\", \"Early AM\", 0)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7ad06e7a-f7fa-4604-b28e-c08802ce4883", - "metadata": {}, - "outputs": [], - "source": [ - "checkout_test_groups(test1, \"5671\", \"Early AM\", 1)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "79e3584a-ac1f-4ecc-9b69-8f4551b8b632", - "metadata": {}, - "outputs": [], - "source": [ - "checkout_test_groups(test1, \"Lynx\", \"AM Peak\", 1)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f6043d40-9178-4547-97f2-6140585ea418", - "metadata": {}, - "outputs": [], - "source": [ - "checkout_test_groups(test1, \"38R\", \"AM Peak\", 1)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "bff53f95-8520-44f0-bc9c-96d5bc5a8323", - "metadata": {}, - "outputs": [], - "source": [ - "checkout_test_groups(test1, \"16611\", \"AM Peak\", 0)" + "checkout_route(mar_intermediary, mar_final, \"332\", \"Evening\", 0)" ] } ], diff --git a/rt_scheduled_v_ran/logs/rt_v_scheduled_trip_metrics.log b/rt_scheduled_v_ran/logs/rt_v_scheduled_trip_metrics.log index e4ecbff91..870e85ea8 100644 --- a/rt_scheduled_v_ran/logs/rt_v_scheduled_trip_metrics.log +++ b/rt_scheduled_v_ran/logs/rt_v_scheduled_trip_metrics.log @@ -209,3 +209,66 @@ 2023-12-28 14:25:39.680 | INFO | __main__:vp_usable_metrics:315 - Find vps that fall into shapes: 0:15:35.914302 2023-12-28 14:25:42.477 | INFO | __main__:vp_usable_metrics:327 - Spatial accuracy grouping metric: 0:00:02.796187 2023-12-28 14:26:52.690 | INFO | __main__:vp_usable_metrics:353 - Total run time for metrics on 2023-12-13: 0:20:47.087647 +2024-01-05 11:03:41.382 | INFO | __main__:vp_usable_metrics:274 - Rt service min: 0:00:00.999016 +2024-01-05 11:05:42.554 | INFO | __main__:vp_usable_metrics:289 - Grouping by each minute: 0:02:01.171415 +2024-01-05 11:05:42.569 | INFO | __main__:vp_usable_metrics:294 - Spatial accuracy metric: 0:00:00.015383 +2024-01-05 11:08:14.846 | INFO | __main__:vp_usable_metrics:303 - Buffering: 0:02:32.276862 +2024-01-05 11:23:46.118 | INFO | __main__:vp_usable_metrics:315 - Find vps that fall into shapes: 0:15:31.272475 +2024-01-05 11:23:49.128 | INFO | __main__:vp_usable_metrics:327 - Spatial accuracy grouping metric: 0:00:03.009932 +2024-01-05 11:24:52.108 | INFO | __main__:vp_usable_metrics:353 - Total run time for metrics on 2023-11-15: 0:21:11.725005 +2024-01-05 11:24:52.597 | INFO | __main__:vp_usable_metrics:274 - Rt service min: 0:00:00.412462 +2024-01-05 11:26:55.371 | INFO | __main__:vp_usable_metrics:289 - Grouping by each minute: 0:02:02.774083 +2024-01-05 11:26:55.387 | INFO | __main__:vp_usable_metrics:294 - Spatial accuracy metric: 0:00:00.015792 +2024-01-05 11:29:04.800 | INFO | __main__:vp_usable_metrics:303 - Buffering: 0:02:09.413010 +2024-01-05 11:44:29.583 | INFO | __main__:vp_usable_metrics:315 - Find vps that fall into shapes: 0:15:24.783640 +2024-01-05 11:44:32.258 | INFO | __main__:vp_usable_metrics:327 - Spatial accuracy grouping metric: 0:00:02.674567 +2024-01-05 11:45:35.779 | INFO | __main__:vp_usable_metrics:353 - Total run time for metrics on 2023-10-11: 0:20:43.594600 +2024-01-05 11:45:36.293 | INFO | __main__:vp_usable_metrics:274 - Rt service min: 0:00:00.420491 +2024-01-05 11:47:23.192 | INFO | __main__:vp_usable_metrics:289 - Grouping by each minute: 0:01:46.899004 +2024-01-05 11:47:23.211 | INFO | __main__:vp_usable_metrics:294 - Spatial accuracy metric: 0:00:00.019127 +2024-01-05 11:49:32.796 | INFO | __main__:vp_usable_metrics:303 - Buffering: 0:02:09.585046 +2024-01-05 12:03:47.580 | INFO | __main__:vp_usable_metrics:315 - Find vps that fall into shapes: 0:14:14.783944 +2024-01-05 12:03:50.322 | INFO | __main__:vp_usable_metrics:327 - Spatial accuracy grouping metric: 0:00:02.741292 +2024-01-05 12:04:52.959 | INFO | __main__:vp_usable_metrics:353 - Total run time for metrics on 2023-09-13: 0:19:17.086066 +2024-01-05 12:04:53.533 | INFO | __main__:vp_usable_metrics:274 - Rt service min: 0:00:00.511619 +2024-01-05 12:06:39.520 | INFO | __main__:vp_usable_metrics:289 - Grouping by each minute: 0:01:45.986997 +2024-01-05 12:06:39.536 | INFO | __main__:vp_usable_metrics:294 - Spatial accuracy metric: 0:00:00.016369 +2024-01-05 12:08:41.105 | INFO | __main__:vp_usable_metrics:303 - Buffering: 0:02:01.569323 +2024-01-05 12:23:39.400 | INFO | __main__:vp_usable_metrics:315 - Find vps that fall into shapes: 0:14:58.294782 +2024-01-05 12:23:42.180 | INFO | __main__:vp_usable_metrics:327 - Spatial accuracy grouping metric: 0:00:02.780027 +2024-01-05 12:24:44.946 | INFO | __main__:vp_usable_metrics:353 - Total run time for metrics on 2023-08-15: 0:19:51.925519 +2024-01-05 12:24:45.545 | INFO | __main__:vp_usable_metrics:274 - Rt service min: 0:00:00.526637 +2024-01-05 12:26:32.644 | INFO | __main__:vp_usable_metrics:289 - Grouping by each minute: 0:01:47.098508 +2024-01-05 12:26:32.663 | INFO | __main__:vp_usable_metrics:294 - Spatial accuracy metric: 0:00:00.019457 +2024-01-05 12:28:49.599 | INFO | __main__:vp_usable_metrics:303 - Buffering: 0:02:16.935979 +2024-01-05 12:43:19.563 | INFO | __main__:vp_usable_metrics:315 - Find vps that fall into shapes: 0:14:29.964329 +2024-01-05 12:43:22.303 | INFO | __main__:vp_usable_metrics:327 - Spatial accuracy grouping metric: 0:00:02.739075 +2024-01-05 12:44:28.711 | INFO | __main__:vp_usable_metrics:353 - Total run time for metrics on 2023-07-12: 0:19:43.692713 +2024-01-05 12:44:29.206 | INFO | __main__:vp_usable_metrics:274 - Rt service min: 0:00:00.417839 +2024-01-05 12:46:13.252 | INFO | __main__:vp_usable_metrics:289 - Grouping by each minute: 0:01:44.045936 +2024-01-05 12:46:13.275 | INFO | __main__:vp_usable_metrics:294 - Spatial accuracy metric: 0:00:00.022944 +2024-01-05 12:48:16.284 | INFO | __main__:vp_usable_metrics:303 - Buffering: 0:02:03.008759 +2024-01-05 13:01:13.035 | INFO | __main__:vp_usable_metrics:315 - Find vps that fall into shapes: 0:12:56.751552 +2024-01-05 13:01:15.751 | INFO | __main__:vp_usable_metrics:327 - Spatial accuracy grouping metric: 0:00:02.715483 +2024-01-05 13:02:15.811 | INFO | __main__:vp_usable_metrics:353 - Total run time for metrics on 2023-06-14: 0:17:47.022998 +2024-01-05 13:02:16.400 | INFO | __main__:vp_usable_metrics:274 - Rt service min: 0:00:00.451781 +2024-01-05 13:04:00.897 | INFO | __main__:vp_usable_metrics:289 - Grouping by each minute: 0:01:44.496557 +2024-01-05 13:04:00.914 | INFO | __main__:vp_usable_metrics:294 - Spatial accuracy metric: 0:00:00.017863 +2024-01-05 13:06:00.788 | INFO | __main__:vp_usable_metrics:303 - Buffering: 0:01:59.873825 +2024-01-05 13:19:26.211 | INFO | __main__:vp_usable_metrics:315 - Find vps that fall into shapes: 0:13:25.422956 +2024-01-05 13:19:28.546 | INFO | __main__:vp_usable_metrics:327 - Spatial accuracy grouping metric: 0:00:02.334467 +2024-01-05 13:20:25.610 | INFO | __main__:vp_usable_metrics:353 - Total run time for metrics on 2023-05-17: 0:18:09.661479 +2024-01-05 13:20:26.146 | INFO | __main__:vp_usable_metrics:274 - Rt service min: 0:00:00.429638 +2024-01-05 13:22:07.005 | INFO | __main__:vp_usable_metrics:289 - Grouping by each minute: 0:01:40.859106 +2024-01-05 13:22:07.019 | INFO | __main__:vp_usable_metrics:294 - Spatial accuracy metric: 0:00:00.014126 +2024-01-05 13:24:17.361 | INFO | __main__:vp_usable_metrics:303 - Buffering: 0:02:10.342300 +2024-01-05 13:38:29.251 | INFO | __main__:vp_usable_metrics:315 - Find vps that fall into shapes: 0:14:11.889550 +2024-01-05 13:38:31.778 | INFO | __main__:vp_usable_metrics:327 - Spatial accuracy grouping metric: 0:00:02.526908 +2024-01-05 13:39:31.416 | INFO | __main__:vp_usable_metrics:353 - Total run time for metrics on 2023-04-12: 0:19:05.699879 +2024-01-05 13:39:32.121 | INFO | __main__:vp_usable_metrics:274 - Rt service min: 0:00:00.556966 +2024-01-05 13:41:15.962 | INFO | __main__:vp_usable_metrics:289 - Grouping by each minute: 0:01:43.840271 +2024-01-05 13:41:15.978 | INFO | __main__:vp_usable_metrics:294 - Spatial accuracy metric: 0:00:00.016722 +2024-01-05 13:43:34.187 | INFO | __main__:vp_usable_metrics:303 - Buffering: 0:02:18.208294 +2024-01-05 13:57:17.060 | INFO | __main__:vp_usable_metrics:315 - Find vps that fall into shapes: 0:13:42.873109 +2024-01-05 13:57:19.748 | INFO | __main__:vp_usable_metrics:327 - Spatial accuracy grouping metric: 0:00:02.688274 +2024-01-05 13:58:20.064 | INFO | __main__:vp_usable_metrics:353 - Total run time for metrics on 2023-03-15: 0:18:48.500023 diff --git a/rt_scheduled_v_ran/scripts/update_vars.py b/rt_scheduled_v_ran/scripts/update_vars.py index abc839fd9..910a872c6 100644 --- a/rt_scheduled_v_ran/scripts/update_vars.py +++ b/rt_scheduled_v_ran/scripts/update_vars.py @@ -1,6 +1,8 @@ from shared_utils import rt_dates -months = ["dec"] +months = ["nov", "oct", "sep", "aug", + "jul", "jun", "may", "apr", "mar"] + #months = ["dec","nov", "oct", "sep", "aug", # "jul", "jun", "may", "apr", "mar"]