From 725f49deaa913daef8ff1fd6dd9da67131bbbda9 Mon Sep 17 00:00:00 2001 From: amandaha8 Date: Thu, 1 Feb 2024 00:25:25 +0000 Subject: [PATCH 1/2] change segment speed utils ref, add avg freq --- .../06_vp_usable_exploration.ipynb | 1871 +++++++---------- 1 file changed, 815 insertions(+), 1056 deletions(-) diff --git a/rt_scheduled_v_ran/06_vp_usable_exploration.ipynb b/rt_scheduled_v_ran/06_vp_usable_exploration.ipynb index a56971a02..f0a7bd06b 100644 --- a/rt_scheduled_v_ran/06_vp_usable_exploration.ipynb +++ b/rt_scheduled_v_ran/06_vp_usable_exploration.ipynb @@ -21,15 +21,17 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 139, "id": "67351837-e385-464b-96b3-48fc13703af7", "metadata": {}, "outputs": [], "source": [ - "from segment_speed_utils import helpers, sched_rt_utils, wrangle_shapes\n", + "# https://github.com/cal-itp/data-analyses/tree/main/rt_segment_speeds/segment_speed_utils\n", + "from segment_speed_utils import gtfs_schedule_wrangling, helpers, wrangle_shapes\n", "from segment_speed_utils.project_vars import (\n", " GCS_FILE_PATH,\n", " PROJECT_CRS,\n", + " RT_SCHED_GCS,\n", " SEGMENT_GCS,\n", " analysis_date,\n", ")\n", @@ -51,7 +53,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "id": "be14001d-8599-4496-b267-d028174ebc78", "metadata": {}, "outputs": [], @@ -63,7 +65,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "id": "78669969-8d99-4049-99cb-2f933016b2d9", "metadata": {}, "outputs": [ @@ -82,7 +84,7 @@ " '2023-03-15']" ] }, - "execution_count": 6, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -93,10 +95,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "id": "73a27c22-ba75-4804-9ce6-f0d114962b0c", "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "'gs://calitp-analytics-data/data-analyses/rt_vs_schedule/trip_level_metrics/2023-12-13_metrics.parquet'" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "\"gs://calitp-analytics-data/data-analyses/rt_vs_schedule/trip_level_metrics/2023-12-13_metrics.parquet\"" ] @@ -111,12 +124,12 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 7, "id": "05e483c3-b620-42f1-9243-d711777c1903", "metadata": {}, "outputs": [], "source": [ - "def read_files_into_dataframes(dates: list)-> list:\n", + "def read_files_into_dataframes(dates: list) -> list:\n", " \"\"\"\n", " Read files with given dates into separate pandas DataFrames.\n", "\n", @@ -132,14 +145,10 @@ " )\n", "\n", " for date in dates:\n", - " file_path = (\n", - " f\"{GCS_PATH}{date}_metrics.parquet\" \n", - " )\n", + " file_path = f\"{GCS_PATH}{date}_metrics.parquet\"\n", "\n", " # Read the file into a DataFrame\n", - " df = pd.read_parquet(\n", - " file_path\n", - " ) \n", + " df = pd.read_parquet(file_path)\n", "\n", " df_name = f\"df_{date.replace('-', '_')}\"\n", " # Store the DataFrame in the dictionary\n", @@ -150,7 +159,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 8, "id": "5703f90e-9ec1-4253-8392-e4df3a9dfda3", "metadata": {}, "outputs": [], @@ -160,7 +169,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 9, "id": "cccf51bb-4980-4d96-af0f-44259147f1fa", "metadata": {}, "outputs": [ @@ -170,7 +179,7 @@ "dict" ] }, - "execution_count": 40, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -181,7 +190,7 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 10, "id": "077b3ff7-6707-43e6-b228-6d7fc174b306", "metadata": {}, "outputs": [], @@ -191,7 +200,7 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 11, "id": "531c6112-c633-412f-8350-ab01fc0a4c45", "metadata": {}, "outputs": [], @@ -209,17 +218,28 @@ }, { "cell_type": "code", - "execution_count": 54, + "execution_count": 12, "id": "e4247b70-422f-466f-af12-060e92eb7d70", "metadata": {}, "outputs": [], "source": [ - "all_dfs = [df_2023_12_13, df_2023_11_15, df_2023_10_11, df_2023_09_13, df_2023_08_15, df_2023_07_12, df_2023_06_14, df_2023_05_17, df_2023_04_12, df_2023_03_15]" + "all_dfs = [\n", + " df_2023_12_13,\n", + " df_2023_11_15,\n", + " df_2023_10_11,\n", + " df_2023_09_13,\n", + " df_2023_08_15,\n", + " df_2023_07_12,\n", + " df_2023_06_14,\n", + " df_2023_05_17,\n", + " df_2023_04_12,\n", + " df_2023_03_15,\n", + "]" ] }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 13, "id": "26185ced-9f3b-4266-a080-0eeed0c0a825", "metadata": {}, "outputs": [], @@ -233,7 +253,7 @@ }, { "cell_type": "code", - "execution_count": 55, + "execution_count": 14, "id": "af7c69e1-e363-42f8-bf1c-f9ef2ba141e2", "metadata": { "scrolled": true, @@ -941,13 +961,112 @@ "tags": [] }, "source": [ - "#### Step 1: add missing cols\n", - "* https://github.com/cal-itp/data-analyses/blob/main/rt_segment_speeds/scripts/avg_speeds_by_segment.py#L135" + "#### Add missing cols\n", + "* https://github.com/cal-itp/data-analyses/tree/main/rt_segment_speeds/segment_speed_utils" + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "id": "ed0f790a-f132-40d1-9daf-bafeecee82fd", + "metadata": {}, + "outputs": [], + "source": [ + "analysis_date = \"2023-12-13\"" + ] + }, + { + "cell_type": "markdown", + "id": "3d08933b-cc48-46d1-b217-e5c07fc5685f", + "metadata": {}, + "source": [ + "#### Why is shape_array_key deleted `most_common_shape_by_route_direction`" + ] + }, + { + "cell_type": "code", + "execution_count": 83, + "id": "ac3e285d-6011-4da6-903a-d114d6587547", + "metadata": {}, + "outputs": [], + "source": [ + "def most_common_shape_by_route_direction(analysis_date: str) -> gpd.GeoDataFrame:\n", + " \"\"\"\n", + " Find shape_id with most trips for that route-direction.\n", + " Merge in shape geometry.\n", + " \"\"\"\n", + " route_dir_cols = [\"gtfs_dataset_key\", \"route_id\", \"direction_id\"]\n", + "\n", + " keep_trip_cols = route_dir_cols + [\n", + " \"trip_instance_key\",\n", + " \"shape_id\",\n", + " \"shape_array_key\",\n", + " ]\n", + "\n", + " trips = helpers.import_scheduled_trips(\n", + " analysis_date, columns=keep_trip_cols, get_pandas=True\n", + " ).rename(columns={\"schedule_gtfs_dataset_key\": \"gtfs_dataset_key\"})\n", + " sorting_order = [True for i in route_dir_cols]\n", + "\n", + " most_common_shape = (\n", + " trips.groupby(\n", + " route_dir_cols + [\"shape_id\", \"shape_array_key\"],\n", + " observed=True,\n", + " group_keys=False,\n", + " )\n", + " .agg({\"trip_instance_key\": \"count\"})\n", + " .reset_index()\n", + " .sort_values(\n", + " route_dir_cols + [\"trip_instance_key\"], ascending=sorting_order + [False]\n", + " )\n", + " .drop_duplicates(subset=route_dir_cols)\n", + " .reset_index(drop=True)[route_dir_cols + [\"shape_id\", \"shape_array_key\"]]\n", + " ).rename(\n", + " columns={\n", + " \"gtfs_dataset_key\": \"schedule_gtfs_dataset_key\",\n", + " \"shape_id\": \"common_shape_id\",\n", + " }\n", + " )\n", + "\n", + " shape_geom = helpers.import_scheduled_shapes(\n", + " analysis_date,\n", + " columns=[\"shape_array_key\", \"geometry\"],\n", + " )\n", + "\n", + " common_shape_geom = pd.merge(\n", + " shape_geom, most_common_shape, on=\"shape_array_key\", how=\"inner\"\n", + " )\n", + "\n", + " route_info = (\n", + " helpers.import_scheduled_trips(\n", + " analysis_date,\n", + " columns=[\n", + " \"gtfs_dataset_key\",\n", + " \"route_id\",\n", + " \"route_long_name\",\n", + " \"route_short_name\",\n", + " \"route_desc\",\n", + " ],\n", + " )\n", + " .drop_duplicates()\n", + " .pipe(portfolio_utils.add_route_name)\n", + " .drop(columns=[\"route_long_name\", \"route_short_name\", \"route_desc\"])\n", + " )\n", + "\n", + " del shape_geom, most_common_shape\n", + "\n", + " common_shape_geom2 = pd.merge(\n", + " common_shape_geom,\n", + " route_info.rename(columns={\"route_name_used\": \"route_name\"}),\n", + " on=[\"schedule_gtfs_dataset_key\", \"route_id\"],\n", + " )\n", + "\n", + " return common_shape_geom2" ] }, { "cell_type": "code", - "execution_count": 57, + "execution_count": 84, "id": "989a09e4-2f5a-4065-94f0-fc6cee1b1126", "metadata": {}, "outputs": [], @@ -979,9 +1098,10 @@ " \"direction_id\",\n", " \"shape_array_key\",\n", " ]\n", - " common_shape = sched_rt_utils.most_common_shape_by_route_direction(analysis_date)[\n", - " common_keep_cols\n", - " ]\n", + "\n", + " # Drops shape_array_key gtfs_schedule_wrangling.most_common_shape_by_route_direction\n", + " # common_shape = gtfs_schedule_wrangling.most_common_shape_by_route_direction(analysis_date)[common_keep_cols]\n", + " common_shape = most_common_shape_by_route_direction(analysis_date)[common_keep_cols]\n", "\n", " crosswalk2 = pd.merge(\n", " crosswalk,\n", @@ -1000,7 +1120,9 @@ " \"trip_first_departure_datetime_pacific\",\n", " \"time_of_day\",\n", " ]\n", - " time_of_day = sched_rt_utils.get_trip_time_buckets(analysis_date)[time_keep_cols]\n", + " time_of_day = gtfs_schedule_wrangling.get_trip_time_buckets(analysis_date)[\n", + " time_keep_cols\n", + " ]\n", "\n", " df2 = pd.merge(df, crosswalk2, on=\"trip_instance_key\", how=\"left\").merge(\n", " time_of_day, on=\"trip_instance_key\", how=\"left\"\n", @@ -1011,7 +1133,167 @@ }, { "cell_type": "code", - "execution_count": 58, + "execution_count": 85, + "id": "1fd1ba09-df71-4595-bddf-6649233cf02e", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['trip_instance_key', 'rt_service_min', 'min_w_atleast2_trip_updates',\n", + " 'total_pings_for_trip', 'total_min_w_gtfs', 'total_vp', 'vp_in_shape',\n", + " 'speed_mph', 'service_minutes', 'pings_per_min', 'spatial_accuracy_pct',\n", + " 'rt_triptime_w_gtfs_pct', 'rt_v_scheduled_trip_time_pct'],\n", + " dtype='object')" + ] + }, + "execution_count": 85, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df_2023_12_13.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 86, + "id": "6bde7bdc-56f3-465f-97d1-aaae357aea56", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'2023-12-13'" + ] + }, + "execution_count": 86, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "analysis_date" + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "id": "7fabab72-c1a4-468f-83d6-b1e066014129", + "metadata": {}, + "outputs": [], + "source": [ + "dec_df2 = add_scheduled_trip_columns(\n", + " df_2023_12_13, analysis_date, [\"trip_instance_key\"]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 88, + "id": "f04ec3f8-517a-4d2a-aad2-f1636838d98a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Int64Index: 86128 entries, 0 to 86127\n", + "Data columns (total 21 columns):\n", + " # Column Non-Null Count Dtype \n", + "--- ------ -------------- ----- \n", + " 0 trip_instance_key 86128 non-null object \n", + " 1 rt_service_min 86128 non-null float64 \n", + " 2 min_w_atleast2_trip_updates 86128 non-null int64 \n", + " 3 total_pings_for_trip 86128 non-null int64 \n", + " 4 total_min_w_gtfs 86128 non-null int64 \n", + " 5 total_vp 74609 non-null float64 \n", + " 6 vp_in_shape 74609 non-null float64 \n", + " 7 speed_mph 76878 non-null float64 \n", + " 8 service_minutes 76878 non-null float64 \n", + " 9 pings_per_min 86128 non-null float64 \n", + " 10 spatial_accuracy_pct 74609 non-null float64 \n", + " 11 rt_triptime_w_gtfs_pct 86128 non-null float64 \n", + " 12 rt_v_scheduled_trip_time_pct 76878 non-null float64 \n", + " 13 schedule_gtfs_dataset_key 76442 non-null object \n", + " 14 direction_id 76442 non-null Int64 \n", + " 15 route_id 76442 non-null object \n", + " 16 shape_array_key 76442 non-null object \n", + " 17 route_name_used 76442 non-null object \n", + " 18 service_hours 77977 non-null float64 \n", + " 19 trip_first_departure_datetime_pacific 77977 non-null datetime64[ns]\n", + " 20 time_of_day 77977 non-null object \n", + "dtypes: Int64(1), datetime64[ns](1), float64(10), int64(3), object(6)\n", + "memory usage: 14.5+ MB\n" + ] + } + ], + "source": [ + "dec_df2.info()" + ] + }, + { + "cell_type": "code", + "execution_count": 89, + "id": "f78ec95e-86af-4722-8dd5-ab228148c337", + "metadata": {}, + "outputs": [], + "source": [ + "shapes = helpers.import_scheduled_shapes(\n", + " analysis_date,\n", + " columns=[\"shape_array_key\", \"geometry\"],\n", + " get_pandas=True,\n", + " crs=geography_utils.WGS84,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 90, + "id": "9dbd05bb-9ead-41b8-ab43-b561bb276c92", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['shape_array_key', 'geometry'], dtype='object')" + ] + }, + "execution_count": 90, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "shapes.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 125, + "id": "52b9f3c3-429a-42cc-968d-6ef380baf5fc", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(7286, 7285)" + ] + }, + "execution_count": 125, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(shapes), shapes.shape_array_key.nunique()" + ] + }, + { + "cell_type": "code", + "execution_count": 91, "id": "21db7580-cf79-4385-b9dc-80cd99206011", "metadata": {}, "outputs": [], @@ -1158,17 +1440,7 @@ }, { "cell_type": "code", - "execution_count": 59, - "id": "7fabab72-c1a4-468f-83d6-b1e066014129", - "metadata": {}, - "outputs": [], - "source": [ - "dec_df2 = add_scheduled_trip_columns(df_2023_12_13, analysis_date, [\"trip_instance_key\"])" - ] - }, - { - "cell_type": "code", - "execution_count": 60, + "execution_count": 92, "id": "cdbf76f5-bd0b-4e2b-8fc8-15843c7dc3b6", "metadata": {}, "outputs": [ @@ -1299,7 +1571,7 @@ "1 2023-12-13 06:34:00 Early AM " ] }, - "execution_count": 60, + "execution_count": 92, "metadata": {}, "output_type": "execute_result" } @@ -1310,7 +1582,7 @@ }, { "cell_type": "code", - "execution_count": 61, + "execution_count": 93, "id": "fea7cda5-606c-4054-b189-58a12d250957", "metadata": {}, "outputs": [ @@ -1320,7 +1592,7 @@ "pandas.core.frame.DataFrame" ] }, - "execution_count": 61, + "execution_count": 93, "metadata": {}, "output_type": "execute_result" } @@ -1331,7 +1603,7 @@ }, { "cell_type": "code", - "execution_count": 62, + "execution_count": 94, "id": "fe97ad8a-d0ce-40cd-982b-87877882693a", "metadata": {}, "outputs": [], @@ -1343,7 +1615,7 @@ }, { "cell_type": "code", - "execution_count": 63, + "execution_count": 95, "id": "58165517-e414-4843-8ece-b7631d4d7f27", "metadata": {}, "outputs": [ @@ -1361,7 +1633,7 @@ "Name: avg_pings_per_min, dtype: float64" ] }, - "execution_count": 63, + "execution_count": 95, "metadata": {}, "output_type": "execute_result" } @@ -1372,7 +1644,7 @@ }, { "cell_type": "code", - "execution_count": 64, + "execution_count": 96, "id": "e99b22b7-f6c7-4d69-8b65-da3c39c85f33", "metadata": {}, "outputs": [ @@ -1382,7 +1654,7 @@ "(11397, 18)" ] }, - "execution_count": 64, + "execution_count": 96, "metadata": {}, "output_type": "execute_result" } @@ -1393,7 +1665,7 @@ }, { "cell_type": "code", - "execution_count": 65, + "execution_count": 97, "id": "49867873-4a76-49c4-8c95-0918d7468f82", "metadata": {}, "outputs": [ @@ -1438,94 +1710,104 @@ " \n", " \n", " \n", - " 3370\n", - " rec4pgjrmdhCh4z01\n", - " City of Los Angeles\n", - " 576\n", - " Midtown\n", + " 2868\n", + " recfma7GNR5lQTTTg\n", + " Orange County Transportation Authority\n", + " 43\n", + " Fullerton - Costa Mesa via Harbor Blvd\n", " 0\n", - " PM Peak\n", - " 8.60\n", - " 7\n", - " 38.00\n", - " 47.60\n", - " 07 - Los Angeles\n", - " 2.60\n", - " 95.21\n", - " 25.26\n", - " 84.66\n", - " 38.30\n", + " Owl\n", + " 12.00\n", + " 1\n", + " 79.00\n", + " 91.00\n", + " 12 - Irvine\n", + " 3.00\n", + " 98.52\n", + " 15.19\n", + " 100.00\n", + " 90.00\n", " \n", " \n", - " 1173\n", - " recaJnArpFEk5QooE\n", - " City of Elk Grove\n", - " 3425\n", - " \n", + " 375\n", + " recPnGkwdpnr8jmHB\n", + " Los Angeles County Metropolitan Transportation Authority\n", + " 801\n", + " Metro A-Line\n", " 0\n", - " AM Peak\n", - " 19.90\n", - " 5\n", - " 36.20\n", - " 46.00\n", - " 03 - Marysville\n", - " 2.90\n", - " 100.00\n", - " 27.07\n", - " 99.13\n", - " 45.00\n", + " Early AM\n", + " 19.50\n", + " 20\n", + " 109.20\n", + " 136.20\n", + " 07 - Los Angeles\n", + " 2.00\n", + " 92.28\n", + " 24.73\n", + " 89.43\n", + " 90.60\n", " \n", " \n", - " 9120\n", - " rechaapWbeffO33OX\n", - " City and County of San Francisco\n", - " S\n", - " Additional Weekday Service\n", - " 0\n", - " Midday\n", - " 15.40\n", - " 9\n", - " 15.00\n", - " 24.80\n", - " 04 - Oakland\n", - " 2.70\n", - " 96.78\n", - " 65.33\n", - " 85.89\n", - " 20.80\n", + " 7173\n", + " recPnGkwdpnr8jmHB\n", + " Los Angeles County Metropolitan Transportation Authority\n", + " 92-13172\n", + " DWNTWN LA- SYLMAR STA VIA GLENDALE - GLENOAKS BLS\n", + " 1\n", + " PM Peak\n", + " 12.80\n", + " 12\n", + " 125.50\n", + " 134.90\n", + " 07 - Los Angeles\n", + " 2.20\n", + " 86.25\n", + " 7.49\n", + " 93.25\n", + " 104.50\n", " \n", " \n", "\n", "" ], "text/plain": [ - " org_id agency route_id \\\n", - "3370 rec4pgjrmdhCh4z01 City of Los Angeles 576 \n", - "1173 recaJnArpFEk5QooE City of Elk Grove 3425 \n", - "9120 rechaapWbeffO33OX City and County of San Francisco S \n", + " org_id \\\n", + "2868 recfma7GNR5lQTTTg \n", + "375 recPnGkwdpnr8jmHB \n", + "7173 recPnGkwdpnr8jmHB \n", + "\n", + " agency route_id \\\n", + "2868 Orange County Transportation Authority 43 \n", + "375 Los Angeles County Metropolitan Transportation Authority 801 \n", + "7173 Los Angeles County Metropolitan Transportation Authority 92-13172 \n", "\n", - " route_name direction_id time_of_day avg_speed_mph \\\n", - "3370 Midtown 0 PM Peak 8.60 \n", - "1173 0 AM Peak 19.90 \n", - "9120 Additional Weekday Service 0 Midday 15.40 \n", + " route_name direction_id \\\n", + "2868 Fullerton - Costa Mesa via Harbor Blvd 0 \n", + "375 Metro A-Line 0 \n", + "7173 DWNTWN LA- SYLMAR STA VIA GLENDALE - GLENOAKS BLS 1 \n", "\n", - " n_trips avg_sched_trip_min avg_rt_trip_min district_name \\\n", - "3370 7 38.00 47.60 07 - Los Angeles \n", - "1173 5 36.20 46.00 03 - Marysville \n", - "9120 9 15.00 24.80 04 - Oakland \n", + " time_of_day avg_speed_mph n_trips avg_sched_trip_min avg_rt_trip_min \\\n", + "2868 Owl 12.00 1 79.00 91.00 \n", + "375 Early AM 19.50 20 109.20 136.20 \n", + "7173 PM Peak 12.80 12 125.50 134.90 \n", "\n", - " avg_pings_per_min avg_pct_vp_shape avg_pct_rt_v_sched \\\n", - "3370 2.60 95.21 25.26 \n", - "1173 2.90 100.00 27.07 \n", - "9120 2.70 96.78 65.33 \n", + " district_name avg_pings_per_min avg_pct_vp_shape \\\n", + "2868 12 - Irvine 3.00 98.52 \n", + "375 07 - Los Angeles 2.00 92.28 \n", + "7173 07 - Los Angeles 2.20 86.25 \n", "\n", - " avg_rt_triptime_w_gtfs_pct avg_min_w_atleast2_trip_updates \n", - "3370 84.66 38.30 \n", - "1173 99.13 45.00 \n", - "9120 85.89 20.80 " + " avg_pct_rt_v_sched avg_rt_triptime_w_gtfs_pct \\\n", + "2868 15.19 100.00 \n", + "375 24.73 89.43 \n", + "7173 7.49 93.25 \n", + "\n", + " avg_min_w_atleast2_trip_updates \n", + "2868 90.00 \n", + "375 90.60 \n", + "7173 104.50 " ] }, - "execution_count": 65, + "execution_count": 97, "metadata": {}, "output_type": "execute_result" } @@ -1534,36 +1816,280 @@ "dec_final.drop(columns=[\"geometry\", \"base64_url\"]).sample(3)" ] }, + { + "cell_type": "markdown", + "id": "e2f3b80c-b018-4216-806b-30c0733b728f", + "metadata": {}, + "source": [ + "#### Add new columns" + ] + }, + { + "cell_type": "markdown", + "id": "620e4a8e-87c0-4a10-b27a-15a1d938f770", + "metadata": {}, + "source": [ + "##### Test peak_frequency and offpeak_frequency" + ] + }, { "cell_type": "code", - "execution_count": 66, - "id": "3844b20a-6af8-4d23-a42e-166b7330907d", + "execution_count": 140, + "id": "3bbf297f-e58c-4697-8a31-7ec88425144e", "metadata": {}, "outputs": [ { "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + "text/plain": [ + "'gs://calitp-analytics-data/data-analyses/rt_vs_schedule/'" + ] + }, + "execution_count": 140, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "RT_SCHED_GCS" + ] + }, + { + "cell_type": "code", + "execution_count": 158, + "id": "f10a3c2c-4952-4add-9509-45c527bcac4c", + "metadata": {}, + "outputs": [], + "source": [ + "def load_frequency(analysis_date: str) -> pd.DataFrame:\n", + " freq_cols = [\n", + " \"shape_array_key\",\n", + " \"schedule_gtfs_dataset_key\",\n", + " \"offpeak_n_trips\",\n", + " \"peak_n_trips\",\n", + " \"offpeak_frequency\",\n", + " \"peak_frequency\",\n", + " \"pct_dense\",\n", + " ]\n", + " df = pd.read_parquet(\n", + " f\"{RT_SCHED_GCS}schedule_route_direction_metrics_{analysis_date}.parquet\"\n", + " )[freq_cols]\n", + " df[\"avg_frequency\"] = (\n", + " (df.offpeak_n_trips * df.offpeak_frequency)\n", + " + (df.peak_n_trips * df.peak_frequency)\n", + " ) / (df.offpeak_n_trips + df.peak_n_trips)\n", + " df = df.drop(\n", + " columns=[\n", + " \"offpeak_n_trips\",\n", + " \"peak_n_trips\",\n", + " \"offpeak_frequency\",\n", + " \"peak_frequency\",\n", + " ]\n", + " )\n", + " return df" + ] + }, + { + "cell_type": "code", + "execution_count": 159, + "id": "f7b6c61a-47f7-4734-91be-499f80c1f795", + "metadata": {}, + "outputs": [], + "source": [ + "freq_dec = load_frequency(analysis_date)" + ] + }, + { + "cell_type": "code", + "execution_count": 161, + "id": "ca10f951-bbc8-4474-acc8-6e4da8f43f6c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "
trip_instance_keyrt_service_minmin_w_atleast2_trip_updates
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
shape_array_keyschedule_gtfs_dataset_keypct_denseavg_frequency
308552e5a5b703231861cf3c2d7853e6505b4c105bd9f414afe82dba2c3687cc1d880.031.10
\n", + "
" + ], + "text/plain": [ + " shape_array_key schedule_gtfs_dataset_key \\\n", + "3085 52e5a5b703231861cf3c2d7853e6505b 4c105bd9f414afe82dba2c3687cc1d88 \n", + "\n", + " pct_dense avg_frequency \n", + "3085 0.03 1.10 " + ] + }, + "execution_count": 161, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "freq_dec.sample()" + ] + }, + { + "cell_type": "code", + "execution_count": 163, + "id": "e995b96b-3e44-4bc6-b5c0-e0216da651f9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "3560" + ] + }, + "execution_count": 163, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(freq_dec)" + ] + }, + { + "cell_type": "code", + "execution_count": 162, + "id": "166da9c3-dde0-48b0-892a-2f36c442d2eb", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "76838" + ] + }, + "execution_count": 162, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "len(dec_intermediary)" + ] + }, + { + "cell_type": "code", + "execution_count": 164, + "id": "4aeaa128-a41e-4c33-9aa5-7bdeb7abda07", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "_merge \n", + "left_only 51049\n", + "both 25789\n", + "right_only 2672\n", + "dtype: int64" + ] + }, + "execution_count": 164, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.merge(\n", + " dec_intermediary, freq_dec, on=[\"shape_array_key\"], how=\"outer\", indicator=True\n", + ")[[\"_merge\"]].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 166, + "id": "76a88eaf-ede2-4276-9578-032d499bc388", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "_merge \n", + "left_only 51049\n", + "both 25789\n", + "right_only 2672\n", + "dtype: int64" + ] + }, + "execution_count": 166, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pd.merge(\n", + " dec_intermediary,\n", + " freq_dec,\n", + " on=[\"schedule_gtfs_dataset_key\", \"shape_array_key\"],\n", + " how=\"outer\",\n", + " indicator=True,\n", + ")[[\"_merge\"]].value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 167, + "id": "a6bfec15-d0e5-4ec2-a51f-e6ff5c6dfef8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -1574,24 +2100,40 @@ " \n", " \n", " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "
trip_instance_keyrt_service_minmin_w_atleast2_trip_updatestotal_pings_for_triptotal_min_w_gtfstotal_vpspatial_accuracy_pctrt_triptime_w_gtfs_pctrt_v_scheduled_trip_time_pctschedule_gtfs_dataset_keydirection_idroute_idshape_array_keyroute_name_usedservice_hourstrip_first_departure_datetime_pacifictime_of_day
75367bdaf00fefb641fad797f64761bade1df55.975616656166.00165.005.7823.002.9799.40100.00143.331088730b526c933599a93473e74523b6a546286.836515078150.00145.0019.2380.001.7396.6789.838.549809d3f8121513057bc5cb8de7b54ce20023-131216cf9d9e3f57e245d4c3654865847b6Salinas - King City1.332023-12-13 10:50:00Midday
\n", @@ -1599,28 +2141,37 @@ ], "text/plain": [ " trip_instance_key rt_service_min \\\n", - "75367 bdaf00fefb641fad797f64761bade1df 55.97 \n", + "10887 30b526c933599a93473e74523b6a5462 86.83 \n", "\n", " min_w_atleast2_trip_updates total_pings_for_trip total_min_w_gtfs \\\n", - "75367 56 166 56 \n", + "10887 65 150 78 \n", "\n", " total_vp vp_in_shape speed_mph service_minutes pings_per_min \\\n", - "75367 166.00 165.00 5.78 23.00 2.97 \n", + "10887 150.00 145.00 19.23 80.00 1.73 \n", "\n", " spatial_accuracy_pct rt_triptime_w_gtfs_pct \\\n", - "75367 99.40 100.00 \n", + "10887 96.67 89.83 \n", + "\n", + " rt_v_scheduled_trip_time_pct schedule_gtfs_dataset_key \\\n", + "10887 8.54 9809d3f8121513057bc5cb8de7b54ce2 \n", "\n", - " rt_v_scheduled_trip_time_pct \n", - "75367 143.33 " + " direction_id route_id shape_array_key \\\n", + "10887 0 023-131 216cf9d9e3f57e245d4c3654865847b6 \n", + "\n", + " route_name_used service_hours \\\n", + "10887 Salinas - King City 1.33 \n", + "\n", + " trip_first_departure_datetime_pacific time_of_day \n", + "10887 2023-12-13 10:50:00 Midday " ] }, - "execution_count": 66, + "execution_count": 167, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "dec_df.sample()" + "dec_intermediary.sample()" ] }, { @@ -1634,7 +2185,7 @@ }, { "cell_type": "code", - "execution_count": 67, + "execution_count": 116, "id": "aa2cc0c2-da7b-4ae1-9ac4-84c8d6d60e95", "metadata": {}, "outputs": [], @@ -1678,6 +2229,7 @@ " print(f\"vp_in_shape {og_df2.vp_in_shape.mean()}\")\n", " print(f\"min w gtfs {og_df2.total_min_w_gtfs.mean()}\")\n", " print(f\"min w at least 2 pings {og_df2.min_w_atleast2_trip_updates.mean()}\")\n", + " print(f\"avg vp in pct {og_df2.vp_in_shape.mean()/og_df2.total_vp.mean()}\")\n", " display(og_df2[cols])" ] }, @@ -1691,7 +2243,7 @@ }, { "cell_type": "code", - "execution_count": 68, + "execution_count": 117, "id": "04ee1397-318c-4bb7-9f80-2a55b9c75055", "metadata": {}, "outputs": [ @@ -1801,7 +2353,8 @@ "total_vp 267.5\n", "vp_in_shape 187.5\n", "min w gtfs 99.5\n", - "min w at least 2 pings 92.0\n" + "min w at least 2 pings 92.0\n", + "avg vp in pct 0.7009345794392523\n" ] }, { @@ -1886,8 +2439,29 @@ }, { "cell_type": "code", - "execution_count": 69, - "id": "aa775538-5a7f-4500-bc5b-867e9b54d626", + "execution_count": 118, + "id": "7b878898-29f6-4b86-bbfd-a586682df078", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.7009345794392523" + ] + }, + "execution_count": 118, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "187.5 / 267.5" + ] + }, + { + "cell_type": "code", + "execution_count": 120, + "id": "a3c23900-fdaa-476c-a490-dbc703df0c28", "metadata": {}, "outputs": [ { @@ -1938,43 +2512,43 @@ " \n", " \n", " \n", - " 193\n", - " rec3u4aMplqObcoTR\n", - " Tahoe Transportation District\n", - " 5671\n", - " Valley Express Daily\n", - " 1\n", - " Early AM\n", - " 2.20\n", + " 9012\n", + " rechaapWbeffO33OX\n", + " City and County of San Francisco\n", + " 38R\n", + " Weekdays 5am-10pm Weekends 6am-9pm\n", " 1\n", - " 38.00\n", - " 313.50\n", - " 03 - Marysville\n", - " 1.40\n", - " 22.65\n", - " 725.00\n", - " 50.08\n", - " 152.00\n", + " AM Peak\n", + " 6.70\n", + " 30\n", + " 43.90\n", + " 58.90\n", + " 04 - Oakland\n", + " 3.00\n", + " 88.33\n", + " 34.17\n", + " 100.17\n", + " 58.30\n", " \n", " \n", "\n", "
" ], "text/plain": [ - " org_id agency route_id \\\n", - "193 rec3u4aMplqObcoTR Tahoe Transportation District 5671 \n", + " org_id agency route_id \\\n", + "9012 rechaapWbeffO33OX City and County of San Francisco 38R \n", "\n", - " route_name direction_id time_of_day avg_speed_mph n_trips \\\n", - "193 Valley Express Daily 1 Early AM 2.20 1 \n", + " route_name direction_id time_of_day \\\n", + "9012 Weekdays 5am-10pm Weekends 6am-9pm 1 AM Peak \n", "\n", - " avg_sched_trip_min avg_rt_trip_min district_name avg_pings_per_min \\\n", - "193 38.00 313.50 03 - Marysville 1.40 \n", + " avg_speed_mph n_trips avg_sched_trip_min avg_rt_trip_min \\\n", + "9012 6.70 30 43.90 58.90 \n", "\n", - " avg_pct_vp_shape avg_pct_rt_v_sched avg_rt_triptime_w_gtfs_pct \\\n", - "193 22.65 725.00 50.08 \n", + " district_name avg_pings_per_min avg_pct_vp_shape avg_pct_rt_v_sched \\\n", + "9012 04 - Oakland 3.00 88.33 34.17 \n", "\n", - " avg_min_w_atleast2_trip_updates \n", - "193 152.00 " + " avg_rt_triptime_w_gtfs_pct avg_min_w_atleast2_trip_updates \n", + "9012 100.17 58.30 " ] }, "metadata": {}, @@ -1985,12 +2559,13 @@ "output_type": "stream", "text": [ "original\n", - "pings per min 1.422723164442554\n", - "speed_mph 2.158633017384419\n", - "total_vp 446.0\n", - "vp_in_shape 101.0\n", - "min w gtfs 157.0\n", - "min w at least 2 pings 152.0\n" + "pings per min 2.961175851535439\n", + "speed_mph 6.682525584870673\n", + "total_vp 174.56666666666666\n", + "vp_in_shape 154.2\n", + "min w gtfs 58.96666666666667\n", + "min w at least 2 pings 58.3\n", + "avg vp in pct 0.8833301508497231\n" ] }, { @@ -2026,190 +2601,15 @@ " \n", " \n", " \n", - " 27758\n", - " 6fadf197f5bb105ed916de0a337386ee\n", - " Early AM\n", - " 2.16\n", - " 313.48\n", - " 38.00\n", - " 1.42\n", - " 157\n", - " 152\n", - " \n", - " \n", - "\n", - "" - ], - "text/plain": [ - " trip_instance_key time_of_day speed_mph \\\n", - "27758 6fadf197f5bb105ed916de0a337386ee Early AM 2.16 \n", - "\n", - " rt_service_min service_minutes pings_per_min total_min_w_gtfs \\\n", - "27758 313.48 38.00 1.42 157 \n", - "\n", - " min_w_atleast2_trip_updates \n", - "27758 152 " - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "checkout_route(dec_intermediary, dec_final, \"5671\", \"Early AM\", 1)" - ] - }, - { - "cell_type": "code", - "execution_count": 70, - "id": "a3c23900-fdaa-476c-a490-dbc703df0c28", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "final\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
org_idagencyroute_idroute_namedirection_idtime_of_dayavg_speed_mphn_tripsavg_sched_trip_minavg_rt_trip_mindistrict_nameavg_pings_per_minavg_pct_vp_shapeavg_pct_rt_v_schedavg_rt_triptime_w_gtfs_pctavg_min_w_atleast2_trip_updates
9012rechaapWbeffO33OXCity and County of San Francisco38RWeekdays 5am-10pm Weekends 6am-9pm1AM Peak6.703043.9058.9004 - Oakland3.0088.3334.17100.1758.30
\n", - "
" - ], - "text/plain": [ - " org_id agency route_id \\\n", - "9012 rechaapWbeffO33OX City and County of San Francisco 38R \n", - "\n", - " route_name direction_id time_of_day \\\n", - "9012 Weekdays 5am-10pm Weekends 6am-9pm 1 AM Peak \n", - "\n", - " avg_speed_mph n_trips avg_sched_trip_min avg_rt_trip_min \\\n", - "9012 6.70 30 43.90 58.90 \n", - "\n", - " district_name avg_pings_per_min avg_pct_vp_shape avg_pct_rt_v_sched \\\n", - "9012 04 - Oakland 3.00 88.33 34.17 \n", - "\n", - " avg_rt_triptime_w_gtfs_pct avg_min_w_atleast2_trip_updates \n", - "9012 100.17 58.30 " - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "original\n", - "pings per min 2.961175851535439\n", - "speed_mph 6.682525584870673\n", - "total_vp 174.56666666666666\n", - "vp_in_shape 154.2\n", - "min w gtfs 58.96666666666667\n", - "min w at least 2 pings 58.3\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -2642,7 +3042,7 @@ }, { "cell_type": "code", - "execution_count": 71, + "execution_count": 121, "id": "8168d872-49c3-44c2-bdc7-fa499124c5af", "metadata": {}, "outputs": [ @@ -2746,7 +3146,8 @@ "total_vp nan\n", "vp_in_shape nan\n", "min w gtfs 59.5\n", - "min w at least 2 pings 57.666666666666664\n" + "min w at least 2 pings 57.666666666666664\n", + "avg vp in pct nan\n" ] }, { @@ -2895,7 +3296,7 @@ }, { "cell_type": "code", - "execution_count": 72, + "execution_count": 122, "id": "0fd73539-ea23-4b0a-9509-ae0162f512af", "metadata": {}, "outputs": [], @@ -2905,87 +3306,20 @@ }, { "cell_type": "code", - "execution_count": 73, + "execution_count": 123, "id": "d491e2ca-9da1-4ccc-a86e-ff535b5d2ace", "metadata": {}, "outputs": [ { - "data": { - "text/html": [ - "
\n", - "\n", - "
trip_instance_keytime_of_dayspeed_mphrt_service_minservice_minutespings_per_mintotal_min_w_gtfsmin_w_atleast2_trip_updates
6161076fa3ed3fd8ef28a446eedb4c1e94e6aAM Peak9.3948.8239.002.9749496161076fa3ed3fd8ef28a446eedb4c1e94e6aAM Peak9.3948.8239.002.974949
61611
\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
trip_instance_keyrt_service_minmin_w_atleast2_trip_updatestotal_pings_for_triptotal_min_w_gtfstotal_vpvp_in_shapespeed_mphservice_minutespings_per_minspatial_accuracy_pctrt_triptime_w_gtfs_pctrt_v_scheduled_trip_time_pct
638313d8b42c5ef16df6405029c9c87f9161538.7228673767.0036.009.3819.001.7353.7395.57103.77
\n", - "
" - ], - "text/plain": [ - " trip_instance_key rt_service_min \\\n", - "63831 3d8b42c5ef16df6405029c9c87f91615 38.72 \n", - "\n", - " min_w_atleast2_trip_updates total_pings_for_trip total_min_w_gtfs \\\n", - "63831 28 67 37 \n", - "\n", - " total_vp vp_in_shape speed_mph service_minutes pings_per_min \\\n", - "63831 67.00 36.00 9.38 19.00 1.73 \n", - "\n", - " spatial_accuracy_pct rt_triptime_w_gtfs_pct \\\n", - "63831 53.73 95.57 \n", - "\n", - " rt_v_scheduled_trip_time_pct \n", - "63831 103.77 " - ] - }, - "execution_count": 73, - "metadata": {}, - "output_type": "execute_result" + "ename": "NameError", + "evalue": "name 'mar_df' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[123], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mmar_df\u001b[49m\u001b[38;5;241m.\u001b[39msample()\n", + "\u001b[0;31mNameError\u001b[0m: name 'mar_df' is not defined" + ] } ], "source": [ @@ -2994,7 +3328,7 @@ }, { "cell_type": "code", - "execution_count": 74, + "execution_count": null, "id": "cd2a6ffd-03a9-432a-ae0d-ac15f795278e", "metadata": {}, "outputs": [], @@ -3004,52 +3338,17 @@ }, { "cell_type": "code", - "execution_count": 75, + "execution_count": null, "id": "f9a78bb6-f91e-42ad-85da-b954b606c050", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Int64Index: 83620 entries, 0 to 83619\n", - "Data columns (total 21 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 trip_instance_key 83620 non-null object \n", - " 1 rt_service_min 83620 non-null float64 \n", - " 2 min_w_atleast2_trip_updates 83620 non-null int64 \n", - " 3 total_pings_for_trip 83620 non-null int64 \n", - " 4 total_min_w_gtfs 83620 non-null int64 \n", - " 5 total_vp 69494 non-null float64 \n", - " 6 vp_in_shape 69494 non-null float64 \n", - " 7 speed_mph 71797 non-null float64 \n", - " 8 service_minutes 71797 non-null float64 \n", - " 9 pings_per_min 83620 non-null float64 \n", - " 10 spatial_accuracy_pct 69494 non-null float64 \n", - " 11 rt_triptime_w_gtfs_pct 83620 non-null float64 \n", - " 12 rt_v_scheduled_trip_time_pct 71797 non-null float64 \n", - " 13 schedule_gtfs_dataset_key 71867 non-null object \n", - " 14 direction_id 71867 non-null Int64 \n", - " 15 route_id 71867 non-null object \n", - " 16 shape_array_key 71867 non-null object \n", - " 17 route_name_used 71867 non-null object \n", - " 18 service_hours 72874 non-null float64 \n", - " 19 trip_first_departure_datetime_pacific 72874 non-null datetime64[ns]\n", - " 20 time_of_day 72874 non-null object \n", - "dtypes: Int64(1), datetime64[ns](1), float64(10), int64(3), object(6)\n", - "memory usage: 14.1+ MB\n" - ] - } - ], + "outputs": [], "source": [ "mar_df2.info()" ] }, { "cell_type": "code", - "execution_count": 76, + "execution_count": null, "id": "988731d1-b062-43e5-896d-8bad990fdf46", "metadata": {}, "outputs": [], @@ -3059,580 +3358,40 @@ }, { "cell_type": "code", - "execution_count": 77, + "execution_count": null, "id": "64960b35-b4ac-43e4-9e6f-594e349fe7e2", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Index(['org_id', 'agency', 'route_id', 'route_name', 'direction_id',\n", - " 'time_of_day', 'avg_speed_mph', 'n_trips', 'avg_sched_trip_min',\n", - " 'avg_rt_trip_min', 'base64_url', 'district_name', 'geometry',\n", - " 'avg_pings_per_min', 'avg_pct_vp_shape', 'avg_pct_rt_v_sched',\n", - " 'avg_rt_triptime_w_gtfs_pct', 'avg_min_w_atleast2_trip_updates'],\n", - " dtype='object')" - ] - }, - "execution_count": 77, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "mar_final.columns" ] }, { "cell_type": "code", - "execution_count": 78, + "execution_count": null, "id": "c70a9b62-f855-405b-9271-6b129b70cab9", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
org_idagencyroute_idroute_namedirection_idtime_of_dayavg_speed_mphn_tripsavg_sched_trip_minavg_rt_trip_mindistrict_nameavg_pings_per_minavg_pct_vp_shapeavg_pct_rt_v_schedavg_rt_triptime_w_gtfs_pctavg_min_w_atleast2_trip_updates
1780recfma7GNR5lQTTTgOrange County Transportation Authority54Garden Grove - Orange via Chapman Ave1Midday11.701075.50112.5012 - Irvine2.9099.0449.0196.62106.90
8979recJcXMNC5MUm2uDeVictor Valley Transit Authority3215Victor Valley Mall - Victor Valley College0Evening10.00119.0043.0008 - San Bernardino2.80100.00126.3297.6740.00
4227recANs4M9yDhvDyobLivermore / Amador Valley Transit Authority611Ruby Hill1PM Peak13.40142.0047.2004 - Oakland3.0087.1412.3899.5847.00
\n", - "
" - ], - "text/plain": [ - " org_id agency route_id \\\n", - "1780 recfma7GNR5lQTTTg Orange County Transportation Authority 54 \n", - "8979 recJcXMNC5MUm2uDe Victor Valley Transit Authority 3215 \n", - "4227 recANs4M9yDhvDyob Livermore / Amador Valley Transit Authority 611 \n", - "\n", - " route_name direction_id time_of_day \\\n", - "1780 Garden Grove - Orange via Chapman Ave 1 Midday \n", - "8979 Victor Valley Mall - Victor Valley College 0 Evening \n", - "4227 Ruby Hill 1 PM Peak \n", - "\n", - " avg_speed_mph n_trips avg_sched_trip_min avg_rt_trip_min \\\n", - "1780 11.70 10 75.50 112.50 \n", - "8979 10.00 1 19.00 43.00 \n", - "4227 13.40 1 42.00 47.20 \n", - "\n", - " district_name avg_pings_per_min avg_pct_vp_shape \\\n", - "1780 12 - Irvine 2.90 99.04 \n", - "8979 08 - San Bernardino 2.80 100.00 \n", - "4227 04 - Oakland 3.00 87.14 \n", - "\n", - " avg_pct_rt_v_sched avg_rt_triptime_w_gtfs_pct \\\n", - "1780 49.01 96.62 \n", - "8979 126.32 97.67 \n", - "4227 12.38 99.58 \n", - "\n", - " avg_min_w_atleast2_trip_updates \n", - "1780 106.90 \n", - "8979 40.00 \n", - "4227 47.00 " - ] - }, - "execution_count": 78, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "mar_final.sample(3).drop(columns=[\"base64_url\", \"geometry\"])" ] }, { "cell_type": "code", - "execution_count": 79, + "execution_count": null, "id": "bf6c84be-eb6d-4142-9ec7-2c737cd96517", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "final\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
org_idagencyroute_idroute_namedirection_idtime_of_dayavg_speed_mphn_tripsavg_sched_trip_minavg_rt_trip_mindistrict_nameavg_pings_per_minavg_pct_vp_shapeavg_pct_rt_v_schedavg_rt_triptime_w_gtfs_pctavg_min_w_atleast2_trip_updates
3720reckQmUdXUzHFmlVfCity of Ojai4763200PM Peak26.10449.0033.0007 - Los Angeles2.90100.00-32.65100.6132.20
\n", - "
" - ], - "text/plain": [ - " org_id agency route_id route_name direction_id \\\n", - "3720 reckQmUdXUzHFmlVf City of Ojai 4763 20 0 \n", - "\n", - " time_of_day avg_speed_mph n_trips avg_sched_trip_min avg_rt_trip_min \\\n", - "3720 PM Peak 26.10 4 49.00 33.00 \n", - "\n", - " district_name avg_pings_per_min avg_pct_vp_shape \\\n", - "3720 07 - Los Angeles 2.90 100.00 \n", - "\n", - " avg_pct_rt_v_sched avg_rt_triptime_w_gtfs_pct \\\n", - "3720 -32.65 100.61 \n", - "\n", - " avg_min_w_atleast2_trip_updates \n", - "3720 32.20 " - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "original\n", - "pings per min 2.909125279321039\n", - "speed_mph 26.116502281263557\n", - "total_vp 96.75\n", - "vp_in_shape 96.75\n", - "min w gtfs 33.25\n", - "min w at least 2 pings 32.25\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
trip_instance_keytime_of_dayspeed_mphrt_service_minservice_minutespings_per_mintotal_min_w_gtfsmin_w_atleast2_trip_updates
364527ea7312469de416b5c96c2df46b07c85PM Peak46.1216.6734.002.881616
364533d6760f1db43f24d68d2e780ac2be0dbPM Peak15.0449.0554.002.945048
372601af731c948711c3f047f29f922d24ee6PM Peak14.2651.6854.002.965251
3727849ce553a741ac1fcd734f035f299b81bPM Peak29.0614.6854.002.861514
\n", - "
" - ], - "text/plain": [ - " trip_instance_key time_of_day speed_mph \\\n", - "36452 7ea7312469de416b5c96c2df46b07c85 PM Peak 46.12 \n", - "36453 3d6760f1db43f24d68d2e780ac2be0db PM Peak 15.04 \n", - "37260 1af731c948711c3f047f29f922d24ee6 PM Peak 14.26 \n", - "37278 49ce553a741ac1fcd734f035f299b81b PM Peak 29.06 \n", - "\n", - " rt_service_min service_minutes pings_per_min total_min_w_gtfs \\\n", - "36452 16.67 34.00 2.88 16 \n", - "36453 49.05 54.00 2.94 50 \n", - "37260 51.68 54.00 2.96 52 \n", - "37278 14.68 54.00 2.86 15 \n", - "\n", - " min_w_atleast2_trip_updates \n", - "36452 16 \n", - "36453 48 \n", - "37260 51 \n", - "37278 14 " - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "checkout_route(mar_intermediary, mar_final, \"4763\", \"PM Peak\", 0)" ] }, { "cell_type": "code", - "execution_count": 80, + "execution_count": null, "id": "1993266f-05df-4613-9573-29402822b9da", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "final\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
org_idagencyroute_idroute_namedirection_idtime_of_dayavg_speed_mphn_tripsavg_sched_trip_minavg_rt_trip_mindistrict_nameavg_pings_per_minavg_pct_vp_shapeavg_pct_rt_v_schedavg_rt_triptime_w_gtfs_pctavg_min_w_atleast2_trip_updates
1285recRBcrX4ZvTyvSnmNorth County Transit District332Vista TC - Buena Creek Station0Evening16.00232.0046.2011 - San Diego2.60100.0044.3899.5743.00
\n", - "
" - ], - "text/plain": [ - " org_id agency route_id \\\n", - "1285 recRBcrX4ZvTyvSnm North County Transit District 332 \n", - "\n", - " route_name direction_id time_of_day avg_speed_mph \\\n", - "1285 Vista TC - Buena Creek Station 0 Evening 16.00 \n", - "\n", - " n_trips avg_sched_trip_min avg_rt_trip_min district_name \\\n", - "1285 2 32.00 46.20 11 - San Diego \n", - "\n", - " avg_pings_per_min avg_pct_vp_shape avg_pct_rt_v_sched \\\n", - "1285 2.60 100.00 44.38 \n", - "\n", - " avg_rt_triptime_w_gtfs_pct avg_min_w_atleast2_trip_updates \n", - "1285 99.57 43.00 " - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "original\n", - "pings per min 2.5985041071389485\n", - "speed_mph 15.952292627153918\n", - "total_vp 120.0\n", - "vp_in_shape 120.0\n", - "min w gtfs 46.0\n", - "min w at least 2 pings 43.0\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
trip_instance_keytime_of_dayspeed_mphrt_service_minservice_minutespings_per_mintotal_min_w_gtfsmin_w_atleast2_trip_updates
41418e54c1ac191dc0b57df34834df825d0adEvening10.8347.1031.002.594743
41433e1d2ca1ad28f5fe1a3ec9a772cfae369Evening21.0745.2733.002.614543
\n", - "
" - ], - "text/plain": [ - " trip_instance_key time_of_day speed_mph \\\n", - "41418 e54c1ac191dc0b57df34834df825d0ad Evening 10.83 \n", - "41433 e1d2ca1ad28f5fe1a3ec9a772cfae369 Evening 21.07 \n", - "\n", - " rt_service_min service_minutes pings_per_min total_min_w_gtfs \\\n", - "41418 47.10 31.00 2.59 47 \n", - "41433 45.27 33.00 2.61 45 \n", - "\n", - " min_w_atleast2_trip_updates \n", - "41418 43 \n", - "41433 43 " - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "checkout_route(mar_intermediary, mar_final, \"332\", \"Evening\", 0)" ] From ecfeb50fb942cebb3e7b60db761c219a2e18a79f Mon Sep 17 00:00:00 2001 From: amandaha8 Date: Fri, 2 Feb 2024 18:31:48 +0000 Subject: [PATCH 2/2] broke out freq func to understand, applied it within actual func --- .../06_vp_usable_exploration.ipynb | 3224 ++++++++++------- 1 file changed, 1963 insertions(+), 1261 deletions(-) diff --git a/rt_scheduled_v_ran/06_vp_usable_exploration.ipynb b/rt_scheduled_v_ran/06_vp_usable_exploration.ipynb index f0a7bd06b..6d805b68e 100644 --- a/rt_scheduled_v_ran/06_vp_usable_exploration.ipynb +++ b/rt_scheduled_v_ran/06_vp_usable_exploration.ipynb @@ -21,12 +21,13 @@ }, { "cell_type": "code", - "execution_count": 139, + "execution_count": 2, "id": "67351837-e385-464b-96b3-48fc13703af7", "metadata": {}, "outputs": [], "source": [ "# https://github.com/cal-itp/data-analyses/tree/main/rt_segment_speeds/segment_speed_utils\n", + "# cd rt_segment_speeds && pip install -r requirements.txt && cd ../_shared_utils && make setup_env\n", "from segment_speed_utils import gtfs_schedule_wrangling, helpers, wrangle_shapes\n", "from segment_speed_utils.project_vars import (\n", " GCS_FILE_PATH,\n", @@ -93,27 +94,6 @@ "analysis_date_list" ] }, - { - "cell_type": "code", - "execution_count": 6, - "id": "73a27c22-ba75-4804-9ce6-f0d114962b0c", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'gs://calitp-analytics-data/data-analyses/rt_vs_schedule/trip_level_metrics/2023-12-13_metrics.parquet'" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "\"gs://calitp-analytics-data/data-analyses/rt_vs_schedule/trip_level_metrics/2023-12-13_metrics.parquet\"" - ] - }, { "cell_type": "markdown", "id": "45cd052f-1f2a-4434-8346-2c5a1e82683f", @@ -124,7 +104,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "id": "05e483c3-b620-42f1-9243-d711777c1903", "metadata": {}, "outputs": [], @@ -159,7 +139,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "id": "5703f90e-9ec1-4253-8392-e4df3a9dfda3", "metadata": {}, "outputs": [], @@ -169,7 +149,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "id": "cccf51bb-4980-4d96-af0f-44259147f1fa", "metadata": {}, "outputs": [ @@ -179,7 +159,7 @@ "dict" ] }, - "execution_count": 9, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -190,7 +170,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "id": "077b3ff7-6707-43e6-b228-6d7fc174b306", "metadata": {}, "outputs": [], @@ -200,7 +180,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 10, "id": "531c6112-c633-412f-8350-ab01fc0a4c45", "metadata": {}, "outputs": [], @@ -218,7 +198,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 11, "id": "e4247b70-422f-466f-af12-060e92eb7d70", "metadata": {}, "outputs": [], @@ -239,7 +219,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 12, "id": "26185ced-9f3b-4266-a080-0eeed0c0a825", "metadata": {}, "outputs": [], @@ -253,7 +233,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 13, "id": "af7c69e1-e363-42f8-bf1c-f9ef2ba141e2", "metadata": { "scrolled": true, @@ -951,7 +931,8 @@ "id": "b71376f9-2343-4d14-99f0-025c05b7c7b0", "metadata": {}, "source": [ - "### Aggregating up to the route level" + "### Aggregating up to the route level\n", + "* Should these functions to be added to my script `rt_v_scheduled_trip.py` so everythign is done in one swoop?" ] }, { @@ -967,7 +948,7 @@ }, { "cell_type": "code", - "execution_count": 77, + "execution_count": 14, "id": "ed0f790a-f132-40d1-9daf-bafeecee82fd", "metadata": {}, "outputs": [], @@ -980,12 +961,45 @@ "id": "3d08933b-cc48-46d1-b217-e5c07fc5685f", "metadata": {}, "source": [ - "#### Why is shape_array_key deleted `most_common_shape_by_route_direction`" + "#### Function #1 \n", + "##### Why is shape_array_key deleted `most_common_shape_by_route_direction`\n", + "* https://github.com/cal-itp/data-analyses/blob/main/rt_segment_speeds/segment_speed_utils/gtfs_schedule_wrangling.py#L281-L286" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "0ba547e6-9d62-45be-9b57-1e86e9b2fcb2", + "metadata": {}, + "outputs": [], + "source": [ + " route_dir_cols = [\"gtfs_dataset_key\", \"route_id\", \"direction_id\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "419a6e43-ab09-469d-9f48-ff99019f0ad8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "[True, True, True]" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "[True for i in route_dir_cols]" ] }, { "cell_type": "code", - "execution_count": 83, + "execution_count": 17, "id": "ac3e285d-6011-4da6-903a-d114d6587547", "metadata": {}, "outputs": [], @@ -1006,6 +1020,7 @@ " trips = helpers.import_scheduled_trips(\n", " analysis_date, columns=keep_trip_cols, get_pandas=True\n", " ).rename(columns={\"schedule_gtfs_dataset_key\": \"gtfs_dataset_key\"})\n", + " \n", " sorting_order = [True for i in route_dir_cols]\n", "\n", " most_common_shape = (\n", @@ -1066,7 +1081,7 @@ }, { "cell_type": "code", - "execution_count": 84, + "execution_count": 18, "id": "989a09e4-2f5a-4065-94f0-fc6cee1b1126", "metadata": {}, "outputs": [], @@ -1133,7 +1148,7 @@ }, { "cell_type": "code", - "execution_count": 85, + "execution_count": 19, "id": "1fd1ba09-df71-4595-bddf-6649233cf02e", "metadata": {}, "outputs": [ @@ -1147,7 +1162,7 @@ " dtype='object')" ] }, - "execution_count": 85, + "execution_count": 19, "metadata": {}, "output_type": "execute_result" } @@ -1158,7 +1173,7 @@ }, { "cell_type": "code", - "execution_count": 86, + "execution_count": 20, "id": "6bde7bdc-56f3-465f-97d1-aaae357aea56", "metadata": {}, "outputs": [ @@ -1168,7 +1183,7 @@ "'2023-12-13'" ] }, - "execution_count": 86, + "execution_count": 20, "metadata": {}, "output_type": "execute_result" } @@ -1179,7 +1194,7 @@ }, { "cell_type": "code", - "execution_count": 87, + "execution_count": 44, "id": "7fabab72-c1a4-468f-83d6-b1e066014129", "metadata": {}, "outputs": [], @@ -1189,259 +1204,261 @@ ")" ] }, + { + "cell_type": "markdown", + "id": "e7fa491a-21c9-4c1c-9f70-4a36aacfb9a2", + "metadata": {}, + "source": [ + "#### Function #2\n", + "* Takes result from function #1" + ] + }, + { + "cell_type": "markdown", + "id": "e2f3b80c-b018-4216-806b-30c0733b728f", + "metadata": { + "tags": [] + }, + "source": [ + "##### Add avg frequency columns\n", + "* https://github.com/cal-itp/data-analyses/blob/main/gtfs_funnel/route_typologies.py\n", + "* Frequency = avg trips per hour" + ] + }, { "cell_type": "code", - "execution_count": 88, - "id": "f04ec3f8-517a-4d2a-aad2-f1636838d98a", + "execution_count": 23, + "id": "f10a3c2c-4952-4add-9509-45c527bcac4c", "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "Int64Index: 86128 entries, 0 to 86127\n", - "Data columns (total 21 columns):\n", - " # Column Non-Null Count Dtype \n", - "--- ------ -------------- ----- \n", - " 0 trip_instance_key 86128 non-null object \n", - " 1 rt_service_min 86128 non-null float64 \n", - " 2 min_w_atleast2_trip_updates 86128 non-null int64 \n", - " 3 total_pings_for_trip 86128 non-null int64 \n", - " 4 total_min_w_gtfs 86128 non-null int64 \n", - " 5 total_vp 74609 non-null float64 \n", - " 6 vp_in_shape 74609 non-null float64 \n", - " 7 speed_mph 76878 non-null float64 \n", - " 8 service_minutes 76878 non-null float64 \n", - " 9 pings_per_min 86128 non-null float64 \n", - " 10 spatial_accuracy_pct 74609 non-null float64 \n", - " 11 rt_triptime_w_gtfs_pct 86128 non-null float64 \n", - " 12 rt_v_scheduled_trip_time_pct 76878 non-null float64 \n", - " 13 schedule_gtfs_dataset_key 76442 non-null object \n", - " 14 direction_id 76442 non-null Int64 \n", - " 15 route_id 76442 non-null object \n", - " 16 shape_array_key 76442 non-null object \n", - " 17 route_name_used 76442 non-null object \n", - " 18 service_hours 77977 non-null float64 \n", - " 19 trip_first_departure_datetime_pacific 77977 non-null datetime64[ns]\n", - " 20 time_of_day 77977 non-null object \n", - "dtypes: Int64(1), datetime64[ns](1), float64(10), int64(3), object(6)\n", - "memory usage: 14.5+ MB\n" - ] - } - ], + "outputs": [], + "source": [ + "def load_frequency(analysis_date: str) -> pd.DataFrame:\n", + " freq_cols = [\n", + " \"shape_array_key\",\n", + " \"schedule_gtfs_dataset_key\",\n", + " \"offpeak_n_trips\",\n", + " \"peak_n_trips\",\n", + " \"offpeak_frequency\",\n", + " \"peak_frequency\",\n", + " \"pct_dense\",\n", + " ]\n", + " df = pd.read_parquet(\n", + " f\"{RT_SCHED_GCS}schedule_route_direction_metrics_{analysis_date}.parquet\"\n", + " )[freq_cols]\n", + " df[\"avg_frequency\"] = (\n", + " (df.offpeak_n_trips * df.offpeak_frequency)\n", + " + (df.peak_n_trips * df.peak_frequency)\n", + " ) / (df.offpeak_n_trips + df.peak_n_trips)\n", + " df = df.drop(\n", + " columns=[\n", + " \"offpeak_n_trips\",\n", + " \"peak_n_trips\",\n", + " \"offpeak_frequency\",\n", + " \"peak_frequency\",\n", + " ]\n", + " )\n", + " return df" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "f7b6c61a-47f7-4734-91be-499f80c1f795", + "metadata": {}, + "outputs": [], "source": [ - "dec_df2.info()" + "freq_dec = load_frequency(analysis_date)" ] }, { "cell_type": "code", - "execution_count": 89, - "id": "f78ec95e-86af-4722-8dd5-ab228148c337", + "execution_count": 48, + "id": "ca10f951-bbc8-4474-acc8-6e4da8f43f6c", "metadata": {}, "outputs": [], "source": [ - "shapes = helpers.import_scheduled_shapes(\n", - " analysis_date,\n", - " columns=[\"shape_array_key\", \"geometry\"],\n", - " get_pandas=True,\n", - " crs=geography_utils.WGS84,\n", - ")" + "og_freq = pd.read_parquet(\n", + " f\"{RT_SCHED_GCS}schedule_route_direction_metrics_{analysis_date}.parquet\"\n", + " ).drop(columns = ['geometry'])" ] }, { "cell_type": "code", - "execution_count": 90, - "id": "9dbd05bb-9ead-41b8-ab43-b561bb276c92", + "execution_count": 50, + "id": "245a9426-913d-45dd-bf40-9d6f93ec4d78", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "Index(['shape_array_key', 'geometry'], dtype='object')" + "1.69" ] }, - "execution_count": 90, + "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "shapes.columns" + ".75+1.88/2" ] }, { "cell_type": "code", - "execution_count": 125, - "id": "52b9f3c3-429a-42cc-968d-6ef380baf5fc", + "execution_count": 49, + "id": "1b302325-0fb7-41ce-8cfa-f23e03055e50", "metadata": {}, "outputs": [ { "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
shape_array_keyschedule_gtfs_dataset_keyroute_iddirection_idcommon_shape_idavg_stop_metersavg_sched_service_minoffpeak_n_tripspeak_n_tripsoffpeak_frequencypeak_frequencypct_dense
14529f3130b3a733883c0132b1031a424b78cc53a0dbf5df90e3009b9cb5d89d80ba5630.0013869278.4920.0012.0015.000.751.880.70
\n", + "
" + ], "text/plain": [ - "(7286, 7285)" + " shape_array_key schedule_gtfs_dataset_key \\\n", + "1452 9f3130b3a733883c0132b1031a424b78 cc53a0dbf5df90e3009b9cb5d89d80ba \n", + "\n", + " route_id direction_id common_shape_id avg_stop_meters \\\n", + "1452 563 0.00 13869 278.49 \n", + "\n", + " avg_sched_service_min offpeak_n_trips peak_n_trips offpeak_frequency \\\n", + "1452 20.00 12.00 15.00 0.75 \n", + "\n", + " peak_frequency pct_dense \n", + "1452 1.88 0.70 " ] }, - "execution_count": 125, + "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "len(shapes), shapes.shape_array_key.nunique()" + "og_freq.sample()" + ] + }, + { + "cell_type": "markdown", + "id": "5603e1fb-8eb8-417c-90cc-3588d147a40c", + "metadata": {}, + "source": [ + "##### Try to understand how frequency was pieced together" ] }, { "cell_type": "code", - "execution_count": 91, - "id": "21db7580-cf79-4385-b9dc-80cd99206011", + "execution_count": 51, + "id": "9368cfe5-bed5-4ac3-aac2-2dd75c730ac0", "metadata": {}, "outputs": [], "source": [ - "def average_route_speeds_for_export(\n", - " df: pd.DataFrame,\n", - " analysis_date: str,\n", - " max_speed: int,\n", - ") -> gpd.GeoDataFrame:\n", - " \"\"\"\n", - " Aggregate trip speeds to route-direction.\n", - " Attach shape geometry to most common shape_id.\n", - " \"\"\"\n", - " df2 = df.loc[df.speed_mph <= 70].reset_index(drop=True)\n", - "\n", - " route_cols = [\n", - " \"schedule_gtfs_dataset_key\",\n", - " \"time_of_day\",\n", - " \"route_id\",\n", - " \"direction_id\",\n", - " \"route_name_used\",\n", - " \"shape_array_key\",\n", - " ]\n", - "\n", - " mean_cols = [\n", - " \"service_minutes\",\n", - " \"rt_service_min\",\n", - " \"speed_mph\",\n", - " \"pings_per_min\",\n", - " \"total_vp\",\n", - " \"vp_in_shape\",\n", - " \"total_min_w_gtfs\",\n", - " \"min_w_atleast2_trip_updates\",\n", - " ]\n", - " count_cols = [\"trip_instance_key\"]\n", - "\n", - " df3 = (\n", - " df2.groupby(route_cols)\n", - " .agg({**{e: \"mean\" for e in mean_cols}, **{e: \"count\" for e in count_cols}})\n", - " .reset_index()\n", - " )\n", - "\n", - " df4 = df3.assign(\n", - " rt_service_min=df3.rt_service_min.round(1),\n", - " service_minutes=df3.service_minutes.round(1),\n", - " speed_mph=df3.speed_mph.round(1),\n", - " pings_per_min=df3.pings_per_min.round(1),\n", - " min_w_atleast2_trip_updates=df3.min_w_atleast2_trip_updates.round(1),\n", - " total_min_w_gtfs=df3.total_min_w_gtfs.round(1),\n", - " ).rename(\n", - " columns={\n", - " \"service_minutes\": \"avg_sched_trip_min\",\n", - " \"rt_service_min\": \"avg_rt_trip_min\",\n", - " \"trip_instance_key\": \"n_trips\",\n", - " \"route_name_used\": \"route_name\",\n", - " \"pings_per_min\": \"avg_pings_per_min\",\n", - " \"schedule_gtfs_dataset_key\": \"gtfs_dataset_key\",\n", - " \"total_min_w_gtfs\": \"avg_total_min_w_gtfs\",\n", - " \"min_w_atleast2_trip_updates\": \"avg_min_w_atleast2_trip_updates\",\n", - " \"speed_mph\": \"avg_speed_mph\",\n", - " }\n", + "def assemble_scheduled_trip_metrics(analysis_date: str):\n", + " \n", + " df = gpd.read_parquet(\n", + " f\"{RT_SCHED_GCS}stop_times_direction_{analysis_date}.parquet\"\n", " )\n", "\n", - " org_crosswalk = schedule_rt_utils.sample_gtfs_dataset_key_to_organization_crosswalk(\n", - " df4,\n", - " analysis_date,\n", - " quartet_data=\"schedule\",\n", - " dim_gtfs_dataset_cols=[\"key\", \"base64_url\"],\n", - " dim_organization_cols=[\"source_record_id\", \"name\", \"caltrans_district\"],\n", - " )\n", - "\n", - " df_with_org = pd.merge(\n", - " df4,\n", - " org_crosswalk.rename(columns={\"schedule_gtfs_dataset_key\": \"gtfs_dataset_key\"}),\n", - " on=\"gtfs_dataset_key\",\n", - " how=\"inner\",\n", - " )\n", - "\n", - " shapes = helpers.import_scheduled_shapes(\n", + " trips_to_route = helpers.import_scheduled_trips(\n", " analysis_date,\n", - " columns=[\"shape_array_key\", \"geometry\"],\n", - " get_pandas=True,\n", - " crs=geography_utils.WGS84,\n", + " columns = [\"trip_instance_key\", \"route_id\", \"direction_id\"],\n", + " get_pandas = True\n", " )\n", + " \n", + " time_of_day = (gtfs_schedule_wrangling.get_trip_time_buckets(analysis_date) \n", + " [[\"trip_instance_key\", \"time_of_day\", \n", + " \"service_minutes\"]]\n", + " .rename(columns = {\"service_minutes\": \"sched_service_min\"})\n", + " )\n", + " \n", + " trip_cols = [\"schedule_gtfs_dataset_key\", \"trip_instance_key\"]\n", + " \n", + " grouped_df = df.groupby(trip_cols, observed=True, group_keys=False)\n", "\n", - " df_with_shape = pd.merge(\n", - " shapes,\n", - " df_with_org,\n", - " on=\"shape_array_key\", # once merged, can drop shape_array_key\n", - " how=\"inner\",\n", + " # Get median / mean stop meters for the trip\n", + " # Attach time-of-day and route_id and direction_id\n", + " df2 = pd.merge(\n", + " grouped_df.agg({\"stop_meters\": \"median\"}).reset_index().rename(\n", + " columns = {\"stop_meters\": \"median_stop_meters\"}),\n", + " time_of_day,\n", + " on = \"trip_instance_key\",\n", + " how = \"left\"\n", + " ).merge(\n", + " trips_to_route,\n", + " on = \"trip_instance_key\",\n", + " how = \"inner\"\n", " )\n", - "\n", - " df_with_shape[\"avg_pct_vp_shape\"] = (\n", - " df_with_shape.vp_in_shape / df_with_shape.total_vp * 100\n", + " \n", + " df2 = df2.assign(\n", + " median_stop_meters = df2.median_stop_meters.round(2)\n", " )\n", - "\n", - " df_with_shape[\"avg_pct_rt_v_sched\"] = (\n", - " df_with_shape.avg_rt_trip_min / df_with_shape.avg_sched_trip_min - 1\n", - " ) * 100\n", - "\n", - " df_with_shape[\"avg_rt_triptime_w_gtfs_pct\"] = (\n", - " df_with_shape.avg_total_min_w_gtfs / df_with_shape.avg_rt_trip_min\n", - " ) * 100\n", - "\n", - " final_df = df_with_shape.drop(columns=[\"total_vp\", \"vp_in_shape\"])\n", - "\n", - " agency_cols = [\"organization_source_record_id\", \"organization_name\"]\n", - " route_cols = [\n", - " \"route_id\",\n", - " \"route_name\",\n", - " \"direction_id\",\n", - " ]\n", - "\n", - " col_order = (\n", - " agency_cols\n", - " + route_cols\n", - " + [\n", - " \"time_of_day\",\n", - " \"avg_speed_mph\",\n", - " \"n_trips\",\n", - " \"avg_sched_trip_min\",\n", - " \"avg_rt_trip_min\",\n", - " \"base64_url\",\n", - " \"caltrans_district\",\n", - " \"geometry\",\n", - " \"avg_pings_per_min\",\n", - " \"avg_pct_vp_shape\",\n", - " \"avg_pct_rt_v_sched\",\n", - " \"avg_rt_triptime_w_gtfs_pct\",\n", - " \"avg_min_w_atleast2_trip_updates\",\n", - " ]\n", - " )\n", - "\n", - " final_df = df_with_shape.reindex(columns=col_order).rename(\n", - " columns={\n", - " \"organization_source_record_id\": \"org_id\",\n", - " \"organization_name\": \"agency\",\n", - " \"caltrans_district\": \"district_name\",\n", - " }\n", - " )\n", - "\n", - " return df2, final_df" + " \n", + " return df2" ] }, { "cell_type": "code", - "execution_count": 92, - "id": "cdbf76f5-bd0b-4e2b-8fc8-15843c7dc3b6", + "execution_count": 52, + "id": "a6650834-18fb-4997-9f14-9587e63a7222", + "metadata": {}, + "outputs": [], + "source": [ + "assemble_scheduled_trip_metrics_df = assemble_scheduled_trip_metrics(analysis_date)" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "id": "034bdae7-4832-4b6c-b872-3d9f875e35a7", "metadata": {}, "outputs": [ { @@ -1465,208 +1482,150 @@ " \n", " \n", " \n", - " trip_instance_key\n", - " rt_service_min\n", - " min_w_atleast2_trip_updates\n", - " total_pings_for_trip\n", - " total_min_w_gtfs\n", - " total_vp\n", - " vp_in_shape\n", - " speed_mph\n", - " service_minutes\n", - " pings_per_min\n", - " spatial_accuracy_pct\n", - " rt_triptime_w_gtfs_pct\n", - " rt_v_scheduled_trip_time_pct\n", " schedule_gtfs_dataset_key\n", - " direction_id\n", - " route_id\n", - " shape_array_key\n", - " route_name_used\n", - " service_hours\n", - " trip_first_departure_datetime_pacific\n", + " trip_instance_key\n", + " median_stop_meters\n", " time_of_day\n", + " sched_service_min\n", + " route_id\n", + " direction_id\n", " \n", " \n", " \n", " \n", - " 0\n", - " 5d25a4366c173007d9c29fdead0299d7\n", - " 74.03\n", - " 73\n", - " 216\n", - " 74\n", - " 216.00\n", - " 148.00\n", - " 21.01\n", - " 58.00\n", - " 2.92\n", - " 68.52\n", - " 99.95\n", - " 27.64\n", - " 63029a23cb0e73f2a5d98a345c5e2e40\n", - " 1\n", - " 3428\n", - " 0d0ca5bc40fb6266a03f400c3aa7e6cb\n", - " \n", - " 0.97\n", - " 2023-12-13 05:34:00\n", - " Early AM\n", - " \n", - " \n", - " 1\n", - " 4b72b80fc9cfe5e613bab95585cbe7e4\n", - " 23.45\n", - " 21\n", - " 59\n", - " 23\n", - " 59.00\n", - " 19.00\n", - " 54.95\n", - " 58.00\n", - " 2.52\n", - " 32.20\n", - " 98.08\n", - " -59.57\n", - " 63029a23cb0e73f2a5d98a345c5e2e40\n", - " 1\n", - " 3428\n", - " 0d0ca5bc40fb6266a03f400c3aa7e6cb\n", - " \n", - " 0.97\n", - " 2023-12-13 06:34:00\n", - " Early AM\n", + " 14049\n", + " 3f3f36b4c41cc6b5df3eb7f5d8ea6e3c\n", + " 33408393e7418ac53c9b114518c93d18\n", + " 322.57\n", + " Owl\n", + " 84.00\n", + " 4-13172\n", + " 1.00\n", " \n", " \n", "\n", "" ], "text/plain": [ - " trip_instance_key rt_service_min \\\n", - "0 5d25a4366c173007d9c29fdead0299d7 74.03 \n", - "1 4b72b80fc9cfe5e613bab95585cbe7e4 23.45 \n", - "\n", - " min_w_atleast2_trip_updates total_pings_for_trip total_min_w_gtfs \\\n", - "0 73 216 74 \n", - "1 21 59 23 \n", - "\n", - " total_vp vp_in_shape speed_mph service_minutes pings_per_min \\\n", - "0 216.00 148.00 21.01 58.00 2.92 \n", - "1 59.00 19.00 54.95 58.00 2.52 \n", - "\n", - " spatial_accuracy_pct rt_triptime_w_gtfs_pct rt_v_scheduled_trip_time_pct \\\n", - "0 68.52 99.95 27.64 \n", - "1 32.20 98.08 -59.57 \n", - "\n", - " schedule_gtfs_dataset_key direction_id route_id \\\n", - "0 63029a23cb0e73f2a5d98a345c5e2e40 1 3428 \n", - "1 63029a23cb0e73f2a5d98a345c5e2e40 1 3428 \n", + " schedule_gtfs_dataset_key trip_instance_key \\\n", + "14049 3f3f36b4c41cc6b5df3eb7f5d8ea6e3c 33408393e7418ac53c9b114518c93d18 \n", "\n", - " shape_array_key route_name_used service_hours \\\n", - "0 0d0ca5bc40fb6266a03f400c3aa7e6cb 0.97 \n", - "1 0d0ca5bc40fb6266a03f400c3aa7e6cb 0.97 \n", + " median_stop_meters time_of_day sched_service_min route_id \\\n", + "14049 322.57 Owl 84.00 4-13172 \n", "\n", - " trip_first_departure_datetime_pacific time_of_day \n", - "0 2023-12-13 05:34:00 Early AM \n", - "1 2023-12-13 06:34:00 Early AM " + " direction_id \n", + "14049 1.00 " ] }, - "execution_count": 92, + "execution_count": 53, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "dec_df2.head(2)" + "assemble_scheduled_trip_metrics_df.sample()" ] }, { "cell_type": "code", - "execution_count": 93, - "id": "fea7cda5-606c-4054-b189-58a12d250957", + "execution_count": 55, + "id": "3684749f-c9f2-4233-a411-9b2422182d3d", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "pandas.core.frame.DataFrame" - ] - }, - "execution_count": 93, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "type(dec_df2)" + "def add_common_shape(analysis_date: str):\n", + " \"\"\"\n", + " For route-direction df, add common_shape_id (most frequent shape)\n", + " and attach that shape geometry\n", + " \"\"\"\n", + " common_shape = gtfs_schedule_wrangling.most_common_shape_by_route_direction(\n", + " analysis_date).pipe(\n", + " helpers.remove_shapes_outside_ca\n", + " )\n", + " \n", + " return common_shape" ] }, { "cell_type": "code", - "execution_count": 94, - "id": "fe97ad8a-d0ce-40cd-982b-87877882693a", + "execution_count": 54, + "id": "57efca22-e512-4a38-b048-20e253765854", "metadata": {}, "outputs": [], "source": [ - "dec_intermediary, dec_final = average_route_speeds_for_export(\n", - " dec_df2, analysis_date, 70\n", - ")" + "def schedule_metrics_by_route_direction(\n", + " df: pd.DataFrame,\n", + " analysis_date: str,\n", + " group_cols: list\n", + ") -> pd.DataFrame:\n", + " \"\"\"\n", + " Aggregate trip-level metrics to route-direction, and \n", + " attach shape geometry for common_shape_id.\n", + " \"\"\"\n", + " \n", + " service_freq_df = gtfs_schedule_wrangling.aggregate_time_of_day_to_peak_offpeak(\n", + " df, group_cols)\n", + " \n", + " metrics_df = (df.groupby(group_cols, observed=True, group_keys=False)\n", + " .agg({\n", + " \"median_stop_meters\": \"mean\", \n", + " # take mean of the median stop spacing for trip\n", + " # does this make sense?\n", + " # median is the single boiled down metric at the trip-level\n", + " \"sched_service_min\": \"mean\",\n", + " }).reset_index()\n", + " .rename(columns = {\n", + " \"median_stop_meters\": \"avg_stop_meters\",\n", + " \"sched_service_min\": \"avg_sched_service_min\"\n", + " })\n", + " )\n", + "\n", + " common_shape_for_route_dir = add_common_shape(analysis_date)\n", + "\n", + " df = pd.merge(\n", + " common_shape_for_route_dir,\n", + " metrics_df,\n", + " on = group_cols,\n", + " how = \"inner\"\n", + " ).merge(\n", + " service_freq_df,\n", + " on = group_cols,\n", + " how = \"inner\"\n", + " )\n", + " \n", + " return df\n" ] }, { "cell_type": "code", - "execution_count": 95, - "id": "58165517-e414-4843-8ece-b7631d4d7f27", + "execution_count": 56, + "id": "c4f73a21-4807-4710-8359-814d49a42bb5", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "count 11397.00\n", - "mean 2.38\n", - "std 0.56\n", - "min 0.10\n", - "25% 1.90\n", - "50% 2.50\n", - "75% 2.90\n", - "max 3.50\n", - "Name: avg_pings_per_min, dtype: float64" - ] - }, - "execution_count": 95, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "dec_final.avg_pings_per_min.describe()" + "route_cols = [\n", + " \"schedule_gtfs_dataset_key\", \n", + " \"route_id\", \n", + " \"direction_id\"\n", + " ]" ] }, { "cell_type": "code", - "execution_count": 96, - "id": "e99b22b7-f6c7-4d69-8b65-da3c39c85f33", + "execution_count": 67, + "id": "b841548c-52a5-4035-8bda-d8752bba1f02", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(11397, 18)" - ] - }, - "execution_count": 96, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "dec_final.shape" + "# https://github.com/cal-itp/data-analyses/blob/main/rt_segment_speeds/segment_speed_utils/gtfs_schedule_wrangling.py\n", + "service_freq_df = gtfs_schedule_wrangling.aggregate_time_of_day_to_peak_offpeak(\n", + " assemble_scheduled_trip_metrics_df, route_cols)" ] }, { "cell_type": "code", - "execution_count": 97, - "id": "49867873-4a76-49c4-8c95-0918d7468f82", + "execution_count": 69, + "id": "198f50de-5345-45dc-81ad-2489c77c7d56", "metadata": {}, "outputs": [ { @@ -1690,218 +1649,135 @@ " \n", " \n", " \n", - " org_id\n", - " agency\n", + " schedule_gtfs_dataset_key\n", " route_id\n", - " route_name\n", " direction_id\n", - " time_of_day\n", - " avg_speed_mph\n", - " n_trips\n", - " avg_sched_trip_min\n", - " avg_rt_trip_min\n", - " district_name\n", - " avg_pings_per_min\n", - " avg_pct_vp_shape\n", - " avg_pct_rt_v_sched\n", - " avg_rt_triptime_w_gtfs_pct\n", - " avg_min_w_atleast2_trip_updates\n", + " offpeak_n_trips\n", + " peak_n_trips\n", + " offpeak_frequency\n", + " peak_frequency\n", " \n", " \n", " \n", " \n", - " 2868\n", - " recfma7GNR5lQTTTg\n", - " Orange County Transportation Authority\n", - " 43\n", - " Fullerton - Costa Mesa via Harbor Blvd\n", - " 0\n", - " Owl\n", - " 12.00\n", - " 1\n", - " 79.00\n", - " 91.00\n", - " 12 - Irvine\n", + " 732\n", + " 3f3f36b4c41cc6b5df3eb7f5d8ea6e3c\n", + " 246-13172\n", + " 0.00\n", + " 24.00\n", + " 16.00\n", + " 1.50\n", + " 2.00\n", + " \n", + " \n", + " 3813\n", + " ecd018ad66f497fb8f188ed5a71b284b\n", + " 480\n", + " 1.00\n", + " NaN\n", " 3.00\n", - " 98.52\n", - " 15.19\n", - " 100.00\n", - " 90.00\n", + " NaN\n", + " 0.38\n", " \n", " \n", - " 375\n", - " recPnGkwdpnr8jmHB\n", - " Los Angeles County Metropolitan Transportation Authority\n", - " 801\n", - " Metro A-Line\n", - " 0\n", - " Early AM\n", - " 19.50\n", - " 20\n", - " 109.20\n", - " 136.20\n", - " 07 - Los Angeles\n", - " 2.00\n", - " 92.28\n", - " 24.73\n", - " 89.43\n", - " 90.60\n", + " 2926\n", + " c499f905e33929a641f083dad55c521e\n", + " 62\n", + " 1.00\n", + " 33.00\n", + " 24.00\n", + " 2.06\n", + " 3.00\n", " \n", " \n", - " 7173\n", - " recPnGkwdpnr8jmHB\n", - " Los Angeles County Metropolitan Transportation Authority\n", - " 92-13172\n", - " DWNTWN LA- SYLMAR STA VIA GLENDALE - GLENOAKS BLS\n", - " 1\n", - " PM Peak\n", - " 12.80\n", - " 12\n", - " 125.50\n", - " 134.90\n", - " 07 - Los Angeles\n", - " 2.20\n", - " 86.25\n", - " 7.49\n", - " 93.25\n", - " 104.50\n", + " 1882\n", + " 8eecb796518dafd3c1b971a99f8b8252\n", + " 5700\n", + " 1.00\n", + " 1.00\n", + " NaN\n", + " 0.06\n", + " NaN\n", + " \n", + " \n", + " 2043\n", + " 9b4c9cf681dfcc10dffe9523b43598bf\n", + " B7\n", + " 1.00\n", + " 1.00\n", + " 5.00\n", + " 0.06\n", + " 0.62\n", " \n", " \n", "\n", "" ], "text/plain": [ - " org_id \\\n", - "2868 recfma7GNR5lQTTTg \n", - "375 recPnGkwdpnr8jmHB \n", - "7173 recPnGkwdpnr8jmHB \n", - "\n", - " agency route_id \\\n", - "2868 Orange County Transportation Authority 43 \n", - "375 Los Angeles County Metropolitan Transportation Authority 801 \n", - "7173 Los Angeles County Metropolitan Transportation Authority 92-13172 \n", + " schedule_gtfs_dataset_key route_id direction_id \\\n", + "732 3f3f36b4c41cc6b5df3eb7f5d8ea6e3c 246-13172 0.00 \n", + "3813 ecd018ad66f497fb8f188ed5a71b284b 480 1.00 \n", + "2926 c499f905e33929a641f083dad55c521e 62 1.00 \n", + "1882 8eecb796518dafd3c1b971a99f8b8252 5700 1.00 \n", + "2043 9b4c9cf681dfcc10dffe9523b43598bf B7 1.00 \n", "\n", - " route_name direction_id \\\n", - "2868 Fullerton - Costa Mesa via Harbor Blvd 0 \n", - "375 Metro A-Line 0 \n", - "7173 DWNTWN LA- SYLMAR STA VIA GLENDALE - GLENOAKS BLS 1 \n", - "\n", - " time_of_day avg_speed_mph n_trips avg_sched_trip_min avg_rt_trip_min \\\n", - "2868 Owl 12.00 1 79.00 91.00 \n", - "375 Early AM 19.50 20 109.20 136.20 \n", - "7173 PM Peak 12.80 12 125.50 134.90 \n", - "\n", - " district_name avg_pings_per_min avg_pct_vp_shape \\\n", - "2868 12 - Irvine 3.00 98.52 \n", - "375 07 - Los Angeles 2.00 92.28 \n", - "7173 07 - Los Angeles 2.20 86.25 \n", - "\n", - " avg_pct_rt_v_sched avg_rt_triptime_w_gtfs_pct \\\n", - "2868 15.19 100.00 \n", - "375 24.73 89.43 \n", - "7173 7.49 93.25 \n", - "\n", - " avg_min_w_atleast2_trip_updates \n", - "2868 90.00 \n", - "375 90.60 \n", - "7173 104.50 " + " offpeak_n_trips peak_n_trips offpeak_frequency peak_frequency \n", + "732 24.00 16.00 1.50 2.00 \n", + "3813 NaN 3.00 NaN 0.38 \n", + "2926 33.00 24.00 2.06 3.00 \n", + "1882 1.00 NaN 0.06 NaN \n", + "2043 1.00 5.00 0.06 0.62 " ] }, - "execution_count": 97, + "execution_count": 69, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "dec_final.drop(columns=[\"geometry\", \"base64_url\"]).sample(3)" - ] - }, - { - "cell_type": "markdown", - "id": "e2f3b80c-b018-4216-806b-30c0733b728f", - "metadata": {}, - "source": [ - "#### Add new columns" + "service_freq_df.sample(5)" ] }, { - "cell_type": "markdown", - "id": "620e4a8e-87c0-4a10-b27a-15a1d938f770", + "cell_type": "code", + "execution_count": 57, + "id": "8b4bfe50-6b76-4bed-a749-a32ae906914f", "metadata": {}, + "outputs": [], "source": [ - "##### Test peak_frequency and offpeak_frequency" + "schedule_metrics_by_route_direction_df = schedule_metrics_by_route_direction(assemble_scheduled_trip_metrics_df,\n", + " analysis_date, route_cols)" ] }, { "cell_type": "code", - "execution_count": 140, - "id": "3bbf297f-e58c-4697-8a31-7ec88425144e", + "execution_count": 63, + "id": "13917206-ebb4-4eca-9847-bc2e7dde74bd", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'gs://calitp-analytics-data/data-analyses/rt_vs_schedule/'" + "Index(['geometry', 'schedule_gtfs_dataset_key', 'route_id', 'direction_id',\n", + " 'common_shape_id', 'route_name', 'avg_stop_meters',\n", + " 'avg_sched_service_min', 'offpeak_n_trips', 'peak_n_trips',\n", + " 'offpeak_frequency', 'peak_frequency'],\n", + " dtype='object')" ] }, - "execution_count": 140, + "execution_count": 63, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "RT_SCHED_GCS" - ] - }, - { - "cell_type": "code", - "execution_count": 158, - "id": "f10a3c2c-4952-4add-9509-45c527bcac4c", - "metadata": {}, - "outputs": [], - "source": [ - "def load_frequency(analysis_date: str) -> pd.DataFrame:\n", - " freq_cols = [\n", - " \"shape_array_key\",\n", - " \"schedule_gtfs_dataset_key\",\n", - " \"offpeak_n_trips\",\n", - " \"peak_n_trips\",\n", - " \"offpeak_frequency\",\n", - " \"peak_frequency\",\n", - " \"pct_dense\",\n", - " ]\n", - " df = pd.read_parquet(\n", - " f\"{RT_SCHED_GCS}schedule_route_direction_metrics_{analysis_date}.parquet\"\n", - " )[freq_cols]\n", - " df[\"avg_frequency\"] = (\n", - " (df.offpeak_n_trips * df.offpeak_frequency)\n", - " + (df.peak_n_trips * df.peak_frequency)\n", - " ) / (df.offpeak_n_trips + df.peak_n_trips)\n", - " df = df.drop(\n", - " columns=[\n", - " \"offpeak_n_trips\",\n", - " \"peak_n_trips\",\n", - " \"offpeak_frequency\",\n", - " \"peak_frequency\",\n", - " ]\n", - " )\n", - " return df" - ] - }, - { - "cell_type": "code", - "execution_count": 159, - "id": "f7b6c61a-47f7-4734-91be-499f80c1f795", - "metadata": {}, - "outputs": [], - "source": [ - "freq_dec = load_frequency(analysis_date)" + "schedule_metrics_by_route_direction_df.columns" ] }, { "cell_type": "code", - "execution_count": 161, - "id": "ca10f951-bbc8-4474-acc8-6e4da8f43f6c", + "execution_count": 66, + "id": "d8eb847f-0dab-479e-a2bf-606bd98005cd", "metadata": {}, "outputs": [ { @@ -1925,145 +1801,54 @@ " \n", " \n", " \n", - " shape_array_key\n", " schedule_gtfs_dataset_key\n", - " pct_dense\n", - " avg_frequency\n", + " trip_instance_key\n", + " median_stop_meters\n", + " time_of_day\n", + " sched_service_min\n", + " route_id\n", + " direction_id\n", " \n", " \n", " \n", " \n", - " 3085\n", - " 52e5a5b703231861cf3c2d7853e6505b\n", - " 4c105bd9f414afe82dba2c3687cc1d88\n", - " 0.03\n", - " 1.10\n", + " 35206\n", + " 7228eba069f2a0fad0ed8552410a544d\n", + " 897126fba95f1b993ff3a966591916d1\n", + " 810.02\n", + " AM Peak\n", + " 50.00\n", + " 1964\n", + " 0.00\n", " \n", " \n", "\n", "" ], "text/plain": [ - " shape_array_key schedule_gtfs_dataset_key \\\n", - "3085 52e5a5b703231861cf3c2d7853e6505b 4c105bd9f414afe82dba2c3687cc1d88 \n", + " schedule_gtfs_dataset_key trip_instance_key \\\n", + "35206 7228eba069f2a0fad0ed8552410a544d 897126fba95f1b993ff3a966591916d1 \n", "\n", - " pct_dense avg_frequency \n", - "3085 0.03 1.10 " - ] - }, - "execution_count": 161, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "freq_dec.sample()" - ] - }, - { - "cell_type": "code", - "execution_count": 163, - "id": "e995b96b-3e44-4bc6-b5c0-e0216da651f9", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "3560" - ] - }, - "execution_count": 163, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "len(freq_dec)" - ] - }, - { - "cell_type": "code", - "execution_count": 162, - "id": "166da9c3-dde0-48b0-892a-2f36c442d2eb", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "76838" - ] - }, - "execution_count": 162, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "len(dec_intermediary)" - ] - }, - { - "cell_type": "code", - "execution_count": 164, - "id": "4aeaa128-a41e-4c33-9aa5-7bdeb7abda07", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "_merge \n", - "left_only 51049\n", - "both 25789\n", - "right_only 2672\n", - "dtype: int64" - ] - }, - "execution_count": 164, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "pd.merge(\n", - " dec_intermediary, freq_dec, on=[\"shape_array_key\"], how=\"outer\", indicator=True\n", - ")[[\"_merge\"]].value_counts()" - ] - }, - { - "cell_type": "code", - "execution_count": 166, - "id": "76a88eaf-ede2-4276-9578-032d499bc388", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "_merge \n", - "left_only 51049\n", - "both 25789\n", - "right_only 2672\n", - "dtype: int64" + " median_stop_meters time_of_day sched_service_min route_id \\\n", + "35206 810.02 AM Peak 50.00 1964 \n", + "\n", + " direction_id \n", + "35206 0.00 " ] }, - "execution_count": 166, + "execution_count": 66, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "pd.merge(\n", - " dec_intermediary,\n", - " freq_dec,\n", - " on=[\"schedule_gtfs_dataset_key\", \"shape_array_key\"],\n", - " how=\"outer\",\n", - " indicator=True,\n", - ")[[\"_merge\"]].value_counts()" + "assemble_scheduled_trip_metrics_df.sample()" ] }, { "cell_type": "code", - "execution_count": 167, - "id": "a6bfec15-d0e5-4ec2-a51f-e6ff5c6dfef8", + "execution_count": 65, + "id": "9897c32e-9cc6-4aa1-a267-ec8c3a3254f9", "metadata": {}, "outputs": [ { @@ -2087,173 +1872,263 @@ " \n", " \n", " \n", - " trip_instance_key\n", - " rt_service_min\n", - " min_w_atleast2_trip_updates\n", - " total_pings_for_trip\n", - " total_min_w_gtfs\n", - " total_vp\n", - " vp_in_shape\n", - " speed_mph\n", - " service_minutes\n", - " pings_per_min\n", - " spatial_accuracy_pct\n", - " rt_triptime_w_gtfs_pct\n", - " rt_v_scheduled_trip_time_pct\n", " schedule_gtfs_dataset_key\n", - " direction_id\n", " route_id\n", - " shape_array_key\n", - " route_name_used\n", - " service_hours\n", - " trip_first_departure_datetime_pacific\n", - " time_of_day\n", + " direction_id\n", + " common_shape_id\n", + " route_name\n", + " avg_stop_meters\n", + " avg_sched_service_min\n", + " offpeak_n_trips\n", + " peak_n_trips\n", + " offpeak_frequency\n", + " peak_frequency\n", " \n", " \n", " \n", " \n", - " 10887\n", - " 30b526c933599a93473e74523b6a5462\n", - " 86.83\n", - " 65\n", - " 150\n", - " 78\n", - " 150.00\n", - " 145.00\n", - " 19.23\n", - " 80.00\n", - " 1.73\n", - " 96.67\n", - " 89.83\n", - " 8.54\n", - " 9809d3f8121513057bc5cb8de7b54ce2\n", - " 0\n", - " 023-131\n", - " 216cf9d9e3f57e245d4c3654865847b6\n", - " Salinas - King City\n", - " 1.33\n", - " 2023-12-13 10:50:00\n", - " Midday\n", + " 1547\n", + " 43d8d305ee692724a532f30ea63a1cbe\n", + " 35E\n", + " 0.00\n", + " shp-35E-04\n", + " Hwy 9/Scotts Valley via Emeline\n", + " 422.40\n", + " 65.77\n", + " 10.00\n", + " 12.00\n", + " 0.62\n", + " 1.50\n", + " \n", + " \n", + " 767\n", + " fe4aab1717eca5a2935c32c85a35a5bf\n", + " 1\n", + " 0.00\n", + " 1\n", + " \n", + " 590.45\n", + " 70.57\n", + " 11.00\n", + " 12.00\n", + " 0.69\n", + " 1.50\n", + " \n", + " \n", + " 1983\n", + " f30efe350fbd22dcbfa4eeeb0def8e73\n", + " 114\n", + " 1.00\n", + " 50\n", + " Hesperia - Fort Irwin\n", + " 814.56\n", + " 133.00\n", + " NaN\n", + " 3.00\n", + " NaN\n", + " 0.38\n", " \n", " \n", "\n", "" ], "text/plain": [ - " trip_instance_key rt_service_min \\\n", - "10887 30b526c933599a93473e74523b6a5462 86.83 \n", - "\n", - " min_w_atleast2_trip_updates total_pings_for_trip total_min_w_gtfs \\\n", - "10887 65 150 78 \n", - "\n", - " total_vp vp_in_shape speed_mph service_minutes pings_per_min \\\n", - "10887 150.00 145.00 19.23 80.00 1.73 \n", - "\n", - " spatial_accuracy_pct rt_triptime_w_gtfs_pct \\\n", - "10887 96.67 89.83 \n", + " schedule_gtfs_dataset_key route_id direction_id common_shape_id \\\n", + "1547 43d8d305ee692724a532f30ea63a1cbe 35E 0.00 shp-35E-04 \n", + "767 fe4aab1717eca5a2935c32c85a35a5bf 1 0.00 1 \n", + "1983 f30efe350fbd22dcbfa4eeeb0def8e73 114 1.00 50 \n", "\n", - " rt_v_scheduled_trip_time_pct schedule_gtfs_dataset_key \\\n", - "10887 8.54 9809d3f8121513057bc5cb8de7b54ce2 \n", + " route_name avg_stop_meters avg_sched_service_min \\\n", + "1547 Hwy 9/Scotts Valley via Emeline 422.40 65.77 \n", + "767 590.45 70.57 \n", + "1983 Hesperia - Fort Irwin 814.56 133.00 \n", "\n", - " direction_id route_id shape_array_key \\\n", - "10887 0 023-131 216cf9d9e3f57e245d4c3654865847b6 \n", - "\n", - " route_name_used service_hours \\\n", - "10887 Salinas - King City 1.33 \n", - "\n", - " trip_first_departure_datetime_pacific time_of_day \n", - "10887 2023-12-13 10:50:00 Midday " + " offpeak_n_trips peak_n_trips offpeak_frequency peak_frequency \n", + "1547 10.00 12.00 0.62 1.50 \n", + "767 11.00 12.00 0.69 1.50 \n", + "1983 NaN 3.00 NaN 0.38 " ] }, - "execution_count": 167, + "execution_count": 65, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "dec_intermediary.sample()" + "schedule_metrics_by_route_direction_df.sample(3).drop(columns = ['geometry'])" ] }, { "cell_type": "markdown", - "id": "a3acfeb1-54bf-4cee-9810-51c7e5fe0aa6", + "id": "8874ce25-56a7-49d3-96d0-7f1140ec52c1", "metadata": {}, "source": [ - "### Check results after aggregating up to route\n", - "* How are the results sooo wrong with `265-13172`" + "##### Actual Function" ] }, { "cell_type": "code", - "execution_count": 116, - "id": "aa2cc0c2-da7b-4ae1-9ac4-84c8d6d60e95", + "execution_count": 30, + "id": "21db7580-cf79-4385-b9dc-80cd99206011", "metadata": {}, "outputs": [], "source": [ - "def checkout_route(\n", - " og_df: pd.DataFrame,\n", - " route_agg: gpd.GeoDataFrame,\n", - " route_id: str,\n", - " time_of_day: str,\n", - " direction_id: int,\n", - "):\n", - " print(\"final\")\n", - " display(\n", - " route_agg.loc[\n", - " (route_agg.route_id == route_id)\n", - " & (route_agg.time_of_day == time_of_day)\n", - " & (route_agg.direction_id == direction_id)\n", - " ].drop(columns=[\"geometry\", \"base64_url\"])\n", - " )\n", - " cols = [\n", - " \"trip_instance_key\",\n", + "def average_route_speeds_for_export(\n", + " df: pd.DataFrame,\n", + " analysis_date: str,\n", + " max_speed: int,\n", + ") -> gpd.GeoDataFrame:\n", + " \"\"\"\n", + " Aggregate trip speeds to route-direction.\n", + " Attach shape geometry to most common shape_id.\n", + " \"\"\"\n", + " df2 = df.loc[df.speed_mph <= 70].reset_index(drop=True)\n", + "\n", + " route_cols = [\n", + " \"schedule_gtfs_dataset_key\",\n", " \"time_of_day\",\n", - " \"speed_mph\",\n", - " \"rt_service_min\",\n", + " \"route_id\",\n", + " \"direction_id\",\n", + " \"route_name_used\",\n", + " \"shape_array_key\",\n", + " ]\n", + "\n", + " mean_cols = [\n", " \"service_minutes\",\n", + " \"rt_service_min\",\n", + " \"speed_mph\",\n", " \"pings_per_min\",\n", + " \"total_vp\",\n", + " \"vp_in_shape\",\n", " \"total_min_w_gtfs\",\n", " \"min_w_atleast2_trip_updates\",\n", " ]\n", + " count_cols = [\"trip_instance_key\"]\n", "\n", - " print(\"original\")\n", - " og_df2 = og_df.loc[\n", - " (og_df.route_id == route_id)\n", - " & (og_df.time_of_day == time_of_day)\n", - " & (og_df.direction_id == direction_id)\n", - " ]\n", + " df3 = (\n", + " df2.groupby(route_cols)\n", + " .agg({**{e: \"mean\" for e in mean_cols}, **{e: \"count\" for e in count_cols}})\n", + " .reset_index()\n", + " )\n", "\n", - " print(f\"pings per min {og_df2.pings_per_min.mean()}\")\n", - " print(f\"speed_mph {og_df2.speed_mph.mean()}\")\n", - " print(f\"total_vp {og_df2.total_vp.mean()}\")\n", - " print(f\"vp_in_shape {og_df2.vp_in_shape.mean()}\")\n", - " print(f\"min w gtfs {og_df2.total_min_w_gtfs.mean()}\")\n", - " print(f\"min w at least 2 pings {og_df2.min_w_atleast2_trip_updates.mean()}\")\n", - " print(f\"avg vp in pct {og_df2.vp_in_shape.mean()/og_df2.total_vp.mean()}\")\n", - " display(og_df2[cols])" - ] - }, - { - "cell_type": "markdown", - "id": "c8aae146-3b5f-4189-ad0f-b011221b5442", - "metadata": {}, - "source": [ - "#### scheduled trip min (renamed from service_mins) is completely lower." - ] - }, + " df4 = df3.assign(\n", + " rt_service_min=df3.rt_service_min.round(1),\n", + " service_minutes=df3.service_minutes.round(1),\n", + " speed_mph=df3.speed_mph.round(1),\n", + " pings_per_min=df3.pings_per_min.round(1),\n", + " min_w_atleast2_trip_updates=df3.min_w_atleast2_trip_updates.round(1),\n", + " total_min_w_gtfs=df3.total_min_w_gtfs.round(1),\n", + " ).rename(\n", + " columns={\n", + " \"service_minutes\": \"avg_sched_trip_min\",\n", + " \"rt_service_min\": \"avg_rt_trip_min\",\n", + " \"trip_instance_key\": \"n_trips\",\n", + " \"route_name_used\": \"route_name\",\n", + " \"pings_per_min\": \"avg_pings_per_min\",\n", + " \"schedule_gtfs_dataset_key\": \"gtfs_dataset_key\",\n", + " \"total_min_w_gtfs\": \"avg_total_min_w_gtfs\",\n", + " \"min_w_atleast2_trip_updates\": \"avg_min_w_atleast2_trip_updates\",\n", + " \"speed_mph\": \"avg_speed_mph\",\n", + " }\n", + " )\n", + "\n", + " org_crosswalk = schedule_rt_utils.sample_gtfs_dataset_key_to_organization_crosswalk(\n", + " df4,\n", + " analysis_date,\n", + " quartet_data=\"schedule\",\n", + " dim_gtfs_dataset_cols=[\"key\", \"base64_url\"],\n", + " dim_organization_cols=[\"source_record_id\", \"name\", \"caltrans_district\"],\n", + " )\n", + "\n", + " df_with_org = pd.merge(\n", + " df4,\n", + " org_crosswalk.rename(columns={\"schedule_gtfs_dataset_key\": \"gtfs_dataset_key\"}),\n", + " on=\"gtfs_dataset_key\",\n", + " how=\"inner\",\n", + " )\n", + "\n", + " shapes = helpers.import_scheduled_shapes(\n", + " analysis_date,\n", + " columns=[\"shape_array_key\", \"geometry\"],\n", + " get_pandas=True,\n", + " crs=geography_utils.WGS84,\n", + " )\n", + "\n", + " df_with_shape = pd.merge(\n", + " shapes,\n", + " df_with_org,\n", + " on=\"shape_array_key\", # once merged, can drop shape_array_key\n", + " how=\"inner\",\n", + " )\n", + " \n", + " # Frequency of routes\n", + " frequency_routes_df = load_frequency(analysis_date)\n", + " \n", + " # Merge\n", + " df_with_shape = pd.merge(df_with_shape, frequency_routes_df, on=[\"shape_array_key\"], how = \"left\")\n", + " \n", + " # Add some metrics\n", + " df_with_shape[\"avg_pct_vp_shape\"] = (\n", + " df_with_shape.vp_in_shape / df_with_shape.total_vp * 100\n", + " )\n", + "\n", + " df_with_shape[\"avg_pct_rt_v_sched\"] = (\n", + " df_with_shape.avg_rt_trip_min / df_with_shape.avg_sched_trip_min - 1\n", + " ) * 100\n", + "\n", + " df_with_shape[\"avg_rt_triptime_w_gtfs_pct\"] = (\n", + " df_with_shape.avg_total_min_w_gtfs / df_with_shape.avg_rt_trip_min\n", + " ) * 100\n", + "\n", + " # final_df = df_with_shape.drop(columns=[\"total_vp\", \"vp_in_shape\"])\n", + "\n", + " agency_cols = [\"organization_source_record_id\", \"organization_name\"]\n", + " route_cols = [\n", + " \"route_id\",\n", + " \"route_name\",\n", + " \"direction_id\",\n", + " \"avg_frequency\"\n", + " ]\n", + "\n", + " col_order = (\n", + " agency_cols\n", + " + route_cols\n", + " + [\n", + " \"time_of_day\",\n", + " \"avg_speed_mph\",\n", + " \"n_trips\",\n", + " \"avg_sched_trip_min\",\n", + " \"avg_rt_trip_min\",\n", + " \"base64_url\",\n", + " \"caltrans_district\",\n", + " \"geometry\",\n", + " \"avg_pings_per_min\",\n", + " \"avg_pct_vp_shape\",\n", + " \"avg_pct_rt_v_sched\",\n", + " \"avg_rt_triptime_w_gtfs_pct\",\n", + " \"avg_min_w_atleast2_trip_updates\",\n", + " ]\n", + " )\n", + "\n", + " final_df = df_with_shape.reindex(columns=col_order).rename(\n", + " columns={\n", + " \"organization_source_record_id\": \"org_id\",\n", + " \"organization_name\": \"agency\",\n", + " \"caltrans_district\": \"district_name\",\n", + " \"avg_frequency\": \"avg_route_frequency\"\n", + " }\n", + " )\n", + " \n", + " # Delete out returning df2 for the final function\n", + " return df2, final_df" + ] + }, { "cell_type": "code", - "execution_count": 117, - "id": "04ee1397-318c-4bb7-9f80-2a55b9c75055", + "execution_count": 31, + "id": "cdbf76f5-bd0b-4e2b-8fc8-15843c7dc3b6", "metadata": {}, "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "final\n" - ] - }, { "data": { "text/html": [ @@ -2275,202 +2150,210 @@ " \n", " \n", " \n", - " org_id\n", - " agency\n", - " route_id\n", - " route_name\n", + " trip_instance_key\n", + " rt_service_min\n", + " min_w_atleast2_trip_updates\n", + " total_pings_for_trip\n", + " total_min_w_gtfs\n", + " total_vp\n", + " vp_in_shape\n", + " speed_mph\n", + " service_minutes\n", + " pings_per_min\n", + " spatial_accuracy_pct\n", + " rt_triptime_w_gtfs_pct\n", + " rt_v_scheduled_trip_time_pct\n", + " schedule_gtfs_dataset_key\n", " direction_id\n", + " route_id\n", + " shape_array_key\n", + " route_name_used\n", + " service_hours\n", + " trip_first_departure_datetime_pacific\n", " time_of_day\n", - " avg_speed_mph\n", - " n_trips\n", - " avg_sched_trip_min\n", - " avg_rt_trip_min\n", - " district_name\n", - " avg_pings_per_min\n", - " avg_pct_vp_shape\n", - " avg_pct_rt_v_sched\n", - " avg_rt_triptime_w_gtfs_pct\n", - " avg_min_w_atleast2_trip_updates\n", " \n", " \n", " \n", " \n", - " 6533\n", - " recPnGkwdpnr8jmHB\n", - " Los Angeles County Metropolitan Transportation Authority\n", - " 265-13172\n", - " PICO RIVERA - LAKEWOOD CTR MALL VIA PARAMOUNT BL\n", - " 0\n", + " 0\n", + " 5d25a4366c173007d9c29fdead0299d7\n", + " 74.03\n", + " 73\n", + " 216\n", + " 74\n", + " 216.00\n", + " 148.00\n", + " 21.01\n", + " 58.00\n", + " 2.92\n", + " 68.52\n", + " 99.95\n", + " 27.64\n", + " 63029a23cb0e73f2a5d98a345c5e2e40\n", + " 1\n", + " 3428\n", + " 0d0ca5bc40fb6266a03f400c3aa7e6cb\n", + " \n", + " 0.97\n", + " 2023-12-13 05:34:00\n", " Early AM\n", - " 6.90\n", - " 2\n", + " \n", + " \n", + " 1\n", + " 4b72b80fc9cfe5e613bab95585cbe7e4\n", + " 23.45\n", + " 21\n", + " 59\n", + " 23\n", " 59.00\n", - " 99.60\n", - " 07 - Los Angeles\n", - " 2.70\n", - " 70.09\n", - " 68.81\n", - " 99.90\n", - " 92.00\n", + " 19.00\n", + " 54.95\n", + " 58.00\n", + " 2.52\n", + " 32.20\n", + " 98.08\n", + " -59.57\n", + " 63029a23cb0e73f2a5d98a345c5e2e40\n", + " 1\n", + " 3428\n", + " 0d0ca5bc40fb6266a03f400c3aa7e6cb\n", + " \n", + " 0.97\n", + " 2023-12-13 06:34:00\n", + " Early AM\n", " \n", " \n", "\n", "" ], "text/plain": [ - " org_id \\\n", - "6533 recPnGkwdpnr8jmHB \n", + " trip_instance_key rt_service_min \\\n", + "0 5d25a4366c173007d9c29fdead0299d7 74.03 \n", + "1 4b72b80fc9cfe5e613bab95585cbe7e4 23.45 \n", "\n", - " agency route_id \\\n", - "6533 Los Angeles County Metropolitan Transportation Authority 265-13172 \n", + " min_w_atleast2_trip_updates total_pings_for_trip total_min_w_gtfs \\\n", + "0 73 216 74 \n", + "1 21 59 23 \n", "\n", - " route_name direction_id \\\n", - "6533 PICO RIVERA - LAKEWOOD CTR MALL VIA PARAMOUNT BL 0 \n", + " total_vp vp_in_shape speed_mph service_minutes pings_per_min \\\n", + "0 216.00 148.00 21.01 58.00 2.92 \n", + "1 59.00 19.00 54.95 58.00 2.52 \n", "\n", - " time_of_day avg_speed_mph n_trips avg_sched_trip_min avg_rt_trip_min \\\n", - "6533 Early AM 6.90 2 59.00 99.60 \n", + " spatial_accuracy_pct rt_triptime_w_gtfs_pct rt_v_scheduled_trip_time_pct \\\n", + "0 68.52 99.95 27.64 \n", + "1 32.20 98.08 -59.57 \n", "\n", - " district_name avg_pings_per_min avg_pct_vp_shape \\\n", - "6533 07 - Los Angeles 2.70 70.09 \n", + " schedule_gtfs_dataset_key direction_id route_id \\\n", + "0 63029a23cb0e73f2a5d98a345c5e2e40 1 3428 \n", + "1 63029a23cb0e73f2a5d98a345c5e2e40 1 3428 \n", "\n", - " avg_pct_rt_v_sched avg_rt_triptime_w_gtfs_pct \\\n", - "6533 68.81 99.90 \n", + " shape_array_key route_name_used service_hours \\\n", + "0 0d0ca5bc40fb6266a03f400c3aa7e6cb 0.97 \n", + "1 0d0ca5bc40fb6266a03f400c3aa7e6cb 0.97 \n", "\n", - " avg_min_w_atleast2_trip_updates \n", - "6533 92.00 " + " trip_first_departure_datetime_pacific time_of_day \n", + "0 2023-12-13 05:34:00 Early AM \n", + "1 2023-12-13 06:34:00 Early AM " ] }, + "execution_count": 31, "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "original\n", - "pings per min 2.6849179704528776\n", - "speed_mph 6.887376703252869\n", - "total_vp 267.5\n", - "vp_in_shape 187.5\n", - "min w gtfs 99.5\n", - "min w at least 2 pings 92.0\n", - "avg vp in pct 0.7009345794392523\n" - ] - }, + "output_type": "execute_result" + } + ], + "source": [ + "dec_df2.head(2)" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "fea7cda5-606c-4054-b189-58a12d250957", + "metadata": {}, + "outputs": [ { "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
trip_instance_keytime_of_dayspeed_mphrt_service_minservice_minutespings_per_mintotal_min_w_gtfsmin_w_atleast2_trip_updates
2486648a01217589c2faa46db395d6cf8317dEarly AM9.4095.0258.002.659587
2486770674803a1c4416fc49f883bc3b2c18bEarly AM4.38104.1360.002.7210497
\n", - "
" - ], "text/plain": [ - " trip_instance_key time_of_day speed_mph \\\n", - "24866 48a01217589c2faa46db395d6cf8317d Early AM 9.40 \n", - "24867 70674803a1c4416fc49f883bc3b2c18b Early AM 4.38 \n", - "\n", - " rt_service_min service_minutes pings_per_min total_min_w_gtfs \\\n", - "24866 95.02 58.00 2.65 95 \n", - "24867 104.13 60.00 2.72 104 \n", - "\n", - " min_w_atleast2_trip_updates \n", - "24866 87 \n", - "24867 97 " + "pandas.core.frame.DataFrame" ] }, + "execution_count": 32, "metadata": {}, - "output_type": "display_data" + "output_type": "execute_result" } ], "source": [ - "route_265 = checkout_route(dec_intermediary, dec_final, \"265-13172\", \"Early AM\", 0)" + "type(dec_df2)" ] }, { "cell_type": "code", - "execution_count": 118, - "id": "7b878898-29f6-4b86-bbfd-a586682df078", + "execution_count": 33, + "id": "fe97ad8a-d0ce-40cd-982b-87877882693a", + "metadata": {}, + "outputs": [], + "source": [ + "dec_intermediary, dec_final = average_route_speeds_for_export(\n", + " dec_df2, analysis_date, 70\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "58165517-e414-4843-8ece-b7631d4d7f27", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "0.7009345794392523" + "count 11397.00\n", + "mean 2.38\n", + "std 0.56\n", + "min 0.10\n", + "25% 1.90\n", + "50% 2.50\n", + "75% 2.90\n", + "max 3.50\n", + "Name: avg_pings_per_min, dtype: float64" ] }, - "execution_count": 118, + "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "187.5 / 267.5" + "dec_final.avg_pings_per_min.describe()" ] }, { "cell_type": "code", - "execution_count": 120, - "id": "a3c23900-fdaa-476c-a490-dbc703df0c28", + "execution_count": 35, + "id": "e99b22b7-f6c7-4d69-8b65-da3c39c85f33", "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "final\n" - ] - }, + "data": { + "text/plain": [ + "(11397, 19)" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dec_final.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "49867873-4a76-49c4-8c95-0918d7468f82", + "metadata": {}, + "outputs": [ { "data": { "text/html": [ @@ -2497,6 +2380,7 @@ " route_id\n", " route_name\n", " direction_id\n", + " avg_route_frequency\n", " time_of_day\n", " avg_speed_mph\n", " n_trips\n", @@ -2512,60 +2396,188 @@ " \n", " \n", " \n", - " 9012\n", - " rechaapWbeffO33OX\n", - " City and County of San Francisco\n", - " 38R\n", - " Weekdays 5am-10pm Weekends 6am-9pm\n", + " 8314\n", + " recSiaaMmBXW7fUZS\n", + " Stanislaus Regional Transit Authority\n", + " 61\n", + " \n", + " 0\n", + " 0.65\n", + " Early AM\n", + " 13.40\n", " 1\n", + " 28.00\n", + " 56.20\n", + " 10 - Stockton\n", + " 3.00\n", + " 81.33\n", + " 100.71\n", + " 99.64\n", + " 56.00\n", + " \n", + " \n", + " 8785\n", + " recJcXMNC5MUm2uDe\n", + " Victor Valley Transit Authority\n", + " 3226\n", + " Barstow - Hinkley - Helendale\n", + " 0\n", + " 0.28\n", " AM Peak\n", - " 6.70\n", - " 30\n", - " 43.90\n", - " 58.90\n", - " 04 - Oakland\n", + " 34.30\n", + " 1\n", + " 78.00\n", + " 96.70\n", + " 08 - San Bernardino\n", " 3.00\n", - " 88.33\n", - " 34.17\n", - " 100.17\n", - " 58.30\n", + " 100.00\n", + " 23.97\n", + " 99.28\n", + " 96.00\n", + " \n", + " \n", + " 2224\n", + " recOZgevYf7Jimm9L\n", + " Alameda-Contra Costa Transit District\n", + " 251\n", + " Paseo Padre - Thornton - Cherry\n", + " 0\n", + " 0.73\n", + " PM Peak\n", + " 15.90\n", + " 5\n", + " 26.20\n", + " 36.10\n", + " 04 - Oakland\n", + " 2.80\n", + " 100.00\n", + " 37.79\n", + " 100.28\n", + " 35.20\n", " \n", " \n", "\n", "" ], "text/plain": [ - " org_id agency route_id \\\n", - "9012 rechaapWbeffO33OX City and County of San Francisco 38R \n", + " org_id agency route_id \\\n", + "8314 recSiaaMmBXW7fUZS Stanislaus Regional Transit Authority 61 \n", + "8785 recJcXMNC5MUm2uDe Victor Valley Transit Authority 3226 \n", + "2224 recOZgevYf7Jimm9L Alameda-Contra Costa Transit District 251 \n", "\n", - " route_name direction_id time_of_day \\\n", - "9012 Weekdays 5am-10pm Weekends 6am-9pm 1 AM Peak \n", + " route_name direction_id avg_route_frequency \\\n", + "8314 0 0.65 \n", + "8785 Barstow - Hinkley - Helendale 0 0.28 \n", + "2224 Paseo Padre - Thornton - Cherry 0 0.73 \n", "\n", - " avg_speed_mph n_trips avg_sched_trip_min avg_rt_trip_min \\\n", - "9012 6.70 30 43.90 58.90 \n", + " time_of_day avg_speed_mph n_trips avg_sched_trip_min avg_rt_trip_min \\\n", + "8314 Early AM 13.40 1 28.00 56.20 \n", + "8785 AM Peak 34.30 1 78.00 96.70 \n", + "2224 PM Peak 15.90 5 26.20 36.10 \n", "\n", - " district_name avg_pings_per_min avg_pct_vp_shape avg_pct_rt_v_sched \\\n", - "9012 04 - Oakland 3.00 88.33 34.17 \n", + " district_name avg_pings_per_min avg_pct_vp_shape \\\n", + "8314 10 - Stockton 3.00 81.33 \n", + "8785 08 - San Bernardino 3.00 100.00 \n", + "2224 04 - Oakland 2.80 100.00 \n", "\n", - " avg_rt_triptime_w_gtfs_pct avg_min_w_atleast2_trip_updates \n", - "9012 100.17 58.30 " + " avg_pct_rt_v_sched avg_rt_triptime_w_gtfs_pct \\\n", + "8314 100.71 99.64 \n", + "8785 23.97 99.28 \n", + "2224 37.79 100.28 \n", + "\n", + " avg_min_w_atleast2_trip_updates \n", + "8314 56.00 \n", + "8785 96.00 \n", + "2224 35.20 " ] }, + "execution_count": 36, "metadata": {}, - "output_type": "display_data" - }, + "output_type": "execute_result" + } + ], + "source": [ + "dec_final.drop(columns=[\"geometry\", \"base64_url\"]).sample(3)" + ] + }, + { + "cell_type": "markdown", + "id": "a3acfeb1-54bf-4cee-9810-51c7e5fe0aa6", + "metadata": {}, + "source": [ + "### Check results after aggregating up to route\n", + "* How are the results sooo wrong with `265-13172`" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "aa2cc0c2-da7b-4ae1-9ac4-84c8d6d60e95", + "metadata": {}, + "outputs": [], + "source": [ + "def checkout_route(\n", + " og_df: pd.DataFrame,\n", + " route_agg: gpd.GeoDataFrame,\n", + " route_id: str,\n", + " time_of_day: str,\n", + " direction_id: int,\n", + "):\n", + " print(\"final\")\n", + " display(\n", + " route_agg.loc[\n", + " (route_agg.route_id == route_id)\n", + " & (route_agg.time_of_day == time_of_day)\n", + " & (route_agg.direction_id == direction_id)\n", + " ].drop(columns=[\"geometry\", \"base64_url\"])\n", + " )\n", + " cols = [\n", + " \"trip_instance_key\",\n", + " \"time_of_day\",\n", + " \"speed_mph\",\n", + " \"rt_service_min\",\n", + " \"service_minutes\",\n", + " \"pings_per_min\",\n", + " \"total_min_w_gtfs\",\n", + " \"min_w_atleast2_trip_updates\",\n", + " ]\n", + "\n", + " print(\"original\")\n", + " og_df2 = og_df.loc[\n", + " (og_df.route_id == route_id)\n", + " & (og_df.time_of_day == time_of_day)\n", + " & (og_df.direction_id == direction_id)\n", + " ]\n", + "\n", + " print(f\"pings per min {og_df2.pings_per_min.mean()}\")\n", + " print(f\"speed_mph {og_df2.speed_mph.mean()}\")\n", + " print(f\"total_vp {og_df2.total_vp.mean()}\")\n", + " print(f\"vp_in_shape {og_df2.vp_in_shape.mean()}\")\n", + " print(f\"min w gtfs {og_df2.total_min_w_gtfs.mean()}\")\n", + " print(f\"min w at least 2 pings {og_df2.min_w_atleast2_trip_updates.mean()}\")\n", + " print(f\"avg vp in pct {og_df2.vp_in_shape.mean()/og_df2.total_vp.mean()}\")\n", + " display(og_df2[cols])" + ] + }, + { + "cell_type": "markdown", + "id": "c8aae146-3b5f-4189-ad0f-b011221b5442", + "metadata": {}, + "source": [ + "#### scheduled trip min (renamed from service_mins) is completely lower." + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "04ee1397-318c-4bb7-9f80-2a55b9c75055", + "metadata": {}, + "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "original\n", - "pings per min 2.961175851535439\n", - "speed_mph 6.682525584870673\n", - "total_vp 174.56666666666666\n", - "vp_in_shape 154.2\n", - "min w gtfs 58.96666666666667\n", - "min w at least 2 pings 58.3\n", - "avg vp in pct 0.8833301508497231\n" + "final\n" ] }, { @@ -2589,8 +2601,113 @@ " \n", " \n", " \n", - " trip_instance_key\n", - " time_of_day\n", + " org_id\n", + " agency\n", + " route_id\n", + " route_name\n", + " direction_id\n", + " avg_route_frequency\n", + " time_of_day\n", + " avg_speed_mph\n", + " n_trips\n", + " avg_sched_trip_min\n", + " avg_rt_trip_min\n", + " district_name\n", + " avg_pings_per_min\n", + " avg_pct_vp_shape\n", + " avg_pct_rt_v_sched\n", + " avg_rt_triptime_w_gtfs_pct\n", + " avg_min_w_atleast2_trip_updates\n", + " \n", + " \n", + " \n", + " \n", + " 6533\n", + " recPnGkwdpnr8jmHB\n", + " Los Angeles County Metropolitan Transportation Authority\n", + " 265-13172\n", + " PICO RIVERA - LAKEWOOD CTR MALL VIA PARAMOUNT BL\n", + " 0\n", + " 0.83\n", + " Early AM\n", + " 6.90\n", + " 2\n", + " 59.00\n", + " 99.60\n", + " 07 - Los Angeles\n", + " 2.70\n", + " 70.09\n", + " 68.81\n", + " 99.90\n", + " 92.00\n", + " \n", + " \n", + "\n", + "" + ], + "text/plain": [ + " org_id \\\n", + "6533 recPnGkwdpnr8jmHB \n", + "\n", + " agency route_id \\\n", + "6533 Los Angeles County Metropolitan Transportation Authority 265-13172 \n", + "\n", + " route_name direction_id \\\n", + "6533 PICO RIVERA - LAKEWOOD CTR MALL VIA PARAMOUNT BL 0 \n", + "\n", + " avg_route_frequency time_of_day avg_speed_mph n_trips \\\n", + "6533 0.83 Early AM 6.90 2 \n", + "\n", + " avg_sched_trip_min avg_rt_trip_min district_name \\\n", + "6533 59.00 99.60 07 - Los Angeles \n", + "\n", + " avg_pings_per_min avg_pct_vp_shape avg_pct_rt_v_sched \\\n", + "6533 2.70 70.09 68.81 \n", + "\n", + " avg_rt_triptime_w_gtfs_pct avg_min_w_atleast2_trip_updates \n", + "6533 99.90 92.00 " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "original\n", + "pings per min 2.6849179704528776\n", + "speed_mph 6.887376703252869\n", + "total_vp 267.5\n", + "vp_in_shape 187.5\n", + "min w gtfs 99.5\n", + "min w at least 2 pings 92.0\n", + "avg vp in pct 0.7009345794392523\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -2601,57 +2718,270 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + "
trip_instance_keytime_of_dayspeed_mphrt_service_minservice_minutes
6161076fa3ed3fd8ef28a446eedb4c1e94e6aAM Peak9.3948.8239.002.974949
6161147d8da2afbc3e4c78f9be4c6c53a7776AM Peak10.0049.3839.002.9650492486648a01217589c2faa46db395d6cf8317dEarly AM9.4095.0258.002.659587
6161240590ba21f73ae1775a4538e34e67cc3AM Peak5.3497.6741.002.98982486770674803a1c4416fc49f883bc3b2c18bEarly AM4.38104.1360.002.7210497
61613aa1b90f05357a29abc97fae90cd5bafeAM Peak9.7349.4541.002.954949
\n", + "
" + ], + "text/plain": [ + " trip_instance_key time_of_day speed_mph \\\n", + "24866 48a01217589c2faa46db395d6cf8317d Early AM 9.40 \n", + "24867 70674803a1c4416fc49f883bc3b2c18b Early AM 4.38 \n", + "\n", + " rt_service_min service_minutes pings_per_min total_min_w_gtfs \\\n", + "24866 95.02 58.00 2.65 95 \n", + "24867 104.13 60.00 2.72 104 \n", + "\n", + " min_w_atleast2_trip_updates \n", + "24866 87 \n", + "24867 97 " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "route_265 = checkout_route(dec_intermediary, dec_final, \"265-13172\", \"Early AM\", 0)" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "7b878898-29f6-4b86-bbfd-a586682df078", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.7009345794392523" + ] + }, + "execution_count": 39, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "187.5 / 267.5" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "a3c23900-fdaa-476c-a490-dbc703df0c28", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "final\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
org_idagencyroute_idroute_namedirection_idavg_route_frequencytime_of_dayavg_speed_mphn_tripsavg_sched_trip_minavg_rt_trip_mindistrict_nameavg_pings_per_minavg_pct_vp_shapeavg_pct_rt_v_schedavg_rt_triptime_w_gtfs_pctavg_min_w_atleast2_trip_updates
616144d7cb7ddea2191f062c1ea3165df67c8AM Peak8.5857.9541.002.979012rechaapWbeffO33OXCity and County of San Francisco38RWeekdays 5am-10pm Weekends 6am-9pm17.02AM Peak6.703043.9058.9004 - Oakland3.0088.3334.17100.1758.30
\n", + "
" + ], + "text/plain": [ + " org_id agency route_id \\\n", + "9012 rechaapWbeffO33OX City and County of San Francisco 38R \n", + "\n", + " route_name direction_id avg_route_frequency \\\n", + "9012 Weekdays 5am-10pm Weekends 6am-9pm 1 7.02 \n", + "\n", + " time_of_day avg_speed_mph n_trips avg_sched_trip_min avg_rt_trip_min \\\n", + "9012 AM Peak 6.70 30 43.90 58.90 \n", + "\n", + " district_name avg_pings_per_min avg_pct_vp_shape avg_pct_rt_v_sched \\\n", + "9012 04 - Oakland 3.00 88.33 34.17 \n", + "\n", + " avg_rt_triptime_w_gtfs_pct avg_min_w_atleast2_trip_updates \n", + "9012 100.17 58.30 " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "original\n", + "pings per min 2.961175851535439\n", + "speed_mph 6.682525584870673\n", + "total_vp 174.56666666666666\n", + "vp_in_shape 154.2\n", + "min w gtfs 58.96666666666667\n", + "min w at least 2 pings 58.3\n", + "avg vp in pct 0.8833301508497231\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", @@ -3000,36 +3330,572 @@ "66858 89.63 39.00 2.98 90 \n", "\n", " min_w_atleast2_trip_updates \n", - "61610 49 \n", - "61611 49 \n", - "61612 97 \n", - "61613 49 \n", - "61614 58 \n", - "61615 93 \n", - "61616 67 \n", - "61617 54 \n", - "61618 52 \n", - "61619 51 \n", - "61620 41 \n", - "61621 57 \n", - "61622 52 \n", - "61623 84 \n", - "61624 49 \n", - "61625 70 \n", - "61626 47 \n", - "61627 55 \n", - "61628 59 \n", - "61629 52 \n", - "61630 45 \n", - "61631 63 \n", - "61632 47 \n", - "61633 47 \n", - "61634 60 \n", - "61635 56 \n", - "61636 51 \n", - "61637 47 \n", - "61638 59 \n", - "66858 89 " + "61610 49 \n", + "61611 49 \n", + "61612 97 \n", + "61613 49 \n", + "61614 58 \n", + "61615 93 \n", + "61616 67 \n", + "61617 54 \n", + "61618 52 \n", + "61619 51 \n", + "61620 41 \n", + "61621 57 \n", + "61622 52 \n", + "61623 84 \n", + "61624 49 \n", + "61625 70 \n", + "61626 47 \n", + "61627 55 \n", + "61628 59 \n", + "61629 52 \n", + "61630 45 \n", + "61631 63 \n", + "61632 47 \n", + "61633 47 \n", + "61634 60 \n", + "61635 56 \n", + "61636 51 \n", + "61637 47 \n", + "61638 59 \n", + "66858 89 " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "checkout_route(dec_intermediary, dec_final, \"38R\", \"AM Peak\", 1)" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "8168d872-49c3-44c2-bdc7-fa499124c5af", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "final\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "
trip_instance_keytime_of_dayspeed_mphrt_service_minservice_minutespings_per_mintotal_min_w_gtfsmin_w_atleast2_trip_updates
6161076fa3ed3fd8ef28a446eedb4c1e94e6aAM Peak9.3948.8239.002.974949
6161147d8da2afbc3e4c78f9be4c6c53a7776AM Peak10.0049.3839.002.965049
6161240590ba21f73ae1775a4538e34e67cc3AM Peak5.3497.6741.002.989897
61613aa1b90f05357a29abc97fae90cd5bafeAM Peak9.7349.4541.002.954949
616144d7cb7ddea2191f062c1ea3165df67c8AM Peak8.5857.9541.002.975858
\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
org_idagencyroute_idroute_namedirection_idavg_route_frequencytime_of_dayavg_speed_mphn_tripsavg_sched_trip_minavg_rt_trip_mindistrict_nameavg_pings_per_minavg_pct_vp_shapeavg_pct_rt_v_schedavg_rt_triptime_w_gtfs_pctavg_min_w_atleast2_trip_updates
5973recIKnsnTdKQ0vsivWestern Contra Costa Transit AuthorityLynxRodeo/Hercules/San Francisco Transbay Terminal1NaNAM Peak13.40650.0060.7004 - Oakland2.80NaN21.4098.0257.70
\n", + "
" + ], + "text/plain": [ + " org_id agency route_id \\\n", + "5973 recIKnsnTdKQ0vsiv Western Contra Costa Transit Authority Lynx \n", + "\n", + " route_name direction_id \\\n", + "5973 Rodeo/Hercules/San Francisco Transbay Terminal 1 \n", + "\n", + " avg_route_frequency time_of_day avg_speed_mph n_trips \\\n", + "5973 NaN AM Peak 13.40 6 \n", + "\n", + " avg_sched_trip_min avg_rt_trip_min district_name avg_pings_per_min \\\n", + "5973 50.00 60.70 04 - Oakland 2.80 \n", + "\n", + " avg_pct_vp_shape avg_pct_rt_v_sched avg_rt_triptime_w_gtfs_pct \\\n", + "5973 NaN 21.40 98.02 \n", + "\n", + " avg_min_w_atleast2_trip_updates \n", + "5973 57.70 " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "original\n", + "pings per min 2.8253065827801014\n", + "speed_mph 13.403813542450534\n", + "total_vp nan\n", + "vp_in_shape nan\n", + "min w gtfs 59.5\n", + "min w at least 2 pings 57.666666666666664\n", + "avg vp in pct nan\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
trip_instance_keytime_of_dayspeed_mphrt_service_minservice_minutespings_per_mintotal_min_w_gtfsmin_w_atleast2_trip_updates
1298714c9fc6b379e1fe869ba37bfd79a8245AM Peak13.3161.4050.002.725756
129902ee02fe17c9acad17ccb44eaaea4debeAM Peak12.6963.1850.002.906362
1299178a402dfe7d89e0b919a193ac59c69aeAM Peak12.6163.1850.002.826260
129942bb4857e894a94d48a79620858c8384eAM Peak12.4464.4550.002.846362
12997b97a1995cd54253c58e82bb7c9ad3414AM Peak15.5952.8250.002.825350
13004baeeed7c3d6ab74ad9ff40f42a2f1da3AM Peak13.7859.1350.002.865956
\n", + "
" + ], + "text/plain": [ + " trip_instance_key time_of_day speed_mph \\\n", + "12987 14c9fc6b379e1fe869ba37bfd79a8245 AM Peak 13.31 \n", + "12990 2ee02fe17c9acad17ccb44eaaea4debe AM Peak 12.69 \n", + "12991 78a402dfe7d89e0b919a193ac59c69ae AM Peak 12.61 \n", + "12994 2bb4857e894a94d48a79620858c8384e AM Peak 12.44 \n", + "12997 b97a1995cd54253c58e82bb7c9ad3414 AM Peak 15.59 \n", + "13004 baeeed7c3d6ab74ad9ff40f42a2f1da3 AM Peak 13.78 \n", + "\n", + " rt_service_min service_minutes pings_per_min total_min_w_gtfs \\\n", + "12987 61.40 50.00 2.72 57 \n", + "12990 63.18 50.00 2.90 63 \n", + "12991 63.18 50.00 2.82 62 \n", + "12994 64.45 50.00 2.84 63 \n", + "12997 52.82 50.00 2.82 53 \n", + "13004 59.13 50.00 2.86 59 \n", + "\n", + " min_w_atleast2_trip_updates \n", + "12987 56 \n", + "12990 62 \n", + "12991 60 \n", + "12994 62 \n", + "12997 50 \n", + "13004 56 " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "checkout_route(dec_intermediary, dec_final, \"Lynx\", \"AM Peak\", 1)" + ] + }, + { + "cell_type": "markdown", + "id": "bfc98904-dbed-4302-8a9c-55adea3676b9", + "metadata": {}, + "source": [ + "### Test aggregating with March" + ] + }, + { + "cell_type": "code", + "execution_count": 70, + "id": "0fd73539-ea23-4b0a-9509-ae0162f512af", + "metadata": {}, + "outputs": [], + "source": [ + "mar_date = \"2023-03-15\"" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "id": "cd2a6ffd-03a9-432a-ae0d-ac15f795278e", + "metadata": {}, + "outputs": [], + "source": [ + "mar_df2 = add_scheduled_trip_columns(df_2023_03_15, mar_date, [\"trip_instance_key\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "id": "988731d1-b062-43e5-896d-8bad990fdf46", + "metadata": {}, + "outputs": [], + "source": [ + "mar_intermediary, mar_final = average_route_speeds_for_export(mar_df2, mar_date, 70)" + ] + }, + { + "cell_type": "code", + "execution_count": 74, + "id": "64960b35-b4ac-43e4-9e6f-594e349fe7e2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['org_id', 'agency', 'route_id', 'route_name', 'direction_id',\n", + " 'avg_route_frequency', 'time_of_day', 'avg_speed_mph', 'n_trips',\n", + " 'avg_sched_trip_min', 'avg_rt_trip_min', 'base64_url', 'district_name',\n", + " 'geometry', 'avg_pings_per_min', 'avg_pct_vp_shape',\n", + " 'avg_pct_rt_v_sched', 'avg_rt_triptime_w_gtfs_pct',\n", + " 'avg_min_w_atleast2_trip_updates'],\n", + " dtype='object')" + ] + }, + "execution_count": 74, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "mar_final.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "id": "bf6c84be-eb6d-4142-9ec7-2c737cd96517", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "final\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
org_idagencyroute_idroute_namedirection_idavg_route_frequencytime_of_dayavg_speed_mphn_tripsavg_sched_trip_minavg_rt_trip_mindistrict_nameavg_pings_per_minavg_pct_vp_shapeavg_pct_rt_v_schedavg_rt_triptime_w_gtfs_pctavg_min_w_atleast2_trip_updates
3720reckQmUdXUzHFmlVfCity of Ojai47632000.85PM Peak26.10449.0033.0007 - Los Angeles2.90100.00-32.65100.6132.20
\n", + "
" + ], + "text/plain": [ + " org_id agency route_id route_name direction_id \\\n", + "3720 reckQmUdXUzHFmlVf City of Ojai 4763 20 0 \n", + "\n", + " avg_route_frequency time_of_day avg_speed_mph n_trips \\\n", + "3720 0.85 PM Peak 26.10 4 \n", + "\n", + " avg_sched_trip_min avg_rt_trip_min district_name \\\n", + "3720 49.00 33.00 07 - Los Angeles \n", + "\n", + " avg_pings_per_min avg_pct_vp_shape avg_pct_rt_v_sched \\\n", + "3720 2.90 100.00 -32.65 \n", + "\n", + " avg_rt_triptime_w_gtfs_pct avg_min_w_atleast2_trip_updates \n", + "3720 100.61 32.20 " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "original\n", + "pings per min 2.909125279321039\n", + "speed_mph 26.116502281263557\n", + "total_vp 96.75\n", + "vp_in_shape 96.75\n", + "min w gtfs 33.25\n", + "min w at least 2 pings 32.25\n", + "avg vp in pct 1.0\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
trip_instance_keytime_of_dayspeed_mphrt_service_minservice_minutespings_per_mintotal_min_w_gtfsmin_w_atleast2_trip_updates
364527ea7312469de416b5c96c2df46b07c85PM Peak46.1216.6734.002.881616
364533d6760f1db43f24d68d2e780ac2be0dbPM Peak15.0449.0554.002.945048
372601af731c948711c3f047f29f922d24ee6PM Peak14.2651.6854.002.965251
3727849ce553a741ac1fcd734f035f299b81bPM Peak29.0614.6854.002.861514
\n", + "
" + ], + "text/plain": [ + " trip_instance_key time_of_day speed_mph \\\n", + "36452 7ea7312469de416b5c96c2df46b07c85 PM Peak 46.12 \n", + "36453 3d6760f1db43f24d68d2e780ac2be0db PM Peak 15.04 \n", + "37260 1af731c948711c3f047f29f922d24ee6 PM Peak 14.26 \n", + "37278 49ce553a741ac1fcd734f035f299b81b PM Peak 29.06 \n", + "\n", + " rt_service_min service_minutes pings_per_min total_min_w_gtfs \\\n", + "36452 16.67 34.00 2.88 16 \n", + "36453 49.05 54.00 2.94 50 \n", + "37260 51.68 54.00 2.96 52 \n", + "37278 14.68 54.00 2.86 15 \n", + "\n", + " min_w_atleast2_trip_updates \n", + "36452 16 \n", + "36453 48 \n", + "37260 51 \n", + "37278 14 " ] }, "metadata": {}, @@ -3037,13 +3903,13 @@ } ], "source": [ - "checkout_route(dec_intermediary, dec_final, \"38R\", \"AM Peak\", 1)" + "checkout_route(mar_intermediary, mar_final, \"4763\", \"PM Peak\", 0)" ] }, { "cell_type": "code", - "execution_count": 121, - "id": "8168d872-49c3-44c2-bdc7-fa499124c5af", + "execution_count": 77, + "id": "1993266f-05df-4613-9573-29402822b9da", "metadata": {}, "outputs": [ { @@ -3079,6 +3945,7 @@ " route_id\n", " route_name\n", " direction_id\n", + " avg_route_frequency\n", " time_of_day\n", " avg_speed_mph\n", " n_trips\n", @@ -3094,43 +3961,44 @@ " \n", " \n", " \n", - " 5973\n", - " recIKnsnTdKQ0vsiv\n", - " Western Contra Costa Transit Authority\n", - " Lynx\n", - " Rodeo/Hercules/San Francisco Transbay Terminal\n", - " 1\n", - " AM Peak\n", - " 13.40\n", - " 6\n", - " 50.00\n", - " 60.70\n", - " 04 - Oakland\n", - " 2.80\n", - " NaN\n", - " 21.40\n", - " 98.02\n", - " 57.70\n", + " 1285\n", + " recRBcrX4ZvTyvSnm\n", + " North County Transit District\n", + " 332\n", + " Vista TC - Buena Creek Station\n", + " 0\n", + " 1.52\n", + " Evening\n", + " 16.00\n", + " 2\n", + " 32.00\n", + " 46.20\n", + " 11 - San Diego\n", + " 2.60\n", + " 100.00\n", + " 44.38\n", + " 99.57\n", + " 43.00\n", " \n", " \n", "\n", "" ], "text/plain": [ - " org_id agency route_id \\\n", - "5973 recIKnsnTdKQ0vsiv Western Contra Costa Transit Authority Lynx \n", + " org_id agency route_id \\\n", + "1285 recRBcrX4ZvTyvSnm North County Transit District 332 \n", "\n", - " route_name direction_id \\\n", - "5973 Rodeo/Hercules/San Francisco Transbay Terminal 1 \n", + " route_name direction_id avg_route_frequency \\\n", + "1285 Vista TC - Buena Creek Station 0 1.52 \n", "\n", " time_of_day avg_speed_mph n_trips avg_sched_trip_min avg_rt_trip_min \\\n", - "5973 AM Peak 13.40 6 50.00 60.70 \n", + "1285 Evening 16.00 2 32.00 46.20 \n", "\n", - " district_name avg_pings_per_min avg_pct_vp_shape avg_pct_rt_v_sched \\\n", - "5973 04 - Oakland 2.80 NaN 21.40 \n", + " district_name avg_pings_per_min avg_pct_vp_shape avg_pct_rt_v_sched \\\n", + "1285 11 - San Diego 2.60 100.00 44.38 \n", "\n", " avg_rt_triptime_w_gtfs_pct avg_min_w_atleast2_trip_updates \n", - "5973 98.02 57.70 " + "1285 99.57 43.00 " ] }, "metadata": {}, @@ -3141,13 +4009,13 @@ "output_type": "stream", "text": [ "original\n", - "pings per min 2.8253065827801014\n", - "speed_mph 13.403813542450534\n", - "total_vp nan\n", - "vp_in_shape nan\n", - "min w gtfs 59.5\n", - "min w at least 2 pings 57.666666666666664\n", - "avg vp in pct nan\n" + "pings per min 2.5985041071389485\n", + "speed_mph 15.952292627153918\n", + "total_vp 120.0\n", + "vp_in_shape 120.0\n", + "min w gtfs 46.0\n", + "min w at least 2 pings 43.0\n", + "avg vp in pct 1.0\n" ] }, { @@ -3183,70 +4051,26 @@ " \n", " \n", " \n", - " 12987\n", - " 14c9fc6b379e1fe869ba37bfd79a8245\n", - " AM Peak\n", - " 13.31\n", - " 61.40\n", - " 50.00\n", - " 2.72\n", - " 57\n", - " 56\n", - " \n", - " \n", - " 12990\n", - " 2ee02fe17c9acad17ccb44eaaea4debe\n", - " AM Peak\n", - " 12.69\n", - " 63.18\n", - " 50.00\n", - " 2.90\n", - " 63\n", - " 62\n", - " \n", - " \n", - " 12991\n", - " 78a402dfe7d89e0b919a193ac59c69ae\n", - " AM Peak\n", - " 12.61\n", - " 63.18\n", - " 50.00\n", - " 2.82\n", - " 62\n", - " 60\n", - " \n", - " \n", - " 12994\n", - " 2bb4857e894a94d48a79620858c8384e\n", - " AM Peak\n", - " 12.44\n", - " 64.45\n", - " 50.00\n", - " 2.84\n", - " 63\n", - " 62\n", - " \n", - " \n", - " 12997\n", - " b97a1995cd54253c58e82bb7c9ad3414\n", - " AM Peak\n", - " 15.59\n", - " 52.82\n", - " 50.00\n", - " 2.82\n", - " 53\n", - " 50\n", + " 41418\n", + " e54c1ac191dc0b57df34834df825d0ad\n", + " Evening\n", + " 10.83\n", + " 47.10\n", + " 31.00\n", + " 2.59\n", + " 47\n", + " 43\n", " \n", " \n", - " 13004\n", - " baeeed7c3d6ab74ad9ff40f42a2f1da3\n", - " AM Peak\n", - " 13.78\n", - " 59.13\n", - " 50.00\n", - " 2.86\n", - " 59\n", - " 56\n", + " 41433\n", + " e1d2ca1ad28f5fe1a3ec9a772cfae369\n", + " Evening\n", + " 21.07\n", + " 45.27\n", + " 33.00\n", + " 2.61\n", + " 45\n", + " 43\n", " \n", " \n", "\n", @@ -3254,144 +4078,22 @@ ], "text/plain": [ " trip_instance_key time_of_day speed_mph \\\n", - "12987 14c9fc6b379e1fe869ba37bfd79a8245 AM Peak 13.31 \n", - "12990 2ee02fe17c9acad17ccb44eaaea4debe AM Peak 12.69 \n", - "12991 78a402dfe7d89e0b919a193ac59c69ae AM Peak 12.61 \n", - "12994 2bb4857e894a94d48a79620858c8384e AM Peak 12.44 \n", - "12997 b97a1995cd54253c58e82bb7c9ad3414 AM Peak 15.59 \n", - "13004 baeeed7c3d6ab74ad9ff40f42a2f1da3 AM Peak 13.78 \n", + "41418 e54c1ac191dc0b57df34834df825d0ad Evening 10.83 \n", + "41433 e1d2ca1ad28f5fe1a3ec9a772cfae369 Evening 21.07 \n", "\n", " rt_service_min service_minutes pings_per_min total_min_w_gtfs \\\n", - "12987 61.40 50.00 2.72 57 \n", - "12990 63.18 50.00 2.90 63 \n", - "12991 63.18 50.00 2.82 62 \n", - "12994 64.45 50.00 2.84 63 \n", - "12997 52.82 50.00 2.82 53 \n", - "13004 59.13 50.00 2.86 59 \n", + "41418 47.10 31.00 2.59 47 \n", + "41433 45.27 33.00 2.61 45 \n", "\n", " min_w_atleast2_trip_updates \n", - "12987 56 \n", - "12990 62 \n", - "12991 60 \n", - "12994 62 \n", - "12997 50 \n", - "13004 56 " + "41418 43 \n", + "41433 43 " ] }, "metadata": {}, "output_type": "display_data" } ], - "source": [ - "checkout_route(dec_intermediary, dec_final, \"Lynx\", \"AM Peak\", 1)" - ] - }, - { - "cell_type": "markdown", - "id": "bfc98904-dbed-4302-8a9c-55adea3676b9", - "metadata": {}, - "source": [ - "### Test aggregating with March" - ] - }, - { - "cell_type": "code", - "execution_count": 122, - "id": "0fd73539-ea23-4b0a-9509-ae0162f512af", - "metadata": {}, - "outputs": [], - "source": [ - "mar_date = \"2023-03-15\"" - ] - }, - { - "cell_type": "code", - "execution_count": 123, - "id": "d491e2ca-9da1-4ccc-a86e-ff535b5d2ace", - "metadata": {}, - "outputs": [ - { - "ename": "NameError", - "evalue": "name 'mar_df' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[123], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mmar_df\u001b[49m\u001b[38;5;241m.\u001b[39msample()\n", - "\u001b[0;31mNameError\u001b[0m: name 'mar_df' is not defined" - ] - } - ], - "source": [ - "mar_df.sample()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cd2a6ffd-03a9-432a-ae0d-ac15f795278e", - "metadata": {}, - "outputs": [], - "source": [ - "mar_df2 = add_scheduled_trip_columns(df_2023_03_15, mar_date, [\"trip_instance_key\"])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f9a78bb6-f91e-42ad-85da-b954b606c050", - "metadata": {}, - "outputs": [], - "source": [ - "mar_df2.info()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "988731d1-b062-43e5-896d-8bad990fdf46", - "metadata": {}, - "outputs": [], - "source": [ - "mar_intermediary, mar_final = average_route_speeds_for_export(mar_df2, mar_date, 70)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "64960b35-b4ac-43e4-9e6f-594e349fe7e2", - "metadata": {}, - "outputs": [], - "source": [ - "mar_final.columns" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c70a9b62-f855-405b-9271-6b129b70cab9", - "metadata": {}, - "outputs": [], - "source": [ - "mar_final.sample(3).drop(columns=[\"base64_url\", \"geometry\"])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "bf6c84be-eb6d-4142-9ec7-2c737cd96517", - "metadata": {}, - "outputs": [], - "source": [ - "checkout_route(mar_intermediary, mar_final, \"4763\", \"PM Peak\", 0)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1993266f-05df-4613-9573-29402822b9da", - "metadata": {}, - "outputs": [], "source": [ "checkout_route(mar_intermediary, mar_final, \"332\", \"Evening\", 0)" ]