diff --git a/rt_segment_speeds/18_speed_distribution.ipynb b/rt_segment_speeds/18_speed_distribution.ipynb new file mode 100644 index 000000000..61a84e8d5 --- /dev/null +++ b/rt_segment_speeds/18_speed_distribution.ipynb @@ -0,0 +1,1003 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "1266be23-2c40-438d-90c0-eef6dcf3c621", + "metadata": {}, + "source": [ + "# Distribution of speeds\n", + "\n", + "Spot-checking Big Blue Bus speeds of existing speed maps and these `p20_mph` speeds.\n", + "\n", + "Average speeds only throws away too-high speeds (above 70 mph), but not anything too low.\n", + "\n", + "It looks like we're keeping way too many observations going into the averages, and we need to be more aggressive in excluding unstable speed calculations (speeds that are derived over a too-short-distance or too-short-time). Too-short-time means our denominator is approaching zero, and the calculations could be wildly unstable as we approach the asymptote. The same is probably happening for too-short-distances. " + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "776b8208-ad41-4c32-a064-99ab87fb371a", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "os.environ['USE_PYGEOS']='0'\n", + "\n", + "import altair as alt\n", + "import dask.dataframe as dd\n", + "import geopandas as gpd\n", + "import numpy as np\n", + "import pandas as pd\n", + "\n", + "from shared_utils import rt_dates, rt_utils\n", + "from segment_speed_utils.project_vars import SEGMENT_GCS" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "e764cc9e-7113-4672-8943-dc4827b80ea2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'2023-07-12'" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "months = [\"mar\", \"apr\", \"may\", \"jun\", \"jul\"]\n", + "\n", + "dates = [\n", + " rt_dates.DATES[f\"{m}2023\"] for m in months\n", + "]\n", + "analysis_date = dates[-1]\n", + "analysis_date" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "43f5ec34-f301-43cc-898a-ed2e31a1778d", + "metadata": {}, + "outputs": [], + "source": [ + "test_operator = \"Big Blue Bus VehiclePositions\"\n", + "test_org = \"City of Santa Monica\"\n", + "\n", + "pub_df = pd.read_parquet(\n", + " f\"{SEGMENT_GCS}export/avg_speeds_stop_segments_{analysis_date}_tabular.parquet\", \n", + " filters = [[(\"agency\", \"==\", test_org)]]\n", + ")\n", + "\n", + "test_operator_key = pub_df.gtfs_dataset_key.iloc[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "a728d1c4-1777-4706-8df6-81670730a2b7", + "metadata": {}, + "outputs": [], + "source": [ + "def import_avg_speeds(date: str, **kwargs) -> gpd.GeoDataFrame: \n", + " avg_speeds = gpd.read_parquet(\n", + " f\"{SEGMENT_GCS}avg_speeds_stop_segments_{date}.parquet\", \n", + " **kwargs\n", + " )\n", + " \n", + " return avg_speeds\n", + "\n", + "\n", + "def import_trip_speeds(date: str, **kwargs) -> pd.DataFrame:\n", + " trips = dd.read_parquet(\n", + " f\"{SEGMENT_GCS}speeds_stop_segments_{analysis_date}\",\n", + " **kwargs\n", + " ).compute()\n", + " \n", + " return trips" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "bfcd3915-8f86-483f-a4ee-7b17b09c8f9b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['shape_array_key', 'stop_sequence', 'gtfs_dataset_key', 'stop_id',\n", + " 'loop_or_inlining', 'district', 'district_name', 'p50_mph', 'n_trips',\n", + " 'p20_mph', 'p80_mph', 'time_of_day', 'shape_id', 'base64_url', 'uri',\n", + " 'org_id', 'agency'],\n", + " dtype='object')" + ] + }, + "execution_count": 5, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pub_df.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "89f1bbad-a050-4ed9-a96e-45180a8aced8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['de70089f186a809de6685c056377f892'], dtype=object)" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pub_df[pub_df.shape_id==\"26347\"].shape_array_key.unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "bc23981e-6ca9-4339-8064-f01544b873e9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['080f585295228f8c8f52cb373b1685cc'], dtype=object)" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pub_df[pub_df.shape_id==\"26348\"].shape_array_key.unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "728161ad-de9b-43ec-a2ef-9d6fa981976d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['5d34851ee46adb62216152f8a16fe7d0'], dtype=object)" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "pub_df[pub_df.shape_id==\"26342\"].shape_array_key.unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "9aa0f04d-24a0-4786-892d-ec6723838bfc", + "metadata": {}, + "outputs": [], + "source": [ + "test_shape1 = \"de70089f186a809de6685c056377f892\"\n", + "test_shape2 = \"080f585295228f8c8f52cb373b1685cc\"\n", + "test_shape3 = \"5d34851ee46adb62216152f8a16fe7d0\"" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "5eb43efb-53ac-41cc-a7cd-21cdcd6f11d5", + "metadata": {}, + "outputs": [], + "source": [ + "avg_speeds = import_avg_speeds(\n", + " analysis_date, \n", + " filters = [[(\"gtfs_dataset_key\", \"==\", test_operator_key)]],\n", + " columns = [\"shape_array_key\", \"stop_sequence\",\n", + " \"p20_mph\", \"p50_mph\", \"p80_mph\", \n", + " \"n_trips\", \"time_of_day\",\n", + " \"geometry\"]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "7a6feb4c-c273-43f5-9d3d-361d83745f7c", + "metadata": {}, + "outputs": [], + "source": [ + "trip_speeds = import_trip_speeds(\n", + " analysis_date,\n", + " filters = [[(\"gtfs_dataset_key\", \"==\", test_operator_key)]],\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "8f3dd9fb-93bd-4d8b-bc22-5cfb6ccd53b8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
shape_array_keystop_sequencep20_mphp50_mphp80_mphn_tripstime_of_daygeometry
201857de70089f186a809de6685c056377f892291.683.779.893all_dayLINESTRING (-118.48871 34.02165, -118.48951 34...
\n", + "
" + ], + "text/plain": [ + " shape_array_key stop_sequence p20_mph p50_mph \\\n", + "201857 de70089f186a809de6685c056377f892 29 1.68 3.77 \n", + "\n", + " p80_mph n_trips time_of_day \\\n", + "201857 9.89 3 all_day \n", + "\n", + " geometry \n", + "201857 LINESTRING (-118.48871 34.02165, -118.48951 34... " + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "avg_speeds[(avg_speeds.shape_array_key==test_shape1) & \n", + " (avg_speeds.stop_sequence==29)]" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "78826c8e-865e-4e8c-9e23-ee9c599d3867", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ nan, 3.76987431, 13.96246222, 0.29230316])" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "trip_speeds[(trip_speeds.shape_array_key==test_shape1) & \n", + " (trip_speeds.stop_sequence==29)].speed_mph.unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "b7afc829-0833-426b-9b63-c47f3de70ef8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Make this Notebook Trusted to load map: File -> Trust Notebook
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "avg_speeds[avg_speeds.shape_array_key==test_shape1].explore(\n", + " \"p50_mph\", \n", + " tiles = \"CartoDB Positron\",\n", + " cmap = rt_utils.ZERO_THIRTY_COLORSCALE\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "560c3590-c919-4f79-8bbe-8bc87a5b3103", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
Make this Notebook Trusted to load map: File -> Trust Notebook
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "avg_speeds[avg_speeds.shape_array_key==test_shape3].explore(\n", + " \"p50_mph\", \n", + " tiles = \"CartoDB Positron\",\n", + " cmap = rt_utils.ZERO_THIRTY_COLORSCALE\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "2609f100-8eee-400b-a63d-5de271355905", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ nan, 11.85504701, 8.30953561, 8.11102264, 3.30187334,\n", + " 13.55831359, 3.12661394, 0. , 12.39887132, 9.04728661,\n", + " 12.95855883, 11.64274665, 13.80783397, 8.7487871 , 8.11962361,\n", + " 1.13056783, 8.76647662, 1.91638667, 12.15852066, 9.74048341,\n", + " 6.32444451, 10.52354078, 12.24562967, 6.69858223, 4.280623 ,\n", + " 13.21839885])" + ] + }, + "execution_count": 16, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "trip_speeds[(trip_speeds.shape_array_key==test_shape3) & \n", + " (trip_speeds.stop_sequence==6)].speed_mph.unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "f8a5fe95-979b-4ba7-9cbe-d1c22ccdedf0", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 0. , 1.13056783, 1.91638667, 3.12661394, 3.30187334,\n", + " 4.280623 , 6.32444451, 6.69858223, 8.11102264, 8.11962361,\n", + " 8.30953561, 8.7487871 , 8.76647662, 9.04728661, 9.74048341,\n", + " 10.52354078, 11.64274665, 11.85504701, 12.15852066, 12.24562967,\n", + " 12.39887132, 12.95855883, 13.21839885, 13.55831359, 13.80783397,\n", + " nan])" + ] + }, + "execution_count": 17, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "one_segment = trip_speeds[\n", + " (trip_speeds.shape_array_key==test_shape3) & \n", + " (trip_speeds.stop_sequence==6)]\n", + "\n", + "np.sort(one_segment.speed_mph.unique())" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "0897e604-344c-4700-8edf-4221953f1c12", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array([ 1.13056783, 1.91638667, 3.12661394, 3.30187334, 4.280623 ,\n", + " 6.32444451, 6.69858223, 8.11102264, 8.11962361, 8.30953561,\n", + " 8.7487871 , 8.76647662, 9.04728661, 9.74048341, 10.52354078,\n", + " 11.64274665, 11.85504701, 12.15852066, 12.24562967, 12.39887132,\n", + " 12.95855883, 13.21839885, 13.55831359, 13.80783397])" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "one_segment_filtered = one_segment[(one_segment.sec_elapsed > 0) & \n", + " (one_segment.meters_elapsed > 0) & \n", + " (one_segment.speed_mph.notna())\n", + " ]\n", + "\n", + "np.sort(one_segment_filtered.speed_mph.unique())" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "6989271c-05b7-47bf-bf94-ef262c7ddc4f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 53.000000\n", + "mean 102.879312\n", + "std 121.899773\n", + "min 0.000000\n", + "25% 0.000000\n", + "50% 0.000000\n", + "75% 254.725516\n", + "max 271.589045\n", + "Name: meters_elapsed, dtype: float64" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "one_segment.meters_elapsed.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "601277ea-f897-42e4-824b-15aed1a74cc2", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/tmp/ipykernel_1462/2851869297.py:4: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", + "/tmp/ipykernel_1462/2851869297.py:7: SettingWithCopyWarning: \n", + "A value is trying to be set on a copy of a slice from a DataFrame.\n", + "Try using .loc[row_indexer,col_indexer] = value instead\n", + "\n", + "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n" + ] + } + ], + "source": [ + "bins = [0, 5, 10, 15, 20, 25, 30, 35, 40]\n", + "meter_bins = [0, 50, 100, 150, 200, 250, 300]\n", + "\n", + "one_segment['speed_binned'] = pd.cut(\n", + " one_segment.speed_mph, bins).apply(lambda x: x.left)\n", + "\n", + "one_segment[\"meters_binned\"] = pd.cut(\n", + " one_segment.meters_elapsed, meter_bins).apply(lambda x: x.left)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "01238681-f672-41d2-bd4b-ec161cf74bd8", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "
\n", + "" + ], + "text/plain": [ + "alt.Chart(...)" + ] + }, + "execution_count": 23, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "(alt.Chart(one_segment)\n", + " .mark_tick()\n", + " .encode(\n", + " x=\"meters_binned:O\",\n", + " y=\"speed_mph:Q\"\n", + " ).interactive()\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "ca2a05aa-f398-4ec9-af86-29e3acd4777d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
trip_idmeters_elapsedsec_elapsedspeed_mph
595390300722.23736544.01.130568
5955902944268.139932313.01.916387
5936902985271.150248194.03.126614
5932902987271.589045184.03.301873
597190300286.10998445.04.280623
5963902978271.41111996.06.324445
5968902943266.50595489.06.698582
5930902971177.66656649.08.111023
5952902997192.37373853.08.119624
5928902960167.15650545.08.309536
5947902974195.54732050.08.748787
5954903005254.72551665.08.766477
5940902966181.99727245.09.047287
5959902968235.13013154.09.740483
5965902956216.39824646.010.523541
5945902959260.23126250.011.642747
5927902941264.97646450.011.855047
5956902980244.58356345.012.158521
5967902961268.23238949.012.245630
5938902995271.58904549.012.398871
5942902950260.67731245.012.958559
5972902965265.90431345.013.218399
5935902979266.68117944.013.558314
5946902972271.58904544.013.807834
\n", + "
" + ], + "text/plain": [ + " trip_id meters_elapsed sec_elapsed speed_mph\n", + "5953 903007 22.237365 44.0 1.130568\n", + "5955 902944 268.139932 313.0 1.916387\n", + "5936 902985 271.150248 194.0 3.126614\n", + "5932 902987 271.589045 184.0 3.301873\n", + "5971 903002 86.109984 45.0 4.280623\n", + "5963 902978 271.411119 96.0 6.324445\n", + "5968 902943 266.505954 89.0 6.698582\n", + "5930 902971 177.666566 49.0 8.111023\n", + "5952 902997 192.373738 53.0 8.119624\n", + "5928 902960 167.156505 45.0 8.309536\n", + "5947 902974 195.547320 50.0 8.748787\n", + "5954 903005 254.725516 65.0 8.766477\n", + "5940 902966 181.997272 45.0 9.047287\n", + "5959 902968 235.130131 54.0 9.740483\n", + "5965 902956 216.398246 46.0 10.523541\n", + "5945 902959 260.231262 50.0 11.642747\n", + "5927 902941 264.976464 50.0 11.855047\n", + "5956 902980 244.583563 45.0 12.158521\n", + "5967 902961 268.232389 49.0 12.245630\n", + "5938 902995 271.589045 49.0 12.398871\n", + "5942 902950 260.677312 45.0 12.958559\n", + "5972 902965 265.904313 45.0 13.218399\n", + "5935 902979 266.681179 44.0 13.558314\n", + "5946 902972 271.589045 44.0 13.807834" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "one_segment_filtered.sort_values(\"speed_mph\")[\n", + " [\"trip_id\", \"meters_elapsed\", \"sec_elapsed\", \"speed_mph\"]]" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "502c330e-52bc-409c-8a92-9f179f3d6852", + "metadata": {}, + "outputs": [], + "source": [ + "speed_distribution = np.sort(one_segment_filtered.speed_mph.unique())" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "1274bfc2-0c42-4ef3-939f-d5f93afaed13", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "8.906881612224176" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.quantile(speed_distribution, 0.5)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "3b031a5d-2813-44dc-80d6-11548142d61c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "5.506915906639923" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "np.quantile(speed_distribution, 0.2)" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "4ff51893-12cd-450b-810e-a34c3a16ecda", + "metadata": {}, + "outputs": [], + "source": [ + "operator_segments = gpd.read_parquet(\n", + " f\"{SEGMENT_GCS}stop_segments_{analysis_date}.parquet\",\n", + " filters = [[(\"gtfs_dataset_key\", \"==\", test_operator_key)]]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "d981a2fc-53f3-4989-a8f0-cc3e4729609b", + "metadata": {}, + "outputs": [], + "source": [ + "operator_segments = operator_segments.assign(\n", + " segment_meters = operator_segments.geometry.length\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "54dac8b1-c5ac-47e6-9910-2c864d3e46f9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 2211.000000\n", + "mean 431.703997\n", + "std 635.401191\n", + "min 0.000000\n", + "25% 265.627183\n", + "50% 343.305085\n", + "75% 442.045878\n", + "max 17762.088740\n", + "Name: segment_meters, dtype: float64" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "operator_segments.segment_meters.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a6c8c131-31c5-4f17-b82a-2978a941f9d2", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/rt_segment_speeds/ca_segment_speeds.ipynb b/rt_segment_speeds/ca_segment_speeds.ipynb deleted file mode 100644 index b2256516c..000000000 --- a/rt_segment_speeds/ca_segment_speeds.ipynb +++ /dev/null @@ -1,141 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "9f04613d-8fa7-44d1-8827-3c55e2351ab0", - "metadata": {}, - "outputs": [], - "source": [ - "%%capture\n", - "import warnings\n", - "warnings.filterwarnings(\"ignore\")\n", - "\n", - "import branca\n", - "import calitp.magics\n", - "import geopandas as gpd\n", - "import pandas as pd\n", - "\n", - "from shared_utils import portfolio_utils\n", - "from update_vars import SEGMENT_GCS, analysis_date\n", - "\n", - "ZERO_FIFTY_COLORSCALE = branca.colormap.step.RdYlGn_10.scale(\n", - " vmin=0, \n", - " vmax=50\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "4e37258d-b752-467f-948c-6a2d5391e8b0", - "metadata": {}, - "outputs": [ - { - "ename": "NameError", - "evalue": "name 'district' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[2], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m# Read in data\u001b[39;00m\n\u001b[1;32m 2\u001b[0m gdf \u001b[38;5;241m=\u001b[39m gpd\u001b[38;5;241m.\u001b[39mread_parquet(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mSEGMENT_GCS\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124mavg_speeds_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00manalysis_date\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.parquet\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m----> 3\u001b[0m gdf \u001b[38;5;241m=\u001b[39m gdf[gdf\u001b[38;5;241m.\u001b[39mdistrict\u001b[38;5;241m==\u001b[39m\u001b[43mdistrict\u001b[49m]\u001b[38;5;241m.\u001b[39mreset_index(drop\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m 5\u001b[0m \u001b[38;5;66;03m# Use dict to key into district name because D9 is missing data\u001b[39;00m\n\u001b[1;32m 6\u001b[0m district_name \u001b[38;5;241m=\u001b[39m portfolio_utils\u001b[38;5;241m.\u001b[39mdistrict_name_dict[district]\n", - "\u001b[0;31mNameError\u001b[0m: name 'district' is not defined" - ] - } - ], - "source": [ - "# Read in data\n", - "gdf = gpd.read_parquet(f\"{SEGMENT_GCS}avg_speeds_{analysis_date}.parquet\")\n", - "gdf = gdf[gdf.district==district].reset_index(drop=True)\n", - "\n", - "# Use dict to key into district name because D9 is missing data\n", - "district_name = portfolio_utils.district_name_dict[district]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7a222bbd-4fb0-43f4-b189-63edc35ce1f8", - "metadata": {}, - "outputs": [], - "source": [ - "%%capture_parameters\n", - "district, district_name" - ] - }, - { - "cell_type": "markdown", - "id": "0dda7c36-29f0-457f-9932-f2277d2c8ea1", - "metadata": {}, - "source": [ - "# {district_name}\n", - "\n", - "* Goal: statewide map for daily segment speeds\n", - "* v2 warehouse (01/18/23)\n", - "* calculate time-of-day averages" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0ae01433-865d-4820-94fe-a62a6c4c6376", - "metadata": {}, - "outputs": [], - "source": [ - "#for s in range(30, 75, 5):\n", - "# print(f\"# rows with over {s} mph: {len(gdf[gdf.speed_mph > s])}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "113b8300-6992-497f-977e-5bdac9650d15", - "metadata": {}, - "outputs": [], - "source": [ - "def make_map(gdf: gpd.GeoDataFrame, district: int):\n", - " if len(gdf) > 0:\n", - " m = gdf.explore(\n", - " \"speed_mph\",\n", - " tiles = \"CartoDB Positron\",\n", - " cmap = ZERO_FIFTY_COLORSCALE\n", - " )\n", - "\n", - " display(m)\n", - " else:\n", - " print(f\"No RT trip info available in district {district}.\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e92ff10b-ff10-4900-b503-e64848ee8f54", - "metadata": {}, - "outputs": [], - "source": [ - "make_map(gdf, district)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.13" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/rt_segment_speeds/logs/cut_stop_segments.log b/rt_segment_speeds/logs/cut_stop_segments.log index df7e8ea74..2e3075997 100644 --- a/rt_segment_speeds/logs/cut_stop_segments.log +++ b/rt_segment_speeds/logs/cut_stop_segments.log @@ -37,3 +37,27 @@ 2023-07-25 15:31:25.425 | INFO | __main__::313 - Cut special stop segments: 0:28:46.570691 2023-07-25 15:31:26.518 | INFO | __main__::332 - export results: 0:00:01.093220 2023-07-25 15:31:26.520 | INFO | __main__::333 - execution time: 0:28:47.663911 +2023-07-25 16:07:13.609 | INFO | __main__::198 - Analysis date: 2023-07-12 +2023-07-25 16:08:07.369 | INFO | __main__::240 - Cut normal stop segments: 0:00:53.759088 +2023-07-25 16:19:44.770 | INFO | __main__::252 - Export results: 0:11:37.401262 +2023-07-25 16:19:44.770 | INFO | __main__::255 - execution time: 0:12:31.160955 +2023-07-25 16:20:26.164 | INFO | __main__::293 - Analysis date: 2023-07-12 +2023-07-25 16:48:16.067 | INFO | __main__::313 - Cut special stop segments: 0:27:49.863811 +2023-07-25 16:48:17.367 | INFO | __main__::332 - export results: 0:00:01.299823 +2023-07-25 16:48:17.369 | INFO | __main__::333 - execution time: 0:27:51.163634 +2023-07-26 12:20:36.330 | INFO | __main__::198 - Analysis date: 2023-04-12 +2023-07-26 12:21:45.345 | INFO | __main__::240 - Cut normal stop segments: 0:01:08.999538 +2023-07-26 12:38:38.885 | INFO | __main__::252 - Export results: 0:16:53.539426 +2023-07-26 12:38:38.887 | INFO | __main__::255 - execution time: 0:18:02.541602 +2023-07-26 12:39:04.242 | INFO | __main__::293 - Analysis date: 2023-04-12 +2023-07-26 13:24:15.235 | INFO | __main__::313 - Cut special stop segments: 0:45:10.983686 +2023-07-26 13:24:16.470 | INFO | __main__::332 - export results: 0:00:01.234829 +2023-07-26 13:24:16.474 | INFO | __main__::333 - execution time: 0:45:12.218515 +2023-07-26 13:47:14.875 | INFO | __main__::198 - Analysis date: 2023-03-15 +2023-07-26 13:48:21.112 | INFO | __main__::240 - Cut normal stop segments: 0:01:06.234095 +2023-07-26 14:04:28.944 | INFO | __main__::252 - Export results: 0:16:07.832295 +2023-07-26 14:04:28.946 | INFO | __main__::255 - execution time: 0:17:14.068003 +2023-07-26 14:04:55.833 | INFO | __main__::293 - Analysis date: 2023-03-15 +2023-07-26 14:45:50.780 | INFO | __main__::313 - Cut special stop segments: 0:40:54.937581 +2023-07-26 14:45:52.168 | INFO | __main__::332 - export results: 0:00:01.388009 +2023-07-26 14:45:52.170 | INFO | __main__::333 - execution time: 0:40:56.325590 diff --git a/rt_segment_speeds/logs/prep_stop_segments.log b/rt_segment_speeds/logs/prep_stop_segments.log index 45c4d8cee..aaf61620b 100644 --- a/rt_segment_speeds/logs/prep_stop_segments.log +++ b/rt_segment_speeds/logs/prep_stop_segments.log @@ -18,3 +18,10 @@ 2023-07-25 14:39:19.717 | INFO | __main__::290 - Analysis date: 2023-06-14 2023-07-25 14:49:25.861 | INFO | __main__::297 - Prep stop segment df: 0:10:06.141854 2023-07-25 14:50:12.947 | INFO | __main__::307 - execution time: 0:10:53.228185 +2023-07-25 15:55:48.582 | INFO | __main__::290 - Analysis date: 2023-07-12 +2023-07-26 12:01:47.353 | INFO | __main__::290 - Analysis date: 2023-04-12 +2023-07-26 12:19:11.448 | INFO | __main__::297 - Prep stop segment df: 0:17:24.080713 +2023-07-26 12:20:13.208 | INFO | __main__::307 - execution time: 0:18:25.839838 +2023-07-26 13:31:19.892 | INFO | __main__::290 - Analysis date: 2023-03-15 +2023-07-26 13:45:57.782 | INFO | __main__::297 - Prep stop segment df: 0:14:37.886934 +2023-07-26 13:46:51.502 | INFO | __main__::307 - execution time: 0:15:31.607489 diff --git a/rt_segment_speeds/logs/sjoin_vp_segments.log b/rt_segment_speeds/logs/sjoin_vp_segments.log index 16eb2d1ac..b87c6e79a 100644 --- a/rt_segment_speeds/logs/sjoin_vp_segments.log +++ b/rt_segment_speeds/logs/sjoin_vp_segments.log @@ -75,19 +75,27 @@ 2023-06-30 13:41:14.364 | INFO | __main__::298 - attach vp to stop-to-stop segments: 0:46:03.991197 2023-06-30 13:42:25.427 | INFO | __main__::308 - compiled parquets: 0:01:11.062989 2023-06-30 13:42:25.430 | INFO | __main__::309 - execution time: 0:47:15.054186 -2023-07-21 15:14:18.352 | INFO | __main__::286 - Analysis date: 2023-07-12 -2023-07-21 16:00:08.511 | INFO | __main__::298 - attach vp to stop-to-stop segments: 0:45:50.129886 -2023-07-21 16:01:13.055 | INFO | __main__::308 - compiled parquets: 0:01:04.543964 -2023-07-21 16:01:13.056 | INFO | __main__::309 - execution time: 0:46:54.673850 -2023-07-24 15:17:47.583 | INFO | __main__::286 - Analysis date: 2023-05-17 -2023-07-24 15:59:43.505 | INFO | __main__::298 - attach vp to stop-to-stop segments: 0:41:55.910214 -2023-07-24 16:00:43.071 | INFO | __main__::308 - compiled parquets: 0:00:59.566345 -2023-07-24 16:00:43.072 | INFO | __main__::309 - execution time: 0:42:55.476559 -2023-07-24 20:46:21.403 | INFO | __main__::286 - Analysis date: 2023-06-14 -2023-07-24 21:26:37.726 | INFO | __main__::298 - attach vp to stop-to-stop segments: 0:40:16.322554 -2023-07-24 21:27:37.057 | INFO | __main__::308 - compiled parquets: 0:00:59.331107 -2023-07-24 21:27:37.058 | INFO | __main__::309 - execution time: 0:41:15.653661 -2023-07-25 10:03:02.972 | INFO | __main__::286 - Analysis date: 2023-07-12 -2023-07-25 10:47:55.030 | INFO | __main__::298 - attach vp to stop-to-stop segments: 0:44:52.057560 -2023-07-25 10:48:51.630 | INFO | __main__::308 - compiled parquets: 0:00:56.599756 -2023-07-25 10:48:51.630 | INFO | __main__::309 - execution time: 0:45:48.657316 +2023-07-25 16:52:12.138 | INFO | __main__::286 - Analysis date: 2023-05-17 +2023-07-25 17:34:09.962 | INFO | __main__::298 - attach vp to stop-to-stop segments: 0:41:57.812287 +2023-07-25 17:35:06.440 | INFO | __main__::308 - compiled parquets: 0:00:56.477889 +2023-07-25 17:35:06.441 | INFO | __main__::309 - execution time: 0:42:54.290176 +2023-07-25 19:29:22.771 | INFO | __main__::286 - Analysis date: 2023-06-14 +2023-07-25 20:10:03.035 | INFO | __main__::298 - attach vp to stop-to-stop segments: 0:40:40.263436 +2023-07-25 20:11:00.673 | INFO | __main__::308 - compiled parquets: 0:00:57.637347 +2023-07-25 20:11:00.675 | INFO | __main__::309 - execution time: 0:41:37.900783 +2023-07-25 21:27:52.866 | INFO | __main__::286 - Analysis date: 2023-07-12 +2023-07-25 22:11:55.296 | INFO | __main__::298 - attach vp to stop-to-stop segments: 0:44:02.429077 +2023-07-25 22:13:11.709 | INFO | __main__::308 - compiled parquets: 0:01:16.412710 +2023-07-25 22:13:11.713 | INFO | __main__::309 - execution time: 0:45:18.841787 +2023-07-26 15:09:54.854 | INFO | __main__::286 - Analysis date: 2023-03-15 +2023-07-26 16:14:01.117 | INFO | __main__::298 - attach vp to stop-to-stop segments: 1:04:06.237100 +2023-07-26 16:15:04.057 | INFO | __main__::308 - compiled parquets: 0:01:02.939853 +2023-07-26 16:15:04.058 | INFO | __main__::309 - execution time: 1:05:09.176953 +2023-07-26 18:23:50.016 | INFO | __main__::286 - Analysis date: 2023-03-15 +2023-07-26 19:31:45.629 | INFO | __main__::298 - attach vp to stop-to-stop segments: 1:07:55.611782 +2023-07-26 19:32:43.933 | INFO | __main__::308 - compiled parquets: 0:00:58.304490 +2023-07-26 19:32:43.944 | INFO | __main__::309 - execution time: 1:08:53.916272 +2023-07-26 21:21:05.389 | INFO | __main__::286 - Analysis date: 2023-04-12 +2023-07-26 22:22:19.205 | INFO | __main__::298 - attach vp to stop-to-stop segments: 1:01:13.776816 +2023-07-26 22:23:14.796 | INFO | __main__::308 - compiled parquets: 0:00:55.591754 +2023-07-26 22:23:14.798 | INFO | __main__::309 - execution time: 1:02:09.368570 diff --git a/rt_segment_speeds/logs/speeds_by_segment_trip.log b/rt_segment_speeds/logs/speeds_by_segment_trip.log index dacbb21f9..e72c8681c 100644 --- a/rt_segment_speeds/logs/speeds_by_segment_trip.log +++ b/rt_segment_speeds/logs/speeds_by_segment_trip.log @@ -31,18 +31,28 @@ 2023-05-20 10:56:10.880 | INFO | __main__:linear_referencing_and_speed_by_segment:78 - calculate speeds: 0:00:00.000678 2023-05-20 11:50:39.921 | INFO | __main__::115 - speeds for stop segments: 0:54:29.046501 2023-05-20 11:50:39.924 | INFO | __main__::116 - execution time: 0:54:29.049154 -2023-07-24 16:22:19.702 | INFO | __main__::116 - Analysis date: 2023-05-17 -2023-07-24 16:52:00.420 | INFO | __main__:linear_referencing_and_speed_by_segment:84 - linear referencing: 0:29:40.696934 -2023-07-24 16:52:00.426 | INFO | __main__:linear_referencing_and_speed_by_segment:96 - calculate speeds: 0:00:00.006454 -2023-07-24 17:19:08.400 | INFO | __main__::127 - speeds for stop segments: 0:56:48.681858 -2023-07-24 17:19:08.401 | INFO | __main__::128 - execution time: 0:56:48.682781 -2023-07-24 21:48:17.827 | INFO | __main__::116 - Analysis date: 2023-06-14 -2023-07-24 22:17:10.701 | INFO | __main__:linear_referencing_and_speed_by_segment:84 - linear referencing: 0:28:52.868856 -2023-07-24 22:17:10.707 | INFO | __main__:linear_referencing_and_speed_by_segment:96 - calculate speeds: 0:00:00.005970 -2023-07-24 22:43:17.406 | INFO | __main__::127 - speeds for stop segments: 0:54:59.578336 -2023-07-24 22:43:17.407 | INFO | __main__::128 - execution time: 0:54:59.579301 -2023-07-25 11:12:28.456 | INFO | __main__::116 - Analysis date: 2023-07-12 -2023-07-25 11:49:17.338 | INFO | __main__:linear_referencing_and_speed_by_segment:84 - linear referencing: 0:36:48.876648 -2023-07-25 11:49:17.344 | INFO | __main__:linear_referencing_and_speed_by_segment:96 - calculate speeds: 0:00:00.006385 -2023-07-25 12:16:28.698 | INFO | __main__::127 - speeds for stop segments: 1:04:00.240843 -2023-07-25 12:16:28.699 | INFO | __main__::128 - execution time: 1:04:00.241659 +2023-07-25 17:55:40.998 | INFO | __main__::116 - Analysis date: 2023-05-17 +2023-07-25 18:25:17.498 | INFO | __main__:linear_referencing_and_speed_by_segment:84 - linear referencing: 0:29:36.467955 +2023-07-25 18:25:17.508 | INFO | __main__:linear_referencing_and_speed_by_segment:96 - calculate speeds: 0:00:00.009634 +2023-07-25 18:51:59.936 | INFO | __main__::127 - speeds for stop segments: 0:56:18.910904 +2023-07-25 18:51:59.937 | INFO | __main__::128 - execution time: 0:56:18.911558 +2023-07-25 20:31:00.338 | INFO | __main__::116 - Analysis date: 2023-06-14 +2023-07-25 20:59:50.907 | INFO | __main__:linear_referencing_and_speed_by_segment:84 - linear referencing: 0:28:50.563683 +2023-07-25 20:59:50.913 | INFO | __main__:linear_referencing_and_speed_by_segment:96 - calculate speeds: 0:00:00.006050 +2023-07-25 21:26:12.883 | INFO | __main__::127 - speeds for stop segments: 0:55:12.544803 +2023-07-25 21:26:12.884 | INFO | __main__::128 - execution time: 0:55:12.545554 +2023-07-25 22:48:46.313 | INFO | __main__::116 - Analysis date: 2023-07-12 +2023-07-25 23:31:55.464 | INFO | __main__:linear_referencing_and_speed_by_segment:84 - linear referencing: 0:43:09.123043 +2023-07-25 23:31:55.493 | INFO | __main__:linear_referencing_and_speed_by_segment:96 - calculate speeds: 0:00:00.028630 +2023-07-26 00:00:04.530 | INFO | __main__::127 - speeds for stop segments: 1:11:18.196214 +2023-07-26 00:00:04.531 | INFO | __main__::128 - execution time: 1:11:18.197170 +2023-07-26 20:02:52.083 | INFO | __main__::116 - Analysis date: 2023-03-15 +2023-07-26 20:34:36.201 | INFO | __main__:linear_referencing_and_speed_by_segment:84 - linear referencing: 0:31:44.113095 +2023-07-26 20:34:36.209 | INFO | __main__:linear_referencing_and_speed_by_segment:96 - calculate speeds: 0:00:00.007907 +2023-07-26 21:12:55.949 | INFO | __main__::127 - speeds for stop segments: 1:10:03.865319 +2023-07-26 21:12:55.950 | INFO | __main__::128 - execution time: 1:10:03.866321 +2023-07-26 22:48:56.041 | INFO | __main__::116 - Analysis date: 2023-04-12 +2023-07-26 23:21:16.317 | INFO | __main__:linear_referencing_and_speed_by_segment:84 - linear referencing: 0:32:20.261839 +2023-07-26 23:21:16.327 | INFO | __main__:linear_referencing_and_speed_by_segment:96 - calculate speeds: 0:00:00.009498 +2023-07-26 23:50:59.023 | INFO | __main__::127 - speeds for stop segments: 1:02:02.972239 +2023-07-26 23:50:59.024 | INFO | __main__::128 - execution time: 1:02:02.973670 diff --git a/rt_segment_speeds/logs/usable_rt_vp.log b/rt_segment_speeds/logs/usable_rt_vp.log index 7fc9bbad3..70907d754 100644 --- a/rt_segment_speeds/logs/usable_rt_vp.log +++ b/rt_segment_speeds/logs/usable_rt_vp.log @@ -16,3 +16,9 @@ 2023-07-14 13:40:47.502 | INFO | __main__::151 - Analysis date: 2023-07-12 2023-07-14 13:42:34.741 | INFO | __main__::164 - pare down vp 2023-07-14 13:42:34.742 | INFO | __main__::167 - execution time: 0:01:47.239089 +2023-07-26 18:18:52.727 | INFO | __main__::151 - Analysis date: 2023-03-15 +2023-07-26 18:23:25.887 | INFO | __main__::164 - pare down vp +2023-07-26 18:23:25.901 | INFO | __main__::167 - execution time: 0:04:33.151084 +2023-07-26 21:16:48.752 | INFO | __main__::151 - Analysis date: 2023-04-12 +2023-07-26 21:20:41.866 | INFO | __main__::164 - pare down vp +2023-07-26 21:20:41.895 | INFO | __main__::167 - execution time: 0:03:53.140859 diff --git a/rt_segment_speeds/logs/valid_vehicle_positions.log b/rt_segment_speeds/logs/valid_vehicle_positions.log index 93b10b576..7abaeca64 100644 --- a/rt_segment_speeds/logs/valid_vehicle_positions.log +++ b/rt_segment_speeds/logs/valid_vehicle_positions.log @@ -159,3 +159,57 @@ 2023-07-25 10:57:31.262 | INFO | __main__::344 - Analysis date: 2023-07-12 2023-07-25 11:10:59.353 | INFO | __main__::358 - pare down vp by stop segments special cases 0:13:28.085738 2023-07-25 11:10:59.355 | INFO | __main__::361 - execution time: 0:13:28.091919 +2023-07-25 17:35:24.578 | INFO | __main__::157 - Analysis date: 2023-05-17 +2023-07-25 17:38:17.011 | INFO | __main__:pare_down_vp_by_segment:130 - merge usable vp with sjoin results: 0:02:52.414746 +2023-07-25 17:38:17.141 | INFO | __main__:pare_down_vp_by_segment:139 - keep enter/exit points: 0:00:00.130051 +2023-07-25 17:42:44.665 | INFO | __main__:pare_down_vp_by_segment:145 - exported: 0:04:27.523972 +2023-07-25 17:42:44.667 | INFO | __main__::171 - pare down vp by stop segments normal cases 0:07:20.071086 +2023-07-25 17:42:44.667 | INFO | __main__::174 - execution time: 0:07:20.075709 +2023-07-25 17:43:02.687 | INFO | __main__::344 - Analysis date: 2023-05-17 +2023-07-25 17:54:28.505 | INFO | __main__::358 - pare down vp by stop segments special cases 0:11:25.812402 +2023-07-25 17:54:28.506 | INFO | __main__::361 - execution time: 0:11:25.817451 +2023-07-25 20:11:18.015 | INFO | __main__::157 - Analysis date: 2023-06-14 +2023-07-25 20:13:59.584 | INFO | __main__:pare_down_vp_by_segment:130 - merge usable vp with sjoin results: 0:02:41.562999 +2023-07-25 20:13:59.739 | INFO | __main__:pare_down_vp_by_segment:139 - keep enter/exit points: 0:00:00.154597 +2023-07-25 20:18:02.221 | INFO | __main__:pare_down_vp_by_segment:145 - exported: 0:04:02.482467 +2023-07-25 20:18:02.223 | INFO | __main__::171 - pare down vp by stop segments normal cases 0:06:44.202068 +2023-07-25 20:18:02.224 | INFO | __main__::174 - execution time: 0:06:44.207623 +2023-07-25 20:18:18.862 | INFO | __main__::344 - Analysis date: 2023-06-14 +2023-07-25 20:29:41.529 | INFO | __main__::358 - pare down vp by stop segments special cases 0:11:22.662479 +2023-07-25 20:29:41.530 | INFO | __main__::361 - execution time: 0:11:22.667589 +2023-07-25 22:13:36.796 | INFO | __main__::157 - Analysis date: 2023-07-12 +2023-07-25 22:16:54.225 | INFO | __main__:pare_down_vp_by_segment:130 - merge usable vp with sjoin results: 0:03:17.415267 +2023-07-25 22:16:54.345 | INFO | __main__:pare_down_vp_by_segment:139 - keep enter/exit points: 0:00:00.119414 +2023-07-25 22:22:49.255 | INFO | __main__:pare_down_vp_by_segment:145 - exported: 0:05:54.910487 +2023-07-25 22:22:49.260 | INFO | __main__::171 - pare down vp by stop segments normal cases 0:09:12.450290 +2023-07-25 22:22:49.263 | INFO | __main__::174 - execution time: 0:09:12.463640 +2023-07-25 22:23:28.707 | INFO | __main__::344 - Analysis date: 2023-07-12 +2023-07-25 22:46:48.934 | INFO | __main__::358 - pare down vp by stop segments special cases 0:23:20.216783 +2023-07-25 22:46:48.962 | INFO | __main__::361 - execution time: 0:23:20.251985 +2023-07-26 16:15:26.883 | INFO | __main__::157 - Analysis date: 2023-03-15 +2023-07-26 16:19:15.751 | INFO | __main__:pare_down_vp_by_segment:130 - merge usable vp with sjoin results: 0:03:48.836153 +2023-07-26 16:19:15.994 | INFO | __main__:pare_down_vp_by_segment:139 - keep enter/exit points: 0:00:00.242565 +2023-07-26 16:25:56.532 | INFO | __main__:pare_down_vp_by_segment:145 - exported: 0:06:40.538840 +2023-07-26 16:25:56.535 | INFO | __main__::171 - pare down vp by stop segments normal cases 0:10:29.620559 +2023-07-26 16:25:56.536 | INFO | __main__::174 - execution time: 0:10:29.629418 +2023-07-26 16:26:18.870 | INFO | __main__::344 - Analysis date: 2023-03-15 +2023-07-26 16:44:55.208 | INFO | __main__::358 - pare down vp by stop segments special cases 0:18:36.327270 +2023-07-26 16:44:55.211 | INFO | __main__::361 - execution time: 0:18:36.338508 +2023-07-26 19:33:08.404 | INFO | __main__::157 - Analysis date: 2023-03-15 +2023-07-26 19:36:57.092 | INFO | __main__:pare_down_vp_by_segment:130 - merge usable vp with sjoin results: 0:03:48.678126 +2023-07-26 19:36:57.276 | INFO | __main__:pare_down_vp_by_segment:139 - keep enter/exit points: 0:00:00.183628 +2023-07-26 19:43:45.644 | INFO | __main__:pare_down_vp_by_segment:145 - exported: 0:06:48.367899 +2023-07-26 19:43:45.651 | INFO | __main__::171 - pare down vp by stop segments normal cases 0:10:37.237054 +2023-07-26 19:43:45.654 | INFO | __main__::174 - execution time: 0:10:37.246745 +2023-07-26 19:44:10.257 | INFO | __main__::344 - Analysis date: 2023-03-15 +2023-07-26 20:01:26.984 | INFO | __main__::358 - pare down vp by stop segments special cases 0:17:16.715529 +2023-07-26 20:01:26.986 | INFO | __main__::361 - execution time: 0:17:16.724971 +2023-07-26 22:23:33.140 | INFO | __main__::157 - Analysis date: 2023-04-12 +2023-07-26 22:26:50.268 | INFO | __main__:pare_down_vp_by_segment:130 - merge usable vp with sjoin results: 0:03:17.096219 +2023-07-26 22:26:50.417 | INFO | __main__:pare_down_vp_by_segment:139 - keep enter/exit points: 0:00:00.148960 +2023-07-26 22:32:23.388 | INFO | __main__:pare_down_vp_by_segment:145 - exported: 0:05:32.971007 +2023-07-26 22:32:23.391 | INFO | __main__::171 - pare down vp by stop segments normal cases 0:08:50.218961 +2023-07-26 22:32:23.392 | INFO | __main__::174 - execution time: 0:08:50.225194 +2023-07-26 22:32:42.683 | INFO | __main__::344 - Analysis date: 2023-04-12 +2023-07-26 22:47:30.729 | INFO | __main__::358 - pare down vp by stop segments special cases 0:14:48.039668 +2023-07-26 22:47:30.730 | INFO | __main__::361 - execution time: 0:14:48.045353 diff --git a/rt_segment_speeds/scripts/calculate-trip-avg.ipynb b/rt_segment_speeds/scripts/calculate-trip-avg.ipynb deleted file mode 100644 index fecd4ac20..000000000 --- a/rt_segment_speeds/scripts/calculate-trip-avg.ipynb +++ /dev/null @@ -1,917 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "8f0f779b-50a4-4fa6-9fe9-9632c161316a", - "metadata": {}, - "source": [ - "# Average speeds across entire trip" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "id": "52c8d239-4897-42e0-80e0-2352d4b0a79b", - "metadata": {}, - "outputs": [], - "source": [ - "#import os\n", - "#os.environ['USE_PYGEOS'] = '0'\n", - "# turning this off makes to_crs really slow\n", - "\n", - "import dask.dataframe as dd\n", - "import dask_geopandas as dg\n", - "import folium\n", - "import geopandas as gpd\n", - "import numpy as np\n", - "import pandas as pd\n", - "import shapely\n", - "\n", - "from segment_speed_utils import helpers, sched_rt_utils, wrangle_shapes\n", - "from segment_speed_utils.project_vars import (SEGMENT_GCS,\n", - " CONFIG_PATH, PROJECT_CRS\n", - " )\n", - "analysis_date = \"2023-05-17\"" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "id": "5e5d3ceb-f859-4583-b361-e1b51c668a68", - "metadata": {}, - "outputs": [], - "source": [ - "df = pd.read_parquet(f\"{SEGMENT_GCS}trip_summary/trip_speed_{analysis_date}.parquet\")" - ] - }, - { - "cell_type": "code", - "execution_count": 43, - "id": "07fb6ffa-6fdd-4a14-be21-c31d6afe653f", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(31, 14)" - ] - }, - "execution_count": 43, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df[df.speed_mph >= 60].shape" - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "id": "85dca4cc-9b3a-4b3a-806e-51a8d1539833", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(4686, 14)" - ] - }, - "execution_count": 42, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df[df.speed_mph <= 3].shape" - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "id": "fe0a82c9-214f-4c8f-9928-7da9b9a8adf3", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(68556, 14)" - ] - }, - "execution_count": 41, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "7e3f17f1-1883-41c0-aee2-6f560812442a", - "metadata": {}, - "outputs": [], - "source": [ - "# in case there are fewer shapes to grab\n", - "shapes_list = df.shape_array_key.unique().tolist()\n", - "\n", - "shapes = helpers.import_scheduled_shapes(\n", - " analysis_date,\n", - " columns = [\"shape_array_key\",\"geometry\"],\n", - " filters = [[(\"shape_array_key\", \"in\", shapes_list)]],\n", - " get_pandas = True,\n", - " crs = PROJECT_CRS\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "0122c274-e71c-407e-84c3-743899e9b525", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/opt/conda/lib/python3.9/site-packages/pygeos/linear.py:87: RuntimeWarning: invalid value encountered in line_locate_point\n" - ] - } - ], - "source": [ - "linear_ref = wrangle_shapes.linear_reference_vp_against_segment(\n", - " df,\n", - " shapes,\n", - " segment_identifier_cols = [\"shape_array_key\"]\n", - ").compute()\n", - "\n", - "linear_ref.to_parquet(\"test.parquet\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0c66c64a-a4b9-454a-9e7e-f6253085c160", - "metadata": {}, - "outputs": [], - "source": [ - "linear_ref = pd.read_parquet(\"test.parquet\")" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "f4b557a8-019a-4ea5-b544-2a810515c5ab", - "metadata": {}, - "outputs": [], - "source": [ - "def distance_and_seconds_elapsed(\n", - " df: pd.DataFrame, \n", - " group_cols: list\n", - ") -> pd.DataFrame:\n", - " \"\"\"\n", - " If every trip has 3 vp, we want the change in time and distance\n", - " between 1st and 2nd, 2nd and 3rd.\n", - " Then, sum up the change in time and change by trip.\n", - " \"\"\"\n", - " dist_col = \"shape_meters\"\n", - " time_col = \"location_timestamp_local\"\n", - " sort_cols = group_cols + [\"vp_idx\"]\n", - " \n", - "\n", - " df = df.assign(\n", - " prior_dist = (df.sort_values(sort_cols)\n", - " .groupby(group_cols, \n", - " observed=True, group_keys=False)\n", - " [dist_col]\n", - " .apply(lambda x: x.shift(1))\n", - " ),\n", - " prior_time = (df.sort_values(sort_cols)\n", - " .groupby(group_cols, \n", - " observed=True, group_keys=False)\n", - " [time_col]\n", - " .apply(lambda x: x.shift(1))\n", - " ) \n", - " )\n", - " \n", - " df = df.assign(\n", - " change_meters = df[dist_col] - df.prior_dist,\n", - " change_sec = (df[time_col] - df.prior_time).divide(\n", - " np.timedelta64(1, 's'))\n", - " )\n", - " \n", - " df2 = (df.groupby(group_cols, \n", - " observed=True, group_keys=False)\n", - " .agg({\"change_meters\": \"sum\", \n", - " \"change_sec\": \"sum\"})\n", - " .reset_index()\n", - " )\n", - " \n", - " df2 = df2.assign(\n", - " speed_mph = (df2.change_meters.divide(df2.change_sec) * \n", - " rt_utils.MPH_PER_MPS)\n", - " )\n", - " \n", - " return df2" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "192d6fd1-c8c3-461f-a3fe-1c350fb6096c", - "metadata": {}, - "outputs": [], - "source": [ - "from shared_utils import rt_utils\n", - "\n", - "speed = distance_and_seconds_elapsed(\n", - " linear_ref, \n", - " group_cols = [\"gtfs_dataset_key\", \"trip_id\"]\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "9cdf3484-20d8-499a-a3e4-41f7afae23a3", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(68556, 5)" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "speed.shape" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "f28f722f-b481-4543-ae5c-f678e69b30a2", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(20, 5)" - ] - }, - "execution_count": 10, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "speed[speed.speed_mph>=70].shape" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "1346a085-dc35-4792-b487-71dd0b559d80", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(4378, 5)" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "speed[speed.speed_mph<=2].shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4420a0f0-c63d-4d24-9e80-919a7fe32744", - "metadata": {}, - "outputs": [], - "source": [ - "def aggregate_by_operator_route_time_of_day():" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6ca60770-45c1-4aab-b0e9-8b1830d9250e", - "metadata": {}, - "outputs": [], - "source": [ - "#test_key = \"00accf770009aafd5dc103ff2eeddb37\"\n", - "#test_trip = \"t_1995375_b_33395_tn_0\"\n", - "test_shape = \"70f010e0dba18191937ed4b5bea42e8a\"" - ] - }, - { - "cell_type": "markdown", - "id": "dd00a9e9-f60a-4cff-9870-b8d93b763a7d", - "metadata": {}, - "source": [ - "This trip has a lot of vp that end up not being joined to any segment.\n", - "Including those vp far away from the shape mean that the interpolation results show the same thing, because essentially, all those points fall closest to the one end of the shape, and when taking the difference in `shape_meters`, the difference is zero.\n", - "\n", - "This is a compelling reason to add the % of segments touched in the sjoin results. Before, we used time cutoff, because it's easier to implement. '\n", - "\n", - "At least for calculating trip average speeds, we do need to touch at least 50% of the segments, or even 70% of segments as recommended in notebook, to only calculate entire trip averages on trips that have enough vp.\n", - "\n", - "The con of using % of segments is that it becomes even more crucial that segments are cut correctly. If we miss a segment (which we might, currently), there are vp that are not being joined, and we may throw out too many trips because it fails the % segments threshold.\n", - "\n", - "For now, let's take the sjoin results and use a couple points to triangulate the distance. Make an array, and pick points either every 10 min or at least 3 points to calculate distance." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "25017d03-f86a-4cc3-a5f6-584e6b647952", - "metadata": {}, - "outputs": [], - "source": [ - "ddf = A2.merge_usable_vp_with_sjoin_vpidx(\n", - " [test_shape],\n", - " USABLE_FILE,\n", - " SJOIN_FILE,\n", - " SEGMENT_IDENTIFIER_COLS,\n", - " GROUPING_COL\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c9ecb54b-6a8c-4b43-abcd-40d652ac92cd", - "metadata": {}, - "outputs": [], - "source": [ - "ddf = ddf.compute()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f5bc03d7-5ebf-4100-bdc0-250876f1c04e", - "metadata": {}, - "outputs": [], - "source": [ - "from shared_utils import geography_utils\n", - "\n", - "ddf = geography_utils.create_point_geometry(ddf, \"x\", \"y\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f940224a-57e1-4d67-a0d7-8706733aa9fd", - "metadata": {}, - "outputs": [], - "source": [ - "crosswalk = sched_rt_utils.crosswalk_scheduled_trip_grouping_with_rt_key(\n", - " analysis_date, \n", - " [\"feed_key\", \"trip_id\", GROUPING_COL, \"shape_id\"] \n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "50cc7daf-b044-4add-9f4e-85424e7b514c", - "metadata": {}, - "outputs": [], - "source": [ - "shapes = helpers.import_scheduled_shapes(\n", - " analysis_date,\n", - " columns = [\"shape_array_key\", \"geometry\"],\n", - " filters = [[(\"shape_array_key\", \"in\", [test_shape])]],\n", - " get_pandas = True,\n", - " crs = PROJECT_CRS\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c9ac9777-0fd1-4cb7-90bf-1718a7e75e93", - "metadata": {}, - "outputs": [], - "source": [ - "shapes2 = pd.merge(\n", - " shapes,\n", - " crosswalk,\n", - " on = \"shape_array_key\",\n", - " how = \"inner\"\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "785628db-70b1-4152-a507-d35cb610f29c", - "metadata": {}, - "outputs": [], - "source": [ - "ddf2 = ddf.to_crs(PROJECT_CRS).drop(\n", - " columns = [\"location_timestamp\", \"location_timestamp_local\", \n", - " \"activity_date\"])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "956f499e-475f-4427-9338-c9c6062d97af", - "metadata": {}, - "outputs": [], - "source": [ - "m = ddf2.explore(\"trip_id\", tiles = \"CartoDB Positron\")\n", - "m = shapes2.explore(m=m, color=\"yellow\", name=\"shape\")\n", - "folium.LayerControl().add_to(m)\n", - "m" - ] - }, - { - "cell_type": "markdown", - "id": "1450a48c-47f8-429a-b92b-b4b73f6893a9", - "metadata": {}, - "source": [ - "## Triangulate vp based on sjoin results" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2a377e7e-e6c6-489f-b035-f4937622b3a8", - "metadata": {}, - "outputs": [], - "source": [ - "def list_of_vp_by_trip(\n", - " df: pd.DataFrame, \n", - " group_cols: list = [\"gtfs_dataset_key\", \"trip_id\"]\n", - ") -> pd.DataFrame:\n", - "\n", - " df2 = (df.groupby(trip_cols, observed=True)\n", - " .agg({\"vp_idx\": list})\n", - " .reset_index()\n", - " )\n", - " \n", - " return df2" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2f294723-e7c9-46aa-bea1-7ea8397b781b", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "20d94c32-2b9d-448b-bfa4-d4834f923dd4", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4f475c8d-caae-4a13-a3ef-a1d7a8943752", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e2e2ab41-148b-44e4-976f-1a5c836405ec", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2cd529e8-4dca-446b-a540-236e99d6f78d", - "metadata": {}, - "outputs": [], - "source": [ - "by_trip_ddfs = [list_of_vp_by_trip(df, trip_cols) for df in subset_vp_ddfs]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6c59093a-5f29-408b-a460-fcea81d44c0c", - "metadata": {}, - "outputs": [], - "source": [ - "one = by_trip_ddfs[0]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c70056d1-7a0b-4b59-a5c3-d84bc328b4e6", - "metadata": {}, - "outputs": [], - "source": [ - "trip_df = compute(one)[0]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c4df9dfb-5e12-4e16-8078-ed8e8be1c95d", - "metadata": {}, - "outputs": [], - "source": [ - "def count_vp_and_get_every_10_min(my_list: list):\n", - " vp_idx_arr = np.asarray(my_list)\n", - " subset_arr = vp_idx_arr[::30]\n", - " \n", - " if len(subset_arr) < 3:\n", - " subset_arr = vp_idx_arr[:15]\n", - " \n", - " return list(subset_arr)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d674a955-b843-4045-9f85-47293aadecaa", - "metadata": {}, - "outputs": [], - "source": [ - "trip_df = trip_df.assign(\n", - " vp_idx2 = trip_df.apply(\n", - " lambda x: \n", - " count_vp_and_get_every_10_min(x.vp_idx), \n", - " axis=1, meta=('vp_idx2', 'object'))\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6beb7014-d275-4a7e-92ff-ad94787602c3", - "metadata": {}, - "outputs": [], - "source": [ - "keep_subset_vp = trip_df.vp_idx2.explode()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8bf189ec-f3c5-4e99-8879-531fecca531d", - "metadata": {}, - "outputs": [], - "source": [ - "ddf_subset = ddf[ddf.vp_idx.isin(keep_subset_vp)][\n", - " [\"gtfs_dataset_key\", \"trip_id\",\n", - " \"location_timestamp_local\",\n", - " \"x\", \"y\", \"vp_idx\"]]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "747033ce-549a-4dc7-9877-b3c758a6d692", - "metadata": {}, - "outputs": [], - "source": [ - "crosswalk = sched_rt_utils.crosswalk_scheduled_trip_grouping_with_rt_key(\n", - " analysis_date, \n", - " [\"feed_key\", \"trip_id\", GROUPING_COL]\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e4ca3a07-1246-41a9-baa9-666e01f6c8dd", - "metadata": {}, - "outputs": [], - "source": [ - "subset_vp_shape = delayed(dd.merge)(\n", - " ddf_subset,\n", - " crosswalk,\n", - " on = [\"gtfs_dataset_key\", \"trip_id\"],\n", - " how = \"inner\"\n", - ").drop_duplicates()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "95b33d2c-7991-44af-b087-7720ad90762a", - "metadata": {}, - "outputs": [], - "source": [ - "subset_shapes = subset_vp_shape.shape_array_key.unique().persist()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3034219b-99d2-4f64-bcb4-f6082e32760e", - "metadata": {}, - "outputs": [], - "source": [ - "subset_shapes" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2a5214d5-6523-4eeb-800f-dc4088aa7a13", - "metadata": {}, - "outputs": [], - "source": [ - "shapes = helpers.import_scheduled_shapes(\n", - " analysis_date,\n", - " columns = [\"shape_array_key\", \"geometry\"],\n", - " filters = [[(\"shape_array_key\", \"in\", subset_shapes)]],\n", - " get_pandas = True,\n", - " crs = PROJECT_CRS\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f0c78935-f7ab-4d78-9261-fdacf96e8abe", - "metadata": {}, - "outputs": [], - "source": [ - "RT_OPERATORS = subset_vp_shape.gtfs_dataset_key.unique().compute()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9155289c-35bc-4bdc-bfe6-a67b670309dc", - "metadata": {}, - "outputs": [], - "source": [ - "test_operator = RT_OPERATORS[0]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "18b2632f-c9da-4161-9efc-dc7d773cb3c1", - "metadata": {}, - "outputs": [], - "source": [ - "subset_vp_operator = subset_vp_shape[\n", - " subset_vp_shape.gtfs_dataset_key==test_operator]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d2c209eb-b7d5-40ac-b208-6be674e59308", - "metadata": {}, - "outputs": [], - "source": [ - "linear_ref_operator = delayed(\n", - " wrangle_shapes.linear_reference_vp_against_segment)(\n", - " subset_vp_operator,\n", - " shapes,\n", - " segment_identifier_cols = [GROUPING_COL]\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "28593d93-f10e-4b55-8113-2a4b96a0664a", - "metadata": {}, - "outputs": [], - "source": [ - "linear_ref = delayed(wrangle_shapes.linear_reference_vp_against_segment)(\n", - " subset_vp_shape,\n", - " shapes,\n", - " segment_identifier_cols = [GROUPING_COL]\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "71530c79-c2cf-4d54-8b14-fdd9d0810c26", - "metadata": {}, - "outputs": [], - "source": [ - "linear_ref" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "2b1a5e65-92ea-4687-af86-306efc54cd27", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "9135590c-d7e2-4fc7-876b-88b9e99cf5a5", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e3defe7e-2f37-4b9e-b149-200433924255", - "metadata": {}, - "outputs": [], - "source": [ - "operators = dd.read_parquet(\n", - " f\"{SEGMENT_GCS}{INPUT_FILE}\", \n", - " columns = [\"gtfs_dataset_key\"]\n", - ").gtfs_dataset_key.unique().compute().tolist()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "010db624-33bc-4fe7-8f9f-f957ff183f8a", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "87843b73-9963-4ad6-8239-af991b2fdb47", - "metadata": {}, - "outputs": [], - "source": [ - "subset_operators = operators[:2]\n", - "subset_operators" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "02d6ccf0-5f72-4502-a0a0-43da58b088e1", - "metadata": {}, - "outputs": [], - "source": [ - "ddf = dd.read_parquet(\n", - " f\"{SEGMENT_GCS}{INPUT_FILE}\", \n", - " filters = [[(\"gtfs_dataset_key\", \"in\", subset_operators)]],\n", - " columns = [\"vp_idx\"]\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "bc95f22a-549e-4176-9ff5-4d2f0dbaac0d", - "metadata": {}, - "outputs": [], - "source": [ - "trip_cols = [\"gtfs_dataset_key\", \"trip_id\"]\n", - "hour_min_cols = [\"hour\", \"minute\"]" - ] - }, - { - "cell_type": "markdown", - "id": "5cb85263-ca3c-49ac-acc8-2b03bb173a9b", - "metadata": {}, - "source": [ - "## Pings per minute for service hours" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "22f194bd-5713-478e-b7ab-5634a6c86a53", - "metadata": {}, - "outputs": [], - "source": [ - "ddf = ddf.repartition(npartitions=5)\n", - "\n", - "ddf = ddf.assign(\n", - " minute = ddf.location_timestamp_local.dt.minute\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b26e222c-8c20-43f3-b706-4bf311a7fda8", - "metadata": {}, - "outputs": [], - "source": [ - "ddf.dtypes" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "de3ab88f-97b3-45d3-95ef-28a907a25d1f", - "metadata": {}, - "outputs": [], - "source": [ - "num_vp_pings = (ddf.groupby(trip_cols + hour_min_cols, observed=True)\n", - " [\"location_timestamp_local\"]\n", - " .count()\n", - " .dropna()\n", - " .reset_index()\n", - " .rename(columns = {\"location_timestamp_local\": \"num_pings\"})\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4938392d-a2ee-473e-91b5-9805b0aceb14", - "metadata": {}, - "outputs": [], - "source": [ - "num_vp_pings = num_vp_pings.assign(\n", - " atleast2 = num_vp_pings.apply(\n", - " lambda x: 1 if x.num_pings >= 2\n", - " else 0, axis=1, meta=('atleast2', 'int8'))\n", - " ) " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0fcb4ff1-04f6-4870-9e67-4391be2508a3", - "metadata": {}, - "outputs": [], - "source": [ - "vp_pings = (num_vp_pings.groupby(trip_cols)\n", - " .agg({\n", - " \"hour\": \"size\",\n", - " \"atleast2\": \"sum\"})\n", - " .dropna()\n", - " .reset_index()\n", - " ).rename(columns = {\n", - " \"hour\": \"trip_min_elapsed\"})" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "813ec2d3-8cf8-4e44-96bd-405ad65c19a9", - "metadata": {}, - "outputs": [], - "source": [ - "vp_pings = vp_pings.persist()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b427d9db-0c0e-43b9-9b82-464dd923d3e0", - "metadata": {}, - "outputs": [], - "source": [ - "vp_pings.compute()" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.13" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -}