diff --git a/rt_segment_speeds/35_bbb_segment_backtrack.ipynb b/rt_segment_speeds/35_bbb_segment_backtrack.ipynb new file mode 100644 index 000000000..f6b289ccd --- /dev/null +++ b/rt_segment_speeds/35_bbb_segment_backtrack.ipynb @@ -0,0 +1,359 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "f9f1baa5-2de0-4152-89ec-e43880ea043d", + "metadata": {}, + "source": [ + "# Speedmap segments \n", + "* The 20th, 50th, 80th percentiles look extremely tight, why?\n", + "* Is this happening in the trip files?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b18ab7df-9592-4897-8193-ca0ccc015930", + "metadata": {}, + "outputs": [], + "source": [ + "import geopandas as gpd\n", + "import pandas as pd\n", + "\n", + "from segment_speed_utils import helpers\n", + "from segment_speed_utils.project_vars import SEGMENT_GCS, GTFS_DATA_DICT\n", + "from shared_utils import rt_dates\n", + "\n", + "analysis_date = rt_dates.DATES[\"apr2024\"]\n", + "nov_date = rt_dates.DATES[\"nov2023\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "48f4c06f-be94-440f-90ee-df365b4f06b4", + "metadata": {}, + "outputs": [], + "source": [ + "TRIP_FILE = GTFS_DATA_DICT.speedmap_segments.stage4\n", + "SHAPE_FILE = GTFS_DATA_DICT.speedmap_segments.shape_stop_single_segment" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "ab776438-86bc-4b86-a276-9b52aad3f454", + "metadata": {}, + "outputs": [], + "source": [ + "operator_name = \"Big Blue Bus Schedule\"\n", + "\n", + "operator_route_df = helpers.import_scheduled_trips(\n", + " analysis_date,\n", + " columns = [\"gtfs_dataset_key\", \"name\", \n", + " \"route_short_name\", \"route_long_name\", \"route_id\"],\n", + " filters = [[(\"name\", \"==\", operator_name)]],\n", + " get_pandas = True,\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "61f76f27-8c20-45dd-8910-ed60fafbf2d1", + "metadata": {}, + "outputs": [], + "source": [ + "nov_trips = helpers.import_scheduled_trips(\n", + " nov_date,\n", + " columns = [\"gtfs_dataset_key\", \"name\", \"shape_id\", \"route_id\", \n", + " \"route_long_name\", \"route_short_name\"],\n", + " filters = [[(\"name\", \"==\", operator_name)]],\n", + " get_pandas = True\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a5a6a7ff-8466-403a-abf5-3db4a69f42b6", + "metadata": {}, + "outputs": [], + "source": [ + "if nov_trips.schedule_gtfs_dataset_key.iloc[0] == operator_route_df.schedule_gtfs_dataset_key.iloc[0]:\n", + " bbb_key = nov_trips.schedule_gtfs_dataset_key.iloc[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d448d865-2546-487b-83e8-eabeaceccdcd", + "metadata": {}, + "outputs": [], + "source": [ + "def nov_shape_to_apr_route(\n", + " nov_trips: pd.DataFrame,\n", + " apr_route_df: pd.DataFrame,\n", + " operator_key: str = bbb_key,\n", + " one_shape: str = \"\"\n", + "):\n", + "\n", + " nov_route_name = nov_trips[\n", + " #(nov_trips.schedule.str.contains(operator_substring)) & \n", + " (nov_trips.shape_id==one_shape)\n", + " ].route_short_name.iloc[0]\n", + " \n", + " return apr_route_df[\n", + " #(apr_route_df.name.str.contains(operator_substring)) & \n", + " (apr_route_df.route_short_name==nov_route_name)\n", + " ].route_id.iloc[0]" + ] + }, + { + "cell_type": "markdown", + "id": "ccee2dd5-e335-4bd3-9d83-7ebeec4bc422", + "metadata": {}, + "source": [ + "## Trip" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1e04594e-18ff-4283-aa9f-bd811d577882", + "metadata": {}, + "outputs": [], + "source": [ + "trip_df = pd.read_parquet(\n", + " f\"{SEGMENT_GCS}{TRIP_FILE}_{analysis_date}.parquet\",\n", + " filters = [[(\"schedule_gtfs_dataset_key\", \"==\", bbb_key)]]\n", + ")\n", + "\n", + "trip_df = trip_df.assign(\n", + " speed_mph = trip_df.speed_mph.round(2)\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c2216fd7-92cf-4dce-82bb-125e2ecce384", + "metadata": {}, + "outputs": [], + "source": [ + "olympic_shape1 = \"26450\"\n", + "olympic_route1 = nov_shape_to_apr_route(\n", + " nov_trips,\n", + " operator_route_df,\n", + " bbb_key,\n", + " olympic_shape1\n", + ")\n", + "\n", + "santa_monica_shape1 = \"26437\"\n", + "santa_monica_route1 = nov_shape_to_apr_route(\n", + " nov_trips,\n", + " operator_route_df,\n", + " bbb_key,\n", + " santa_monica_shape1\n", + ")\n", + "\n", + "santa_monica_shape2 = \"26509\"\n", + "santa_monica_route2 = nov_shape_to_apr_route(\n", + " nov_trips,\n", + " operator_route_df,\n", + " bbb_key,\n", + " santa_monica_shape2\n", + ")\n", + "\n", + "fourth_shape1 = \"26464\"\n", + "fourth_route1 = nov_shape_to_apr_route(\n", + " nov_trips,\n", + " operator_route_df,\n", + " bbb_key,\n", + " fourth_shape1\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a45b3e7d-ad9e-4f3c-8d43-2821ca2e9aed", + "metadata": {}, + "outputs": [], + "source": [ + "def filter_to_route(trip_df, operator_key, one_route, one_stop):\n", + " return trip_df[\n", + " (trip_df.schedule_gtfs_dataset_key==operator_key) & \n", + " (trip_df.route_id==one_route) & \n", + " (trip_df.stop_id==one_stop)\n", + " ][[\"stop_pair_name\", \"time_of_day\", \"arrival_time\", \"speed_mph\", \n", + " \"meters_elapsed\", \"sec_elapsed\"]].sort_values(\"arrival_time\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "85641403-384d-409c-8de4-ea3d525b5c1c", + "metadata": {}, + "outputs": [], + "source": [ + "green_olympic_speeds = {\n", + " \"721\": \"Olympic & Prosser\",\n", + " \"688\": \"Olympic & Veteran\",\n", + " \"716\": \"Olympic & Colby\",\n", + " \"800\": \"Olympic & Purdue\",\n", + " \"801\": \"Olympic & Colby\",\n", + " \"700\": \"Olympic & 3030\"\n", + "}\n", + "\n", + "green_santa_monica_blvd_speeds = {\n", + " \"370\": \"Santa Monica & 14th\",\n", + " \"117\": \"Santa Monica & 14th, under\",\n", + " \"1234\": \"Santa Monica & 17th\"\n", + "}\n", + "\n", + "green_fourth_speeds = {\n", + " \"668\": \"4th & San Vincente\",\n", + " \"666\": \"4th & Marguerita\",\n", + " \"665\": \"4th & Alta\",\n", + " \"664\": \"4th & Montana\",\n", + " \"505\": \"4th & Washington\",\n", + " \"504\": \"4th & California\",\n", + " \"502\": \"4th & Washington\",\n", + " \"503\":\n", + "}" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3c8ba1d6-388b-498c-bfa5-49ea1b8cb0cb", + "metadata": {}, + "outputs": [], + "source": [ + "# There are several exact speeds across trips, take a look at\n", + "# interpolated stop arrivals, what are the chances this happens?\n", + "# is it actually interpolating between different vp_idx values?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "22d67801-c7c4-4726-be0c-9702a20c9c1c", + "metadata": {}, + "outputs": [], + "source": [ + "filter_to_route(trip_df, bbb_key, fourth_route1, \"502\").query('time_of_day==\"AM Peak\"')\n", + "#.groupby(\"time_of_day\").agg(\n", + "#{\"speed_mph\": lambda x: sorted(list(x))})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5f6e6bff-bdfc-42b7-a549-6d3c62c3af20", + "metadata": {}, + "outputs": [], + "source": [ + "# Why are there the same speeds there?\n", + "trip_df[\n", + " (trip_df.schedule_gtfs_dataset_key==bbb_key) & \n", + " (trip_df.route_id==fourth_route1) & \n", + " (trip_df.stop_id==\"502\") & \n", + " (trip_df.time_of_day==\"AM Peak\") & \n", + " (trip_df.speed_mph > 15) & (trip_df.speed_mph < 16)\n", + "].trip_instance_key.unique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "93087f88-18f1-4517-a9f3-0f35a695ef6f", + "metadata": {}, + "outputs": [], + "source": [ + "INTERP_FILE = GTFS_DATA_DICT.speedmap_segments.stage3b\n", + "NEAREST_VP_FILE = GTFS_DATA_DICT.rt_stop_times.stage2\n", + "subset_trips = [\n", + " '0d448c743a91bc96271d36ba4450ebc9',\n", + " '1fbea8d720efd0dd513e98eef5383dbf',\n", + " '3a2e5c9e7304d091406cb5bbdfcc27e4',\n", + " 'a0f65344cb59c750934aff210b325f7e',\n", + " 'b6fc33a3b002b0bc63b07b6f39d80cb0'\n", + "]\n", + "\n", + "stop_arrivals = pd.read_parquet(\n", + " f\"{SEGMENT_GCS}{INTERP_FILE}_{analysis_date}.parquet\",\n", + " filters = [[(\"trip_instance_key\", \"in\", subset_trips), \n", + " (\"stop_id\", \"==\", \"502\")]]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7f85adf9-bc36-411f-a890-1aa42b655d5d", + "metadata": {}, + "outputs": [], + "source": [ + "nearest = gpd.read_parquet(\n", + " f\"{SEGMENT_GCS}{NEAREST_VP_FILE}_{analysis_date}.parquet\",\n", + " filters = [[(\"trip_instance_key\", \"in\", subset_trips), \n", + " (\"stop_id\", \"==\", \"502\")]]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a231783c-0eb6-47d6-879a-6840ecfaaaff", + "metadata": {}, + "outputs": [], + "source": [ + "for i in nearest.index:\n", + " print(i)\n", + " print(nearest.loc[i][\"location_timestamp_local_trio\"])\n", + " #print(nearest.loc[i][\"vp_coords_trio\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a5a3c27f-cff5-4368-99d3-00c556239b8d", + "metadata": {}, + "outputs": [], + "source": [ + "nearest.columns" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "36341936-6747-4ce6-aee2-aa47b6d3c283", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/rt_segment_speeds/36_bbb_nn_with_unknowns.ipynb b/rt_segment_speeds/36_bbb_nn_with_unknowns.ipynb new file mode 100644 index 000000000..d6b0ac9e2 --- /dev/null +++ b/rt_segment_speeds/36_bbb_nn_with_unknowns.ipynb @@ -0,0 +1,2645 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "fe5aafc6-e153-4727-a26c-cfbf6ea892c4", + "metadata": {}, + "source": [ + "# Big Blue Bus nearest neighbor comparison\n", + "\n", + "* What would happen if we allowed all the full vp to be used to find nearest neighbor, instead of removing the opposite direction?\n", + "* The monotonically increasing condition is set later, could it catch the errors that way?" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "667c4b7f-f402-4a03-9ea6-77bfbdf84ca1", + "metadata": {}, + "outputs": [], + "source": [ + "import geopandas as gpd\n", + "import pandas as pd\n", + "\n", + "from segment_speed_utils import helpers, neighbor\n", + "from segment_speed_utils.project_vars import SEGMENT_GCS, GTFS_DATA_DICT\n", + "from shared_utils import rt_dates\n", + "\n", + "analysis_date = rt_dates.DATES[\"apr2024\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "d4f175b4-6125-4e4d-932d-6d8458e844ec", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import sys\n", + "sys.path.append(\"scripts/\")\n", + "import nearest_vp_to_stop\n", + "\n", + "WGS84 = \"EPSG:4326\"" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "a8d3dfbe-e380-48c8-b88b-7601f87fab9a", + "metadata": {}, + "outputs": [], + "source": [ + "bbb_trips = helpers.import_scheduled_trips(\n", + " analysis_date,\n", + " filters = [(\"name\", \"==\", \"Big Blue Bus Schedule\")],\n", + " columns = [\"gtfs_dataset_key\", \"trip_instance_key\"],\n", + " get_pandas = True\n", + ")\n", + "\n", + "bbb_key = bbb_trips.schedule_gtfs_dataset_key.iloc[0]\n", + "subset_trips = bbb_trips.trip_instance_key.unique()" + ] + }, + { + "cell_type": "markdown", + "id": "31ea87f2-08ba-485b-a35c-46cb50d20730", + "metadata": {}, + "source": [ + "## Construct proxy stop times" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "683892d5-0139-4e41-9407-6f0aa8a5b2b0", + "metadata": {}, + "outputs": [], + "source": [ + "def construct_stop_times(\n", + " analysis_date: str, \n", + " subset_trips: list\n", + ") -> gpd.GeoDataFrame:\n", + "\n", + " # Grab the relevant stop times rows\n", + " # will need to concatenate RT stop times (all trips) \n", + " # with additional segments for speedmaps\n", + " rt_stop_times = (\n", + " nearest_vp_to_stop.stop_times_for_all_trips(analysis_date)\n", + " .query('trip_instance_key in @subset_trips')\n", + " )\n", + "\n", + " proxy_stop_times = (\n", + " nearest_vp_to_stop.stop_times_for_speedmaps(analysis_date)\n", + " .query('trip_instance_key in @subset_trips')\n", + " )\n", + "\n", + " bbb_stop_times = pd.concat(\n", + " [rt_stop_times, proxy_stop_times], \n", + " axis=0, ignore_index=True\n", + " )\n", + " \n", + " return bbb_stop_times\n", + "\n", + "bbb_stop_times = construct_stop_times(analysis_date, subset_trips)" + ] + }, + { + "cell_type": "markdown", + "id": "9d7075cf-c826-4f49-9867-06bcb321df4e", + "metadata": {}, + "source": [ + "## Merge stop and get nearest snap (`neighbor`)" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "a48bf073-2ccd-49b9-898b-0813b09f77b5", + "metadata": {}, + "outputs": [], + "source": [ + "# This is with opposite direction removed\n", + "gdf = neighbor.merge_stop_vp_for_nearest_neighbor(\n", + " bbb_stop_times,\n", + " analysis_date,\n", + " filters = [[(\"trip_instance_key\", \"in\", subset_trips)]]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "4b2c2b8c-0bb7-4066-ae12-73e92043e2f7", + "metadata": {}, + "outputs": [], + "source": [ + "# Try a version without removing vp points\n", + "# and allow nearest neighbor to select from any direction\n", + "vp_full = gpd.read_parquet(\n", + " f\"{SEGMENT_GCS}condensed/vp_condensed_{analysis_date}.parquet\",\n", + " columns = [\"trip_instance_key\", \"vp_idx\", \n", + " \"location_timestamp_local\", \n", + " \"geometry\"],\n", + " filters = [[(\"trip_instance_key\", \"in\", subset_trips)]]\n", + ").rename(columns = {\n", + " \"vp_idx\": \"trip_vp_idx\",\n", + " \"geometry\": \"trip_geometry\"\n", + "}).set_geometry(\"trip_geometry\").to_crs(WGS84)\n", + "\n", + "gdf2 = pd.merge(\n", + " bbb_stop_times.rename(\n", + " columns = {\n", + " \"geometry\": \"stop_geometry\"}\n", + " ).set_geometry(\"stop_geometry\").to_crs(WGS84),\n", + " vp_full.rename(\n", + " columns = {\n", + " \"geometry\": \"vp_geometry\"\n", + " }),\n", + " on = [\"trip_instance_key\"],\n", + " how = \"inner\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "00a7aa0c-7303-461a-a6a2-f62464fba9e3", + "metadata": {}, + "outputs": [], + "source": [ + "# this list comes from an adapted version of \n", + "# GTFS_DATA_DICT stop_pair_cols\n", + "merge_cols = [\"stop_id\", \"stop_pair\", \"stop_sequence\", \"stop_sequence1\", \n", + " \"stop_geometry\", \"stop_primary_direction\", \n", + " \"shape_array_key\", \"trip_instance_key\"]\n", + "\n", + "gdf_results = pd.merge(\n", + " gdf,\n", + " gdf2,\n", + " on = merge_cols,\n", + " how = \"inner\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "cb28d31b-3dc2-4bfa-9d81-a574db9fe270", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "trip_instance_key object\n", + "shape_array_key object\n", + "stop_sequence int64\n", + "stop_id object\n", + "stop_pair object\n", + "stop_primary_direction object\n", + "stop_geometry geometry\n", + "stop_sequence1 float64\n", + "vp_geometry geometry\n", + "vp_idx object\n", + "trip_vp_idx object\n", + "location_timestamp_local object\n", + "trip_geometry geometry\n", + "dtype: object" + ] + }, + "execution_count": 8, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gdf_results.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "c13e674f-4d22-4b15-b053-ab0f5d9e07bd", + "metadata": {}, + "outputs": [], + "source": [ + "nearest_vp_idx = np.vectorize(neighbor.add_nearest_vp_idx)( \n", + " gdf_results.vp_geometry, gdf_results.stop_geometry, gdf_results.vp_idx\n", + ")\n", + "\n", + "nearest_vp_idx2 = np.vectorize(neighbor.add_nearest_vp_idx)( \n", + " gdf_results.trip_geometry, gdf_results.stop_geometry, gdf_results.trip_vp_idx\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "86f44322-c920-4efa-ba42-5bf491c5696d", + "metadata": {}, + "outputs": [], + "source": [ + "gdf_results = gdf_results.assign(\n", + " nearest_vp_idx = nearest_vp_idx,\n", + " nearest_vp_idx2 = nearest_vp_idx2\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "bf3efe13-c896-4a25-94fc-faa2c18c3ac4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "((36719, 15), (1503, 15))" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gdf_results.shape, gdf_results[\n", + " gdf_results.nearest_vp_idx != gdf_results.nearest_vp_idx2\n", + "].shape" + ] + }, + { + "cell_type": "markdown", + "id": "dceeb45c-8e6c-4870-a823-8557d6f40dcf", + "metadata": {}, + "source": [ + "If we allow even opposite directions to show up in the nearest neighbor, about 4.1% of the rows would have different results for which `vp_idx` is selected.\n", + "\n", + "About 2/3 of these are -1 or +1, which probably wouldn't change the result that much. The other 1/3 have differences more than 1, which could change the result, since the trio of points allows for a difference of 1, but not more to interpolate." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "b75e6636-d44a-4860-b13c-abec6bd471a9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "((36719, 15), (1503, 15))" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gdf_results.shape, gdf_results[\n", + " gdf_results.nearest_vp_idx != gdf_results.nearest_vp_idx2].shape" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "07fa38b2-c87e-4e14-b875-d4604d768bd2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "((1503,), (571, 16))" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "diff_df = gdf_results[\n", + " gdf_results.nearest_vp_idx != gdf_results.nearest_vp_idx2]\n", + "\n", + "diff_df = diff_df.assign(\n", + " vp_idx_diff = diff_df.nearest_vp_idx - diff_df.nearest_vp_idx2\n", + ")\n", + "\n", + "diff_df[\"vp_idx_diff\"].shape, diff_df[\n", + " abs(diff_df.vp_idx_diff) > 1].shape" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "a6a1fc5f-8cea-4f4f-af6f-9d3aefb0518d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "diff_df.vp_idx_diff.hist(bins = range(\n", + " diff_df.vp_idx_diff.min(), \n", + " diff_df.vp_idx_diff.max(), \n", + " 1)\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "5b45be2e-eb93-454c-83ed-87e4269ce293", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 1503.000000\n", + "mean -1.375915\n", + "std 10.011931\n", + "min -74.000000\n", + "25% -1.000000\n", + "50% -1.000000\n", + "75% 1.000000\n", + "max 95.000000\n", + "Name: vp_idx_diff, dtype: float64" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "diff_df.vp_idx_diff.describe()" + ] + }, + { + "cell_type": "markdown", + "id": "cb4c82dc-fa6a-45dd-9b89-f3938a1c27cc", + "metadata": {}, + "source": [ + "## Add trio (`neighbor`)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "id": "cfdcfc00-b343-42ab-8a9c-0f8b76e79768", + "metadata": {}, + "outputs": [], + "source": [ + "import shapely\n", + "def add_trio_cols(\n", + " gdf2: gpd.GeoDataFrame, \n", + " nearest_vp_col: str,\n", + "):\n", + "\n", + " nearest_vp_idx_series = [] \n", + " vp_trio_series = []\n", + " time_trio_series = []\n", + " coords_trio_series = []\n", + " \n", + " for row in gdf2.itertuples():\n", + " vp_trio, time_trio, coords_trio = neighbor.add_trio(\n", + " getattr(row, nearest_vp_col), \n", + " np.asarray(getattr(row, \"trip_vp_idx\")),\n", + " np.asarray(getattr(row, \"location_timestamp_local\")),\n", + " np.asarray(getattr(row, \"trip_geometry\").coords),\n", + " )\n", + " \n", + " vp_trio_series.append(vp_trio)\n", + " time_trio_series.append(time_trio)\n", + " coords_trio_series.append(shapely.LineString(coords_trio))\n", + " \n", + " drop_cols = [\n", + " \"location_timestamp_local\",\n", + " \"trip_vp_idx\", \"trip_geometry\"\n", + " ]\n", + " \n", + " gdf2 = gdf2.assign(\n", + " vp_idx_trio = vp_trio_series,\n", + " location_timestamp_local_trio = time_trio_series,\n", + " vp_coords_trio = gpd.GeoSeries(coords_trio_series, crs = WGS84)\n", + " ).drop(columns = drop_cols)\n", + " \n", + " return gdf2" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "id": "c40c45bc-8639-4154-adaa-114d0d96e758", + "metadata": {}, + "outputs": [], + "source": [ + "gdf_results1 = add_trio_cols(gdf_results, \"nearest_vp_idx\")\n", + "gdf_results2 = add_trio_cols(gdf_results, \"nearest_vp_idx2\")" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "cb963cd1-3275-49aa-bb01-04a70dcc46fa", + "metadata": {}, + "outputs": [], + "source": [ + "trio_results = pd.merge(\n", + " gdf_results1,\n", + " gdf_results2.rename(columns = {\n", + " \"vp_idx\": \"vp_idx2\",\n", + " \n", + " \"vp_idx_trio\": \"vp_idx_trio2\",\n", + " \"location_timestamp_local_trio\": \"location_timestamp_local_trio2\",\n", + " \"vp_coords_trio\": \"vp_coords_trio2\"\n", + " }),\n", + " on = merge_cols + [\"vp_geometry\", \"nearest_vp_idx\", \"nearest_vp_idx2\"],\n", + " how = \"inner\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "7392a95e-de47-47a9-8990-128cf1b8b7f5", + "metadata": {}, + "source": [ + "## Interpolate arrival" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "6cee18a4-09fd-42cf-a6c9-51d46941e4b6", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "trip_instance_key object\n", + "shape_array_key object\n", + "stop_sequence int64\n", + "stop_id object\n", + "stop_pair object\n", + "stop_primary_direction object\n", + "stop_geometry geometry\n", + "stop_sequence1 float64\n", + "vp_geometry geometry\n", + "vp_idx object\n", + "nearest_vp_idx int64\n", + "nearest_vp_idx2 int64\n", + "vp_idx_trio object\n", + "location_timestamp_local_trio object\n", + "vp_coords_trio geometry\n", + "vp_idx2 object\n", + "vp_idx_trio2 object\n", + "location_timestamp_local_trio2 object\n", + "vp_coords_trio2 geometry\n", + "dtype: object" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "trio_results.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 20, + "id": "b53a7be5-145e-438b-bbf9-c85cc335663b", + "metadata": {}, + "outputs": [], + "source": [ + "PROJECT_CRS = \"EPSG:3310\"\n", + "import interpolate_stop_arrival\n", + "\n", + "trip_stop_cols = [\"trip_instance_key\", \"stop_sequence\", \"stop_sequence1\"]\n", + "\n", + "def interpolate_me(\n", + " df: gpd.GeoDataFrame,\n", + " analysis_date: str,\n", + "):\n", + " df = df.assign(\n", + " stop_geometry = df.stop_geometry.to_crs(PROJECT_CRS),\n", + " vp_coords_trio = df.vp_coords_trio.to_crs(PROJECT_CRS)\n", + " )\n", + "\n", + " shapes = helpers.import_scheduled_shapes(\n", + " analysis_date,\n", + " columns = [\"shape_array_key\", \"geometry\"],\n", + " crs = PROJECT_CRS\n", + " ).dropna(subset=\"geometry\")\n", + "\n", + " gdf = pd.merge(\n", + " df,\n", + " shapes.rename(columns = {\"geometry\": \"shape_geometry\"}),\n", + " on = \"shape_array_key\",\n", + " how = \"inner\"\n", + " )\n", + "\n", + " del df, shapes\n", + "\n", + " stop_meters_series = []\n", + " stop_arrival_series = []\n", + " \n", + " for row in gdf.itertuples():\n", + " \n", + " stop_meters, interpolated_arrival = interpolate_stop_arrival.project_points_onto_shape(\n", + " getattr(row, \"stop_geometry\"),\n", + " getattr(row, \"vp_coords_trio\"),\n", + " getattr(row, \"shape_geometry\"),\n", + " getattr(row, \"location_timestamp_local_trio\")\n", + " )\n", + " \n", + " stop_meters_series.append(stop_meters)\n", + " stop_arrival_series.append(interpolated_arrival)\n", + "\n", + " results = gdf.assign(\n", + " stop_meters = stop_meters_series,\n", + " arrival_time = stop_arrival_series,\n", + " )[trip_stop_cols + [\"shape_array_key\", \"stop_id\", \n", + " \"stop_meters\", \"arrival_time\"]\n", + " ].sort_values(\n", + " trip_stop_cols\n", + " ).reset_index(drop=True)\n", + " \n", + " \n", + " results = interpolate_stop_arrival.enforce_monotonicity_and_interpolate_across_stops(\n", + " results, trip_stop_cols)\n", + " \n", + " return results" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "f139dbd5-4a8a-4bfb-b82a-63e3fd09cc86", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n" + ] + } + ], + "source": [ + "gdf_interp1 = interpolate_me(gdf_results1, analysis_date)" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "a19363bc-a2be-4771-90a8-b1ca928bac53", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n" + ] + } + ], + "source": [ + "gdf_interp2 = interpolate_me(gdf_results2, analysis_date)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "be982784-9afc-4785-9b76-fe84d5fd9254", + "metadata": {}, + "outputs": [], + "source": [ + "interp_results = pd.merge(\n", + " gdf_interp1,\n", + " gdf_interp2.rename(columns = {\n", + " \"stop_meters\": \"stop_meters2\",\n", + " \"arrival_time\": \"arrival_time2\"\n", + " }),\n", + " on = trip_stop_cols + [\"shape_array_key\", \"stop_id\"],\n", + " how = \"inner\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "8c9a52ef-fd66-4380-bfa7-c9ef3aaab535", + "metadata": {}, + "source": [ + "About 4.5% have different interpolated arrival time results. Before, about 4.1% have different nearest vp selected, and about 1/3 of those had differences greater than 1. \n", + "\n", + "But overall, it results in about 4.5% of different interpolated arrival times.\n", + "\n", + "**Conclusion**:\n", + "* A small number of rows are affected, but interestingly, the speeds that come out differ by a lot. Speeds can swing wildly, so it is sensitive to what point is selected. If it didn't vary a lot, then speeds would only differ by 1 or 2 mph, but not in the bounds that we see.\n", + "* If we think about where buses dwell, it would be near stops, so we can expect that sensitive results are concentrated where we care about measuring arrival.\n", + "* We should definitely include `dwell_time` in vp, but once we account for that, we would actually narrow down the possibilities we select, and that's a good thing." + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "164b72a0-c254-4813-bb22-ff9b006b916c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "((36483, 9), (1624, 9))" + ] + }, + "execution_count": 24, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "interp_results.shape, interp_results[\n", + " interp_results.arrival_time != interp_results.arrival_time2\n", + "].shape" + ] + }, + { + "cell_type": "markdown", + "id": "a781f93c-cbbf-4947-a756-36db28685de4", + "metadata": {}, + "source": [ + "## Speeds" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "52216905-ae5a-47e2-8876-e28f6028b624", + "metadata": {}, + "outputs": [], + "source": [ + "from segment_speed_utils import segment_calcs\n", + "\n", + "def speed_calculation(df: pd.DataFrame):\n", + " trip_cols = [\"trip_instance_key\"]\n", + "\n", + " df = segment_calcs.convert_timestamp_to_seconds(\n", + " df, [\"arrival_time\"]\n", + " ).sort_values(trip_stop_cols).reset_index(drop=True)\n", + " \n", + " df = df.assign(\n", + " subseq_arrival_time_sec = (df.groupby(trip_cols, \n", + " observed=True, group_keys=False)\n", + " .arrival_time_sec\n", + " .shift(-1)\n", + " ),\n", + " subseq_stop_meters = (df.groupby(trip_cols, \n", + " observed=True, group_keys=False)\n", + " .stop_meters\n", + " .shift(-1)\n", + " )\n", + " )\n", + "\n", + " speed = df.assign(\n", + " meters_elapsed = df.subseq_stop_meters - df.stop_meters, \n", + " sec_elapsed = df.subseq_arrival_time_sec - df.arrival_time_sec,\n", + " ).pipe(\n", + " segment_calcs.derive_speed, \n", + " (\"stop_meters\", \"subseq_stop_meters\"), \n", + " (\"arrival_time_sec\", \"subseq_arrival_time_sec\")\n", + " )\n", + " \n", + " return speed" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "d896a200-3cbd-42c7-89ca-67cbf7211314", + "metadata": {}, + "outputs": [], + "source": [ + "drop_me = [\n", + " \"arrival_time_sec\", \"subseq_arrival_time_sec\",\n", + " \"subseq_stop_meters\"\n", + "]\n", + "speed1 = speed_calculation(gdf_interp1).drop(columns = drop_me)\n", + "speed2 = speed_calculation(gdf_interp2).drop(columns = drop_me)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "e7a5d228-6792-4625-bcad-d456c76cbf06", + "metadata": {}, + "outputs": [], + "source": [ + "speed_results = pd.merge(\n", + " speed1,\n", + " speed2.rename(columns = {\n", + " \"stop_meters\": \"stop_meters2\",\n", + " \"arrival_time\": \"arrival_time2\",\n", + " \"meters_elapsed\": \"meters_elapsed2\",\n", + " \"sec_elapsed\": \"sec_elapsed2\",\n", + " \"speed_mph\": \"speed_mph2\"\n", + " }),\n", + " on = trip_stop_cols + [\"shape_array_key\", \"stop_id\"],\n", + " how = \"inner\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "7723cbac-ab8f-4786-9a0e-e8572e434adc", + "metadata": {}, + "source": [ + "This magnifies to about 5.5% different speeds" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "b3ced60d-c2b1-431e-ada4-d32a5a0c0f3b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "((36483, 15), (1997, 15))" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "speed_results.shape, speed_results[\n", + " (speed_results.speed_mph != speed_results.speed_mph2) & \n", + " (speed_results.speed_mph.notna()) & \n", + " (speed_results.speed_mph < 100_000) # remove infinity\n", + "].shape" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "d0357ffd-1500-4027-b2d0-a033e97eee26", + "metadata": {}, + "outputs": [], + "source": [ + "speed_results = speed_results.assign(\n", + " speed_diff = speed_results.speed_mph - speed_results.speed_mph2\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "415ba0a0-2695-4667-b1d1-1cc61a2c89a4", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "count 33593.0\n", + "mean NaN\n", + "std NaN\n", + "min -inf\n", + "25% 0.0\n", + "50% 0.0\n", + "75% 0.0\n", + "max inf\n", + "Name: speed_diff, dtype: float64" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "speed_results.speed_diff.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "6b3aed83-2ab2-42cb-ba53-c3001d30b85c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 31, + "metadata": {}, + "output_type": "execute_result" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "speed_results[\n", + " speed_results.speed_diff != 0\n", + "].speed_diff.hist(bins=range(-70, 70, 1))" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "8abc7a49-2e4f-4a4d-91cb-510607b37ceb", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
arrival_timearrival_time2speed_mphspeed_mph2speed_diff
12024-04-17 08:16:282024-04-17 08:16:2810.0765776.8830773.193500
22024-04-17 08:20:102024-04-17 08:21:536.30140812.091892-5.790483
32024-04-17 08:26:052024-04-17 08:24:5879.89285723.54736856.345489
1472024-04-17 12:16:412024-04-17 12:16:4115.85125718.648538-2.797281
1482024-04-17 12:18:012024-04-17 12:17:4918.07236518.695550-0.623185
..................
364422024-04-17 13:00:482024-04-17 13:03:559.64356786.792099-77.148533
364432024-04-17 13:01:512024-04-17 13:04:029.55945474.269606-64.710152
364442024-04-17 13:03:322024-04-17 13:04:159.54701677.967296-68.420280
364712024-04-17 13:48:132024-04-17 13:48:136.3232385.0253101.297928
364722024-04-17 13:50:442024-04-17 13:51:234.0484406.636787-2.588347
\n", + "

1997 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " arrival_time arrival_time2 speed_mph speed_mph2 \\\n", + "1 2024-04-17 08:16:28 2024-04-17 08:16:28 10.076577 6.883077 \n", + "2 2024-04-17 08:20:10 2024-04-17 08:21:53 6.301408 12.091892 \n", + "3 2024-04-17 08:26:05 2024-04-17 08:24:58 79.892857 23.547368 \n", + "147 2024-04-17 12:16:41 2024-04-17 12:16:41 15.851257 18.648538 \n", + "148 2024-04-17 12:18:01 2024-04-17 12:17:49 18.072365 18.695550 \n", + "... ... ... ... ... \n", + "36442 2024-04-17 13:00:48 2024-04-17 13:03:55 9.643567 86.792099 \n", + "36443 2024-04-17 13:01:51 2024-04-17 13:04:02 9.559454 74.269606 \n", + "36444 2024-04-17 13:03:32 2024-04-17 13:04:15 9.547016 77.967296 \n", + "36471 2024-04-17 13:48:13 2024-04-17 13:48:13 6.323238 5.025310 \n", + "36472 2024-04-17 13:50:44 2024-04-17 13:51:23 4.048440 6.636787 \n", + "\n", + " speed_diff \n", + "1 3.193500 \n", + "2 -5.790483 \n", + "3 56.345489 \n", + "147 -2.797281 \n", + "148 -0.623185 \n", + "... ... \n", + "36442 -77.148533 \n", + "36443 -64.710152 \n", + "36444 -68.420280 \n", + "36471 1.297928 \n", + "36472 -2.588347 \n", + "\n", + "[1997 rows x 5 columns]" + ] + }, + "execution_count": 32, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "speed_results[\n", + " (speed_results.speed_diff != 0) & \n", + " (speed_results.speed_mph.notna()) & \n", + " (speed_results.speed_mph < 100_000)\n", + "].sort_values([\"trip_instance_key\", \"arrival_time\"])[\n", + " [\"arrival_time\", \"arrival_time2\", \n", + " \"speed_mph\", \"speed_mph2\", \"speed_diff\"]]" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "48404074-2da8-4f14-88d2-040090b3fcb6", + "metadata": {}, + "outputs": [], + "source": [ + "SEGMENT_FILE = GTFS_DATA_DICT.speedmap_segments.segments_file\n", + "\n", + "segment_gdf = gpd.read_parquet(\n", + " f\"{SEGMENT_GCS}{SEGMENT_FILE}_{analysis_date}.parquet\", \n", + " filters = [[(\"trip_instance_key\", \"in\", subset_trips)]]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "721bfb7a-82b3-47ce-a397-cb3e63a87fd9", + "metadata": {}, + "outputs": [], + "source": [ + "from segment_speed_utils import gtfs_schedule_wrangling\n", + "\n", + "speed_results2 = gtfs_schedule_wrangling.fill_missing_stop_sequence1(\n", + " speed_results)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "7c62d1b6-8038-4efc-92ac-ba5ef14336ed", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(35665, 12)" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "segment_gdf.shape" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "b076ca6c-f74c-4228-a063-8249d30b7cae", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "both 35583\n", + "left_only 1165\n", + "right_only 900\n", + "Name: _merge, dtype: int64" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# address how stop_sequence1 can be missing in speedmap segments\n", + "pd.merge(\n", + " segment_gdf,\n", + " speed_results2,\n", + " on = trip_stop_cols + [\"shape_array_key\"],\n", + " how = \"outer\",\n", + " indicator = True\n", + ")._merge.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "b333082e-aef8-4541-8d64-dc06a61dbce7", + "metadata": {}, + "outputs": [], + "source": [ + "speed_results_gdf = pd.merge(\n", + " segment_gdf,\n", + " speed_results2,\n", + " on = trip_stop_cols + [\"shape_array_key\"],\n", + " how = \"inner\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "b7289e05-267a-470b-a709-3628ff0b0334", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "trip_instance_key object\n", + "shape_array_key object\n", + "stop_id1 object\n", + "stop_sequence int64\n", + "geometry geometry\n", + "stop_id2 object\n", + "segment_id object\n", + "stop_pair object\n", + "schedule_gtfs_dataset_key object\n", + "route_id object\n", + "direction_id float64\n", + "stop_sequence1 float64\n", + "stop_id object\n", + "stop_meters float64\n", + "arrival_time datetime64[ns]\n", + "meters_elapsed float64\n", + "sec_elapsed float64\n", + "speed_mph float64\n", + "stop_meters2 float64\n", + "arrival_time2 datetime64[ns]\n", + "meters_elapsed2 float64\n", + "sec_elapsed2 float64\n", + "speed_mph2 float64\n", + "speed_diff float64\n", + "dtype: object" + ] + }, + "execution_count": 38, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "speed_results_gdf.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "d979ebea-9201-47b6-921c-408d85bd0956", + "metadata": {}, + "outputs": [], + "source": [ + "from shared_utils import rt_utils\n", + "import folium\n", + "\n", + "def make_map(gdf):\n", + " drop = [\"arrival_time\", \"arrival_time2\"]\n", + " \n", + " print(f\"# rows: {len(gdf)}\")\n", + " \n", + " print(\"rows with differences\")\n", + " display_table = gdf.sort_values(\n", + " \"segment_id\"\n", + " ).query('speed_diff != 0')[\n", + " [\"segment_id\", \"speed_mph\", \"speed_mph2\", \"speed_diff\"]]\n", + " \n", + " display(display_table)\n", + " \n", + " m = gdf.drop(columns = drop).explore(\n", + " \"speed_mph\",\n", + " tiles = \"CartoDB Positron\",\n", + " cmap = rt_utils.ZERO_THIRTY_COLORSCALE\n", + " )\n", + " \n", + " m2 = gdf.drop(columns = drop).explore(\n", + " \"speed_mph2\",\n", + " m=m\n", + " )\n", + " \n", + " folium.LayerControl().add_to(m)\n", + " \n", + " return m" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "b9842095-44f3-43b6-8afb-fd1c2dba86eb", + "metadata": {}, + "outputs": [], + "source": [ + "bbb_trips_and_shape = helpers.import_scheduled_trips(\n", + " analysis_date,\n", + " filters = [(\"trip_instance_key\", \"in\", subset_trips)],\n", + " columns = [\"shape_id\", \"shape_array_key\", \"trip_instance_key\"],\n", + " get_pandas = True\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "7f6b54b8-f4fb-40b1-be8a-1bc782de9497", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'3dfd9bae3724d3f62363a8328696cb4e', '523d9d30ace49b2cc966c2cbaa8e9071'}" + ] + }, + "execution_count": 41, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "set(bbb_trips_and_shape[\n", + " bbb_trips_and_shape.shape_id==\"26714\"\n", + "].trip_instance_key.unique()) & set(\n", + " speed_results_gdf.trip_instance_key.unique())" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "044a5122-0f9c-4540-a2bf-e540d871d9d7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "# rows: 27\n", + "rows with differences\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
segment_idspeed_mphspeed_mph2speed_diff
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: [segment_id, speed_mph, speed_mph2, speed_diff]\n", + "Index: []" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "one_trip = \"523d9d30ace49b2cc966c2cbaa8e9071\" \n", + "\n", + "m = make_map(\n", + " speed_results_gdf[\n", + " speed_results_gdf.trip_instance_key == \n", + " one_trip], \n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "fdba60ae-032d-4746-8b90-f53b429159cd", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "# rows: 27\n", + "rows with differences\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
segment_idspeed_mphspeed_mph2speed_diff
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: [segment_id, speed_mph, speed_mph2, speed_diff]\n", + "Index: []" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "one_trip = \"3dfd9bae3724d3f62363a8328696cb4e\" \n", + "\n", + "m = make_map(\n", + " speed_results_gdf[\n", + " speed_results_gdf.trip_instance_key == \n", + " one_trip], \n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "0664e24f-fba9-44c6-9295-ba979fb1e2c9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'103ffb4be00deb25a90c82f92d431cb2', 'b7e313991dda4231381e05c871aced54'}" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "set(bbb_trips_and_shape[\n", + " bbb_trips_and_shape.shape_id==\"26751\"\n", + "].trip_instance_key.unique()) & set(\n", + " speed_results_gdf.trip_instance_key.unique())" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "a97f9878-f040-4ab3-8d8e-b982f9cf5428", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "# rows: 48\n", + "rows with differences\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
segment_idspeed_mphspeed_mph2speed_diff
17701623-956-118.86518624.524742-5.659556
1769952-1623-17.1393246.6323890.506934
\n", + "
" + ], + "text/plain": [ + " segment_id speed_mph speed_mph2 speed_diff\n", + "1770 1623-956-1 18.865186 24.524742 -5.659556\n", + "1769 952-1623-1 7.139324 6.632389 0.506934" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "one_trip = \"103ffb4be00deb25a90c82f92d431cb2\" \n", + "\n", + "m = make_map(\n", + " speed_results_gdf[\n", + " speed_results_gdf.trip_instance_key == \n", + " one_trip], \n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "be923d34-c0c8-4bd0-bf8d-ca120a6c635e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "# rows: 48\n", + "rows with differences\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
segment_idspeed_mphspeed_mph2speed_diff
26202149-6-1153.6893079.692118143.997188
26201376-149-15.58180816.745425-11.163617
\n", + "
" + ], + "text/plain": [ + " segment_id speed_mph speed_mph2 speed_diff\n", + "26202 149-6-1 153.689307 9.692118 143.997188\n", + "26201 376-149-1 5.581808 16.745425 -11.163617" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "one_trip = \"b7e313991dda4231381e05c871aced54\" \n", + "\n", + "m = make_map(\n", + " speed_results_gdf[\n", + " speed_results_gdf.trip_instance_key == \n", + " one_trip], \n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "id": "c7f1d190-9f2c-432a-8bdc-97ad10c9ff0d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'143c56083f8e23bd7ced3a78f81d0f93',\n", + " '167d16535237b35a22bf79d466d5d740',\n", + " '2ab8b3a8c97083f616fba5b08fb36d0c',\n", + " '5403c8f1427707c364f04d3f34be3b4b',\n", + " '604be0c6fe24d8d2e2387de6186f8814',\n", + " '66c7c7215da8fc97c6e620c694aa689c',\n", + " '8c1585f261c194f978e43d39834bca68',\n", + " '9829f32559f856275dfe082b8cac9719',\n", + " 'a725e1f77de271e916519186e26ba819',\n", + " 'ac0eec3f8bd18ba7c881c96ea7651eb8',\n", + " 'dd5bb7fdf34987d8ed372fb62b112443'}" + ] + }, + "execution_count": 47, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "set(bbb_trips_and_shape[\n", + " bbb_trips_and_shape.shape_id==\"26793\"\n", + "].trip_instance_key.unique()) & set(\n", + " speed_results_gdf.trip_instance_key.unique())" + ] + }, + { + "cell_type": "code", + "execution_count": 48, + "id": "a240a759-6cbe-44a0-afa0-4f0247a4b9b6", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "# rows: 19\n", + "rows with differences\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
segment_idspeed_mphspeed_mph2speed_diff
308691499-1497-1infinfNaN
308681554-1499-1infinfNaN
\n", + "
" + ], + "text/plain": [ + " segment_id speed_mph speed_mph2 speed_diff\n", + "30869 1499-1497-1 inf inf NaN\n", + "30868 1554-1499-1 inf inf NaN" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/conda/lib/python3.9/site-packages/numpy/core/function_base.py:151: RuntimeWarning: invalid value encountered in multiply\n", + " y *= step\n", + "/opt/conda/lib/python3.9/site-packages/mapclassify/classifiers.py:931: RuntimeWarning: invalid value encountered in subtract\n", + " css = yc - yc.mean()\n", + "/opt/conda/lib/python3.9/site-packages/mapclassify/classifiers.py:963: RuntimeWarning: invalid value encountered in double_scalars\n", + " gadf = 1 - self.adcm / adam\n", + "/opt/conda/lib/python3.9/site-packages/branca/colormap.py:193: RuntimeWarning: invalid value encountered in double_scalars\n", + " self.index = [vmin + (vmax-vmin)*i*1./(n-1) for i in range(n)]\n" + ] + } + ], + "source": [ + "one_trip = \"dd5bb7fdf34987d8ed372fb62b112443\" \n", + "\n", + "m = make_map(\n", + " speed_results_gdf[\n", + " speed_results_gdf.trip_instance_key == \n", + " one_trip], \n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "id": "6a314c42-0b32-41c6-82f8-9f03057bd94f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "# rows: 19\n", + "rows with differences\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
segment_idspeed_mphspeed_mph2speed_diff
25451446-1545-112.0389368.2767693.762168
25571499-1497-1infinfNaN
25461545-1546-16.0937298.198836-2.105107
25551553-1554-1infinfNaN
25561554-1499-1infinfNaN
2544698-1446-18.2064798.286934-0.080456
\n", + "
" + ], + "text/plain": [ + " segment_id speed_mph speed_mph2 speed_diff\n", + "2545 1446-1545-1 12.038936 8.276769 3.762168\n", + "2557 1499-1497-1 inf inf NaN\n", + "2546 1545-1546-1 6.093729 8.198836 -2.105107\n", + "2555 1553-1554-1 inf inf NaN\n", + "2556 1554-1499-1 inf inf NaN\n", + "2544 698-1446-1 8.206479 8.286934 -0.080456" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/conda/lib/python3.9/site-packages/numpy/core/function_base.py:151: RuntimeWarning: invalid value encountered in multiply\n", + " y *= step\n", + "/opt/conda/lib/python3.9/site-packages/mapclassify/classifiers.py:931: RuntimeWarning: invalid value encountered in subtract\n", + " css = yc - yc.mean()\n", + "/opt/conda/lib/python3.9/site-packages/mapclassify/classifiers.py:963: RuntimeWarning: invalid value encountered in double_scalars\n", + " gadf = 1 - self.adcm / adam\n", + "/opt/conda/lib/python3.9/site-packages/branca/colormap.py:193: RuntimeWarning: invalid value encountered in double_scalars\n", + " self.index = [vmin + (vmax-vmin)*i*1./(n-1) for i in range(n)]\n" + ] + } + ], + "source": [ + "one_trip = \"167d16535237b35a22bf79d466d5d740\" \n", + "\n", + "m = make_map(\n", + " speed_results_gdf[\n", + " speed_results_gdf.trip_instance_key == \n", + " one_trip], \n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 50, + "id": "ecf664e1-7dbd-42cc-88f2-a6a99340357a", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "array(['26714', '26715', '26716', '26717', '26718', '26719', '26720',\n", + " '26721', '26722', '26723', '26724', '26725', '26726', '26727',\n", + " '26729', '26730', '26731', '26732', '26733', '26734', '26735',\n", + " '26736', '26737', '26739', '26740', '26741', '26742', '26743',\n", + " '26744', '26745', '26746', '26748', '26749', '26750', '26751',\n", + " '26752', '26754', '26755', '26756', '26757', '26758', '26759',\n", + " '26761', '26762', '26764', '26765', '26766', '26767', '26768',\n", + " '26770', '26771', '26772', '26773', '26774', '26775', '26776',\n", + " '26778', '26779', '26780', '26783', '26784', '26791', '26793',\n", + " '26794', '26807'], dtype=object)" + ] + }, + "execution_count": 50, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bbb_trips_and_shape.shape_id.unique()" + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "id": "b25888b6-47be-4525-b9f7-fdd63d51bac3", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'2bd2f54886e8fba388fbc240ab9fb6f6',\n", + " '76da1c769ae95a9f7beddf8c767ac445',\n", + " '95003d28b91c57daf4144f2bc2e24dde',\n", + " 'd993539eceeacd7253b4b15f057af313'}" + ] + }, + "execution_count": 51, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "set(bbb_trips_and_shape[\n", + " bbb_trips_and_shape.shape_id==\"26776\"\n", + "].trip_instance_key.unique()) & set(\n", + " speed_results_gdf.trip_instance_key.unique())" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "id": "f2c676b2-32cf-4f61-a7cf-ab02f815e1fb", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "# rows: 30\n", + "rows with differences\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
segment_idspeed_mphspeed_mph2speed_diff
171651636-539-114.24840810.8592233.389184
171661636-539-114.19829610.8298223.368473
171671636-539-214.23007310.8486693.381403
1719021-4-1infinfNaN
171914-43-1infinfNaN
17162532-1636-110.85922314.158228-3.299005
17163532-1636-110.81341814.047013-3.233595
17164532-1636-210.82957014.113374-3.283805
\n", + "
" + ], + "text/plain": [ + " segment_id speed_mph speed_mph2 speed_diff\n", + "17165 1636-539-1 14.248408 10.859223 3.389184\n", + "17166 1636-539-1 14.198296 10.829822 3.368473\n", + "17167 1636-539-2 14.230073 10.848669 3.381403\n", + "17190 21-4-1 inf inf NaN\n", + "17191 4-43-1 inf inf NaN\n", + "17162 532-1636-1 10.859223 14.158228 -3.299005\n", + "17163 532-1636-1 10.813418 14.047013 -3.233595\n", + "17164 532-1636-2 10.829570 14.113374 -3.283805" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/conda/lib/python3.9/site-packages/numpy/core/function_base.py:151: RuntimeWarning: invalid value encountered in multiply\n", + " y *= step\n", + "/opt/conda/lib/python3.9/site-packages/mapclassify/classifiers.py:931: RuntimeWarning: invalid value encountered in subtract\n", + " css = yc - yc.mean()\n", + "/opt/conda/lib/python3.9/site-packages/mapclassify/classifiers.py:963: RuntimeWarning: invalid value encountered in double_scalars\n", + " gadf = 1 - self.adcm / adam\n", + "/opt/conda/lib/python3.9/site-packages/branca/colormap.py:193: RuntimeWarning: invalid value encountered in double_scalars\n", + " self.index = [vmin + (vmax-vmin)*i*1./(n-1) for i in range(n)]\n" + ] + } + ], + "source": [ + "one_trip = \"76da1c769ae95a9f7beddf8c767ac445\" \n", + "\n", + "m = make_map(\n", + " speed_results_gdf[\n", + " speed_results_gdf.trip_instance_key == \n", + " one_trip], \n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "id": "dcdded1c-abef-4f52-ad57-4c9d59a02c0c", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "# rows: 30\n", + "rows with differences\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
segment_idspeed_mphspeed_mph2speed_diff
3046941-469-14.0043974.105137-0.100740
30470469-1200-111.42792310.9255960.502326
\n", + "
" + ], + "text/plain": [ + " segment_id speed_mph speed_mph2 speed_diff\n", + "30469 41-469-1 4.004397 4.105137 -0.100740\n", + "30470 469-1200-1 11.427923 10.925596 0.502326" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "one_trip = \"d993539eceeacd7253b4b15f057af313\" \n", + "\n", + "m = make_map(\n", + " speed_results_gdf[\n", + " speed_results_gdf.trip_instance_key == \n", + " one_trip], \n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "id": "429fbd3f-2aef-41ea-afe9-51e8eabe0d37", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'019f9c6edb777d117f355db69a157293',\n", + " '2cae14be171e33a905bbf6e4491594c1',\n", + " '47e0f2bcdf735d7ed20155e10bbc469d',\n", + " '5f815e872bedb557ea592917806dcba3',\n", + " '93a7b7befc2c9adf4e3cbf3242504ac0',\n", + " 'a703ab9a7275b50e56b234b72b60a7ac',\n", + " 'e33bfee4abc73cae23830c0f66aee6d6',\n", + " 'f598cd73601dc63a43d2df6c1356e4bc',\n", + " 'f7f86e477bc93b582b24dd512973f356'}" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "set(bbb_trips_and_shape[\n", + " bbb_trips_and_shape.shape_id==\"26755\"\n", + "].trip_instance_key.unique()) & set(\n", + " speed_results_gdf.trip_instance_key.unique())" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "id": "28c48e37-bbb8-4190-868a-4fe80d6f25bf", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "# rows: 16\n", + "rows with differences\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
segment_idspeed_mphspeed_mph2speed_diff
344971148-1538-125.67803220.0755535.602480
344871150-303-113.4169902.09897711.318013
344991539-1566-116.1126652.48433213.628334
345011541-1543-1infinfNaN
345021543-1409-1infinfNaN
345001566-1541-12.1309123.123959-0.993047
34494236-837-117.21177620.080405-2.868629
34493689-236-112.93980220.453235-7.513433
34495837-840-126.47535320.1212686.354085
34496840-1148-126.10228619.9845626.117723
\n", + "
" + ], + "text/plain": [ + " segment_id speed_mph speed_mph2 speed_diff\n", + "34497 1148-1538-1 25.678032 20.075553 5.602480\n", + "34487 1150-303-1 13.416990 2.098977 11.318013\n", + "34499 1539-1566-1 16.112665 2.484332 13.628334\n", + "34501 1541-1543-1 inf inf NaN\n", + "34502 1543-1409-1 inf inf NaN\n", + "34500 1566-1541-1 2.130912 3.123959 -0.993047\n", + "34494 236-837-1 17.211776 20.080405 -2.868629\n", + "34493 689-236-1 12.939802 20.453235 -7.513433\n", + "34495 837-840-1 26.475353 20.121268 6.354085\n", + "34496 840-1148-1 26.102286 19.984562 6.117723" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/conda/lib/python3.9/site-packages/numpy/core/function_base.py:151: RuntimeWarning: invalid value encountered in multiply\n", + " y *= step\n", + "/opt/conda/lib/python3.9/site-packages/mapclassify/classifiers.py:931: RuntimeWarning: invalid value encountered in subtract\n", + " css = yc - yc.mean()\n", + "/opt/conda/lib/python3.9/site-packages/mapclassify/classifiers.py:963: RuntimeWarning: invalid value encountered in double_scalars\n", + " gadf = 1 - self.adcm / adam\n", + "/opt/conda/lib/python3.9/site-packages/branca/colormap.py:193: RuntimeWarning: invalid value encountered in double_scalars\n", + " self.index = [vmin + (vmax-vmin)*i*1./(n-1) for i in range(n)]\n" + ] + } + ], + "source": [ + "one_trip = \"f7f86e477bc93b582b24dd512973f356\" \n", + "\n", + "m = make_map(\n", + " speed_results_gdf[\n", + " speed_results_gdf.trip_instance_key == \n", + " one_trip], \n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "id": "aec055a7-5e34-4a84-9a37-ac28918915e3", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "# rows: 16\n", + "rows with differences\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
segment_idspeed_mphspeed_mph2speed_diff
57661148-1538-113.80194213.3030770.498865
57671538-1539-113.43070113.1508950.279806
57701541-1543-1infinfNaN
57711543-1409-1infinfNaN
5763236-837-113.72584713.2236810.502165
5762689-236-111.63395013.348427-1.714477
5764837-840-113.71904613.2376760.481370
5765840-1148-113.60651113.1856910.420820
\n", + "
" + ], + "text/plain": [ + " segment_id speed_mph speed_mph2 speed_diff\n", + "5766 1148-1538-1 13.801942 13.303077 0.498865\n", + "5767 1538-1539-1 13.430701 13.150895 0.279806\n", + "5770 1541-1543-1 inf inf NaN\n", + "5771 1543-1409-1 inf inf NaN\n", + "5763 236-837-1 13.725847 13.223681 0.502165\n", + "5762 689-236-1 11.633950 13.348427 -1.714477\n", + "5764 837-840-1 13.719046 13.237676 0.481370\n", + "5765 840-1148-1 13.606511 13.185691 0.420820" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/conda/lib/python3.9/site-packages/numpy/core/function_base.py:151: RuntimeWarning: invalid value encountered in multiply\n", + " y *= step\n", + "/opt/conda/lib/python3.9/site-packages/mapclassify/classifiers.py:931: RuntimeWarning: invalid value encountered in subtract\n", + " css = yc - yc.mean()\n", + "/opt/conda/lib/python3.9/site-packages/mapclassify/classifiers.py:963: RuntimeWarning: invalid value encountered in double_scalars\n", + " gadf = 1 - self.adcm / adam\n", + "/opt/conda/lib/python3.9/site-packages/branca/colormap.py:193: RuntimeWarning: invalid value encountered in double_scalars\n", + " self.index = [vmin + (vmax-vmin)*i*1./(n-1) for i in range(n)]\n" + ] + } + ], + "source": [ + "one_trip = \"2cae14be171e33a905bbf6e4491594c1\" \n", + "\n", + "m = make_map(\n", + " speed_results_gdf[\n", + " speed_results_gdf.trip_instance_key == \n", + " one_trip], \n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "id": "ce7ad51a-838e-45ea-ad0f-c590579186ae", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "# rows: 16\n", + "rows with differences\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
segment_idspeed_mphspeed_mph2speed_diff
137461148-1538-116.479931inf-inf
137471538-1539-116.185717inf-inf
137481539-1566-112.816893inf-inf
137501541-1543-1infinfNaN
137511543-1409-15.959934inf-inf
137491566-1541-13.028403inf-inf
13743236-837-115.490598inf-inf
13742689-236-115.464641inf-inf
13744837-840-115.398930inf-inf
13745840-1148-116.610545inf-inf
\n", + "
" + ], + "text/plain": [ + " segment_id speed_mph speed_mph2 speed_diff\n", + "13746 1148-1538-1 16.479931 inf -inf\n", + "13747 1538-1539-1 16.185717 inf -inf\n", + "13748 1539-1566-1 12.816893 inf -inf\n", + "13750 1541-1543-1 inf inf NaN\n", + "13751 1543-1409-1 5.959934 inf -inf\n", + "13749 1566-1541-1 3.028403 inf -inf\n", + "13743 236-837-1 15.490598 inf -inf\n", + "13742 689-236-1 15.464641 inf -inf\n", + "13744 837-840-1 15.398930 inf -inf\n", + "13745 840-1148-1 16.610545 inf -inf" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/conda/lib/python3.9/site-packages/numpy/core/function_base.py:151: RuntimeWarning: invalid value encountered in multiply\n", + " y *= step\n", + "/opt/conda/lib/python3.9/site-packages/mapclassify/classifiers.py:931: RuntimeWarning: invalid value encountered in subtract\n", + " css = yc - yc.mean()\n", + "/opt/conda/lib/python3.9/site-packages/mapclassify/classifiers.py:953: RuntimeWarning: invalid value encountered in subtract\n", + " ycd = np.abs(yc - yc_med)\n", + "/opt/conda/lib/python3.9/site-packages/mapclassify/classifiers.py:959: RuntimeWarning: invalid value encountered in subtract\n", + " adam = (np.abs(self.y - np.median(self.y))).sum()\n", + "/opt/conda/lib/python3.9/site-packages/branca/colormap.py:193: RuntimeWarning: invalid value encountered in double_scalars\n", + " self.index = [vmin + (vmax-vmin)*i*1./(n-1) for i in range(n)]\n" + ] + } + ], + "source": [ + "# This seems to indicate that unknowns should not be left in our vp_usable\n", + "# we should definitely use dwell time so that these get avoided\n", + "one_trip = \"5f815e872bedb557ea592917806dcba3\" \n", + "\n", + "m = make_map(\n", + " speed_results_gdf[\n", + " speed_results_gdf.trip_instance_key == \n", + " one_trip], \n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bccbd424-1334-4f43-9716-f87eeb70bb0a", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/rt_segment_speeds/logs/avg_speeds.log b/rt_segment_speeds/logs/avg_speeds.log index f29516951..37de3048e 100644 --- a/rt_segment_speeds/logs/avg_speeds.log +++ b/rt_segment_speeds/logs/avg_speeds.log @@ -221,6 +221,7 @@ 2024-06-10 10:07:51.019 | INFO | average_segment_speeds:single_day_segment_averages:180 - route dir seg avg 0:02:27.119942 2024-06-10 10:07:51.021 | INFO | average_segment_speeds:single_day_segment_averages:181 - single day segment execution time: 0:06:02.302191 2024-06-10 10:07:51.236 | INFO | __main__::32 - average rollups for 2024-05-26: 0:06:02.523599 +<<<<<<< HEAD 2024-06-13 15:29:53.097 | INFO | __main__:single_day_segment_averages:164 - shape seg avg 0:06:49.646517 2024-06-13 15:34:53.205 | INFO | __main__:single_day_segment_averages:180 - route dir seg avg 0:05:00.107094 2024-06-13 15:34:53.207 | INFO | __main__:single_day_segment_averages:181 - single day segment execution time: 0:11:49.753611 @@ -245,3 +246,7 @@ 2024-06-13 17:56:43.666 | INFO | average_segment_speeds:single_day_segment_averages:180 - route dir seg avg 0:03:21.679683 2024-06-13 17:56:43.667 | INFO | average_segment_speeds:single_day_segment_averages:181 - single day segment execution time: 0:07:53.089031 2024-06-13 17:56:44.001 | INFO | __main__::32 - average rollups for 2024-06-12: 0:07:53.425862 +2024-06-13 08:58:02.364 | INFO | average_segment_speeds:single_day_segment_averages:164 - shape seg avg 0:05:38.316470 +2024-06-13 09:02:21.542 | INFO | average_segment_speeds:single_day_segment_averages:180 - route dir seg avg 0:04:19.178266 +2024-06-13 09:02:21.543 | INFO | average_segment_speeds:single_day_segment_averages:181 - single day segment execution time: 0:09:57.494736 +2024-06-13 09:02:22.066 | INFO | __main__::32 - average rollups for 2023-11-15: 0:09:58.021704 diff --git a/rt_segment_speeds/logs/cut_stop_segments.log b/rt_segment_speeds/logs/cut_stop_segments.log index a3539e6cd..58aaf3164 100644 --- a/rt_segment_speeds/logs/cut_stop_segments.log +++ b/rt_segment_speeds/logs/cut_stop_segments.log @@ -26,3 +26,4 @@ 2024-06-13 14:08:18.330 | INFO | __main__::155 - cut segments 2024-06-12: 0:20:26.740837 2024-06-13 14:17:53.404 | INFO | __main__::244 - speedmap segments and proxy_stop_times: 0:06:49.991721 2024-06-13 14:23:45.919 | INFO | __main__::244 - speedmap segments and proxy_stop_times: 0:05:40.339263 +2024-06-13 08:41:30.803 | INFO | __main__::244 - speedmap segments and proxy_stop_times: 0:04:33.902220 diff --git a/rt_segment_speeds/logs/interpolate_stop_arrival.log b/rt_segment_speeds/logs/interpolate_stop_arrival.log index afcf7f494..64f04979c 100644 --- a/rt_segment_speeds/logs/interpolate_stop_arrival.log +++ b/rt_segment_speeds/logs/interpolate_stop_arrival.log @@ -46,6 +46,7 @@ 2024-06-10 09:59:27.350 | INFO | interpolate_stop_arrival:interpolate_stop_arrivals:203 - get stop arrivals 2024-05-26: 0:00:59.531471 2024-06-10 10:00:03.629 | INFO | interpolate_stop_arrival:interpolate_stop_arrivals:215 - interpolate arrivals for speedmap_segments 2024-05-26: 2024-05-26: 0:01:35.810567 2024-06-10 10:01:27.191 | INFO | stop_arrivals_to_speed:calculate_speed_from_stop_arrivals:174 - speeds by segment for speedmap_segments 2024-05-26: 0:01:15.588158 +<<<<<<< HEAD 2024-06-13 14:46:25.431 | INFO | interpolate_stop_arrival:interpolate_stop_arrivals:203 - get stop arrivals 2024-05-22: 0:08:49.812904 2024-06-13 14:51:21.130 | INFO | interpolate_stop_arrival:interpolate_stop_arrivals:215 - interpolate arrivals for stop_segments 2024-05-22: 2024-05-22: 0:13:45.512628 2024-06-13 14:53:28.047 | INFO | stop_arrivals_to_speed:calculate_speed_from_stop_arrivals:174 - speeds by segment for stop_segments 2024-05-22: 0:02:06.654773 @@ -76,3 +77,6 @@ 2024-06-13 17:36:06.737 | INFO | interpolate_stop_arrival:interpolate_stop_arrivals:215 - interpolate arrivals for speedmap_segments 2024-06-12: 2024-06-12: 0:02:47.680208 2024-06-13 17:38:19.617 | INFO | stop_arrivals_to_speed:calculate_speed_from_stop_arrivals:174 - speeds by segment for speedmap_segments 2024-06-12: 0:02:03.262084 2024-06-13 17:38:19.617 | INFO | stop_arrivals_to_speed:calculate_speed_from_stop_arrivals:174 - speeds by segment for speedmap_segments 2024-06-12: 0:02:03.262084 +2024-06-13 08:46:59.731 | INFO | interpolate_stop_arrival:interpolate_stop_arrivals:203 - get stop arrivals 2023-11-15: 0:02:04.434805 +2024-06-13 08:48:59.278 | INFO | interpolate_stop_arrival:interpolate_stop_arrivals:215 - interpolate arrivals for speedmap_segments 2023-11-15: 2023-11-15: 0:04:03.982445 +2024-06-13 08:51:59.429 | INFO | stop_arrivals_to_speed:calculate_speed_from_stop_arrivals:174 - speeds by segment for speedmap_segments 2023-11-15: 0:02:44.265009 diff --git a/rt_segment_speeds/logs/nearest_vp.log b/rt_segment_speeds/logs/nearest_vp.log index 6c12c797b..4ac2f41c2 100644 --- a/rt_segment_speeds/logs/nearest_vp.log +++ b/rt_segment_speeds/logs/nearest_vp.log @@ -54,6 +54,7 @@ 2024-06-10 09:59:27.350 | INFO | interpolate_stop_arrival:interpolate_stop_arrivals:203 - get stop arrivals 2024-05-26: 0:00:59.531471 2024-06-10 10:00:03.629 | INFO | interpolate_stop_arrival:interpolate_stop_arrivals:215 - interpolate arrivals for speedmap_segments 2024-05-26: 2024-05-26: 0:01:35.810567 2024-06-10 10:01:27.191 | INFO | stop_arrivals_to_speed:calculate_speed_from_stop_arrivals:174 - speeds by segment for speedmap_segments 2024-05-26: 0:01:15.588158 +<<<<<<< HEAD 2024-06-13 14:37:31.710 | INFO | nearest_vp_to_stop:nearest_neighbor_for_stop:168 - nearest neighbor for stop_segments 2024-05-22: 0:12:45.733268 2024-06-13 14:46:25.431 | INFO | interpolate_stop_arrival:interpolate_stop_arrivals:203 - get stop arrivals 2024-05-22: 0:08:49.812904 2024-06-13 14:51:21.130 | INFO | interpolate_stop_arrival:interpolate_stop_arrivals:215 - interpolate arrivals for stop_segments 2024-05-22: 2024-05-22: 0:13:45.512628 @@ -90,3 +91,7 @@ 2024-06-13 17:36:06.737 | INFO | interpolate_stop_arrival:interpolate_stop_arrivals:215 - interpolate arrivals for speedmap_segments 2024-06-12: 2024-06-12: 0:02:47.680208 2024-06-13 17:38:19.617 | INFO | stop_arrivals_to_speed:calculate_speed_from_stop_arrivals:174 - speeds by segment for speedmap_segments 2024-06-12: 0:02:03.262084 2024-06-13 17:38:19.617 | INFO | stop_arrivals_to_speed:calculate_speed_from_stop_arrivals:174 - speeds by segment for speedmap_segments 2024-06-12: 0:02:03.262084 +2024-06-13 08:44:54.443 | INFO | nearest_vp_to_stop:nearest_neighbor_for_stop:168 - nearest neighbor for speedmap_segments 2023-11-15: 0:02:58.415526 +2024-06-13 08:46:59.731 | INFO | interpolate_stop_arrival:interpolate_stop_arrivals:203 - get stop arrivals 2023-11-15: 0:02:04.434805 +2024-06-13 08:48:59.278 | INFO | interpolate_stop_arrival:interpolate_stop_arrivals:215 - interpolate arrivals for speedmap_segments 2023-11-15: 2023-11-15: 0:04:03.982445 +2024-06-13 08:51:59.429 | INFO | stop_arrivals_to_speed:calculate_speed_from_stop_arrivals:174 - speeds by segment for speedmap_segments 2023-11-15: 0:02:44.265009 diff --git a/rt_segment_speeds/logs/speeds_by_segment_trip.log b/rt_segment_speeds/logs/speeds_by_segment_trip.log index 705268d63..b971a87fd 100644 --- a/rt_segment_speeds/logs/speeds_by_segment_trip.log +++ b/rt_segment_speeds/logs/speeds_by_segment_trip.log @@ -24,6 +24,7 @@ 2024-06-10 09:27:20.868 | INFO | stop_arrivals_to_speed:calculate_speed_from_stop_arrivals:174 - speeds by segment for stop_segments 2024-05-26: 0:01:13.681538 2024-06-10 09:55:58.905 | INFO | stop_arrivals_to_speed:calculate_speed_from_stop_arrivals:174 - speeds by segment for rt_stop_times 2024-05-26: 0:01:08.614559 2024-06-10 10:01:27.191 | INFO | stop_arrivals_to_speed:calculate_speed_from_stop_arrivals:174 - speeds by segment for speedmap_segments 2024-05-26: 0:01:15.588158 +<<<<<<< HEAD 2024-06-13 14:53:28.047 | INFO | stop_arrivals_to_speed:calculate_speed_from_stop_arrivals:174 - speeds by segment for stop_segments 2024-05-22: 0:02:06.654773 2024-06-13 15:03:53.225 | INFO | nearest_vp_to_stop:nearest_neighbor_for_stop:168 - nearest neighbor for stop_segments 2024-06-12: 0:10:24.992237 2024-06-13 15:13:45.999 | INFO | interpolate_stop_arrival:interpolate_stop_arrivals:203 - get stop arrivals 2024-06-12: 0:09:49.334825 @@ -42,3 +43,4 @@ 2024-06-13 17:36:06.737 | INFO | interpolate_stop_arrival:interpolate_stop_arrivals:215 - interpolate arrivals for speedmap_segments 2024-06-12: 2024-06-12: 0:02:47.680208 2024-06-13 17:38:19.617 | INFO | stop_arrivals_to_speed:calculate_speed_from_stop_arrivals:174 - speeds by segment for speedmap_segments 2024-06-12: 0:02:03.262084 2024-06-13 17:38:19.617 | INFO | stop_arrivals_to_speed:calculate_speed_from_stop_arrivals:174 - speeds by segment for speedmap_segments 2024-06-12: 0:02:03.262084 +2024-06-13 08:51:59.429 | INFO | stop_arrivals_to_speed:calculate_speed_from_stop_arrivals:174 - speeds by segment for speedmap_segments 2023-11-15: 0:02:44.265009