diff --git a/rt_segment_speeds/36_bbb_nn_with_unknowns.ipynb b/rt_segment_speeds/36_bbb_nn_with_unknowns.ipynb new file mode 100644 index 0000000000..e1d065f6c3 --- /dev/null +++ b/rt_segment_speeds/36_bbb_nn_with_unknowns.ipynb @@ -0,0 +1,992 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "fe5aafc6-e153-4727-a26c-cfbf6ea892c4", + "metadata": {}, + "source": [ + "# Big Blue Bus nearest neighbor comparison\n", + "\n", + "* What would happen if we allowed all the full vp to be used to find nearest neighbor, instead of removing the opposite direction?\n", + "* The monotonically increasing condition is set later, could it catch the errors that way?" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "667c4b7f-f402-4a03-9ea6-77bfbdf84ca1", + "metadata": {}, + "outputs": [], + "source": [ + "import geopandas as gpd\n", + "import pandas as pd\n", + "\n", + "from segment_speed_utils import helpers, neighbor\n", + "from segment_speed_utils.project_vars import SEGMENT_GCS, GTFS_DATA_DICT\n", + "from shared_utils import rt_dates\n", + "\n", + "analysis_date = rt_dates.DATES[\"apr2024\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d4f175b4-6125-4e4d-932d-6d8458e844ec", + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import sys\n", + "sys.path.append(\"scripts/\")\n", + "import nearest_vp_to_stop\n", + "\n", + "WGS84 = \"EPSG:4326\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a8d3dfbe-e380-48c8-b88b-7601f87fab9a", + "metadata": {}, + "outputs": [], + "source": [ + "bbb_trips = helpers.import_scheduled_trips(\n", + " analysis_date,\n", + " filters = [(\"name\", \"==\", \"Big Blue Bus Schedule\")],\n", + " columns = [\"gtfs_dataset_key\", \"trip_instance_key\"],\n", + " get_pandas = True\n", + ")\n", + "\n", + "bbb_key = bbb_trips.schedule_gtfs_dataset_key.iloc[0]\n", + "subset_trips = bbb_trips.trip_instance_key.unique()" + ] + }, + { + "cell_type": "markdown", + "id": "31ea87f2-08ba-485b-a35c-46cb50d20730", + "metadata": {}, + "source": [ + "## Construct proxy stop times" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "683892d5-0139-4e41-9407-6f0aa8a5b2b0", + "metadata": {}, + "outputs": [], + "source": [ + "def construct_stop_times(\n", + " analysis_date: str, \n", + " subset_trips: list\n", + ") -> gpd.GeoDataFrame:\n", + "\n", + " # Grab the relevant stop times rows\n", + " # will need to concatenate RT stop times (all trips) \n", + " # with additional segments for speedmaps\n", + " rt_stop_times = (\n", + " nearest_vp_to_stop.stop_times_for_all_trips(analysis_date)\n", + " .query('trip_instance_key in @subset_trips')\n", + " )\n", + "\n", + " proxy_stop_times = (\n", + " nearest_vp_to_stop.stop_times_for_speedmaps(analysis_date)\n", + " .query('trip_instance_key in @subset_trips')\n", + " )\n", + "\n", + " bbb_stop_times = pd.concat(\n", + " [rt_stop_times, proxy_stop_times], \n", + " axis=0, ignore_index=True\n", + " )\n", + " \n", + " return bbb_stop_times\n", + "\n", + "bbb_stop_times = construct_stop_times(analysis_date, subset_trips)" + ] + }, + { + "cell_type": "markdown", + "id": "9d7075cf-c826-4f49-9867-06bcb321df4e", + "metadata": {}, + "source": [ + "## Merge stop and get nearest snap (`neighbor`)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a48bf073-2ccd-49b9-898b-0813b09f77b5", + "metadata": {}, + "outputs": [], + "source": [ + "# This is with opposite direction removed\n", + "gdf = neighbor.merge_stop_vp_for_nearest_neighbor(\n", + " bbb_stop_times,\n", + " analysis_date,\n", + " filters = [[(\"trip_instance_key\", \"in\", subset_trips)]]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4b2c2b8c-0bb7-4066-ae12-73e92043e2f7", + "metadata": {}, + "outputs": [], + "source": [ + "# Try a version without removing vp points\n", + "# and allow nearest neighbor to select from any direction\n", + "vp_full = gpd.read_parquet(\n", + " f\"{SEGMENT_GCS}condensed/vp_condensed_{analysis_date}.parquet\",\n", + " columns = [\"trip_instance_key\", \"vp_idx\", \n", + " \"location_timestamp_local\", \n", + " \"geometry\"],\n", + " filters = [[(\"trip_instance_key\", \"in\", subset_trips)]]\n", + ").rename(columns = {\n", + " \"vp_idx\": \"trip_vp_idx\",\n", + " \"geometry\": \"trip_geometry\"\n", + "}).set_geometry(\"trip_geometry\").to_crs(WGS84)\n", + "\n", + "gdf2 = pd.merge(\n", + " bbb_stop_times.rename(\n", + " columns = {\n", + " \"geometry\": \"stop_geometry\"}\n", + " ).set_geometry(\"stop_geometry\").to_crs(WGS84),\n", + " vp_full.rename(\n", + " columns = {\n", + " \"geometry\": \"vp_geometry\"\n", + " }),\n", + " on = [\"trip_instance_key\"],\n", + " how = \"inner\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "00a7aa0c-7303-461a-a6a2-f62464fba9e3", + "metadata": {}, + "outputs": [], + "source": [ + "# this list comes from an adapted version of \n", + "# GTFS_DATA_DICT stop_pair_cols\n", + "merge_cols = [\"stop_id\", \"stop_pair\", \"stop_sequence\", \"stop_sequence1\", \n", + " \"stop_geometry\", \"stop_primary_direction\", \n", + " \"shape_array_key\", \"trip_instance_key\"]\n", + "\n", + "gdf_results = pd.merge(\n", + " gdf,\n", + " gdf2,\n", + " on = merge_cols,\n", + " how = \"inner\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb28d31b-3dc2-4bfa-9d81-a574db9fe270", + "metadata": {}, + "outputs": [], + "source": [ + "gdf_results.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c13e674f-4d22-4b15-b053-ab0f5d9e07bd", + "metadata": {}, + "outputs": [], + "source": [ + "nearest_vp_idx = np.vectorize(neighbor.add_nearest_vp_idx)( \n", + " gdf_results.vp_geometry, gdf_results.stop_geometry, gdf_results.vp_idx\n", + ")\n", + "\n", + "nearest_vp_idx2 = np.vectorize(neighbor.add_nearest_vp_idx)( \n", + " gdf_results.trip_geometry, gdf_results.stop_geometry, gdf_results.trip_vp_idx\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "86f44322-c920-4efa-ba42-5bf491c5696d", + "metadata": {}, + "outputs": [], + "source": [ + "gdf_results = gdf_results.assign(\n", + " nearest_vp_idx = nearest_vp_idx,\n", + " nearest_vp_idx2 = nearest_vp_idx2\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "bf3efe13-c896-4a25-94fc-faa2c18c3ac4", + "metadata": {}, + "outputs": [], + "source": [ + "gdf_results.shape, gdf_results[\n", + " gdf_results.nearest_vp_idx != gdf_results.nearest_vp_idx2\n", + "].shape" + ] + }, + { + "cell_type": "markdown", + "id": "dceeb45c-8e6c-4870-a823-8557d6f40dcf", + "metadata": {}, + "source": [ + "If we allow even opposite directions to show up in the nearest neighbor, about 4.1% of the rows would have different results for which `vp_idx` is selected.\n", + "\n", + "About 2/3 of these are -1 or +1, which probably wouldn't change the result that much. The other 1/3 have differences more than 1, which could change the result, since the trio of points allows for a difference of 1, but not more to interpolate." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b75e6636-d44a-4860-b13c-abec6bd471a9", + "metadata": {}, + "outputs": [], + "source": [ + "gdf_results.shape, gdf_results[\n", + " gdf_results.nearest_vp_idx != gdf_results.nearest_vp_idx2].shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "07fa38b2-c87e-4e14-b875-d4604d768bd2", + "metadata": {}, + "outputs": [], + "source": [ + "diff_df = gdf_results[\n", + " gdf_results.nearest_vp_idx != gdf_results.nearest_vp_idx2]\n", + "\n", + "diff_df = diff_df.assign(\n", + " vp_idx_diff = diff_df.nearest_vp_idx - diff_df.nearest_vp_idx2\n", + ")\n", + "\n", + "diff_df[\"vp_idx_diff\"].shape, diff_df[\n", + " abs(diff_df.vp_idx_diff) > 1].shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a6a1fc5f-8cea-4f4f-af6f-9d3aefb0518d", + "metadata": {}, + "outputs": [], + "source": [ + "diff_df.vp_idx_diff.hist(bins = range(\n", + " diff_df.vp_idx_diff.min(), \n", + " diff_df.vp_idx_diff.max(), \n", + " 1)\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5b45be2e-eb93-454c-83ed-87e4269ce293", + "metadata": {}, + "outputs": [], + "source": [ + "diff_df.vp_idx_diff.describe()" + ] + }, + { + "cell_type": "markdown", + "id": "cb4c82dc-fa6a-45dd-9b89-f3938a1c27cc", + "metadata": {}, + "source": [ + "## Add trio (`neighbor`)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cfdcfc00-b343-42ab-8a9c-0f8b76e79768", + "metadata": {}, + "outputs": [], + "source": [ + "import shapely\n", + "def add_trio_cols(\n", + " gdf2: gpd.GeoDataFrame, \n", + " nearest_vp_col: str,\n", + "):\n", + "\n", + " nearest_vp_idx_series = [] \n", + " vp_trio_series = []\n", + " time_trio_series = []\n", + " coords_trio_series = []\n", + " \n", + " for row in gdf2.itertuples():\n", + " vp_trio, time_trio, coords_trio = neighbor.add_trio(\n", + " getattr(row, nearest_vp_col), \n", + " np.asarray(getattr(row, \"trip_vp_idx\")),\n", + " np.asarray(getattr(row, \"location_timestamp_local\")),\n", + " np.asarray(getattr(row, \"trip_geometry\").coords),\n", + " )\n", + " \n", + " vp_trio_series.append(vp_trio)\n", + " time_trio_series.append(time_trio)\n", + " coords_trio_series.append(shapely.LineString(coords_trio))\n", + " \n", + " drop_cols = [\n", + " \"location_timestamp_local\",\n", + " \"trip_vp_idx\", \"trip_geometry\"\n", + " ]\n", + " \n", + " gdf2 = gdf2.assign(\n", + " vp_idx_trio = vp_trio_series,\n", + " location_timestamp_local_trio = time_trio_series,\n", + " vp_coords_trio = gpd.GeoSeries(coords_trio_series, crs = WGS84)\n", + " ).drop(columns = drop_cols)\n", + " \n", + " return gdf2" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c40c45bc-8639-4154-adaa-114d0d96e758", + "metadata": {}, + "outputs": [], + "source": [ + "gdf_results1 = add_trio_cols(gdf_results, \"nearest_vp_idx\")\n", + "gdf_results2 = add_trio_cols(gdf_results, \"nearest_vp_idx2\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cb963cd1-3275-49aa-bb01-04a70dcc46fa", + "metadata": {}, + "outputs": [], + "source": [ + "trio_results = pd.merge(\n", + " gdf_results1,\n", + " gdf_results2.rename(columns = {\n", + " \"vp_idx\": \"vp_idx2\",\n", + " \n", + " \"vp_idx_trio\": \"vp_idx_trio2\",\n", + " \"location_timestamp_local_trio\": \"location_timestamp_local_trio2\",\n", + " \"vp_coords_trio\": \"vp_coords_trio2\"\n", + " }),\n", + " on = merge_cols + [\"vp_geometry\", \"nearest_vp_idx\", \"nearest_vp_idx2\"],\n", + " how = \"inner\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "7392a95e-de47-47a9-8990-128cf1b8b7f5", + "metadata": {}, + "source": [ + "## Interpolate arrival" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6cee18a4-09fd-42cf-a6c9-51d46941e4b6", + "metadata": {}, + "outputs": [], + "source": [ + "trio_results.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b53a7be5-145e-438b-bbf9-c85cc335663b", + "metadata": {}, + "outputs": [], + "source": [ + "PROJECT_CRS = \"EPSG:3310\"\n", + "import interpolate_stop_arrival\n", + "\n", + "trip_stop_cols = [\"trip_instance_key\", \"stop_sequence\", \"stop_sequence1\"]\n", + "\n", + "def interpolate_me(\n", + " df: gpd.GeoDataFrame,\n", + " analysis_date: str,\n", + "):\n", + " df = df.assign(\n", + " stop_geometry = df.stop_geometry.to_crs(PROJECT_CRS),\n", + " vp_coords_trio = df.vp_coords_trio.to_crs(PROJECT_CRS)\n", + " )\n", + "\n", + " shapes = helpers.import_scheduled_shapes(\n", + " analysis_date,\n", + " columns = [\"shape_array_key\", \"geometry\"],\n", + " crs = PROJECT_CRS\n", + " ).dropna(subset=\"geometry\")\n", + "\n", + " gdf = pd.merge(\n", + " df,\n", + " shapes.rename(columns = {\"geometry\": \"shape_geometry\"}),\n", + " on = \"shape_array_key\",\n", + " how = \"inner\"\n", + " )\n", + "\n", + " del df, shapes\n", + "\n", + " stop_meters_series = []\n", + " stop_arrival_series = []\n", + " \n", + " for row in gdf.itertuples():\n", + " \n", + " stop_meters, interpolated_arrival = interpolate_stop_arrival.project_points_onto_shape(\n", + " getattr(row, \"stop_geometry\"),\n", + " getattr(row, \"vp_coords_trio\"),\n", + " getattr(row, \"shape_geometry\"),\n", + " getattr(row, \"location_timestamp_local_trio\")\n", + " )\n", + " \n", + " stop_meters_series.append(stop_meters)\n", + " stop_arrival_series.append(interpolated_arrival)\n", + "\n", + " results = gdf.assign(\n", + " stop_meters = stop_meters_series,\n", + " arrival_time = stop_arrival_series,\n", + " )[trip_stop_cols + [\"shape_array_key\", \"stop_id\", \n", + " \"stop_meters\", \"arrival_time\"]\n", + " ].sort_values(\n", + " trip_stop_cols\n", + " ).reset_index(drop=True)\n", + " \n", + " \n", + " results = interpolate_stop_arrival.enforce_monotonicity_and_interpolate_across_stops(\n", + " results, trip_stop_cols)\n", + " \n", + " return results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f139dbd5-4a8a-4bfb-b82a-63e3fd09cc86", + "metadata": {}, + "outputs": [], + "source": [ + "gdf_interp1 = interpolate_me(gdf_results1, analysis_date)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "f0a23f59-96f5-4cc4-87e1-8b81a63f51ab", + "metadata": {}, + "outputs": [], + "source": [ + "gdf_interp1.dtypes, gdf_interp2.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a19363bc-a2be-4771-90a8-b1ca928bac53", + "metadata": {}, + "outputs": [], + "source": [ + "gdf_interp2 = interpolate_me(gdf_results2, analysis_date)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "be982784-9afc-4785-9b76-fe84d5fd9254", + "metadata": {}, + "outputs": [], + "source": [ + "interp_results = pd.merge(\n", + " gdf_interp1,\n", + " gdf_interp2.rename(columns = {\n", + " \"stop_meters\": \"stop_meters2\",\n", + " \"arrival_time\": \"arrival_time2\"\n", + " }),\n", + " on = trip_stop_cols + [\"shape_array_key\", \"stop_id\"],\n", + " how = \"inner\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "8c9a52ef-fd66-4380-bfa7-c9ef3aaab535", + "metadata": {}, + "source": [ + "About 4.5% have different interpolated arrival time results. Before, about 4.1% have different nearest vp selected, and about 1/3 of those had differences greater than 1. \n", + "\n", + "But overall, it results in about 4.5% of different interpolated arrival times." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "164b72a0-c254-4813-bb22-ff9b006b916c", + "metadata": {}, + "outputs": [], + "source": [ + "interp_results.shape, interp_results[\n", + " interp_results.arrival_time != interp_results.arrival_time2\n", + "].shape" + ] + }, + { + "cell_type": "markdown", + "id": "a781f93c-cbbf-4947-a756-36db28685de4", + "metadata": {}, + "source": [ + "## Speeds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "52216905-ae5a-47e2-8876-e28f6028b624", + "metadata": {}, + "outputs": [], + "source": [ + "from segment_speed_utils import segment_calcs\n", + "\n", + "def speed_calculation(df: pd.DataFrame):\n", + " trip_cols = [\"trip_instance_key\"]\n", + "\n", + " df = segment_calcs.convert_timestamp_to_seconds(\n", + " df, [\"arrival_time\"]\n", + " ).sort_values(trip_stop_cols).reset_index(drop=True)\n", + " \n", + " df = df.assign(\n", + " subseq_arrival_time_sec = (df.groupby(trip_cols, \n", + " observed=True, group_keys=False)\n", + " .arrival_time_sec\n", + " .shift(-1)\n", + " ),\n", + " subseq_stop_meters = (df.groupby(trip_cols, \n", + " observed=True, group_keys=False)\n", + " .stop_meters\n", + " .shift(-1)\n", + " )\n", + " )\n", + "\n", + " speed = df.assign(\n", + " meters_elapsed = df.subseq_stop_meters - df.stop_meters, \n", + " sec_elapsed = df.subseq_arrival_time_sec - df.arrival_time_sec,\n", + " ).pipe(\n", + " segment_calcs.derive_speed, \n", + " (\"stop_meters\", \"subseq_stop_meters\"), \n", + " (\"arrival_time_sec\", \"subseq_arrival_time_sec\")\n", + " )\n", + " \n", + " return speed" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d896a200-3cbd-42c7-89ca-67cbf7211314", + "metadata": {}, + "outputs": [], + "source": [ + "drop_me = [\n", + " \"arrival_time_sec\", \"subseq_arrival_time_sec\",\n", + " \"subseq_stop_meters\"\n", + "]\n", + "speed1 = speed_calculation(gdf_interp1).drop(columns = drop_me)\n", + "speed2 = speed_calculation(gdf_interp2).drop(columns = drop_me)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e7a5d228-6792-4625-bcad-d456c76cbf06", + "metadata": {}, + "outputs": [], + "source": [ + "speed_results = pd.merge(\n", + " speed1,\n", + " speed2.rename(columns = {\n", + " \"stop_meters\": \"stop_meters2\",\n", + " \"arrival_time\": \"arrival_time2\",\n", + " \"meters_elapsed\": \"meters_elapsed2\",\n", + " \"sec_elapsed\": \"sec_elapsed2\",\n", + " \"speed_mph\": \"speed_mph2\"\n", + " }),\n", + " on = trip_stop_cols + [\"shape_array_key\", \"stop_id\"],\n", + " how = \"inner\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "7723cbac-ab8f-4786-9a0e-e8572e434adc", + "metadata": {}, + "source": [ + "This magnifies to about 5.5% different speeds" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b3ced60d-c2b1-431e-ada4-d32a5a0c0f3b", + "metadata": {}, + "outputs": [], + "source": [ + "speed_results.shape, speed_results[\n", + " (speed_results.speed_mph != speed_results.speed_mph2) & \n", + " (speed_results.speed_mph.notna()) & \n", + " (speed_results.speed_mph < 100_000) # remove infinity\n", + "].shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d0357ffd-1500-4027-b2d0-a033e97eee26", + "metadata": {}, + "outputs": [], + "source": [ + "speed_results = speed_results.assign(\n", + " speed_diff = speed_results.speed_mph - speed_results.speed_mph2\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "415ba0a0-2695-4667-b1d1-1cc61a2c89a4", + "metadata": {}, + "outputs": [], + "source": [ + "speed_results.speed_diff.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6b3aed83-2ab2-42cb-ba53-c3001d30b85c", + "metadata": {}, + "outputs": [], + "source": [ + "speed_results[\n", + " speed_results.speed_diff != 0\n", + "].speed_diff.hist(bins=range(-70, 70, 1))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8abc7a49-2e4f-4a4d-91cb-510607b37ceb", + "metadata": {}, + "outputs": [], + "source": [ + "speed_results[\n", + " (speed_results.speed_diff != 0) & \n", + " (speed_results.speed_mph.notna()) & \n", + " (speed_results.speed_mph < 100_000)\n", + "].sort_values([\"trip_instance_key\", \"arrival_time\"])[\n", + " [\"arrival_time\", \"arrival_time2\", \n", + " \"speed_mph\", \"speed_mph2\", \"speed_diff\"]]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "48404074-2da8-4f14-88d2-040090b3fcb6", + "metadata": {}, + "outputs": [], + "source": [ + "SEGMENT_FILE = GTFS_DATA_DICT.speedmap_segments.segments_file\n", + "segment_gdf = gpd.read_parquet(\n", + " f\"{SEGMENT_GCS}{SEGMENT_FILE}_{analysis_date}.parquet\", \n", + " filters = [[(\"trip_instance_key\", \"in\", subset_trips)]]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "721bfb7a-82b3-47ce-a397-cb3e63a87fd9", + "metadata": {}, + "outputs": [], + "source": [ + "from segment_speed_utils import gtfs_schedule_wrangling\n", + "speed_results2 = gtfs_schedule_wrangling.fill_missing_stop_sequence1(\n", + " speed_results)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7c62d1b6-8038-4efc-92ac-ba5ef14336ed", + "metadata": {}, + "outputs": [], + "source": [ + "segment_gdf.shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b076ca6c-f74c-4228-a063-8249d30b7cae", + "metadata": {}, + "outputs": [], + "source": [ + "pd.merge(\n", + " segment_gdf,\n", + " speed_results2,\n", + " on = trip_stop_cols + [\"shape_array_key\"],\n", + " how = \"outer\",\n", + " indicator = True\n", + ")._merge.value_counts()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b333082e-aef8-4541-8d64-dc06a61dbce7", + "metadata": {}, + "outputs": [], + "source": [ + "speed_results_gdf = pd.merge(\n", + " segment_gdf,\n", + " speed_results2,\n", + " on = trip_stop_cols + [\"shape_array_key\"],\n", + " how = \"inner\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b7289e05-267a-470b-a709-3628ff0b0334", + "metadata": {}, + "outputs": [], + "source": [ + "speed_results_gdf.dtypes" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d979ebea-9201-47b6-921c-408d85bd0956", + "metadata": {}, + "outputs": [], + "source": [ + "from shared_utils import rt_utils\n", + "\n", + "def make_map(gdf, speed_col):\n", + " drop = [\"arrival_time\", \"arrival_time2\"]\n", + " m = gdf.drop(columns = drop).explore(\n", + " speed_col,\n", + " tiles = \"CartoDB Positron\",\n", + " cmap = rt_utils.ZERO_THIRTY_COLORSCALE\n", + " )\n", + " \n", + " return m" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b9842095-44f3-43b6-8afb-fd1c2dba86eb", + "metadata": {}, + "outputs": [], + "source": [ + "bbb_trips_and_shape = helpers.import_scheduled_trips(\n", + " analysis_date,\n", + " filters = [(\"trip_instance_key\", \"in\", subset_trips)],\n", + " columns = [\"shape_id\", \"shape_array_key\", \"trip_instance_key\"],\n", + " get_pandas = True\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7f6b54b8-f4fb-40b1-be8a-1bc782de9497", + "metadata": {}, + "outputs": [], + "source": [ + "bbb_trips_and_shape[\n", + " bbb_trips_and_shape.shape_id==\"26714\"\n", + "].trip_instance_key.unique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "044a5122-0f9c-4540-a2bf-e540d871d9d7", + "metadata": {}, + "outputs": [], + "source": [ + "make_map(\n", + " speed_results_gdf[\n", + " speed_results_gdf.trip_instance_key == \n", + " \"523d9d30ace49b2cc966c2cbaa8e9071\"], \n", + " \"speed_mph\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "93236a86-a819-4987-ae67-72e392a19e96", + "metadata": {}, + "outputs": [], + "source": [ + "make_map(\n", + " speed_results_gdf[\n", + " speed_results_gdf.trip_instance_key == \n", + " \"523d9d30ace49b2cc966c2cbaa8e9071\"], \n", + " \"speed_mph2\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "18ed483f-30e4-4e12-b2aa-d2eb34b022f4", + "metadata": {}, + "outputs": [], + "source": [ + "make_map(\n", + " speed_results_gdf[\n", + " speed_results_gdf.trip_instance_key == \n", + " \"3dfd9bae3724d3f62363a8328696cb4e\"], \n", + " \"speed_mph2\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "fdba60ae-032d-4746-8b90-f53b429159cd", + "metadata": {}, + "outputs": [], + "source": [ + "make_map(\n", + " speed_results_gdf[\n", + " speed_results_gdf.trip_instance_key == \n", + " \"3dfd9bae3724d3f62363a8328696cb4e\"], \n", + " \"speed_mph2\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0664e24f-fba9-44c6-9295-ba979fb1e2c9", + "metadata": {}, + "outputs": [], + "source": [ + "bbb_trips_and_shape[\n", + " bbb_trips_and_shape.shape_id==\"26751\"].trip_instance_key.unique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a97f9878-f040-4ab3-8d8e-b982f9cf5428", + "metadata": {}, + "outputs": [], + "source": [ + "make_map(\n", + " speed_results_gdf[\n", + " speed_results_gdf.trip_instance_key == \n", + " \"103ffb4be00deb25a90c82f92d431cb2\"], \n", + " \"speed_mph\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3353c3ca-fa51-41e5-a382-8171c34fa8a2", + "metadata": {}, + "outputs": [], + "source": [ + "make_map(\n", + " speed_results_gdf[\n", + " speed_results_gdf.trip_instance_key == \n", + " \"103ffb4be00deb25a90c82f92d431cb2\"], \n", + " \"speed_mph2\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c7f1d190-9f2c-432a-8bdc-97ad10c9ff0d", + "metadata": {}, + "outputs": [], + "source": [ + "bbb_trips_and_shape[\n", + " bbb_trips_and_shape.shape_id==\"26793\"\n", + "].trip_instance_key.unique()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "a240a759-6cbe-44a0-afa0-4f0247a4b9b6", + "metadata": {}, + "outputs": [], + "source": [ + "make_map(\n", + " speed_results_gdf[\n", + " speed_results_gdf.trip_instance_key == \n", + " \"66c7c7215da8fc97c6e620c694aa689c\"], \n", + " \"speed_mph\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6ddc641b-a005-40c1-b8ab-98c5629547cb", + "metadata": {}, + "outputs": [], + "source": [ + "make_map(\n", + " speed_results_gdf[\n", + " speed_results_gdf.trip_instance_key == \n", + " \"66c7c7215da8fc97c6e620c694aa689c\"], \n", + " \"speed_mph2\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6a314c42-0b32-41c6-82f8-9f03057bd94f", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}