diff --git a/rt_segment_speeds/37_bbb_altered_nn.ipynb b/rt_segment_speeds/37_bbb_altered_nn.ipynb deleted file mode 100644 index e1d065f6c..000000000 --- a/rt_segment_speeds/37_bbb_altered_nn.ipynb +++ /dev/null @@ -1,992 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "fe5aafc6-e153-4727-a26c-cfbf6ea892c4", - "metadata": {}, - "source": [ - "# Big Blue Bus nearest neighbor comparison\n", - "\n", - "* What would happen if we allowed all the full vp to be used to find nearest neighbor, instead of removing the opposite direction?\n", - "* The monotonically increasing condition is set later, could it catch the errors that way?" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "667c4b7f-f402-4a03-9ea6-77bfbdf84ca1", - "metadata": {}, - "outputs": [], - "source": [ - "import geopandas as gpd\n", - "import pandas as pd\n", - "\n", - "from segment_speed_utils import helpers, neighbor\n", - "from segment_speed_utils.project_vars import SEGMENT_GCS, GTFS_DATA_DICT\n", - "from shared_utils import rt_dates\n", - "\n", - "analysis_date = rt_dates.DATES[\"apr2024\"]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d4f175b4-6125-4e4d-932d-6d8458e844ec", - "metadata": {}, - "outputs": [], - "source": [ - "import numpy as np\n", - "import sys\n", - "sys.path.append(\"scripts/\")\n", - "import nearest_vp_to_stop\n", - "\n", - "WGS84 = \"EPSG:4326\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a8d3dfbe-e380-48c8-b88b-7601f87fab9a", - "metadata": {}, - "outputs": [], - "source": [ - "bbb_trips = helpers.import_scheduled_trips(\n", - " analysis_date,\n", - " filters = [(\"name\", \"==\", \"Big Blue Bus Schedule\")],\n", - " columns = [\"gtfs_dataset_key\", \"trip_instance_key\"],\n", - " get_pandas = True\n", - ")\n", - "\n", - "bbb_key = bbb_trips.schedule_gtfs_dataset_key.iloc[0]\n", - "subset_trips = bbb_trips.trip_instance_key.unique()" - ] - }, - { - "cell_type": "markdown", - "id": "31ea87f2-08ba-485b-a35c-46cb50d20730", - "metadata": {}, - "source": [ - "## Construct proxy stop times" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "683892d5-0139-4e41-9407-6f0aa8a5b2b0", - "metadata": {}, - "outputs": [], - "source": [ - "def construct_stop_times(\n", - " analysis_date: str, \n", - " subset_trips: list\n", - ") -> gpd.GeoDataFrame:\n", - "\n", - " # Grab the relevant stop times rows\n", - " # will need to concatenate RT stop times (all trips) \n", - " # with additional segments for speedmaps\n", - " rt_stop_times = (\n", - " nearest_vp_to_stop.stop_times_for_all_trips(analysis_date)\n", - " .query('trip_instance_key in @subset_trips')\n", - " )\n", - "\n", - " proxy_stop_times = (\n", - " nearest_vp_to_stop.stop_times_for_speedmaps(analysis_date)\n", - " .query('trip_instance_key in @subset_trips')\n", - " )\n", - "\n", - " bbb_stop_times = pd.concat(\n", - " [rt_stop_times, proxy_stop_times], \n", - " axis=0, ignore_index=True\n", - " )\n", - " \n", - " return bbb_stop_times\n", - "\n", - "bbb_stop_times = construct_stop_times(analysis_date, subset_trips)" - ] - }, - { - "cell_type": "markdown", - "id": "9d7075cf-c826-4f49-9867-06bcb321df4e", - "metadata": {}, - "source": [ - "## Merge stop and get nearest snap (`neighbor`)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a48bf073-2ccd-49b9-898b-0813b09f77b5", - "metadata": {}, - "outputs": [], - "source": [ - "# This is with opposite direction removed\n", - "gdf = neighbor.merge_stop_vp_for_nearest_neighbor(\n", - " bbb_stop_times,\n", - " analysis_date,\n", - " filters = [[(\"trip_instance_key\", \"in\", subset_trips)]]\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4b2c2b8c-0bb7-4066-ae12-73e92043e2f7", - "metadata": {}, - "outputs": [], - "source": [ - "# Try a version without removing vp points\n", - "# and allow nearest neighbor to select from any direction\n", - "vp_full = gpd.read_parquet(\n", - " f\"{SEGMENT_GCS}condensed/vp_condensed_{analysis_date}.parquet\",\n", - " columns = [\"trip_instance_key\", \"vp_idx\", \n", - " \"location_timestamp_local\", \n", - " \"geometry\"],\n", - " filters = [[(\"trip_instance_key\", \"in\", subset_trips)]]\n", - ").rename(columns = {\n", - " \"vp_idx\": \"trip_vp_idx\",\n", - " \"geometry\": \"trip_geometry\"\n", - "}).set_geometry(\"trip_geometry\").to_crs(WGS84)\n", - "\n", - "gdf2 = pd.merge(\n", - " bbb_stop_times.rename(\n", - " columns = {\n", - " \"geometry\": \"stop_geometry\"}\n", - " ).set_geometry(\"stop_geometry\").to_crs(WGS84),\n", - " vp_full.rename(\n", - " columns = {\n", - " \"geometry\": \"vp_geometry\"\n", - " }),\n", - " on = [\"trip_instance_key\"],\n", - " how = \"inner\"\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "00a7aa0c-7303-461a-a6a2-f62464fba9e3", - "metadata": {}, - "outputs": [], - "source": [ - "# this list comes from an adapted version of \n", - "# GTFS_DATA_DICT stop_pair_cols\n", - "merge_cols = [\"stop_id\", \"stop_pair\", \"stop_sequence\", \"stop_sequence1\", \n", - " \"stop_geometry\", \"stop_primary_direction\", \n", - " \"shape_array_key\", \"trip_instance_key\"]\n", - "\n", - "gdf_results = pd.merge(\n", - " gdf,\n", - " gdf2,\n", - " on = merge_cols,\n", - " how = \"inner\"\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cb28d31b-3dc2-4bfa-9d81-a574db9fe270", - "metadata": {}, - "outputs": [], - "source": [ - "gdf_results.dtypes" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c13e674f-4d22-4b15-b053-ab0f5d9e07bd", - "metadata": {}, - "outputs": [], - "source": [ - "nearest_vp_idx = np.vectorize(neighbor.add_nearest_vp_idx)( \n", - " gdf_results.vp_geometry, gdf_results.stop_geometry, gdf_results.vp_idx\n", - ")\n", - "\n", - "nearest_vp_idx2 = np.vectorize(neighbor.add_nearest_vp_idx)( \n", - " gdf_results.trip_geometry, gdf_results.stop_geometry, gdf_results.trip_vp_idx\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "86f44322-c920-4efa-ba42-5bf491c5696d", - "metadata": {}, - "outputs": [], - "source": [ - "gdf_results = gdf_results.assign(\n", - " nearest_vp_idx = nearest_vp_idx,\n", - " nearest_vp_idx2 = nearest_vp_idx2\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "bf3efe13-c896-4a25-94fc-faa2c18c3ac4", - "metadata": {}, - "outputs": [], - "source": [ - "gdf_results.shape, gdf_results[\n", - " gdf_results.nearest_vp_idx != gdf_results.nearest_vp_idx2\n", - "].shape" - ] - }, - { - "cell_type": "markdown", - "id": "dceeb45c-8e6c-4870-a823-8557d6f40dcf", - "metadata": {}, - "source": [ - "If we allow even opposite directions to show up in the nearest neighbor, about 4.1% of the rows would have different results for which `vp_idx` is selected.\n", - "\n", - "About 2/3 of these are -1 or +1, which probably wouldn't change the result that much. The other 1/3 have differences more than 1, which could change the result, since the trio of points allows for a difference of 1, but not more to interpolate." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b75e6636-d44a-4860-b13c-abec6bd471a9", - "metadata": {}, - "outputs": [], - "source": [ - "gdf_results.shape, gdf_results[\n", - " gdf_results.nearest_vp_idx != gdf_results.nearest_vp_idx2].shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "07fa38b2-c87e-4e14-b875-d4604d768bd2", - "metadata": {}, - "outputs": [], - "source": [ - "diff_df = gdf_results[\n", - " gdf_results.nearest_vp_idx != gdf_results.nearest_vp_idx2]\n", - "\n", - "diff_df = diff_df.assign(\n", - " vp_idx_diff = diff_df.nearest_vp_idx - diff_df.nearest_vp_idx2\n", - ")\n", - "\n", - "diff_df[\"vp_idx_diff\"].shape, diff_df[\n", - " abs(diff_df.vp_idx_diff) > 1].shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a6a1fc5f-8cea-4f4f-af6f-9d3aefb0518d", - "metadata": {}, - "outputs": [], - "source": [ - "diff_df.vp_idx_diff.hist(bins = range(\n", - " diff_df.vp_idx_diff.min(), \n", - " diff_df.vp_idx_diff.max(), \n", - " 1)\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5b45be2e-eb93-454c-83ed-87e4269ce293", - "metadata": {}, - "outputs": [], - "source": [ - "diff_df.vp_idx_diff.describe()" - ] - }, - { - "cell_type": "markdown", - "id": "cb4c82dc-fa6a-45dd-9b89-f3938a1c27cc", - "metadata": {}, - "source": [ - "## Add trio (`neighbor`)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cfdcfc00-b343-42ab-8a9c-0f8b76e79768", - "metadata": {}, - "outputs": [], - "source": [ - "import shapely\n", - "def add_trio_cols(\n", - " gdf2: gpd.GeoDataFrame, \n", - " nearest_vp_col: str,\n", - "):\n", - "\n", - " nearest_vp_idx_series = [] \n", - " vp_trio_series = []\n", - " time_trio_series = []\n", - " coords_trio_series = []\n", - " \n", - " for row in gdf2.itertuples():\n", - " vp_trio, time_trio, coords_trio = neighbor.add_trio(\n", - " getattr(row, nearest_vp_col), \n", - " np.asarray(getattr(row, \"trip_vp_idx\")),\n", - " np.asarray(getattr(row, \"location_timestamp_local\")),\n", - " np.asarray(getattr(row, \"trip_geometry\").coords),\n", - " )\n", - " \n", - " vp_trio_series.append(vp_trio)\n", - " time_trio_series.append(time_trio)\n", - " coords_trio_series.append(shapely.LineString(coords_trio))\n", - " \n", - " drop_cols = [\n", - " \"location_timestamp_local\",\n", - " \"trip_vp_idx\", \"trip_geometry\"\n", - " ]\n", - " \n", - " gdf2 = gdf2.assign(\n", - " vp_idx_trio = vp_trio_series,\n", - " location_timestamp_local_trio = time_trio_series,\n", - " vp_coords_trio = gpd.GeoSeries(coords_trio_series, crs = WGS84)\n", - " ).drop(columns = drop_cols)\n", - " \n", - " return gdf2" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c40c45bc-8639-4154-adaa-114d0d96e758", - "metadata": {}, - "outputs": [], - "source": [ - "gdf_results1 = add_trio_cols(gdf_results, \"nearest_vp_idx\")\n", - "gdf_results2 = add_trio_cols(gdf_results, \"nearest_vp_idx2\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cb963cd1-3275-49aa-bb01-04a70dcc46fa", - "metadata": {}, - "outputs": [], - "source": [ - "trio_results = pd.merge(\n", - " gdf_results1,\n", - " gdf_results2.rename(columns = {\n", - " \"vp_idx\": \"vp_idx2\",\n", - " \n", - " \"vp_idx_trio\": \"vp_idx_trio2\",\n", - " \"location_timestamp_local_trio\": \"location_timestamp_local_trio2\",\n", - " \"vp_coords_trio\": \"vp_coords_trio2\"\n", - " }),\n", - " on = merge_cols + [\"vp_geometry\", \"nearest_vp_idx\", \"nearest_vp_idx2\"],\n", - " how = \"inner\",\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "7392a95e-de47-47a9-8990-128cf1b8b7f5", - "metadata": {}, - "source": [ - "## Interpolate arrival" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6cee18a4-09fd-42cf-a6c9-51d46941e4b6", - "metadata": {}, - "outputs": [], - "source": [ - "trio_results.dtypes" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b53a7be5-145e-438b-bbf9-c85cc335663b", - "metadata": {}, - "outputs": [], - "source": [ - "PROJECT_CRS = \"EPSG:3310\"\n", - "import interpolate_stop_arrival\n", - "\n", - "trip_stop_cols = [\"trip_instance_key\", \"stop_sequence\", \"stop_sequence1\"]\n", - "\n", - "def interpolate_me(\n", - " df: gpd.GeoDataFrame,\n", - " analysis_date: str,\n", - "):\n", - " df = df.assign(\n", - " stop_geometry = df.stop_geometry.to_crs(PROJECT_CRS),\n", - " vp_coords_trio = df.vp_coords_trio.to_crs(PROJECT_CRS)\n", - " )\n", - "\n", - " shapes = helpers.import_scheduled_shapes(\n", - " analysis_date,\n", - " columns = [\"shape_array_key\", \"geometry\"],\n", - " crs = PROJECT_CRS\n", - " ).dropna(subset=\"geometry\")\n", - "\n", - " gdf = pd.merge(\n", - " df,\n", - " shapes.rename(columns = {\"geometry\": \"shape_geometry\"}),\n", - " on = \"shape_array_key\",\n", - " how = \"inner\"\n", - " )\n", - "\n", - " del df, shapes\n", - "\n", - " stop_meters_series = []\n", - " stop_arrival_series = []\n", - " \n", - " for row in gdf.itertuples():\n", - " \n", - " stop_meters, interpolated_arrival = interpolate_stop_arrival.project_points_onto_shape(\n", - " getattr(row, \"stop_geometry\"),\n", - " getattr(row, \"vp_coords_trio\"),\n", - " getattr(row, \"shape_geometry\"),\n", - " getattr(row, \"location_timestamp_local_trio\")\n", - " )\n", - " \n", - " stop_meters_series.append(stop_meters)\n", - " stop_arrival_series.append(interpolated_arrival)\n", - "\n", - " results = gdf.assign(\n", - " stop_meters = stop_meters_series,\n", - " arrival_time = stop_arrival_series,\n", - " )[trip_stop_cols + [\"shape_array_key\", \"stop_id\", \n", - " \"stop_meters\", \"arrival_time\"]\n", - " ].sort_values(\n", - " trip_stop_cols\n", - " ).reset_index(drop=True)\n", - " \n", - " \n", - " results = interpolate_stop_arrival.enforce_monotonicity_and_interpolate_across_stops(\n", - " results, trip_stop_cols)\n", - " \n", - " return results" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f139dbd5-4a8a-4bfb-b82a-63e3fd09cc86", - "metadata": {}, - "outputs": [], - "source": [ - "gdf_interp1 = interpolate_me(gdf_results1, analysis_date)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f0a23f59-96f5-4cc4-87e1-8b81a63f51ab", - "metadata": {}, - "outputs": [], - "source": [ - "gdf_interp1.dtypes, gdf_interp2.dtypes" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a19363bc-a2be-4771-90a8-b1ca928bac53", - "metadata": {}, - "outputs": [], - "source": [ - "gdf_interp2 = interpolate_me(gdf_results2, analysis_date)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "be982784-9afc-4785-9b76-fe84d5fd9254", - "metadata": {}, - "outputs": [], - "source": [ - "interp_results = pd.merge(\n", - " gdf_interp1,\n", - " gdf_interp2.rename(columns = {\n", - " \"stop_meters\": \"stop_meters2\",\n", - " \"arrival_time\": \"arrival_time2\"\n", - " }),\n", - " on = trip_stop_cols + [\"shape_array_key\", \"stop_id\"],\n", - " how = \"inner\",\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "8c9a52ef-fd66-4380-bfa7-c9ef3aaab535", - "metadata": {}, - "source": [ - "About 4.5% have different interpolated arrival time results. Before, about 4.1% have different nearest vp selected, and about 1/3 of those had differences greater than 1. \n", - "\n", - "But overall, it results in about 4.5% of different interpolated arrival times." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "164b72a0-c254-4813-bb22-ff9b006b916c", - "metadata": {}, - "outputs": [], - "source": [ - "interp_results.shape, interp_results[\n", - " interp_results.arrival_time != interp_results.arrival_time2\n", - "].shape" - ] - }, - { - "cell_type": "markdown", - "id": "a781f93c-cbbf-4947-a756-36db28685de4", - "metadata": {}, - "source": [ - "## Speeds" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "52216905-ae5a-47e2-8876-e28f6028b624", - "metadata": {}, - "outputs": [], - "source": [ - "from segment_speed_utils import segment_calcs\n", - "\n", - "def speed_calculation(df: pd.DataFrame):\n", - " trip_cols = [\"trip_instance_key\"]\n", - "\n", - " df = segment_calcs.convert_timestamp_to_seconds(\n", - " df, [\"arrival_time\"]\n", - " ).sort_values(trip_stop_cols).reset_index(drop=True)\n", - " \n", - " df = df.assign(\n", - " subseq_arrival_time_sec = (df.groupby(trip_cols, \n", - " observed=True, group_keys=False)\n", - " .arrival_time_sec\n", - " .shift(-1)\n", - " ),\n", - " subseq_stop_meters = (df.groupby(trip_cols, \n", - " observed=True, group_keys=False)\n", - " .stop_meters\n", - " .shift(-1)\n", - " )\n", - " )\n", - "\n", - " speed = df.assign(\n", - " meters_elapsed = df.subseq_stop_meters - df.stop_meters, \n", - " sec_elapsed = df.subseq_arrival_time_sec - df.arrival_time_sec,\n", - " ).pipe(\n", - " segment_calcs.derive_speed, \n", - " (\"stop_meters\", \"subseq_stop_meters\"), \n", - " (\"arrival_time_sec\", \"subseq_arrival_time_sec\")\n", - " )\n", - " \n", - " return speed" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d896a200-3cbd-42c7-89ca-67cbf7211314", - "metadata": {}, - "outputs": [], - "source": [ - "drop_me = [\n", - " \"arrival_time_sec\", \"subseq_arrival_time_sec\",\n", - " \"subseq_stop_meters\"\n", - "]\n", - "speed1 = speed_calculation(gdf_interp1).drop(columns = drop_me)\n", - "speed2 = speed_calculation(gdf_interp2).drop(columns = drop_me)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e7a5d228-6792-4625-bcad-d456c76cbf06", - "metadata": {}, - "outputs": [], - "source": [ - "speed_results = pd.merge(\n", - " speed1,\n", - " speed2.rename(columns = {\n", - " \"stop_meters\": \"stop_meters2\",\n", - " \"arrival_time\": \"arrival_time2\",\n", - " \"meters_elapsed\": \"meters_elapsed2\",\n", - " \"sec_elapsed\": \"sec_elapsed2\",\n", - " \"speed_mph\": \"speed_mph2\"\n", - " }),\n", - " on = trip_stop_cols + [\"shape_array_key\", \"stop_id\"],\n", - " how = \"inner\",\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "7723cbac-ab8f-4786-9a0e-e8572e434adc", - "metadata": {}, - "source": [ - "This magnifies to about 5.5% different speeds" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b3ced60d-c2b1-431e-ada4-d32a5a0c0f3b", - "metadata": {}, - "outputs": [], - "source": [ - "speed_results.shape, speed_results[\n", - " (speed_results.speed_mph != speed_results.speed_mph2) & \n", - " (speed_results.speed_mph.notna()) & \n", - " (speed_results.speed_mph < 100_000) # remove infinity\n", - "].shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d0357ffd-1500-4027-b2d0-a033e97eee26", - "metadata": {}, - "outputs": [], - "source": [ - "speed_results = speed_results.assign(\n", - " speed_diff = speed_results.speed_mph - speed_results.speed_mph2\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "415ba0a0-2695-4667-b1d1-1cc61a2c89a4", - "metadata": {}, - "outputs": [], - "source": [ - "speed_results.speed_diff.describe()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6b3aed83-2ab2-42cb-ba53-c3001d30b85c", - "metadata": {}, - "outputs": [], - "source": [ - "speed_results[\n", - " speed_results.speed_diff != 0\n", - "].speed_diff.hist(bins=range(-70, 70, 1))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8abc7a49-2e4f-4a4d-91cb-510607b37ceb", - "metadata": {}, - "outputs": [], - "source": [ - "speed_results[\n", - " (speed_results.speed_diff != 0) & \n", - " (speed_results.speed_mph.notna()) & \n", - " (speed_results.speed_mph < 100_000)\n", - "].sort_values([\"trip_instance_key\", \"arrival_time\"])[\n", - " [\"arrival_time\", \"arrival_time2\", \n", - " \"speed_mph\", \"speed_mph2\", \"speed_diff\"]]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "48404074-2da8-4f14-88d2-040090b3fcb6", - "metadata": {}, - "outputs": [], - "source": [ - "SEGMENT_FILE = GTFS_DATA_DICT.speedmap_segments.segments_file\n", - "segment_gdf = gpd.read_parquet(\n", - " f\"{SEGMENT_GCS}{SEGMENT_FILE}_{analysis_date}.parquet\", \n", - " filters = [[(\"trip_instance_key\", \"in\", subset_trips)]]\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "721bfb7a-82b3-47ce-a397-cb3e63a87fd9", - "metadata": {}, - "outputs": [], - "source": [ - "from segment_speed_utils import gtfs_schedule_wrangling\n", - "speed_results2 = gtfs_schedule_wrangling.fill_missing_stop_sequence1(\n", - " speed_results)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7c62d1b6-8038-4efc-92ac-ba5ef14336ed", - "metadata": {}, - "outputs": [], - "source": [ - "segment_gdf.shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b076ca6c-f74c-4228-a063-8249d30b7cae", - "metadata": {}, - "outputs": [], - "source": [ - "pd.merge(\n", - " segment_gdf,\n", - " speed_results2,\n", - " on = trip_stop_cols + [\"shape_array_key\"],\n", - " how = \"outer\",\n", - " indicator = True\n", - ")._merge.value_counts()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b333082e-aef8-4541-8d64-dc06a61dbce7", - "metadata": {}, - "outputs": [], - "source": [ - "speed_results_gdf = pd.merge(\n", - " segment_gdf,\n", - " speed_results2,\n", - " on = trip_stop_cols + [\"shape_array_key\"],\n", - " how = \"inner\",\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b7289e05-267a-470b-a709-3628ff0b0334", - "metadata": {}, - "outputs": [], - "source": [ - "speed_results_gdf.dtypes" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d979ebea-9201-47b6-921c-408d85bd0956", - "metadata": {}, - "outputs": [], - "source": [ - "from shared_utils import rt_utils\n", - "\n", - "def make_map(gdf, speed_col):\n", - " drop = [\"arrival_time\", \"arrival_time2\"]\n", - " m = gdf.drop(columns = drop).explore(\n", - " speed_col,\n", - " tiles = \"CartoDB Positron\",\n", - " cmap = rt_utils.ZERO_THIRTY_COLORSCALE\n", - " )\n", - " \n", - " return m" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b9842095-44f3-43b6-8afb-fd1c2dba86eb", - "metadata": {}, - "outputs": [], - "source": [ - "bbb_trips_and_shape = helpers.import_scheduled_trips(\n", - " analysis_date,\n", - " filters = [(\"trip_instance_key\", \"in\", subset_trips)],\n", - " columns = [\"shape_id\", \"shape_array_key\", \"trip_instance_key\"],\n", - " get_pandas = True\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7f6b54b8-f4fb-40b1-be8a-1bc782de9497", - "metadata": {}, - "outputs": [], - "source": [ - "bbb_trips_and_shape[\n", - " bbb_trips_and_shape.shape_id==\"26714\"\n", - "].trip_instance_key.unique()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "044a5122-0f9c-4540-a2bf-e540d871d9d7", - "metadata": {}, - "outputs": [], - "source": [ - "make_map(\n", - " speed_results_gdf[\n", - " speed_results_gdf.trip_instance_key == \n", - " \"523d9d30ace49b2cc966c2cbaa8e9071\"], \n", - " \"speed_mph\"\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "93236a86-a819-4987-ae67-72e392a19e96", - "metadata": {}, - "outputs": [], - "source": [ - "make_map(\n", - " speed_results_gdf[\n", - " speed_results_gdf.trip_instance_key == \n", - " \"523d9d30ace49b2cc966c2cbaa8e9071\"], \n", - " \"speed_mph2\"\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "18ed483f-30e4-4e12-b2aa-d2eb34b022f4", - "metadata": {}, - "outputs": [], - "source": [ - "make_map(\n", - " speed_results_gdf[\n", - " speed_results_gdf.trip_instance_key == \n", - " \"3dfd9bae3724d3f62363a8328696cb4e\"], \n", - " \"speed_mph2\"\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fdba60ae-032d-4746-8b90-f53b429159cd", - "metadata": {}, - "outputs": [], - "source": [ - "make_map(\n", - " speed_results_gdf[\n", - " speed_results_gdf.trip_instance_key == \n", - " \"3dfd9bae3724d3f62363a8328696cb4e\"], \n", - " \"speed_mph2\"\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0664e24f-fba9-44c6-9295-ba979fb1e2c9", - "metadata": {}, - "outputs": [], - "source": [ - "bbb_trips_and_shape[\n", - " bbb_trips_and_shape.shape_id==\"26751\"].trip_instance_key.unique()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a97f9878-f040-4ab3-8d8e-b982f9cf5428", - "metadata": {}, - "outputs": [], - "source": [ - "make_map(\n", - " speed_results_gdf[\n", - " speed_results_gdf.trip_instance_key == \n", - " \"103ffb4be00deb25a90c82f92d431cb2\"], \n", - " \"speed_mph\"\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3353c3ca-fa51-41e5-a382-8171c34fa8a2", - "metadata": {}, - "outputs": [], - "source": [ - "make_map(\n", - " speed_results_gdf[\n", - " speed_results_gdf.trip_instance_key == \n", - " \"103ffb4be00deb25a90c82f92d431cb2\"], \n", - " \"speed_mph2\"\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c7f1d190-9f2c-432a-8bdc-97ad10c9ff0d", - "metadata": {}, - "outputs": [], - "source": [ - "bbb_trips_and_shape[\n", - " bbb_trips_and_shape.shape_id==\"26793\"\n", - "].trip_instance_key.unique()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a240a759-6cbe-44a0-afa0-4f0247a4b9b6", - "metadata": {}, - "outputs": [], - "source": [ - "make_map(\n", - " speed_results_gdf[\n", - " speed_results_gdf.trip_instance_key == \n", - " \"66c7c7215da8fc97c6e620c694aa689c\"], \n", - " \"speed_mph\"\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6ddc641b-a005-40c1-b8ab-98c5629547cb", - "metadata": {}, - "outputs": [], - "source": [ - "make_map(\n", - " speed_results_gdf[\n", - " speed_results_gdf.trip_instance_key == \n", - " \"66c7c7215da8fc97c6e620c694aa689c\"], \n", - " \"speed_mph2\"\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6a314c42-0b32-41c6-82f8-9f03057bd94f", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.13" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -}