From b50d16532d97800f1d9efea69c354cc9a179e669 Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Fri, 15 Sep 2023 19:59:23 +0000 Subject: [PATCH 01/13] add test notebook to check distances --- rt_segment_speeds/scripts/fill_in_1_vp.ipynb | 1107 ++++++++++++++++++ 1 file changed, 1107 insertions(+) create mode 100644 rt_segment_speeds/scripts/fill_in_1_vp.ipynb diff --git a/rt_segment_speeds/scripts/fill_in_1_vp.ipynb b/rt_segment_speeds/scripts/fill_in_1_vp.ipynb new file mode 100644 index 000000000..fd94e5089 --- /dev/null +++ b/rt_segment_speeds/scripts/fill_in_1_vp.ipynb @@ -0,0 +1,1107 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "2c5b9cfe-eb62-4823-8008-1e9ecac2d930", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/conda/lib/python3.9/site-packages/geopandas/_compat.py:124: UserWarning: The Shapely GEOS version (3.11.1-CAPI-1.17.1) is incompatible with the GEOS version PyGEOS was compiled with (3.10.1-CAPI-1.16.0). Conversions between both will be slow.\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "import dask.dataframe as dd\n", + "import dask_geopandas as dg\n", + "import geopandas as gpd\n", + "import pandas as pd\n", + "\n", + "from segment_speed_utils.project_vars import (SEGMENT_GCS, \n", + " CONFIG_PATH, \n", + " PROJECT_CRS\n", + " )\n", + "from segment_speed_utils import (helpers, wrangle_shapes, \n", + " segment_calcs)\n", + "from shared_utils import rt_dates\n", + "import test_split\n", + "\n", + "analysis_date = rt_dates.DATES[\"jul2023\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "ed5ebb6b-934d-4451-a108-82895f5b0661", + "metadata": {}, + "outputs": [], + "source": [ + "from importlib import reload\n", + "\n", + "STOP_SEG_DICT = helpers.get_parameters(CONFIG_PATH, \"stop_segments\")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "58a6b7db-f81f-4d20-8ee1-4205dba51523", + "metadata": {}, + "outputs": [], + "source": [ + "gdf = test_split.put_all_together(analysis_date, STOP_SEG_DICT)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "7103374c-a9e7-4eb9-ba33-52f3c8a4fd9a", + "metadata": {}, + "outputs": [], + "source": [ + "GROUPING_COL = STOP_SEG_DICT[\"grouping_col\"]\n", + "SEGMENT_IDENTIFIER_COLS = STOP_SEG_DICT[\"segment_identifier_cols\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "dc40464c-7523-4dcc-a174-7dfd5725e15b", + "metadata": {}, + "outputs": [], + "source": [ + "part1 = gdf[gdf.n_vp_seg==1].reset_index(drop=True)\n", + "part2 = gdf[gdf.n_vp_seg==2].reset_index(drop=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "9e0d29de-13bc-4825-905e-5b7792afc6c0", + "metadata": {}, + "outputs": [], + "source": [ + "part2_keep = (part2.groupby([\"trip_instance_key\"] + SEGMENT_IDENTIFIER_COLS)\n", + " .vp_idx\n", + " .max()\n", + " .reset_index()\n", + " )\n", + "\n", + "part2_pared = pd.merge(\n", + " part2,\n", + " part2_keep,\n", + " on = [\"trip_instance_key\", \"vp_idx\"] + SEGMENT_IDENTIFIER_COLS, \n", + " how = \"inner\"\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "c1e9d0aa-e27d-44e2-868b-a90df9dc63b0", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "fcf447d5-1e4d-496d-a96b-b471da84d13a", + "metadata": {}, + "outputs": [], + "source": [ + "part1_gdf = merge_in_segments(\n", + " part1,\n", + " SEGMENT_IDENTIFIER_COLS,\n", + " GROUPING_COL\n", + ")\n", + "\n", + "part2_gdf = merge_in_segments(\n", + " part2_pared,\n", + " SEGMENT_IDENTIFIER_COLS,\n", + " GROUPING_COL\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "63f6ac9c-0a9c-43d6-ba9d-4ba3bab949ea", + "metadata": {}, + "outputs": [], + "source": [ + "gdf2 = pd.concat(\n", + " [part1_gdf, part2_gdf], \n", + " axis=0\n", + ").sort_values(\n", + " SEGMENT_IDENTIFIER_COLS + [\"trip_instance_key\"]\n", + ").reset_index(drop=True)" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "99e00a23-7731-420f-b068-ce40d1ce78a5", + "metadata": {}, + "outputs": [], + "source": [ + "gddf = dg.from_geopandas(gdf2, npartitions=50)" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "92d6151d-2959-4a61-b0cb-e1a2068fb22b", + "metadata": {}, + "outputs": [], + "source": [ + "shape_meters_series = gddf.map_partitions(\n", + " wrangle_shapes.project_point_geom_onto_linestring,\n", + " \"geometry\",\n", + " \"vp_geometry\",\n", + " meta = (\"shape_meters\", \"float\")\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "2789939c-1952-4093-9a53-d9d0a002bb9b", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['trip_instance_key', 'vp_idx', 'location_timestamp_local',\n", + " 'shape_array_key', 'stop_sequence', 'n_vp_seg', 'prior_vp_idx',\n", + " 'prior_location_timestamp_local', 'vp_geometry', 'prior_geometry',\n", + " 'geometry', 'shape_meters', 'location_timestamp_local_sec'],\n", + " dtype='object')" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gddf.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "292ba063-2b29-4f54-80a0-f05affbdefcd", + "metadata": {}, + "outputs": [], + "source": [ + "shape_meters_series2 = gddf.map_partitions(\n", + " wrangle_shapes.project_point_geom_onto_linestring,\n", + " \"geometry\",\n", + " \"prior_geometry\",\n", + " meta = (\"prior_shape_meters\", \"float\")\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "44767130-5c70-40b9-a48f-45f289a5bf67", + "metadata": {}, + "outputs": [], + "source": [ + "TIMESTAMP_COL = \"location_timestamp_local\"\n", + "\n", + "gddf[\"shape_meters\"] = shape_meters_series\n", + "gddf[\"prior_shape_meters\"] = shape_meters_series2\n", + "\n", + "gddf = segment_calcs.convert_timestamp_to_seconds(\n", + " gddf, [TIMESTAMP_COL, f\"prior_{TIMESTAMP_COL}\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "58bf7360-d133-4229-bf0d-b7554120e887", + "metadata": {}, + "outputs": [], + "source": [ + "gddf[\"distance\"] = gddf.vp_geometry.distance(gddf.prior_geometry)" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "b6b854d5-e6e2-49e3-bc36-27379c11f98e", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n" + ] + } + ], + "source": [ + "gdf3 = gddf.compute()" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "786771f4-93d3-43df-ad8e-aeb0b9e5be59", + "metadata": {}, + "outputs": [], + "source": [ + "gdf3 = gdf3.assign(\n", + " meters_elapsed = abs(gdf3.shape_meters - gdf3.prior_shape_meters),\n", + " sec_elapsed = abs(gdf3.location_timestamp_local_sec - gdf3.prior_location_timestamp_local_sec)\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "ecbe0afa-7abd-48cd-b844-b886125608a5", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "Index(['trip_instance_key', 'vp_idx', 'location_timestamp_local',\n", + " 'shape_array_key', 'stop_sequence', 'n_vp_seg', 'prior_vp_idx',\n", + " 'prior_location_timestamp_local', 'vp_geometry', 'prior_geometry',\n", + " 'geometry', 'shape_meters', 'location_timestamp_local_sec',\n", + " 'prior_shape_meters', 'prior_location_timestamp_local_sec', 'distance',\n", + " 'meters_elapsed', 'sec_elapsed'],\n", + " dtype='object')" + ] + }, + "execution_count": 44, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gdf3.columns" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "id": "cfe580f6-290e-4440-a633-4925de424e3c", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(51808, 5)" + ] + }, + "execution_count": 54, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gdf3[gdf3.meters_elapsed > gdf3[\"distance\"]*1.5][\n", + " [\"shape_meters\", \"prior_shape_meters\",\n", + " \"meters_elapsed\", \"distance\",\n", + " \"sec_elapsed\"\n", + " ]].shape" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "0e6448aa-0797-46eb-a92d-7a5a9919e1f5", + "metadata": {}, + "outputs": [], + "source": [ + "SCALE = 1.5\n", + "gdf3 = gdf3.assign(\n", + " meters_elapsed = gdf3.apply(\n", + " lambda x: \n", + " x[\"distance\"] if (\n", + " x.meters_elapsed == 0 or \n", + " x.meters_elapsed >= x[\"distance\"]*SCALE\n", + " ) else x.meters_elapsed, \n", + " axis=1)\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "id": "55483f61-0b8b-4fc8-8597-10993d0bde22", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
shape_metersprior_shape_metersmeters_elapseddistancesec_elapsed
0340.492168294.21612546.27604336.79289831
10.0000000.0000000.00000055.583304212
91828.30321611.9900351816.3131821786.873286121
101501.4946670.0000001501.4946671494.377947137
111828.3032160.0000001828.3032161813.254069152
..................
2647856267.60982496.331438171.278386153.554969411
2647857247.93251864.090787183.841731177.632313409
2647858300.47941549.640185250.839230207.49616132
264785932782.69920432523.942558258.756646206.60557927
2647860267.60982320.183224247.426600223.24626930
\n", + "

2596053 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " shape_meters prior_shape_meters meters_elapsed distance \\\n", + "0 340.492168 294.216125 46.276043 36.792898 \n", + "1 0.000000 0.000000 0.000000 55.583304 \n", + "9 1828.303216 11.990035 1816.313182 1786.873286 \n", + "10 1501.494667 0.000000 1501.494667 1494.377947 \n", + "11 1828.303216 0.000000 1828.303216 1813.254069 \n", + "... ... ... ... ... \n", + "2647856 267.609824 96.331438 171.278386 153.554969 \n", + "2647857 247.932518 64.090787 183.841731 177.632313 \n", + "2647858 300.479415 49.640185 250.839230 207.496161 \n", + "2647859 32782.699204 32523.942558 258.756646 206.605579 \n", + "2647860 267.609823 20.183224 247.426600 223.246269 \n", + "\n", + " sec_elapsed \n", + "0 31 \n", + "1 212 \n", + "9 121 \n", + "10 137 \n", + "11 152 \n", + "... ... \n", + "2647856 411 \n", + "2647857 409 \n", + "2647858 32 \n", + "2647859 27 \n", + "2647860 30 \n", + "\n", + "[2596053 rows x 5 columns]" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gdf3[gdf3.meters_elapsed <= gdf3[\"distance\"]*1.5][\n", + " [\"shape_meters\", \"prior_shape_meters\",\n", + " \"meters_elapsed\", \"distance\",\n", + " \"sec_elapsed\"\n", + " ]]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "85fd6513-e216-41c8-b302-2f613aaccc8f", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.13" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} From 323fd903715ba41ac0e60be4b10030a1c83b3d3d Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Fri, 15 Sep 2023 20:00:11 +0000 Subject: [PATCH 02/13] remove old functions --- rt_segment_speeds/scripts/test_split.py | 154 +++++++++++++----------- 1 file changed, 86 insertions(+), 68 deletions(-) diff --git a/rt_segment_speeds/scripts/test_split.py b/rt_segment_speeds/scripts/test_split.py index e6631a17e..6fae21dfb 100644 --- a/rt_segment_speeds/scripts/test_split.py +++ b/rt_segment_speeds/scripts/test_split.py @@ -17,6 +17,8 @@ import geopandas as gpd import pandas as pd +from typing import Literal + from segment_speed_utils import helpers, segment_calcs, wrangle_shapes from segment_speed_utils.project_vars import (SEGMENT_GCS, analysis_date, CONFIG_PATH, PROJECT_CRS) @@ -95,7 +97,55 @@ def get_usable_vp_bounds_by_trip(df: dd.DataFrame) -> pd.DataFrame: ).reset_index(drop=True).compute() return df2 + + +def merge_in_segments( + gdf: gpd.GeoDataFrame, + segment_identifier_cols: list, + grouping_col: str +) -> gpd.GeoDataFrame: + + shapes_needed = gdf[grouping_col].unique().tolist() + + # If segment has 1 point, then we have to use the shape, + # since the prior point can come from multiple segments away + if (gdf.n_vp_seg==1).all(): + + shapes = helpers.import_scheduled_shapes( + analysis_date, + filters = [[(grouping_col, "in", shapes_needed)]], + columns = [grouping_col, "geometry"], + get_pandas = True, + crs = PROJECT_CRS + ) + + m1 = pd.merge( + gdf, + shapes, + on = grouping_col, + how = "inner" + ).rename(columns = { + "geometry_x": "vp_geometry", + "geometry_y": "geometry"}) + + # If segment has 2 points, then we can use segment geometry + elif (gdf.n_vp_seg==2).all(): + + segments = gpd.read_parquet( + f"{SEGMENT_GCS}stop_segments_{analysis_date}.parquet", + columns = segment_identifier_cols + ["geometry"] + ) + + m1 = pd.merge( + gdf, + segments, + on = segment_identifier_cols, + how = "inner" + ).rename(columns = { + "geometry_x": "vp_geometry", + "geometry_y": "geometry"}) + return m1 def put_all_together( analysis_date: str, @@ -188,83 +238,51 @@ def put_all_together( ).to_crs(PROJECT_CRS) ).drop(columns = ["prior_x", "prior_y"]).set_geometry("geometry") - return gdf2 + part1 = gdf2[gdf.n_vp_seg==1].reset_index(drop=True) + part2 = gdf2[gdf.n_vp_seg==2].reset_index(drop=True) + part2_keep = (part2.groupby(["trip_instance_key"] + SEGMENT_IDENTIFIER_COLS) + .vp_idx + .max() + .reset_index() + ) + part2_pared = pd.merge( + part2, + part2_keep, + on = ["trip_instance_key", "vp_idx"] + SEGMENT_IDENTIFIER_COLS, + how = "inner" + ) -if __name__ == "__main__": - - - - gdf = gdf.assign( - prior_time = gdf.prior_time.fillna( - gdf.groupby("trip_instance_key", - observed=True, group_keys=False) - [time_col] - .shift(1) - ), - prior_coord = gdf.geometry.fillna( - gdf.groupby("trip_instance_key", - observed=True, group_keys=False) - .geometry - .shift(1) - ), - ).rename(columns = {"geometry": "vp_geometry"}) + part1_gdf = merge_in_segments( + part1, + SEGMENT_IDENTIFIER_COLS, + GROUPING_COL + ) + part2_gdf = merge_in_segments( + part2_pared, + SEGMENT_IDENTIFIER_COLS, + GROUPING_COL + ) - segments = gpd.read_parquet( - f"{SEGMENT_GCS}stop_segments_{analysis_date}.parquet", - columns = SEGMENT_IDENTIFIER_COLS + ["geometry"] - ).rename(columns = {"geometry": "segment_geometry"}) + gdf3 = pd.concat( + [part1_gdf, part2_gdf], + axis=0 + ).sort_values( + SEGMENT_IDENTIFIER_COLS + ["trip_instance_key"] + ).reset_index(drop=True) - two_obs_in_seg_gdf = pd.merge( - two_obs_in_seg, - segments, - on = SEGMENT_IDENTIFIER_COLS, - how = "inner" - ) - two_obs_in_seg_gdf = dg.from_geopandas(two_obs_in_seg_gdf, npartitions=10 - ).set_geometry("vp_geometry") - - shapes_to_keep = one_obs_in_seg_one_outside.shape_array_key.unique().tolist() - - shapes = helpers.import_scheduled_shapes( - analysis_date, - columns = ["shape_array_key", "geometry"], - filters = [[("shape_array_key", "in", shapes_to_keep)]], - get_pandas = True, - crs = "EPSG:3310" - ).rename(columns = {"geometry": "shape_geometry"}) - - one_obs_in_seg_one_outside_gdf = pd.merge( - one_obs_in_seg_one_outside, - shapes, - on = "shape_array_key", - how = "inner" - ) + return gdf3 - shape_meters_series = two_obs_in_seg_gdf.map_partitions( - wrangle_shapes.project_point_geom_onto_linestring( - "segment_geometry", - "vp_geometry", - meta = ("shape_meters", "float") - )) - - two_obs_in_seg_gdf["shape_meters"] = shape_meters_series + - two_obs_in_seg_gdf = two_obs_in_seg_gdf.repartition(npartitions=5) - two_obs_in_seg_gdf.to_parquet("two_seg_test") - shape_meters_series2 =wrangle_shapes.project_point_geom_onto_linestring( - one_obs_in_seg_one_outside_gdf, - "shape_geometry", - "vp_geometry", - #meta = ("shape_meters", "float") - ) +if __name__ == "__main__": - one_obs_in_seg_one_outside_gdf["shape_meters"] = shape_meters_series2 - one_obs_in_seg_one_outside_gdf.to_parquet("one_seg_test.parquet") - - \ No newline at end of file + + + + gddf = dg.from_geopandas(gdf3, npartitions=50) From d3a8af2b9c746e9439c28f8079eb722e5e8ce759 Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Fri, 15 Sep 2023 23:35:19 +0000 Subject: [PATCH 03/13] use delayed --- rt_segment_speeds/scripts/test_split.py | 184 +++++++++++++++++------- 1 file changed, 136 insertions(+), 48 deletions(-) diff --git a/rt_segment_speeds/scripts/test_split.py b/rt_segment_speeds/scripts/test_split.py index 6fae21dfb..5550f5947 100644 --- a/rt_segment_speeds/scripts/test_split.py +++ b/rt_segment_speeds/scripts/test_split.py @@ -14,9 +14,11 @@ """ import dask.dataframe as dd import dask_geopandas as dg +import datetime import geopandas as gpd import pandas as pd +from dask import delayed, compute from typing import Literal from segment_speed_utils import helpers, segment_calcs, wrangle_shapes @@ -147,6 +149,113 @@ def merge_in_segments( return m1 + +def attach_vp_timestamp_location( + df: pd.DataFrame, + usable_vp: dd.DataFrame, + timestamp_col: str +) ->gpd.GeoDataFrame: + """ + """ + # Merge in the timestamp and x, y coords + df_with_xy = dd.merge( + usable_vp, + df, + on = "vp_idx", + how = "inner" + ) + + # Merge again to get timestamp and x, y coords of previous point + usable_vp2 = usable_vp.rename( + columns = { + "vp_idx": "prior_vp_idx", + timestamp_col: f"prior_{timestamp_col}", + "x": "prior_x", + "y": "prior_y", + } + ).drop(columns = "trip_instance_key") + + df_with_prior_xy = dd.merge( + df_with_xy, + usable_vp2, + on = "prior_vp_idx", + how = "inner" + ).compute() + + gdf = gpd.GeoDataFrame( + df_with_prior_xy, + geometry = gpd.points_from_xy(df_with_prior_xy.x, df_with_prior_xy.y), + crs = WGS84 + ).to_crs(PROJECT_CRS).drop(columns = ["x", "y"]) + + gdf2 = gdf.assign( + prior_vp_geometry = gpd.points_from_xy( + gdf.prior_x, gdf.prior_y, crs = WGS84 + ).to_crs(PROJECT_CRS) + ).drop(columns = ["prior_x", "prior_y"]).set_geometry("geometry") + + return gdf2 + + +def linear_referencing_for_segment( + gdf: dg.GeoDataFrame, + timestamp_col: str, + scaling_factor: float = 1.75 +) -> dg.GeoDataFrame: + + #gddf = dg.from_geopandas(gdf, npartitions=50) + gddf = gdf.copy() + + + shape_meters_series = (#gddf.map_partitions( + wrangle_shapes.project_point_geom_onto_linestring( + gddf, + "geometry", + "vp_geometry", + #meta = ("shape_meters", "float") + )) + + prior_shape_meters_series = (#gddf.map_partitions( + wrangle_shapes.project_point_geom_onto_linestring( + gddf, + "geometry", + "prior_vp_geometry", + #meta = ("prior_shape_meters", "float") + )) + + #gddf["current_shape_meters"] = shape_meters_series + #gddf["prior_shape_meters"] = prior_shape_meters_series + gddf["difference_shape_meters"] = abs( + shape_meters_series - prior_shape_meters_series) + gddf["straight_distance"] = gddf.vp_geometry.distance(gddf.prior_vp_geometry) + + # Decide what distance to keep + # If difference between current_shape_meters and prior_shape_meters is 0, + # use the straight line distance. + # If difference between current_shape_meters and prior_shape_meters is way + # too high, don't use it, it could be from projecting against the full shape + gddf = gddf.assign( + meters_elapsed = gddf.apply( + lambda x: x.straight_distance if ( + x.difference_shape_meters == 0 or + x.difference_shape_meters >= x.straight_distance*scaling_factor + ) else x.difference_shape_meters, + axis=1, + #meta = ("meters_elapsed", "float") + ), + ) + + gddf = segment_calcs.convert_timestamp_to_seconds( + gddf, [timestamp_col, f"prior_{timestamp_col}"]) + + drop_cols = ["difference_shape_meters", "straight_distance", + "vp_geometry", "prior_vp_geometry", "geometry" + ] + gddf2 = gddf.drop(columns = drop_cols) + + return gddf + + def put_all_together( analysis_date: str, dict_inputs: dict = {} @@ -166,21 +275,21 @@ def put_all_together( vp_idx_bounds = get_usable_vp_bounds_by_trip(usable_vp) # Start from pared down vp - df = pd.read_parquet( + df = delayed(pd.read_parquet)( f"{SEGMENT_GCS}vp_pare_down/{INPUT_FILE}_all_{analysis_date}", columns = SEGMENT_IDENTIFIER_COLS + ["trip_instance_key", "vp_idx"] ) # Make sure all segments have 2 points # If it doesn't, fill it in with the previous vp_idx - df2 = get_prior_position_on_segment( + df2 = delayed(get_prior_position_on_segment)( df, SEGMENT_IDENTIFIER_COLS, TIMESTAMP_COL ) # Check that the previous vp_idx actually occurs on the same trip - df3 = pd.merge( + df3 = delayed(pd.merge)( df2, vp_idx_bounds, on = "trip_instance_key", @@ -201,45 +310,14 @@ def put_all_together( axis=1) ).drop(columns = ["trip_instance_key", "min_vp_idx", "max_vp_idx"]) - # Merge in the timestamp and x, y coords - df_with_xy = dd.merge( - usable_vp, + gdf = delayed(attach_vp_timestamp_location)( df3, - on = "vp_idx", - how = "inner" + usable_vp, + TIMESTAMP_COL ) - # Merge again to get timestamp and x, y coords of previous point - usable_vp2 = usable_vp.rename( - columns = { - "vp_idx": "prior_vp_idx", - TIMESTAMP_COL: f"prior_{TIMESTAMP_COL}", - "x": "prior_x", - "y": "prior_y", - } - ).drop(columns = "trip_instance_key") - - df_with_prior_xy = dd.merge( - df_with_xy, - usable_vp2, - on = "prior_vp_idx", - how = "inner" - ).compute() - - gdf = gpd.GeoDataFrame( - df_with_prior_xy, - geometry = gpd.points_from_xy(df_with_prior_xy.x, df_with_prior_xy.y), - crs = WGS84 - ).to_crs(PROJECT_CRS).drop(columns = ["x", "y"]) - - gdf2 = gdf.assign( - prior_geometry = gpd.points_from_xy( - gdf.prior_x, gdf.prior_y, crs = WGS84 - ).to_crs(PROJECT_CRS) - ).drop(columns = ["prior_x", "prior_y"]).set_geometry("geometry") - - part1 = gdf2[gdf.n_vp_seg==1].reset_index(drop=True) - part2 = gdf2[gdf.n_vp_seg==2].reset_index(drop=True) + part1 = gdf[gdf.n_vp_seg==1].reset_index(drop=True) + part2 = gdf[gdf.n_vp_seg==2].reset_index(drop=True) part2_keep = (part2.groupby(["trip_instance_key"] + SEGMENT_IDENTIFIER_COLS) .vp_idx @@ -247,7 +325,7 @@ def put_all_together( .reset_index() ) - part2_pared = pd.merge( + part2_pared = delayed(pd.merge)( part2, part2_keep, on = ["trip_instance_key", "vp_idx"] + SEGMENT_IDENTIFIER_COLS, @@ -255,34 +333,44 @@ def put_all_together( ) - part1_gdf = merge_in_segments( + part1_gdf = delayed(merge_in_segments)( part1, SEGMENT_IDENTIFIER_COLS, GROUPING_COL ) - part2_gdf = merge_in_segments( + part2_gdf = delayed(merge_in_segments)( part2_pared, SEGMENT_IDENTIFIER_COLS, GROUPING_COL ) - gdf3 = pd.concat( + gdf3 = delayed(pd.concat)( [part1_gdf, part2_gdf], axis=0 ).sort_values( SEGMENT_IDENTIFIER_COLS + ["trip_instance_key"] ).reset_index(drop=True) + gdf4 = delayed(linear_referencing_for_segment)( + gdf3, + TIMESTAMP_COL, + scaling_factor = 1.75 + ) + + return gdf4 - return gdf3 +if __name__ == "__main__": - + start = datetime.datetime.now() + STOP_SEG_DICT = helpers.get_parameters(CONFIG_PATH, "stop_segments") + gddf = put_all_together(analysis_date, STOP_SEG_DICT) + gdf = compute(gddf)[0] -if __name__ == "__main__": + gdf.to_parquet(f"linear_ref.parquet") + print(f"execution time: {datetime.datetime.now() - start}") - - gddf = dg.from_geopandas(gdf3, npartitions=50) + \ No newline at end of file From 26c4452ed9992f884747232ffa1cddb74432a7a6 Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Mon, 18 Sep 2023 16:53:40 +0000 Subject: [PATCH 04/13] tag results that seem to low --- rt_segment_speeds/scripts/fill_in_1_vp.ipynb | 920 ++++++++++++++----- rt_segment_speeds/scripts/test_split.py | 159 ++-- 2 files changed, 775 insertions(+), 304 deletions(-) diff --git a/rt_segment_speeds/scripts/fill_in_1_vp.ipynb b/rt_segment_speeds/scripts/fill_in_1_vp.ipynb index fd94e5089..fe884d133 100644 --- a/rt_segment_speeds/scripts/fill_in_1_vp.ipynb +++ b/rt_segment_speeds/scripts/fill_in_1_vp.ipynb @@ -19,6 +19,7 @@ "import dask.dataframe as dd\n", "import dask_geopandas as dg\n", "import geopandas as gpd\n", + "import numpy as np\n", "import pandas as pd\n", "\n", "from segment_speed_utils.project_vars import (SEGMENT_GCS, \n", @@ -42,119 +43,242 @@ "source": [ "from importlib import reload\n", "\n", - "STOP_SEG_DICT = helpers.get_parameters(CONFIG_PATH, \"stop_segments\")" + "dict_inputs = helpers.get_parameters(CONFIG_PATH, \"stop_segments\")" ] }, { "cell_type": "code", "execution_count": 3, - "id": "58a6b7db-f81f-4d20-8ee1-4205dba51523", + "id": "e7ab12a8-9fef-4e5a-8be3-ad18992d685b", "metadata": {}, "outputs": [], "source": [ - "gdf = test_split.put_all_together(analysis_date, STOP_SEG_DICT)" + "USABLE_VP = dict_inputs[\"stage1\"]\n", + "INPUT_FILE = dict_inputs[\"stage3\"]\n", + "SEGMENT_FILE = dict_inputs[\"segments_file\"]\n", + "SEGMENT_IDENTIFIER_COLS = dict_inputs[\"segment_identifier_cols\"]\n", + "GROUPING_COL = dict_inputs[\"grouping_col\"]\n", + "TIMESTAMP_COL = dict_inputs[\"timestamp_col\"]" ] }, { "cell_type": "code", "execution_count": 4, - "id": "7103374c-a9e7-4eb9-ba33-52f3c8a4fd9a", + "id": "29ef4e7b-82d2-427a-9122-f4ee1c34baa3", "metadata": {}, "outputs": [], "source": [ - "GROUPING_COL = STOP_SEG_DICT[\"grouping_col\"]\n", - "SEGMENT_IDENTIFIER_COLS = STOP_SEG_DICT[\"segment_identifier_cols\"]" + "usable_vp = dd.read_parquet(\n", + " f\"{SEGMENT_GCS}{USABLE_VP}_{analysis_date}\",\n", + " columns = [\"trip_instance_key\", \"vp_idx\", TIMESTAMP_COL, \"x\", \"y\"]\n", + ")\n", + "vp_idx_bounds = test_split.get_usable_vp_bounds_by_trip(usable_vp)" ] }, { "cell_type": "code", "execution_count": 5, - "id": "dc40464c-7523-4dcc-a174-7dfd5725e15b", + "id": "86983844-a321-4d86-863a-3297bd3f7dd5", + "metadata": {}, + "outputs": [], + "source": [ + "from dask import delayed, compute\n", + "# Start from pared down vp\n", + "df = delayed(pd.read_parquet)(\n", + " f\"{SEGMENT_GCS}vp_pare_down/{INPUT_FILE}_all_{analysis_date}\",\n", + " columns = SEGMENT_IDENTIFIER_COLS + [\"trip_instance_key\", \"vp_idx\"]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "45664688-422d-4336-b7ac-dcaa399b803d", "metadata": {}, "outputs": [], "source": [ - "part1 = gdf[gdf.n_vp_seg==1].reset_index(drop=True)\n", - "part2 = gdf[gdf.n_vp_seg==2].reset_index(drop=True)" + "df2 = delayed(test_split.get_prior_position_on_segment)(\n", + " df, \n", + " SEGMENT_IDENTIFIER_COLS,\n", + " TIMESTAMP_COL\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "da871931-d2fa-42dd-8d66-b5bd6c735728", + "metadata": {}, + "outputs": [], + "source": [ + "df3 = delayed(dd.merge)(\n", + " df2,\n", + " vp_idx_bounds,\n", + " on = \"trip_instance_key\",\n", + " how = \"inner\"\n", + ")\n", + "\n", + "df3 = df3.assign(\n", + " prior_vp_idx = df3.apply(\n", + " lambda x: \n", + " x.vp_idx + 1 if (x.prior_vp_idx < x.min_vp_idx) and \n", + " (x.vp_idx + 1 <= x.max_vp_idx)\n", + " else x.prior_vp_idx, \n", + " axis=1)\n", + ").drop(columns = [\"trip_instance_key\", \"min_vp_idx\", \"max_vp_idx\"])\n", + " " ] }, { "cell_type": "code", "execution_count": 8, - "id": "9e0d29de-13bc-4825-905e-5b7792afc6c0", + "id": "6aedd4e2-c3a4-4b18-8ca5-9234fc10f992", + "metadata": {}, + "outputs": [], + "source": [ + "df3 = compute(df3)[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "22f8c086-4592-4c8f-9e08-cbe43c90373f", + "metadata": {}, + "outputs": [], + "source": [ + "def attach_vp_timestamp_location(\n", + " df: pd.DataFrame,\n", + " usable_vp: dd.DataFrame,\n", + " timestamp_col: str\n", + ") -> gpd.GeoDataFrame:\n", + " \"\"\"\n", + " \"\"\"\n", + " \n", + " # Turn the vp_idx we need into gdf\n", + " vp_to_keep = np.union1d(df.vp_idx, df.prior_vp_idx).tolist()\n", + " usable_vp2 = usable_vp[usable_vp.vp_idx.isin(vp_to_keep)]\n", + " \n", + " # Merge in the timestamp and x, y coords \n", + " usable_gdf = gpd.GeoDataFrame(\n", + " usable_vp2,\n", + " geometry = gpd.points_from_xy(usable_vp2.x, usable_vp2.y),\n", + " crs = WGS84\n", + " ).to_crs(PROJECT_CRS).drop(columns = [\"x\", \"y\"])\n", + " \n", + " \n", + " df_with_xy = pd.merge(\n", + " usable_gdf,\n", + " df,\n", + " on = \"vp_idx\",\n", + " how = \"inner\"\n", + " ).rename(columns = {\"geometry\": \"vp_geometry\"})\n", + " \n", + " # Merge again to get timestamp and x, y coords of previous point\n", + " usable_gdf2 = usable_gdf.rename(\n", + " columns = {\n", + " \"vp_idx\": \"prior_vp_idx\",\n", + " timestamp_col: f\"prior_{timestamp_col}\",\n", + " \"geometry\": \"prior_vp_geometry\"\n", + " }\n", + " ).drop(columns = \"trip_instance_key\")\n", + " \n", + " df_with_prior_xy = dd.merge(\n", + " df_with_xy,\n", + " usable_gdf2,\n", + " on = \"prior_vp_idx\",\n", + " how = \"inner\"\n", + " )\n", + " \n", + " return df_with_prior_xy" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "502f4715-a298-4520-b571-fae4d196cf2f", + "metadata": {}, + "outputs": [], + "source": [ + "from shared_utils.geography_utils import WGS84\n", + "\n", + "gdf = delayed(attach_vp_timestamp_location)(\n", + " df3,\n", + " usable_vp,\n", + " TIMESTAMP_COL\n", + ").persist()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "eeecaba2-9e58-4954-9660-5337ef02569c", "metadata": {}, "outputs": [], "source": [ - "part2_keep = (part2.groupby([\"trip_instance_key\"] + SEGMENT_IDENTIFIER_COLS)\n", + "part2 = gdf[gdf.n_vp_seg==2]\n", + "\n", + "part2_keep = (part2.groupby([\"trip_instance_key\"] + SEGMENT_IDENTIFIER_COLS,\n", + " observed=True, group_keys=False)\n", " .vp_idx\n", " .max()\n", " .reset_index()\n", - " )\n", - "\n", - "part2_pared = pd.merge(\n", - " part2,\n", - " part2_keep,\n", - " on = [\"trip_instance_key\", \"vp_idx\"] + SEGMENT_IDENTIFIER_COLS, \n", - " how = \"inner\"\n", - ")" + " )" ] }, { "cell_type": "code", "execution_count": 13, - "id": "c1e9d0aa-e27d-44e2-868b-a90df9dc63b0", + "id": "150a9219-94c7-44cc-84ac-e9badd88c3e5", "metadata": {}, "outputs": [], - "source": [] + "source": [ + "part2_pared = delayed(dd.merge)(\n", + " part2,\n", + " part2_keep,\n", + " on = [\"trip_instance_key\", \"vp_idx\"] + SEGMENT_IDENTIFIER_COLS, \n", + " how = \"inner\"\n", + ")\n" + ] }, { "cell_type": "code", - "execution_count": 10, - "id": "fcf447d5-1e4d-496d-a96b-b471da84d13a", + "execution_count": 14, + "id": "14b7f596-1676-40e8-a185-c0f02ad7c7d2", "metadata": {}, "outputs": [], "source": [ - "part1_gdf = merge_in_segments(\n", - " part1,\n", - " SEGMENT_IDENTIFIER_COLS,\n", - " GROUPING_COL\n", - ")\n", - "\n", - "part2_gdf = merge_in_segments(\n", + "part2_gdf = delayed(test_split.merge_in_segments)(\n", " part2_pared,\n", " SEGMENT_IDENTIFIER_COLS,\n", - " GROUPING_COL\n", + " GROUPING_COL,\n", + " n_vp_seg_value=2\n", ")" ] }, { "cell_type": "code", - "execution_count": 21, - "id": "63f6ac9c-0a9c-43d6-ba9d-4ba3bab949ea", + "execution_count": 15, + "id": "cefe0484-6a54-4b11-a562-5e77ff824b85", "metadata": {}, "outputs": [], "source": [ - "gdf2 = pd.concat(\n", - " [part1_gdf, part2_gdf], \n", - " axis=0\n", - ").sort_values(\n", - " SEGMENT_IDENTIFIER_COLS + [\"trip_instance_key\"]\n", - ").reset_index(drop=True)" + "p2_gdf = compute(part2_gdf)[0]" ] }, { "cell_type": "code", - "execution_count": 23, - "id": "99e00a23-7731-420f-b068-ce40d1ce78a5", + "execution_count": 16, + "id": "88525881-e849-4a42-b0a3-aa14d9d38f38", "metadata": {}, "outputs": [], "source": [ - "gddf = dg.from_geopandas(gdf2, npartitions=50)" + "gddf = dg.from_geopandas(p2_gdf, npartitions=100)" ] }, { "cell_type": "code", - "execution_count": 27, - "id": "92d6151d-2959-4a61-b0cb-e1a2068fb22b", + "execution_count": 17, + "id": "678c1e78-9fe3-4f11-a2f6-4fa7ced6c69c", "metadata": {}, "outputs": [], "source": [ @@ -163,79 +287,85 @@ " \"geometry\",\n", " \"vp_geometry\",\n", " meta = (\"shape_meters\", \"float\")\n", + ")\n", + "\n", + "prior_shape_meters_series = gddf.map_partitions(\n", + " wrangle_shapes.project_point_geom_onto_linestring,\n", + " \"geometry\",\n", + " \"prior_vp_geometry\",\n", + " meta = (\"prior_shape_meters\", \"float\")\n", ")" ] }, { "cell_type": "code", - "execution_count": 30, - "id": "2789939c-1952-4093-9a53-d9d0a002bb9b", + "execution_count": 18, + "id": "351cc3d5-ade0-47a3-ba30-aa8e24b4c9e7", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Index(['trip_instance_key', 'vp_idx', 'location_timestamp_local',\n", - " 'shape_array_key', 'stop_sequence', 'n_vp_seg', 'prior_vp_idx',\n", - " 'prior_location_timestamp_local', 'vp_geometry', 'prior_geometry',\n", - " 'geometry', 'shape_meters', 'location_timestamp_local_sec'],\n", - " dtype='object')" - ] - }, - "execution_count": 30, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "gddf.columns" + "gddf[\"difference_shape_meters\"] = abs(\n", + " shape_meters_series - prior_shape_meters_series)\n", + "gddf[\"straight_distance\"] = gddf.vp_geometry.distance(gddf.prior_vp_geometry)" ] }, { "cell_type": "code", - "execution_count": 32, - "id": "292ba063-2b29-4f54-80a0-f05affbdefcd", + "execution_count": 19, + "id": "8f943943-e0bc-4526-b9ab-e8963eeec7ae", "metadata": {}, "outputs": [], "source": [ - "shape_meters_series2 = gddf.map_partitions(\n", - " wrangle_shapes.project_point_geom_onto_linestring,\n", - " \"geometry\",\n", - " \"prior_geometry\",\n", - " meta = (\"prior_shape_meters\", \"float\")\n", + "scaling_factor = 1.75\n", + "min_meters_elapsed = 100\n", + "\n", + "# It's not only that difference_shape_meters==0 is wrong,\n", + "# Separate out these and try again with full shape \n", + "gddf = gddf.assign(\n", + " meters_elapsed = gddf.apply(\n", + " lambda x: x.straight_distance if (\n", + " x.difference_shape_meters <= min_meters_elapsed or \n", + " x.difference_shape_meters >= x.straight_distance*scaling_factor\n", + " ) else x.difference_shape_meters, \n", + " axis=1, \n", + " meta = (\"meters_elapsed\", \"float\")\n", + " ),\n", + ")\n", + "\n", + "gddf = segment_calcs.convert_timestamp_to_seconds(\n", + " gddf, [TIMESTAMP_COL, f\"prior_{TIMESTAMP_COL}\"])\n", + "\n", + "gddf = gddf.assign(\n", + " sec_elapsed = (gddf[f\"{TIMESTAMP_COL}_sec\"] - \n", + " gddf[f\"prior_{TIMESTAMP_COL}_sec\"]).abs()\n", ")" ] }, { "cell_type": "code", - "execution_count": 28, - "id": "44767130-5c70-40b9-a48f-45f289a5bf67", + "execution_count": 20, + "id": "b10cbeb9-558e-459b-93d3-becb118ba7b1", "metadata": {}, "outputs": [], "source": [ - "TIMESTAMP_COL = \"location_timestamp_local\"\n", - "\n", - "gddf[\"shape_meters\"] = shape_meters_series\n", - "gddf[\"prior_shape_meters\"] = shape_meters_series2\n", - "\n", - "gddf = segment_calcs.convert_timestamp_to_seconds(\n", - " gddf, [TIMESTAMP_COL, f\"prior_{TIMESTAMP_COL}\"])" + "ddf = gddf.drop(columns = [\"prior_vp_geometry\", \n", + " \"vp_geometry\", \"geometry\"])" ] }, { "cell_type": "code", - "execution_count": 37, - "id": "58bf7360-d133-4229-bf0d-b7554120e887", + "execution_count": 21, + "id": "d2d54fb3-d5dc-4a98-a304-9b5004e022b8", "metadata": {}, "outputs": [], "source": [ - "gddf[\"distance\"] = gddf.vp_geometry.distance(gddf.prior_geometry)" + "ddf = ddf.repartition(npartitions=2)" ] }, { "cell_type": "code", - "execution_count": 39, - "id": "b6b854d5-e6e2-49e3-bc36-27379c11f98e", + "execution_count": 22, + "id": "df1d5370-3021-45fb-a6ef-7eb408185d5f", "metadata": {}, "outputs": [ { @@ -811,102 +941,207 @@ "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", " return lib.line_locate_point(line, other)\n", "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", + " return lib.line_locate_point(line, other)\n", + "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", " return lib.line_locate_point(line, other)\n" ] } ], "source": [ - "gdf3 = gddf.compute()" + "results = ddf.compute()" ] }, { "cell_type": "code", - "execution_count": 41, - "id": "786771f4-93d3-43df-ad8e-aeb0b9e5be59", + "execution_count": 23, + "id": "d4c9fdf6-55b8-479a-94ae-ab36a4343171", "metadata": {}, "outputs": [], "source": [ - "gdf3 = gdf3.assign(\n", - " meters_elapsed = abs(gdf3.shape_meters - gdf3.prior_shape_meters),\n", - " sec_elapsed = abs(gdf3.location_timestamp_local_sec - gdf3.prior_location_timestamp_local_sec)\n", + "from shared_utils.rt_utils import MPH_PER_MPS\n", + "\n", + "results = results.assign(\n", + " speed_mph = (results.meters_elapsed.divide(results.sec_elapsed) * \n", + " MPH_PER_MPS)\n", ")" ] }, { "cell_type": "code", - "execution_count": 44, - "id": "ecbe0afa-7abd-48cd-b844-b886125608a5", + "execution_count": 24, + "id": "433e0382-461e-4970-9d88-37e07a40efaf", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "Index(['trip_instance_key', 'vp_idx', 'location_timestamp_local',\n", - " 'shape_array_key', 'stop_sequence', 'n_vp_seg', 'prior_vp_idx',\n", - " 'prior_location_timestamp_local', 'vp_geometry', 'prior_geometry',\n", - " 'geometry', 'shape_meters', 'location_timestamp_local_sec',\n", - " 'prior_shape_meters', 'prior_location_timestamp_local_sec', 'distance',\n", - " 'meters_elapsed', 'sec_elapsed'],\n", - " dtype='object')" + "(17751, 15)" ] }, - "execution_count": 44, + "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "gdf3.columns" + "results[results.speed_mph < 5].shape" ] }, { "cell_type": "code", - "execution_count": 54, - "id": "cfe580f6-290e-4440-a633-4925de424e3c", + "execution_count": 25, + "id": "be27c87b-0de2-472e-9ea0-cc42da1d4256", + "metadata": {}, + "outputs": [], + "source": [ + "ok_results = results[results.speed_mph >= 5]" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "e177393a-86c0-4c74-824c-fe9548b461b2", + "metadata": {}, + "outputs": [], + "source": [ + "bad_results = results[(results.speed_mph >= 0) & (results.speed_mph <5)]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4ab46336-10a3-4a25-86d5-6f9343f3175f", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "(51808, 5)" + "(39221, 17751, 0)" ] }, - "execution_count": 54, + "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "gdf3[gdf3.meters_elapsed > gdf3[\"distance\"]*1.5][\n", - " [\"shape_meters\", \"prior_shape_meters\",\n", - " \"meters_elapsed\", \"distance\",\n", - " \"sec_elapsed\"\n", - " ]].shape" + "len(ok_results), len(bad_results)" ] }, { "cell_type": "code", - "execution_count": null, - "id": "0e6448aa-0797-46eb-a92d-7a5a9919e1f5", - "metadata": {}, - "outputs": [], - "source": [ - "SCALE = 1.5\n", - "gdf3 = gdf3.assign(\n", - " meters_elapsed = gdf3.apply(\n", - " lambda x: \n", - " x[\"distance\"] if (\n", - " x.meters_elapsed == 0 or \n", - " x.meters_elapsed >= x[\"distance\"]*SCALE\n", - " ) else x.meters_elapsed, \n", - " axis=1)\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 55, - "id": "55483f61-0b8b-4fc8-8597-10993d0bde22", + "execution_count": 29, + "id": "6390361b-bfdd-44f3-833b-12b291343bd3", "metadata": {}, "outputs": [ { @@ -930,53 +1165,113 @@ " \n", " \n", " \n", - " shape_meters\n", - " prior_shape_meters\n", + " trip_instance_key\n", + " vp_idx\n", + " location_timestamp_local\n", + " shape_array_key\n", + " stop_sequence\n", + " n_vp_seg\n", + " prior_vp_idx\n", + " prior_location_timestamp_local\n", + " difference_shape_meters\n", + " straight_distance\n", " meters_elapsed\n", - " distance\n", + " location_timestamp_local_sec\n", + " prior_location_timestamp_local_sec\n", " sec_elapsed\n", + " speed_mph\n", " \n", " \n", " \n", " \n", " 0\n", - " 340.492168\n", - " 294.216125\n", - " 46.276043\n", - " 36.792898\n", - " 31\n", + " d15b0bea563cd87d86c86aebe5092eec\n", + " 103533\n", + " 2023-07-12 07:02:19\n", + " bbcffbd3d0f15bb6aa401323d9b4cc16\n", + " 21\n", + " 2\n", + " 103414\n", + " 2023-07-12 06:09:18\n", + " 130.776168\n", + " 149.466617\n", + " 130.776168\n", + " 25339\n", + " 22158\n", + " 3181\n", + " 0.091967\n", " \n", " \n", " 1\n", - " 0.000000\n", - " 0.000000\n", - " 0.000000\n", - " 55.583304\n", - " 212\n", + " 3be657dfc73c03e07bf64c8d0d1ba5b9\n", + " 103662\n", + " 2023-07-12 07:47:59\n", + " bbcffbd3d0f15bb6aa401323d9b4cc16\n", + " 21\n", + " 2\n", + " 103587\n", + " 2023-07-12 07:22:58\n", + " 30.767878\n", + " 35.712476\n", + " 35.712476\n", + " 28079\n", + " 26578\n", + " 1501\n", + " 0.053224\n", " \n", " \n", - " 9\n", - " 1828.303216\n", - " 11.990035\n", - " 1816.313182\n", - " 1786.873286\n", - " 121\n", + " 2\n", + " 96b1cdcb34a5e140783ecdd704b941a8\n", + " 103820\n", + " 2023-07-12 08:37:49\n", + " bbcffbd3d0f15bb6aa401323d9b4cc16\n", + " 21\n", + " 2\n", + " 103735\n", + " 2023-07-12 08:09:32\n", + " 47.987494\n", + " 59.847917\n", + " 59.847917\n", + " 31069\n", + " 29372\n", + " 1697\n", + " 0.078892\n", " \n", " \n", - " 10\n", - " 1501.494667\n", - " 0.000000\n", - " 1501.494667\n", - " 1494.377947\n", - " 137\n", + " 3\n", + " 2de5b5e96e174df9876577aa32574a37\n", + " 103951\n", + " 2023-07-12 09:21:27\n", + " bbcffbd3d0f15bb6aa401323d9b4cc16\n", + " 21\n", + " 2\n", + " 103867\n", + " 2023-07-12 08:53:28\n", + " 66.836692\n", + " 82.544468\n", + " 82.544468\n", + " 33687\n", + " 32008\n", + " 1679\n", + " 0.109977\n", " \n", " \n", - " 11\n", - " 1828.303216\n", - " 0.000000\n", - " 1828.303216\n", - " 1813.254069\n", - " 152\n", + " 4\n", + " 7272eba6ea9e1a31c821e8baab6ed1f4\n", + " 104085\n", + " 2023-07-12 10:06:12\n", + " bbcffbd3d0f15bb6aa401323d9b4cc16\n", + " 21\n", + " 2\n", + " 104002\n", + " 2023-07-12 09:38:34\n", + " 42.590357\n", + " 67.569399\n", + " 67.569399\n", + " 36372\n", + " 34714\n", + " 1658\n", + " 0.091166\n", " \n", " \n", " ...\n", @@ -985,99 +1280,274 @@ " ...\n", " ...\n", " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", + " ...\n", " \n", " \n", - " 2647856\n", - " 267.609824\n", - " 96.331438\n", - " 171.278386\n", - " 153.554969\n", - " 411\n", + " 56967\n", + " 4230577777b1a6eefac68b504efd4107\n", + " 14525378\n", + " 2023-07-12 07:44:24\n", + " 5c07969e700a525860a02fcee6389fbf\n", + " 2\n", + " 2\n", + " 14525377\n", + " 2023-07-12 07:43:24\n", + " 0.000000\n", + " 8.874775\n", + " 8.874775\n", + " 27864\n", + " 27804\n", + " 60\n", + " 0.330881\n", " \n", " \n", - " 2647857\n", - " 247.932518\n", - " 64.090787\n", - " 183.841731\n", - " 177.632313\n", - " 409\n", + " 56968\n", + " 81c7590efa1520143b181577898b3572\n", + " 14525418\n", + " 2023-07-12 07:50:51\n", + " 5c07969e700a525860a02fcee6389fbf\n", + " 2\n", + " 2\n", + " 14525416\n", + " 2023-07-12 07:48:51\n", + " 0.769686\n", + " 9.364080\n", + " 9.364080\n", + " 28251\n", + " 28131\n", + " 120\n", + " 0.174562\n", " \n", " \n", - " 2647858\n", - " 300.479415\n", - " 49.640185\n", - " 250.839230\n", - " 207.496161\n", - " 32\n", + " 56969\n", + " 1189d5633f78463a0ddac2448f7a30b0\n", + " 14525455\n", + " 2023-07-12 08:06:39\n", + " 5c07969e700a525860a02fcee6389fbf\n", + " 2\n", + " 2\n", + " 14525454\n", + " 2023-07-12 08:06:17\n", + " 4.003774\n", + " 14.340286\n", + " 14.340286\n", + " 29199\n", + " 29177\n", + " 22\n", + " 1.458146\n", " \n", " \n", - " 2647859\n", - " 32782.699204\n", - " 32523.942558\n", - " 258.756646\n", - " 206.605579\n", - " 27\n", + " 56970\n", + " 68785ab8159f0daa7545085b46d5b6d2\n", + " 14525498\n", + " 2023-07-12 08:34:15\n", + " 5c07969e700a525860a02fcee6389fbf\n", + " 2\n", + " 2\n", + " 14525495\n", + " 2023-07-12 08:31:58\n", + " 0.000000\n", + " 10.401350\n", + " 10.401350\n", + " 30855\n", + " 30718\n", + " 137\n", + " 0.169838\n", " \n", " \n", - " 2647860\n", - " 267.609823\n", - " 20.183224\n", - " 247.426600\n", - " 223.246269\n", - " 30\n", + " 56971\n", + " 929ee8ae9296906d4f716ea8f5e28a2f\n", + " 14525956\n", + " 2023-07-12 18:32:30\n", + " 5c07969e700a525860a02fcee6389fbf\n", + " 2\n", + " 2\n", + " 14525955\n", + " 2023-07-12 18:32:19\n", + " 0.000000\n", + " 4.472623\n", + " 4.472623\n", + " 66750\n", + " 66739\n", + " 11\n", + " 0.909569\n", " \n", " \n", "\n", - "

2596053 rows × 5 columns

\n", + "

17751 rows × 15 columns

\n", "" ], "text/plain": [ - " shape_meters prior_shape_meters meters_elapsed distance \\\n", - "0 340.492168 294.216125 46.276043 36.792898 \n", - "1 0.000000 0.000000 0.000000 55.583304 \n", - "9 1828.303216 11.990035 1816.313182 1786.873286 \n", - "10 1501.494667 0.000000 1501.494667 1494.377947 \n", - "11 1828.303216 0.000000 1828.303216 1813.254069 \n", - "... ... ... ... ... \n", - "2647856 267.609824 96.331438 171.278386 153.554969 \n", - "2647857 247.932518 64.090787 183.841731 177.632313 \n", - "2647858 300.479415 49.640185 250.839230 207.496161 \n", - "2647859 32782.699204 32523.942558 258.756646 206.605579 \n", - "2647860 267.609823 20.183224 247.426600 223.246269 \n", + " trip_instance_key vp_idx location_timestamp_local \\\n", + "0 d15b0bea563cd87d86c86aebe5092eec 103533 2023-07-12 07:02:19 \n", + "1 3be657dfc73c03e07bf64c8d0d1ba5b9 103662 2023-07-12 07:47:59 \n", + "2 96b1cdcb34a5e140783ecdd704b941a8 103820 2023-07-12 08:37:49 \n", + "3 2de5b5e96e174df9876577aa32574a37 103951 2023-07-12 09:21:27 \n", + "4 7272eba6ea9e1a31c821e8baab6ed1f4 104085 2023-07-12 10:06:12 \n", + "... ... ... ... \n", + "56967 4230577777b1a6eefac68b504efd4107 14525378 2023-07-12 07:44:24 \n", + "56968 81c7590efa1520143b181577898b3572 14525418 2023-07-12 07:50:51 \n", + "56969 1189d5633f78463a0ddac2448f7a30b0 14525455 2023-07-12 08:06:39 \n", + "56970 68785ab8159f0daa7545085b46d5b6d2 14525498 2023-07-12 08:34:15 \n", + "56971 929ee8ae9296906d4f716ea8f5e28a2f 14525956 2023-07-12 18:32:30 \n", + "\n", + " shape_array_key stop_sequence n_vp_seg \\\n", + "0 bbcffbd3d0f15bb6aa401323d9b4cc16 21 2 \n", + "1 bbcffbd3d0f15bb6aa401323d9b4cc16 21 2 \n", + "2 bbcffbd3d0f15bb6aa401323d9b4cc16 21 2 \n", + "3 bbcffbd3d0f15bb6aa401323d9b4cc16 21 2 \n", + "4 bbcffbd3d0f15bb6aa401323d9b4cc16 21 2 \n", + "... ... ... ... \n", + "56967 5c07969e700a525860a02fcee6389fbf 2 2 \n", + "56968 5c07969e700a525860a02fcee6389fbf 2 2 \n", + "56969 5c07969e700a525860a02fcee6389fbf 2 2 \n", + "56970 5c07969e700a525860a02fcee6389fbf 2 2 \n", + "56971 5c07969e700a525860a02fcee6389fbf 2 2 \n", + "\n", + " prior_vp_idx prior_location_timestamp_local difference_shape_meters \\\n", + "0 103414 2023-07-12 06:09:18 130.776168 \n", + "1 103587 2023-07-12 07:22:58 30.767878 \n", + "2 103735 2023-07-12 08:09:32 47.987494 \n", + "3 103867 2023-07-12 08:53:28 66.836692 \n", + "4 104002 2023-07-12 09:38:34 42.590357 \n", + "... ... ... ... \n", + "56967 14525377 2023-07-12 07:43:24 0.000000 \n", + "56968 14525416 2023-07-12 07:48:51 0.769686 \n", + "56969 14525454 2023-07-12 08:06:17 4.003774 \n", + "56970 14525495 2023-07-12 08:31:58 0.000000 \n", + "56971 14525955 2023-07-12 18:32:19 0.000000 \n", "\n", - " sec_elapsed \n", - "0 31 \n", - "1 212 \n", - "9 121 \n", - "10 137 \n", - "11 152 \n", - "... ... \n", - "2647856 411 \n", - "2647857 409 \n", - "2647858 32 \n", - "2647859 27 \n", - "2647860 30 \n", + " straight_distance meters_elapsed location_timestamp_local_sec \\\n", + "0 149.466617 130.776168 25339 \n", + "1 35.712476 35.712476 28079 \n", + "2 59.847917 59.847917 31069 \n", + "3 82.544468 82.544468 33687 \n", + "4 67.569399 67.569399 36372 \n", + "... ... ... ... \n", + "56967 8.874775 8.874775 27864 \n", + "56968 9.364080 9.364080 28251 \n", + "56969 14.340286 14.340286 29199 \n", + "56970 10.401350 10.401350 30855 \n", + "56971 4.472623 4.472623 66750 \n", "\n", - "[2596053 rows x 5 columns]" + " prior_location_timestamp_local_sec sec_elapsed speed_mph \n", + "0 22158 3181 0.091967 \n", + "1 26578 1501 0.053224 \n", + "2 29372 1697 0.078892 \n", + "3 32008 1679 0.109977 \n", + "4 34714 1658 0.091166 \n", + "... ... ... ... \n", + "56967 27804 60 0.330881 \n", + "56968 28131 120 0.174562 \n", + "56969 29177 22 1.458146 \n", + "56970 30718 137 0.169838 \n", + "56971 66739 11 0.909569 \n", + "\n", + "[17751 rows x 15 columns]" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bad_results" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "989bdfe8-8dde-4457-b4d9-ca110ecfabf7", + "metadata": {}, + "outputs": [], + "source": [ + "part1 = gdf[gdf.n_vp_seg==1]\n", + "\n", + "p1 = compute(part1)[0]\n", + "\n", + "p1.to_parquet(\"one_vp_in_seg.parquet\")" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "1415ef3d-7a88-48ca-ad9c-a20a00618d61", + "metadata": {}, + "outputs": [], + "source": [ + "bad_shapes = bad_results.shape_array_key.unique().tolist()" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "ce58f30c-54b7-4ba1-854e-fcd9d0314012", + "metadata": {}, + "outputs": [], + "source": [ + "ok_results.to_parquet(\"ok_results.parquet\")\n", + "bad_results.to_parquet(\"bad_results.parquet\")" + ] + }, + { + "cell_type": "markdown", + "id": "77a87e87-77cf-4d90-bdba-a9f6eaaf6433", + "metadata": {}, + "source": [ + "For speeds that are unusually low, it is now confirmed it's not just `loop_or_inlining`. It happens on `loop_or_inlining==0` too, so it's good to have moved the sjoin postprocessing to all shapes." + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "3ae778cb-00df-4c31-9161-c969fa27565a", + "metadata": {}, + "outputs": [], + "source": [ + "segments = gpd.read_parquet(\n", + " f\"{SEGMENT_GCS}stop_segments_{analysis_date}.parquet\",\n", + " filters = [[(GROUPING_COL, \"in\", bad_shapes)]],\n", + " columns = SEGMENT_IDENTIFIER_COLS + [\"loop_or_inlining\", \"geometry\"]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "53aab44a-b49f-4658-a79f-cf4cdf1e1383", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 5040\n", + "1 1074\n", + "Name: loop_or_inlining, dtype: int64" ] }, - "execution_count": 55, + "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "gdf3[gdf3.meters_elapsed <= gdf3[\"distance\"]*1.5][\n", - " [\"shape_meters\", \"prior_shape_meters\",\n", - " \"meters_elapsed\", \"distance\",\n", - " \"sec_elapsed\"\n", - " ]]" + "segments.loop_or_inlining.value_counts()" ] }, { "cell_type": "code", "execution_count": null, - "id": "85fd6513-e216-41c8-b302-2f613aaccc8f", + "id": "bda0d4da-96f9-4833-95f2-c02f992a63c6", "metadata": {}, "outputs": [], "source": [] diff --git a/rt_segment_speeds/scripts/test_split.py b/rt_segment_speeds/scripts/test_split.py index 5550f5947..b3765671e 100644 --- a/rt_segment_speeds/scripts/test_split.py +++ b/rt_segment_speeds/scripts/test_split.py @@ -18,7 +18,6 @@ import geopandas as gpd import pandas as pd -from dask import delayed, compute from typing import Literal from segment_speed_utils import helpers, segment_calcs, wrangle_shapes @@ -56,6 +55,7 @@ def get_prior_position_on_segment( segment_trip_cols + ["vp_idx"] ).reset_index(drop=True) + df2 = df2.assign( prior_vp_idx = (df2.groupby(segment_trip_cols, observed=True, group_keys=False) @@ -104,48 +104,42 @@ def get_usable_vp_bounds_by_trip(df: dd.DataFrame) -> pd.DataFrame: def merge_in_segments( gdf: gpd.GeoDataFrame, segment_identifier_cols: list, - grouping_col: str + grouping_col: str, + n_vp_seg_value: Literal[1,2] ) -> gpd.GeoDataFrame: - - shapes_needed = gdf[grouping_col].unique().tolist() - + # If segment has 1 point, then we have to use the shape, # since the prior point can come from multiple segments away - if (gdf.n_vp_seg==1).all(): + if n_vp_seg_value==1: shapes = helpers.import_scheduled_shapes( analysis_date, - filters = [[(grouping_col, "in", shapes_needed)]], columns = [grouping_col, "geometry"], get_pandas = True, crs = PROJECT_CRS ) - m1 = pd.merge( + m1 = dd.merge( gdf, shapes, on = grouping_col, how = "inner" - ).rename(columns = { - "geometry_x": "vp_geometry", - "geometry_y": "geometry"}) + ) # If segment has 2 points, then we can use segment geometry - elif (gdf.n_vp_seg==2).all(): + elif n_vp_seg_value==2: segments = gpd.read_parquet( f"{SEGMENT_GCS}stop_segments_{analysis_date}.parquet", columns = segment_identifier_cols + ["geometry"] ) - m1 = pd.merge( + m1 = dd.merge( gdf, segments, on = segment_identifier_cols, how = "inner" - ).rename(columns = { - "geometry_x": "vp_geometry", - "geometry_y": "geometry"}) + ) return m1 @@ -154,47 +148,40 @@ def attach_vp_timestamp_location( df: pd.DataFrame, usable_vp: dd.DataFrame, timestamp_col: str -) ->gpd.GeoDataFrame: +) -> gpd.GeoDataFrame: """ """ # Merge in the timestamp and x, y coords - df_with_xy = dd.merge( + usable_gdf = dg.from_dask_dataframe( usable_vp, + geometry = dg.points_from_xy(usable_vp, x = "x", y = "y") + ).drop(columns = ["x", "y"]).set_crs(WGS84) + + usable_gdf = usable_gdf.to_crs(PROJECT_CRS) + + df_with_xy = dd.merge( + usable_gdf, df, on = "vp_idx", how = "inner" ) # Merge again to get timestamp and x, y coords of previous point - usable_vp2 = usable_vp.rename( + usable_gdf2 = usable_gdf.rename( columns = { "vp_idx": "prior_vp_idx", timestamp_col: f"prior_{timestamp_col}", - "x": "prior_x", - "y": "prior_y", } ).drop(columns = "trip_instance_key") df_with_prior_xy = dd.merge( df_with_xy, - usable_vp2, + usable_gdf2, on = "prior_vp_idx", how = "inner" - ).compute() - - gdf = gpd.GeoDataFrame( - df_with_prior_xy, - geometry = gpd.points_from_xy(df_with_prior_xy.x, df_with_prior_xy.y), - crs = WGS84 - ).to_crs(PROJECT_CRS).drop(columns = ["x", "y"]) - - gdf2 = gdf.assign( - prior_vp_geometry = gpd.points_from_xy( - gdf.prior_x, gdf.prior_y, crs = WGS84 - ).to_crs(PROJECT_CRS) - ).drop(columns = ["prior_x", "prior_y"]).set_geometry("geometry") + ) - return gdf2 + return df_with_prior_xy def linear_referencing_for_segment( @@ -203,25 +190,21 @@ def linear_referencing_for_segment( scaling_factor: float = 1.75 ) -> dg.GeoDataFrame: - #gddf = dg.from_geopandas(gdf, npartitions=50) - gddf = gdf.copy() - + gddf = gdf.repartition(npartitions=50) - shape_meters_series = (#gddf.map_partitions( - wrangle_shapes.project_point_geom_onto_linestring( - gddf, + shape_meters_series = gddf.map_partitions( + wrangle_shapes.project_point_geom_onto_linestring, "geometry", "vp_geometry", - #meta = ("shape_meters", "float") - )) + meta = ("shape_meters", "float") + ) - prior_shape_meters_series = (#gddf.map_partitions( - wrangle_shapes.project_point_geom_onto_linestring( - gddf, + prior_shape_meters_series = gddf.map_partitions( + wrangle_shapes.project_point_geom_onto_linestring, "geometry", "prior_vp_geometry", - #meta = ("prior_shape_meters", "float") - )) + meta = ("prior_shape_meters", "float") + ) #gddf["current_shape_meters"] = shape_meters_series #gddf["prior_shape_meters"] = prior_shape_meters_series @@ -241,7 +224,7 @@ def linear_referencing_for_segment( x.difference_shape_meters >= x.straight_distance*scaling_factor ) else x.difference_shape_meters, axis=1, - #meta = ("meters_elapsed", "float") + meta = ("meters_elapsed", "float") ), ) @@ -267,6 +250,8 @@ def put_all_together( GROUPING_COL = dict_inputs["grouping_col"] TIMESTAMP_COL = dict_inputs["timestamp_col"] + time0 = datetime.datetime.now() + # Import usable vp, which we'll use later for the x, y and timestamp usable_vp = dd.read_parquet( f"{SEGMENT_GCS}{USABLE_VP}_{analysis_date}", @@ -275,21 +260,24 @@ def put_all_together( vp_idx_bounds = get_usable_vp_bounds_by_trip(usable_vp) # Start from pared down vp - df = delayed(pd.read_parquet)( + df = pd.read_parquet( f"{SEGMENT_GCS}vp_pare_down/{INPUT_FILE}_all_{analysis_date}", columns = SEGMENT_IDENTIFIER_COLS + ["trip_instance_key", "vp_idx"] ) # Make sure all segments have 2 points # If it doesn't, fill it in with the previous vp_idx - df2 = delayed(get_prior_position_on_segment)( + df2 = get_prior_position_on_segment( df, SEGMENT_IDENTIFIER_COLS, TIMESTAMP_COL ) + time1 = datetime.datetime.now() + print(f"get prior position: {time1 - time0}") + # Check that the previous vp_idx actually occurs on the same trip - df3 = delayed(pd.merge)( + df3 = dd.merge( df2, vp_idx_bounds, on = "trip_instance_key", @@ -310,53 +298,66 @@ def put_all_together( axis=1) ).drop(columns = ["trip_instance_key", "min_vp_idx", "max_vp_idx"]) - gdf = delayed(attach_vp_timestamp_location)( + gdf = attach_vp_timestamp_location( df3, usable_vp, TIMESTAMP_COL ) - part1 = gdf[gdf.n_vp_seg==1].reset_index(drop=True) - part2 = gdf[gdf.n_vp_seg==2].reset_index(drop=True) + time2 = datetime.datetime.now() + print(f"attach vp timestamp: {time2 - time1}") - part2_keep = (part2.groupby(["trip_instance_key"] + SEGMENT_IDENTIFIER_COLS) - .vp_idx - .max() - .reset_index() - ) + #part1 = gdf[gdf.n_vp_seg==1] + part2 = gdf[gdf.n_vp_seg==2] + + part2_keep = (part2.groupby(["trip_instance_key"] + SEGMENT_IDENTIFIER_COLS, + observed=True, group_keys=False) + .vp_idx + .max() + .reset_index() + ) - part2_pared = delayed(pd.merge)( + part2_pared = dd.merge( part2, part2_keep, on = ["trip_instance_key", "vp_idx"] + SEGMENT_IDENTIFIER_COLS, how = "inner" ) - - part1_gdf = delayed(merge_in_segments)( - part1, - SEGMENT_IDENTIFIER_COLS, - GROUPING_COL - ) + #part1_gdf = merge_in_segments( + # part1, + # SEGMENT_IDENTIFIER_COLS, + # GROUPING_COL, + # n_vp_seg_value=1 + #) - part2_gdf = delayed(merge_in_segments)( + part2_gdf = merge_in_segments( part2_pared, SEGMENT_IDENTIFIER_COLS, - GROUPING_COL + GROUPING_COL, + n_vp_seg_value=2 ) - gdf3 = delayed(pd.concat)( - [part1_gdf, part2_gdf], + gdf3 = dd.multi.concat( + [ + #part1_gdf, + part2_gdf], axis=0 - ).sort_values( - SEGMENT_IDENTIFIER_COLS + ["trip_instance_key"] - ).reset_index(drop=True) + ).reset_index(drop=True)#.sort_values( + # SEGMENT_IDENTIFIER_COLS + ["trip_instance_key"] + #) + + time3 = datetime.datetime.now() + print(f"merge in segments: {time3 - time2}") - gdf4 = delayed(linear_referencing_for_segment)( + gdf4 = linear_referencing_for_segment( gdf3, TIMESTAMP_COL, scaling_factor = 1.75 ) + + time4 = datetime.datetime.now() + print(f"linear ref: {time4 - time2}") return gdf4 @@ -365,10 +366,10 @@ def put_all_together( start = datetime.datetime.now() STOP_SEG_DICT = helpers.get_parameters(CONFIG_PATH, "stop_segments") gddf = put_all_together(analysis_date, STOP_SEG_DICT) + + gddf = gddf.repartition(npartitions=2) - gdf = compute(gddf)[0] - - gdf.to_parquet(f"linear_ref.parquet") + gddf.to_parquet("linear_ref") print(f"execution time: {datetime.datetime.now() - start}") From b71e5c9bd8a633a2a537a1a0a18cdd3e7e9e8ca1 Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Mon, 18 Sep 2023 16:53:58 +0000 Subject: [PATCH 05/13] (remove): remove concat since we pare down for all shapes --- .../scripts/A4_concatenate_vp_pared.py | 52 ------------------- 1 file changed, 52 deletions(-) delete mode 100644 rt_segment_speeds/scripts/A4_concatenate_vp_pared.py diff --git a/rt_segment_speeds/scripts/A4_concatenate_vp_pared.py b/rt_segment_speeds/scripts/A4_concatenate_vp_pared.py deleted file mode 100644 index fd6db812b..000000000 --- a/rt_segment_speeds/scripts/A4_concatenate_vp_pared.py +++ /dev/null @@ -1,52 +0,0 @@ -""" -Concatenate pared down vp -from normal and special cases -and save as one file to use. -""" -import dask.dataframe as dd -import datetime - -from segment_speed_utils import helpers -from segment_speed_utils.project_vars import (analysis_date, SEGMENT_GCS, - CONFIG_PATH) - -if __name__ == "__main__": - - start = datetime.datetime.now() - - STOP_SEG_DICT = helpers.get_parameters(CONFIG_PATH, "stop_segments") - - VP_FULL_INFO = STOP_SEG_DICT["stage1"] - INPUT_FILE = STOP_SEG_DICT["stage3"] - SEGMENT_IDENTIFIER_COLS = STOP_SEG_DICT["segment_identifier_cols"] - TIMESTAMP_COL = STOP_SEG_DICT["timestamp_col"] - cases = ["normal", "special"] - - dfs = [ - dd.read_parquet( - f"{SEGMENT_GCS}vp_pare_down/{INPUT_FILE}_{c}_{analysis_date}", - columns = ["vp_idx"] + SEGMENT_IDENTIFIER_COLS - ) for c in cases - ] - - pared_down_vp = dd.multi.concat(dfs, axis=0).reset_index( - drop=True).set_index("vp_idx", sorted=False) - - vp_full_info = dd.read_parquet( - f"{SEGMENT_GCS}{VP_FULL_INFO}_{analysis_date}" - ).set_index("vp_idx", sorted=False) - - df = dd.merge( - vp_full_info, - pared_down_vp, - left_index = True, - right_index = True, - how = "inner" - ).reset_index() - - df = df.repartition(npartitions = 2) - df.to_parquet(f"{SEGMENT_GCS}{INPUT_FILE}_{analysis_date}", - overwrite=True) - - end = datetime.datetime.now() - print(f"execution time: {end-start}") \ No newline at end of file From 40268982811dc85e4962b9e6fb5ba24445aa46a1 Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Tue, 19 Sep 2023 15:35:19 +0000 Subject: [PATCH 06/13] adapt B1 --- rt_segment_speeds/scripts/fill_in_1_vp.ipynb | 1536 +++++++----------- 1 file changed, 560 insertions(+), 976 deletions(-) diff --git a/rt_segment_speeds/scripts/fill_in_1_vp.ipynb b/rt_segment_speeds/scripts/fill_in_1_vp.ipynb index fe884d133..8427f7519 100644 --- a/rt_segment_speeds/scripts/fill_in_1_vp.ipynb +++ b/rt_segment_speeds/scripts/fill_in_1_vp.ipynb @@ -29,6 +29,8 @@ "from segment_speed_utils import (helpers, wrangle_shapes, \n", " segment_calcs)\n", "from shared_utils import rt_dates\n", + "from shared_utils.geography_utils import WGS84\n", + "\n", "import test_split\n", "\n", "analysis_date = rt_dates.DATES[\"jul2023\"]" @@ -63,7 +65,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "id": "29ef4e7b-82d2-427a-9122-f4ee1c34baa3", "metadata": {}, "outputs": [], @@ -77,12 +79,13 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "id": "86983844-a321-4d86-863a-3297bd3f7dd5", "metadata": {}, "outputs": [], "source": [ "from dask import delayed, compute\n", + "\n", "# Start from pared down vp\n", "df = delayed(pd.read_parquet)(\n", " f\"{SEGMENT_GCS}vp_pare_down/{INPUT_FILE}_all_{analysis_date}\",\n", @@ -92,280 +95,97 @@ }, { "cell_type": "code", - "execution_count": 6, - "id": "45664688-422d-4336-b7ac-dcaa399b803d", + "execution_count": 5, + "id": "784086ef-5e92-4c06-ada0-1abe7819de2d", "metadata": {}, "outputs": [], "source": [ - "df2 = delayed(test_split.get_prior_position_on_segment)(\n", - " df, \n", - " SEGMENT_IDENTIFIER_COLS,\n", - " TIMESTAMP_COL\n", + "df = pd.read_parquet(\n", + " f\"{SEGMENT_GCS}vp_pare_down/{INPUT_FILE}_all_{analysis_date}\",\n", + " #columns = SEGMENT_IDENTIFIER_COLS + [\"trip_instance_key\", \"vp_idx\"]\n", ")" ] }, - { - "cell_type": "code", - "execution_count": 7, - "id": "da871931-d2fa-42dd-8d66-b5bd6c735728", - "metadata": {}, - "outputs": [], - "source": [ - "df3 = delayed(dd.merge)(\n", - " df2,\n", - " vp_idx_bounds,\n", - " on = \"trip_instance_key\",\n", - " how = \"inner\"\n", - ")\n", - "\n", - "df3 = df3.assign(\n", - " prior_vp_idx = df3.apply(\n", - " lambda x: \n", - " x.vp_idx + 1 if (x.prior_vp_idx < x.min_vp_idx) and \n", - " (x.vp_idx + 1 <= x.max_vp_idx)\n", - " else x.prior_vp_idx, \n", - " axis=1)\n", - ").drop(columns = [\"trip_instance_key\", \"min_vp_idx\", \"max_vp_idx\"])\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "6aedd4e2-c3a4-4b18-8ca5-9234fc10f992", - "metadata": {}, - "outputs": [], - "source": [ - "df3 = compute(df3)[0]" - ] - }, { "cell_type": "code", "execution_count": 10, - "id": "22f8c086-4592-4c8f-9e08-cbe43c90373f", + "id": "fd0b0494-8ca7-4a7f-8ff9-3f9b4ac4cced", "metadata": {}, "outputs": [], "source": [ - "def attach_vp_timestamp_location(\n", - " df: pd.DataFrame,\n", - " usable_vp: dd.DataFrame,\n", + "def make_wide(\n", + " df: pd.DataFrame, \n", + " segment_identifier_cols: list,\n", " timestamp_col: str\n", - ") -> gpd.GeoDataFrame:\n", - " \"\"\"\n", - " \"\"\"\n", + ") -> pd.DataFrame:\n", " \n", - " # Turn the vp_idx we need into gdf\n", - " vp_to_keep = np.union1d(df.vp_idx, df.prior_vp_idx).tolist()\n", - " usable_vp2 = usable_vp[usable_vp.vp_idx.isin(vp_to_keep)]\n", + " group_cols = [\"trip_instance_key\"] + segment_identifier_cols\n", " \n", - " # Merge in the timestamp and x, y coords \n", - " usable_gdf = gpd.GeoDataFrame(\n", - " usable_vp2,\n", - " geometry = gpd.points_from_xy(usable_vp2.x, usable_vp2.y),\n", - " crs = WGS84\n", - " ).to_crs(PROJECT_CRS).drop(columns = [\"x\", \"y\"])\n", + " # Sort and make sure we get a pairing of point 1 and 2 within segment\n", + " df = df.sort_values(group_cols + [\"vp_idx\"]).reset_index(drop=True)\n", " \n", + " df = dd.from_pandas(df, npartitions=50, sort=True)\n", + "\n", + " df = df.assign(\n", + " obs = df.groupby(group_cols, \n", + " observed=True, group_keys=False\n", + " ).cumcount() + 1\n", + " )\n", " \n", - " df_with_xy = pd.merge(\n", - " usable_gdf,\n", - " df,\n", - " on = \"vp_idx\",\n", - " how = \"inner\"\n", - " ).rename(columns = {\"geometry\": \"vp_geometry\"})\n", + " gdf = dg.from_dask_dataframe(\n", + " df, \n", + " geometry=dg.points_from_xy(df, \"x\", \"y\")\n", + " ).set_crs(WGS84).to_crs(PROJECT_CRS).drop(columns = [\"x\", \"y\"])\n", " \n", - " # Merge again to get timestamp and x, y coords of previous point\n", - " usable_gdf2 = usable_gdf.rename(\n", - " columns = {\n", - " \"vp_idx\": \"prior_vp_idx\",\n", - " timestamp_col: f\"prior_{timestamp_col}\",\n", - " \"geometry\": \"prior_vp_geometry\"\n", - " }\n", - " ).drop(columns = \"trip_instance_key\")\n", + " segments = helpers.import_segments(\n", + " SEGMENT_GCS,\n", + " f\"{SEGMENT_FILE}_{analysis_date}\", \n", + " columns = segment_identifier_cols + [\"geometry\"]\n", + " ).dropna(subset=\"geometry\").reset_index(drop=True)\n", " \n", - " df_with_prior_xy = dd.merge(\n", - " df_with_xy,\n", - " usable_gdf2,\n", - " on = \"prior_vp_idx\",\n", + " gddf = dd.merge(\n", + " gdf,\n", + " segments,\n", + " on = segment_identifier_cols,\n", " how = \"inner\"\n", + " ).rename(columns = {\n", + " \"geometry_x\": \"vp_geometry\",\n", + " \"geometry_y\": \"segment_geometry\"\n", + " }).set_geometry(\"vp_geometry\")\n", + " \n", + " shape_meters_series = gddf.map_partitions(\n", + " wrangle_shapes.project_point_geom_onto_linestring,\n", + " \"segment_geometry\",\n", + " \"vp_geometry\",\n", + " meta = (\"shape_meters\", \"float\")\n", " )\n", " \n", - " return df_with_prior_xy" + " gddf[\"shape_meters\"] = shape_meters_series\n", + " \n", + " ddf = gddf.drop(columns = [\"vp_geometry\", \"segment_geometry\"]).reset_index(drop=True)\n", + " '''\n", + " ddf = segment_calcs.convert_timestamp_to_seconds(\n", + " ddf, [timestamp_col])\n", + " \n", + " # Must do point 1 against point 2\n", + " # Some segments only have 1 point, have no second point to find\n", + " point1 = ddf[ddf.obs==1].reset_index(drop=True)\n", + " point2 = ddf[ddf.obs==2].reset_index(drop=True)\n", + " \n", + " ddf_wide = dd.merge(\n", + " point1,\n", + " point2,\n", + " on = group_cols,\n", + " how = \"left\"\n", + " ).reset_index(drop=True)\n", + " '''\n", + " return ddf" ] }, { "cell_type": "code", "execution_count": 11, - "id": "502f4715-a298-4520-b571-fae4d196cf2f", - "metadata": {}, - "outputs": [], - "source": [ - "from shared_utils.geography_utils import WGS84\n", - "\n", - "gdf = delayed(attach_vp_timestamp_location)(\n", - " df3,\n", - " usable_vp,\n", - " TIMESTAMP_COL\n", - ").persist()" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "eeecaba2-9e58-4954-9660-5337ef02569c", - "metadata": {}, - "outputs": [], - "source": [ - "part2 = gdf[gdf.n_vp_seg==2]\n", - "\n", - "part2_keep = (part2.groupby([\"trip_instance_key\"] + SEGMENT_IDENTIFIER_COLS,\n", - " observed=True, group_keys=False)\n", - " .vp_idx\n", - " .max()\n", - " .reset_index()\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "150a9219-94c7-44cc-84ac-e9badd88c3e5", - "metadata": {}, - "outputs": [], - "source": [ - "part2_pared = delayed(dd.merge)(\n", - " part2,\n", - " part2_keep,\n", - " on = [\"trip_instance_key\", \"vp_idx\"] + SEGMENT_IDENTIFIER_COLS, \n", - " how = \"inner\"\n", - ")\n" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "14b7f596-1676-40e8-a185-c0f02ad7c7d2", - "metadata": {}, - "outputs": [], - "source": [ - "part2_gdf = delayed(test_split.merge_in_segments)(\n", - " part2_pared,\n", - " SEGMENT_IDENTIFIER_COLS,\n", - " GROUPING_COL,\n", - " n_vp_seg_value=2\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "cefe0484-6a54-4b11-a562-5e77ff824b85", - "metadata": {}, - "outputs": [], - "source": [ - "p2_gdf = compute(part2_gdf)[0]" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "88525881-e849-4a42-b0a3-aa14d9d38f38", - "metadata": {}, - "outputs": [], - "source": [ - "gddf = dg.from_geopandas(p2_gdf, npartitions=100)" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "678c1e78-9fe3-4f11-a2f6-4fa7ced6c69c", - "metadata": {}, - "outputs": [], - "source": [ - "shape_meters_series = gddf.map_partitions(\n", - " wrangle_shapes.project_point_geom_onto_linestring,\n", - " \"geometry\",\n", - " \"vp_geometry\",\n", - " meta = (\"shape_meters\", \"float\")\n", - ")\n", - "\n", - "prior_shape_meters_series = gddf.map_partitions(\n", - " wrangle_shapes.project_point_geom_onto_linestring,\n", - " \"geometry\",\n", - " \"prior_vp_geometry\",\n", - " meta = (\"prior_shape_meters\", \"float\")\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "id": "351cc3d5-ade0-47a3-ba30-aa8e24b4c9e7", - "metadata": {}, - "outputs": [], - "source": [ - "gddf[\"difference_shape_meters\"] = abs(\n", - " shape_meters_series - prior_shape_meters_series)\n", - "gddf[\"straight_distance\"] = gddf.vp_geometry.distance(gddf.prior_vp_geometry)" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "8f943943-e0bc-4526-b9ab-e8963eeec7ae", - "metadata": {}, - "outputs": [], - "source": [ - "scaling_factor = 1.75\n", - "min_meters_elapsed = 100\n", - "\n", - "# It's not only that difference_shape_meters==0 is wrong,\n", - "# Separate out these and try again with full shape \n", - "gddf = gddf.assign(\n", - " meters_elapsed = gddf.apply(\n", - " lambda x: x.straight_distance if (\n", - " x.difference_shape_meters <= min_meters_elapsed or \n", - " x.difference_shape_meters >= x.straight_distance*scaling_factor\n", - " ) else x.difference_shape_meters, \n", - " axis=1, \n", - " meta = (\"meters_elapsed\", \"float\")\n", - " ),\n", - ")\n", - "\n", - "gddf = segment_calcs.convert_timestamp_to_seconds(\n", - " gddf, [TIMESTAMP_COL, f\"prior_{TIMESTAMP_COL}\"])\n", - "\n", - "gddf = gddf.assign(\n", - " sec_elapsed = (gddf[f\"{TIMESTAMP_COL}_sec\"] - \n", - " gddf[f\"prior_{TIMESTAMP_COL}_sec\"]).abs()\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "b10cbeb9-558e-459b-93d3-becb118ba7b1", - "metadata": {}, - "outputs": [], - "source": [ - "ddf = gddf.drop(columns = [\"prior_vp_geometry\", \n", - " \"vp_geometry\", \"geometry\"])" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "d2d54fb3-d5dc-4a98-a304-9b5004e022b8", - "metadata": {}, - "outputs": [], - "source": [ - "ddf = ddf.repartition(npartitions=2)" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "id": "df1d5370-3021-45fb-a6ef-7eb408185d5f", + "id": "540db34a-ae25-4fde-916e-0ed14aa3e0f0", "metadata": {}, "outputs": [ { @@ -691,379 +511,494 @@ "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", " return lib.line_locate_point(line, other)\n", "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", " return lib.line_locate_point(line, other)\n" ] } ], + "source": [ + "results = make_wide(\n", + " df, \n", + " SEGMENT_IDENTIFIER_COLS, \n", + " TIMESTAMP_COL\n", + ").compute()" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "a222b1f4-f54a-4b18-acab-93282af02d1f", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
vp_idxtrip_instance_keylocation_timestamp_localshape_array_keystop_sequenceobsshape_meters
031407110000703b8b86da99dbf637e746c452362023-07-12 18:14:5815b81a6fa853534940f7f1c8f7c3a3ba110.000000
131407120000703b8b86da99dbf637e746c452362023-07-12 18:15:1315b81a6fa853534940f7f1c8f7c3a3ba120.000000
231407110000703b8b86da99dbf637e746c452362023-07-12 18:14:5815b81a6fa853534940f7f1c8f7c3a3ba2113.352249
331407190000703b8b86da99dbf637e746c452362023-07-12 18:17:3115b81a6fa853534940f7f1c8f7c3a3ba221114.127066
431407170000703b8b86da99dbf637e746c452362023-07-12 18:17:0015b81a6fa853534940f7f1c8f7c3a3ba317.565720
........................
991946808154ffffe3ff18b68f1b90b97f583d4601ef2023-07-12 19:06:37f8348bfb848a94699bf0a8d3c2be02c92021042.154977
991956808154ffffe3ff18b68f1b90b97f583d4601ef2023-07-12 19:06:37f8348bfb848a94699bf0a8d3c2be02c921115.118653
991966808156ffffe3ff18b68f1b90b97f583d4601ef2023-07-12 19:07:17f8348bfb848a94699bf0a8d3c2be02c9212393.326336
991976808156ffffe3ff18b68f1b90b97f583d4601ef2023-07-12 19:07:17f8348bfb848a94699bf0a8d3c2be02c92210.000000
991986808160ffffe3ff18b68f1b90b97f583d4601ef2023-07-12 19:08:38f8348bfb848a94699bf0a8d3c2be02c9222208.493146
\n", + "

4959972 rows × 7 columns

\n", + "
" + ], + "text/plain": [ + " vp_idx trip_instance_key location_timestamp_local \\\n", + "0 3140711 0000703b8b86da99dbf637e746c45236 2023-07-12 18:14:58 \n", + "1 3140712 0000703b8b86da99dbf637e746c45236 2023-07-12 18:15:13 \n", + "2 3140711 0000703b8b86da99dbf637e746c45236 2023-07-12 18:14:58 \n", + "3 3140719 0000703b8b86da99dbf637e746c45236 2023-07-12 18:17:31 \n", + "4 3140717 0000703b8b86da99dbf637e746c45236 2023-07-12 18:17:00 \n", + "... ... ... ... \n", + "99194 6808154 ffffe3ff18b68f1b90b97f583d4601ef 2023-07-12 19:06:37 \n", + "99195 6808154 ffffe3ff18b68f1b90b97f583d4601ef 2023-07-12 19:06:37 \n", + "99196 6808156 ffffe3ff18b68f1b90b97f583d4601ef 2023-07-12 19:07:17 \n", + "99197 6808156 ffffe3ff18b68f1b90b97f583d4601ef 2023-07-12 19:07:17 \n", + "99198 6808160 ffffe3ff18b68f1b90b97f583d4601ef 2023-07-12 19:08:38 \n", + "\n", + " shape_array_key stop_sequence obs shape_meters \n", + "0 15b81a6fa853534940f7f1c8f7c3a3ba 1 1 0.000000 \n", + "1 15b81a6fa853534940f7f1c8f7c3a3ba 1 2 0.000000 \n", + "2 15b81a6fa853534940f7f1c8f7c3a3ba 2 1 13.352249 \n", + "3 15b81a6fa853534940f7f1c8f7c3a3ba 2 2 1114.127066 \n", + "4 15b81a6fa853534940f7f1c8f7c3a3ba 3 1 7.565720 \n", + "... ... ... ... ... \n", + "99194 f8348bfb848a94699bf0a8d3c2be02c9 20 2 1042.154977 \n", + "99195 f8348bfb848a94699bf0a8d3c2be02c9 21 1 15.118653 \n", + "99196 f8348bfb848a94699bf0a8d3c2be02c9 21 2 393.326336 \n", + "99197 f8348bfb848a94699bf0a8d3c2be02c9 22 1 0.000000 \n", + "99198 f8348bfb848a94699bf0a8d3c2be02c9 22 2 208.493146 \n", + "\n", + "[4959972 rows x 7 columns]" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "results" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "45664688-422d-4336-b7ac-dcaa399b803d", + "metadata": {}, + "outputs": [], + "source": [ + "df2 = delayed(test_split.get_prior_position_on_segment)(\n", + " df, \n", + " SEGMENT_IDENTIFIER_COLS,\n", + " TIMESTAMP_COL\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "da871931-d2fa-42dd-8d66-b5bd6c735728", + "metadata": {}, + "outputs": [], + "source": [ + "df3 = delayed(dd.merge)(\n", + " df2,\n", + " vp_idx_bounds,\n", + " on = \"trip_instance_key\",\n", + " how = \"inner\"\n", + ")\n", + "\n", + "df3 = df3.assign(\n", + " prior_vp_idx = df3.apply(\n", + " lambda x: \n", + " x.vp_idx + 1 if (x.prior_vp_idx < x.min_vp_idx) and \n", + " (x.vp_idx + 1 <= x.max_vp_idx)\n", + " else x.prior_vp_idx, \n", + " axis=1)\n", + ").drop(columns = [\"trip_instance_key\", \"min_vp_idx\", \"max_vp_idx\"])\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6aedd4e2-c3a4-4b18-8ca5-9234fc10f992", + "metadata": {}, + "outputs": [], + "source": [ + "df3 = compute(df3)[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "22f8c086-4592-4c8f-9e08-cbe43c90373f", + "metadata": {}, + "outputs": [], + "source": [ + "def attach_vp_timestamp_location(\n", + " df: pd.DataFrame,\n", + " usable_vp: dd.DataFrame,\n", + " timestamp_col: str\n", + ") -> gpd.GeoDataFrame:\n", + " \"\"\"\n", + " \"\"\"\n", + " \n", + " # Turn the vp_idx we need into gdf\n", + " vp_to_keep = np.union1d(df.vp_idx, df.prior_vp_idx).tolist()\n", + " usable_vp2 = usable_vp[usable_vp.vp_idx.isin(vp_to_keep)]\n", + " \n", + " # Merge in the timestamp and x, y coords \n", + " usable_gdf = gpd.GeoDataFrame(\n", + " usable_vp2,\n", + " geometry = gpd.points_from_xy(usable_vp2.x, usable_vp2.y),\n", + " crs = WGS84\n", + " ).to_crs(PROJECT_CRS).drop(columns = [\"x\", \"y\"])\n", + " \n", + " \n", + " df_with_xy = pd.merge(\n", + " usable_gdf,\n", + " df,\n", + " on = \"vp_idx\",\n", + " how = \"inner\"\n", + " ).rename(columns = {\"geometry\": \"vp_geometry\"})\n", + " \n", + " # Merge again to get timestamp and x, y coords of previous point\n", + " usable_gdf2 = usable_gdf.rename(\n", + " columns = {\n", + " \"vp_idx\": \"prior_vp_idx\",\n", + " timestamp_col: f\"prior_{timestamp_col}\",\n", + " \"geometry\": \"prior_vp_geometry\"\n", + " }\n", + " ).drop(columns = \"trip_instance_key\")\n", + " \n", + " df_with_prior_xy = dd.merge(\n", + " df_with_xy,\n", + " usable_gdf2,\n", + " on = \"prior_vp_idx\",\n", + " how = \"inner\"\n", + " )\n", + " \n", + " return df_with_prior_xy" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "502f4715-a298-4520-b571-fae4d196cf2f", + "metadata": {}, + "outputs": [], + "source": [ + "from shared_utils.geography_utils import WGS84\n", + "\n", + "gdf = delayed(attach_vp_timestamp_location)(\n", + " df3,\n", + " usable_vp,\n", + " TIMESTAMP_COL\n", + ").persist()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "eeecaba2-9e58-4954-9660-5337ef02569c", + "metadata": {}, + "outputs": [], + "source": [ + "part2 = gdf[gdf.n_vp_seg==2]\n", + "\n", + "part2_keep = (part2.groupby([\"trip_instance_key\"] + SEGMENT_IDENTIFIER_COLS,\n", + " observed=True, group_keys=False)\n", + " .vp_idx\n", + " .max()\n", + " .reset_index()\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "150a9219-94c7-44cc-84ac-e9badd88c3e5", + "metadata": {}, + "outputs": [], + "source": [ + "part2_pared = delayed(dd.merge)(\n", + " part2,\n", + " part2_keep,\n", + " on = [\"trip_instance_key\", \"vp_idx\"] + SEGMENT_IDENTIFIER_COLS, \n", + " how = \"inner\"\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "14b7f596-1676-40e8-a185-c0f02ad7c7d2", + "metadata": {}, + "outputs": [], + "source": [ + "part2_gdf = delayed(test_split.merge_in_segments)(\n", + " part2_pared,\n", + " SEGMENT_IDENTIFIER_COLS,\n", + " GROUPING_COL,\n", + " n_vp_seg_value=2\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "cefe0484-6a54-4b11-a562-5e77ff824b85", + "metadata": {}, + "outputs": [], + "source": [ + "p2_gdf = compute(part2_gdf)[0]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "88525881-e849-4a42-b0a3-aa14d9d38f38", + "metadata": {}, + "outputs": [], + "source": [ + "gddf = dg.from_geopandas(p2_gdf, npartitions=100)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "678c1e78-9fe3-4f11-a2f6-4fa7ced6c69c", + "metadata": {}, + "outputs": [], + "source": [ + "shape_meters_series = gddf.map_partitions(\n", + " wrangle_shapes.project_point_geom_onto_linestring,\n", + " \"geometry\",\n", + " \"vp_geometry\",\n", + " meta = (\"shape_meters\", \"float\")\n", + ")\n", + "\n", + "prior_shape_meters_series = gddf.map_partitions(\n", + " wrangle_shapes.project_point_geom_onto_linestring,\n", + " \"geometry\",\n", + " \"prior_vp_geometry\",\n", + " meta = (\"prior_shape_meters\", \"float\")\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "351cc3d5-ade0-47a3-ba30-aa8e24b4c9e7", + "metadata": {}, + "outputs": [], + "source": [ + "gddf[\"difference_shape_meters\"] = abs(\n", + " shape_meters_series - prior_shape_meters_series)\n", + "gddf[\"straight_distance\"] = gddf.vp_geometry.distance(gddf.prior_vp_geometry)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "8f943943-e0bc-4526-b9ab-e8963eeec7ae", + "metadata": {}, + "outputs": [], + "source": [ + "scaling_factor = 1.75\n", + "min_meters_elapsed = 100\n", + "\n", + "# It's not only that difference_shape_meters==0 is wrong,\n", + "# Separate out these and try again with full shape \n", + "gddf = gddf.assign(\n", + " meters_elapsed = gddf.apply(\n", + " lambda x: x.straight_distance if (\n", + " x.difference_shape_meters <= min_meters_elapsed or \n", + " x.difference_shape_meters >= x.straight_distance*scaling_factor\n", + " ) else x.difference_shape_meters, \n", + " axis=1, \n", + " meta = (\"meters_elapsed\", \"float\")\n", + " ),\n", + ")\n", + "\n", + "gddf = segment_calcs.convert_timestamp_to_seconds(\n", + " gddf, [TIMESTAMP_COL, f\"prior_{TIMESTAMP_COL}\"])\n", + "\n", + "gddf = gddf.assign(\n", + " sec_elapsed = (gddf[f\"{TIMESTAMP_COL}_sec\"] - \n", + " gddf[f\"prior_{TIMESTAMP_COL}_sec\"]).abs()\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b10cbeb9-558e-459b-93d3-becb118ba7b1", + "metadata": {}, + "outputs": [], + "source": [ + "ddf = gddf.drop(columns = [\"prior_vp_geometry\", \n", + " \"vp_geometry\", \"geometry\"])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "d2d54fb3-d5dc-4a98-a304-9b5004e022b8", + "metadata": {}, + "outputs": [], + "source": [ + "ddf = ddf.repartition(npartitions=2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "df1d5370-3021-45fb-a6ef-7eb408185d5f", + "metadata": {}, + "outputs": [], "source": [ "results = ddf.compute()" ] }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "id": "d4c9fdf6-55b8-479a-94ae-ab36a4343171", "metadata": {}, "outputs": [], @@ -1078,28 +1013,17 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "id": "433e0382-461e-4970-9d88-37e07a40efaf", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(17751, 15)" - ] - }, - "execution_count": 24, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "results[results.speed_mph < 5].shape" ] }, { "cell_type": "code", - "execution_count": 25, + "execution_count": null, "id": "be27c87b-0de2-472e-9ea0-cc42da1d4256", "metadata": {}, "outputs": [], @@ -1109,7 +1033,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": null, "id": "e177393a-86c0-4c74-824c-fe9548b461b2", "metadata": {}, "outputs": [], @@ -1122,351 +1046,24 @@ "execution_count": null, "id": "4ab46336-10a3-4a25-86d5-6f9343f3175f", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(39221, 17751, 0)" - ] - }, - "execution_count": 28, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "len(ok_results), len(bad_results)" ] }, { "cell_type": "code", - "execution_count": 29, + "execution_count": null, "id": "6390361b-bfdd-44f3-833b-12b291343bd3", "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
trip_instance_keyvp_idxlocation_timestamp_localshape_array_keystop_sequencen_vp_segprior_vp_idxprior_location_timestamp_localdifference_shape_metersstraight_distancemeters_elapsedlocation_timestamp_local_secprior_location_timestamp_local_secsec_elapsedspeed_mph
0d15b0bea563cd87d86c86aebe5092eec1035332023-07-12 07:02:19bbcffbd3d0f15bb6aa401323d9b4cc162121034142023-07-12 06:09:18130.776168149.466617130.776168253392215831810.091967
13be657dfc73c03e07bf64c8d0d1ba5b91036622023-07-12 07:47:59bbcffbd3d0f15bb6aa401323d9b4cc162121035872023-07-12 07:22:5830.76787835.71247635.712476280792657815010.053224
296b1cdcb34a5e140783ecdd704b941a81038202023-07-12 08:37:49bbcffbd3d0f15bb6aa401323d9b4cc162121037352023-07-12 08:09:3247.98749459.84791759.847917310692937216970.078892
32de5b5e96e174df9876577aa32574a371039512023-07-12 09:21:27bbcffbd3d0f15bb6aa401323d9b4cc162121038672023-07-12 08:53:2866.83669282.54446882.544468336873200816790.109977
47272eba6ea9e1a31c821e8baab6ed1f41040852023-07-12 10:06:12bbcffbd3d0f15bb6aa401323d9b4cc162121040022023-07-12 09:38:3442.59035767.56939967.569399363723471416580.091166
................................................
569674230577777b1a6eefac68b504efd4107145253782023-07-12 07:44:245c07969e700a525860a02fcee6389fbf22145253772023-07-12 07:43:240.0000008.8747758.8747752786427804600.330881
5696881c7590efa1520143b181577898b3572145254182023-07-12 07:50:515c07969e700a525860a02fcee6389fbf22145254162023-07-12 07:48:510.7696869.3640809.36408028251281311200.174562
569691189d5633f78463a0ddac2448f7a30b0145254552023-07-12 08:06:395c07969e700a525860a02fcee6389fbf22145254542023-07-12 08:06:174.00377414.34028614.3402862919929177221.458146
5697068785ab8159f0daa7545085b46d5b6d2145254982023-07-12 08:34:155c07969e700a525860a02fcee6389fbf22145254952023-07-12 08:31:580.00000010.40135010.40135030855307181370.169838
56971929ee8ae9296906d4f716ea8f5e28a2f145259562023-07-12 18:32:305c07969e700a525860a02fcee6389fbf22145259552023-07-12 18:32:190.0000004.4726234.4726236675066739110.909569
\n", - "

17751 rows × 15 columns

\n", - "
" - ], - "text/plain": [ - " trip_instance_key vp_idx location_timestamp_local \\\n", - "0 d15b0bea563cd87d86c86aebe5092eec 103533 2023-07-12 07:02:19 \n", - "1 3be657dfc73c03e07bf64c8d0d1ba5b9 103662 2023-07-12 07:47:59 \n", - "2 96b1cdcb34a5e140783ecdd704b941a8 103820 2023-07-12 08:37:49 \n", - "3 2de5b5e96e174df9876577aa32574a37 103951 2023-07-12 09:21:27 \n", - "4 7272eba6ea9e1a31c821e8baab6ed1f4 104085 2023-07-12 10:06:12 \n", - "... ... ... ... \n", - "56967 4230577777b1a6eefac68b504efd4107 14525378 2023-07-12 07:44:24 \n", - "56968 81c7590efa1520143b181577898b3572 14525418 2023-07-12 07:50:51 \n", - "56969 1189d5633f78463a0ddac2448f7a30b0 14525455 2023-07-12 08:06:39 \n", - "56970 68785ab8159f0daa7545085b46d5b6d2 14525498 2023-07-12 08:34:15 \n", - "56971 929ee8ae9296906d4f716ea8f5e28a2f 14525956 2023-07-12 18:32:30 \n", - "\n", - " shape_array_key stop_sequence n_vp_seg \\\n", - "0 bbcffbd3d0f15bb6aa401323d9b4cc16 21 2 \n", - "1 bbcffbd3d0f15bb6aa401323d9b4cc16 21 2 \n", - "2 bbcffbd3d0f15bb6aa401323d9b4cc16 21 2 \n", - "3 bbcffbd3d0f15bb6aa401323d9b4cc16 21 2 \n", - "4 bbcffbd3d0f15bb6aa401323d9b4cc16 21 2 \n", - "... ... ... ... \n", - "56967 5c07969e700a525860a02fcee6389fbf 2 2 \n", - "56968 5c07969e700a525860a02fcee6389fbf 2 2 \n", - "56969 5c07969e700a525860a02fcee6389fbf 2 2 \n", - "56970 5c07969e700a525860a02fcee6389fbf 2 2 \n", - "56971 5c07969e700a525860a02fcee6389fbf 2 2 \n", - "\n", - " prior_vp_idx prior_location_timestamp_local difference_shape_meters \\\n", - "0 103414 2023-07-12 06:09:18 130.776168 \n", - "1 103587 2023-07-12 07:22:58 30.767878 \n", - "2 103735 2023-07-12 08:09:32 47.987494 \n", - "3 103867 2023-07-12 08:53:28 66.836692 \n", - "4 104002 2023-07-12 09:38:34 42.590357 \n", - "... ... ... ... \n", - "56967 14525377 2023-07-12 07:43:24 0.000000 \n", - "56968 14525416 2023-07-12 07:48:51 0.769686 \n", - "56969 14525454 2023-07-12 08:06:17 4.003774 \n", - "56970 14525495 2023-07-12 08:31:58 0.000000 \n", - "56971 14525955 2023-07-12 18:32:19 0.000000 \n", - "\n", - " straight_distance meters_elapsed location_timestamp_local_sec \\\n", - "0 149.466617 130.776168 25339 \n", - "1 35.712476 35.712476 28079 \n", - "2 59.847917 59.847917 31069 \n", - "3 82.544468 82.544468 33687 \n", - "4 67.569399 67.569399 36372 \n", - "... ... ... ... \n", - "56967 8.874775 8.874775 27864 \n", - "56968 9.364080 9.364080 28251 \n", - "56969 14.340286 14.340286 29199 \n", - "56970 10.401350 10.401350 30855 \n", - "56971 4.472623 4.472623 66750 \n", - "\n", - " prior_location_timestamp_local_sec sec_elapsed speed_mph \n", - "0 22158 3181 0.091967 \n", - "1 26578 1501 0.053224 \n", - "2 29372 1697 0.078892 \n", - "3 32008 1679 0.109977 \n", - "4 34714 1658 0.091166 \n", - "... ... ... ... \n", - "56967 27804 60 0.330881 \n", - "56968 28131 120 0.174562 \n", - "56969 29177 22 1.458146 \n", - "56970 30718 137 0.169838 \n", - "56971 66739 11 0.909569 \n", - "\n", - "[17751 rows x 15 columns]" - ] - }, - "execution_count": 29, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "bad_results" ] }, { "cell_type": "code", - "execution_count": 42, + "execution_count": null, "id": "989bdfe8-8dde-4457-b4d9-ca110ecfabf7", "metadata": {}, "outputs": [], @@ -1480,7 +1077,7 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": null, "id": "1415ef3d-7a88-48ca-ad9c-a20a00618d61", "metadata": {}, "outputs": [], @@ -1490,7 +1087,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": null, "id": "ce58f30c-54b7-4ba1-854e-fcd9d0314012", "metadata": {}, "outputs": [], @@ -1509,7 +1106,7 @@ }, { "cell_type": "code", - "execution_count": 36, + "execution_count": null, "id": "3ae778cb-00df-4c31-9161-c969fa27565a", "metadata": {}, "outputs": [], @@ -1523,23 +1120,10 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": null, "id": "53aab44a-b49f-4658-a79f-cf4cdf1e1383", "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 5040\n", - "1 1074\n", - "Name: loop_or_inlining, dtype: int64" - ] - }, - "execution_count": 37, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "segments.loop_or_inlining.value_counts()" ] From 46ee81b25fe48bb651569e2aeacdef15a263457a Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Wed, 20 Sep 2023 23:22:42 +0000 Subject: [PATCH 07/13] add a step to fix low speeds --- rt_segment_speeds/scripts/test_speeds.py | 372 +++++++++++++++++++++++ 1 file changed, 372 insertions(+) create mode 100644 rt_segment_speeds/scripts/test_speeds.py diff --git a/rt_segment_speeds/scripts/test_speeds.py b/rt_segment_speeds/scripts/test_speeds.py new file mode 100644 index 000000000..1f25818b8 --- /dev/null +++ b/rt_segment_speeds/scripts/test_speeds.py @@ -0,0 +1,372 @@ +import os +os.environ['USE_PYGEOS'] = '0' + +import dask.dataframe as dd +import dask_geopandas as dg +import datetime +import geopandas as gpd +import numpy as np +import pandas as pd +import sys + +from loguru import logger + +from shared_utils import geography_utils +from shared_utils.rt_utils import MPH_PER_MPS +from segment_speed_utils import helpers, segment_calcs, wrangle_shapes +from segment_speed_utils.project_vars import (SEGMENT_GCS, analysis_date, + PROJECT_CRS, CONFIG_PATH) +import test_split + +def linear_referencing_vp_against_line( + vp: dd.DataFrame, + segments: gpd.GeoDataFrame, + segment_identifier_cols: list, + timestamp_col: str +) -> dd.DataFrame: + time0 = datetime.datetime.now() + + # https://stackoverflow.com/questions/71685387/faster-methods-to-create-geodataframe-from-a-dask-or-pandas-dataframe + # https://github.com/geopandas/dask-geopandas/issues/197 + vp_gddf = dg.from_dask_dataframe( + vp, + geometry=dg.points_from_xy(vp, "x", "y") + ).set_crs(geography_utils.WGS84).to_crs(PROJECT_CRS).drop(columns = ["x", "y"]) + + vp_with_seg_geom = dd.merge( + vp_gddf, + segments, + on = segment_identifier_cols, + how = "inner" + ).rename(columns = { + "geometry_x": "vp_geometry", + "geometry_y": "segment_geometry"} + ).set_geometry("vp_geometry") + + vp_with_seg_geom = vp_with_seg_geom.repartition(npartitions=50) + + time1 = datetime.datetime.now() + logger.info(f"set up merged vp with segments: {time1 - time0}") + + shape_meters_series = vp_with_seg_geom.map_partitions( + wrangle_shapes.project_point_geom_onto_linestring, + "segment_geometry", + "vp_geometry", + meta = ("shape_meters", "float") + ) + + vp_with_seg_geom = segment_calcs.convert_timestamp_to_seconds( + vp_with_seg_geom, [timestamp_col]) + + vp_with_seg_geom = vp_with_seg_geom.assign( + shape_meters = shape_meters_series, + segment_meters = vp_with_seg_geom.segment_geometry.length + ) + + time2 = datetime.datetime.now() + logger.info(f"linear referencing: {time2 - time1}") + + drop_cols = [f"{timestamp_col}", "vp_geometry", "segment_geometry"] + vp_with_seg_geom2 = vp_with_seg_geom.drop(columns = drop_cols) + + return vp_with_seg_geom2 + + +def make_wide( + df: dd.DataFrame, + group_cols: list, + timestamp_col: str +) -> dd.DataFrame: + """ + Get df wide and set up current vp_idx and get meters/sec_elapsed + against prior. + """ + vp2 = ( + df.groupby(group_cols, + observed=True, group_keys=False) + .agg({"vp_idx": "max"}) + .reset_index() + .merge( + df, + on = group_cols + ["vp_idx"], + how = "inner" + ) + ) + + vp1 = ( + df.groupby(group_cols, + observed=True, group_keys=False) + .agg({"vp_idx": "min"}) + .reset_index() + .merge( + df, + on = group_cols + ["vp_idx"], + how = "inner" + ).rename(columns = { + "vp_idx": "prior_vp_idx", + f"{timestamp_col}_sec": f"prior_{timestamp_col}_sec", + "shape_meters": "prior_shape_meters", + }) + ) + + df_wide = dd.merge( + vp2, + vp1, + on = group_cols, + how = "left" + ) + + df_wide = df_wide.assign( + meters_elapsed = (df_wide.shape_meters - + df_wide.prior_shape_meters).abs(), + sec_elapsed = (df_wide[f"{timestamp_col}_sec"]- + df_wide[f"prior_{timestamp_col}_sec"]).abs(), + ) + + df_wide = df_wide.assign( + pct_segment = df_wide.meters_elapsed.divide(df_wide.segment_meters) + ) + + return df_wide + + +def calculate_speed( + df: dd.DataFrame, + distance_cols: tuple = ("prior_shape_meters", "shape_meters"), + time_cols: tuple = ("prior_location_timestamp_local_sec", "location_timestamp_local_sec") +) -> dd.DataFrame: + + min_dist, max_dist = distance_cols + min_time, max_time = time_cols + + df = df.assign( + meters_elapsed = (df[max_dist] - df[min_dist]).abs(), + sec_elapsed = (df[max_time] - df[min_time]).abs(), + ) + + df = df.assign( + speed_mph = (df.meters_elapsed.divide(df.sec_elapsed) * + MPH_PER_MPS) + ) + + return df + + +def filter_for_unstable_speeds( + df: pd.DataFrame, + pct_segment_threshold: float +) -> tuple[pd.DataFrame]: + ok_speeds = df[df.pct_segment > pct_segment_threshold] + low_speeds = df[df.pct_segment <= pct_segment_threshold] + + return ok_speeds, low_speeds + +def low_speed_segments_select_different_prior_vp( + low_speeds_df: pd.DataFrame, + group_cols: list, + timestamp_col: str +): + + keep_cols = group_cols + [ + "vp_idx", "location_timestamp_local_sec", + ] + + df1 = low_speeds_df[keep_cols] + + df1 = df1.assign( + prior_vp_idx = df1.vp_idx -1 + ) + + usable_vp = dd.read_parquet( + f"{SEGMENT_GCS}vp_usable_{analysis_date}", + columns = ["trip_instance_key", "vp_idx", timestamp_col, "x", "y"] + ) + + vp_idx_bounds = test_split.get_usable_vp_bounds_by_trip(usable_vp) + + df2 = pd.merge( + df1, + vp_idx_bounds, + on = "trip_instance_key", + how = "inner" + ) + + df2 = df2.assign( + prior_vp_idx = df2.apply( + lambda x: + x.vp_idx + 1 if (x.prior_vp_idx < x.min_vp_idx) and + (x.vp_idx + 1 <= x.max_vp_idx) + else x.prior_vp_idx, + axis=1) + ).drop(columns = ["trip_instance_key", "min_vp_idx", "max_vp_idx"]) + + + subset_vp_idx = np.union1d( + df2.vp_idx.unique(), + df2.prior_vp_idx.unique() + ).tolist() + + usable_vp2 = usable_vp[usable_vp.vp_idx.isin(subset_vp_idx)].compute() + + usable_gdf = geography_utils.create_point_geometry( + usable_vp2, + longitude_col = "x", + latitude_col = "y", + crs = PROJECT_CRS + ).drop(columns = ["x", "y"]).reset_index(drop=True) + + usable_gdf2 = segment_calcs.convert_timestamp_to_seconds( + usable_gdf, [timestamp_col]).drop(columns = timestamp_col) + + # Merge in coord for current_vp_idx + # we already have a timestamp_sec for current vp_idx + gdf = pd.merge( + usable_gdf2.drop(columns = f"{timestamp_col}_sec"), + df2, + on = "vp_idx", + how = "inner" + ) + + # Merge in coord for prior_vp_idx + gdf2 = pd.merge( + gdf, + usable_gdf2[["vp_idx", f"{timestamp_col}_sec", "geometry"]].add_prefix("prior_"), + on = "prior_vp_idx", + how = "inner" + ) + + # should we do straight distance or interpolate against full shape? + # what if full shape is problematic? + # do we want to do a check against the scale? that's not very robust either though + + gdf2 = gdf2.assign( + straight_distance = gdf2.geometry.distance(gdf2.prior_geometry) + ) + + gdf2 = gdf2.assign( + sec_elapsed = (gdf2[f"{timestamp_col}_sec"] - + gdf2[f"prior_{timestamp_col}_sec"]).abs() + ) + + gdf2 = gdf2.assign( + speed_mph = gdf2.straight_distance.divide(gdf2.sec_elapsed) * MPH_PER_MPS + ) + + drop_cols = ["geometry", "prior_geometry"] + results = gdf2.drop(columns = drop_cols) + + return results + + +def linear_referencing_and_speed_by_segment( + analysis_date: str, + dict_inputs: dict = {} +): + """ + With just enter / exit points on segments, + do the linear referencing to get shape_meters, and then derive speed. + """ + time0 = datetime.datetime.now() + + VP_FILE = dict_inputs["stage3"] + SEGMENT_FILE = dict_inputs["segments_file"] + SEGMENT_IDENTIFIER_COLS = dict_inputs["segment_identifier_cols"] + TIMESTAMP_COL = dict_inputs["timestamp_col"] + EXPORT_FILE = dict_inputs["stage4"] + + # Keep subset of columns - don't need it all. we can get the + # columns dropped through segments file + vp_keep_cols = [ + 'trip_instance_key', + TIMESTAMP_COL, + 'x', 'y', 'vp_idx' + ] + SEGMENT_IDENTIFIER_COLS + + vp = dd.read_parquet( + f"{SEGMENT_GCS}vp_pare_down/{VP_FILE}_all_{analysis_date}", + columns = vp_keep_cols + ) + + segments = helpers.import_segments( + SEGMENT_GCS, + f"{SEGMENT_FILE}_{analysis_date}", + columns = SEGMENT_IDENTIFIER_COLS + ["geometry"] + ).dropna(subset="geometry").reset_index(drop=True) + + vp_with_seg_geom = linear_referencing_vp_against_line( + vp, + segments, + SEGMENT_IDENTIFIER_COLS, + TIMESTAMP_COL + ).persist() + + time1 = datetime.datetime.now() + logger.info(f"linear referencing: {time1 - time0}") + + SEGMENT_TRIP_COLS = ["trip_instance_key", + "segment_meters"] + SEGMENT_IDENTIFIER_COLS + + vp_with_seg_wide = make_wide( + vp_with_seg_geom, SEGMENT_TRIP_COLS, TIMESTAMP_COL + ) + + initial_speeds = calculate_speed( + vp_with_seg_wide, + distance_cols = ("prior_shape_meters", "shape_meters"), + time_cols = (f"prior_{TIMESTAMP_COL}_sec", f"{TIMESTAMP_COL}_sec") + ).compute() + + time2 = datetime.datetime.now() + logger.info(f"make wide and get initial speeds: {time2 - time1}") + + ok_speeds, low_speeds = filter_for_unstable_speeds( + initial_speeds, + pct_segment_threshold = 0.3 + ) + + low_speeds_recalculated = low_speed_segments_select_different_prior_vp( + low_speeds, + SEGMENT_TRIP_COLS, + TIMESTAMP_COL + ) + + low_speeds_recalculated = low_speeds_recalculated.assign( + flag_recalculated = 1, + meters_elapsed = low_speeds_recalculated.straight_distance + ) + + keep_cols = SEGMENT_TRIP_COLS + [ + "vp_idx", "prior_vp_idx", + f"{TIMESTAMP_COL}_sec", f"prior_{TIMESTAMP_COL}_sec", + "meters_elapsed", + "sec_elapsed", + "pct_segment", + "speed_mph", + "flag_recalculated", + ] + + speeds = pd.concat([ + ok_speeds, + low_speeds_recalculated + ], axis=0).sort_values(SEGMENT_IDENTIFIER_COLS + ["trip_instance_key"] + ).reset_index(drop=True) + + speeds = speeds.assign( + flag_recalculated = speeds.flag_recalculated.fillna(0).astype("int8") + )[keep_cols] + + time3 = datetime.datetime.now() + logger.info(f"recalculate speeds and get final: {time3 - time2}") + + speeds.to_parquet( + f"{SEGMENT_GCS}{EXPORT_FILE}_{analysis_date}_df.parquet", + ) + + time4 = datetime.datetime.now() + logger.info(f"execution time: {time4 - time0}") + +if __name__ == "__main__": + + STOP_SEG_DICT = helpers.get_parameters(CONFIG_PATH, "stop_segments") + + linear_referencing_and_speed_by_segment(analysis_date, STOP_SEG_DICT) \ No newline at end of file From f8cb153b8f975c193986155b5d3e71932a67fd4a Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Wed, 20 Sep 2023 23:32:44 +0000 Subject: [PATCH 08/13] move function over --- rt_segment_speeds/scripts/test_speeds.py | 32 ++++++++++++++++++++++-- 1 file changed, 30 insertions(+), 2 deletions(-) diff --git a/rt_segment_speeds/scripts/test_speeds.py b/rt_segment_speeds/scripts/test_speeds.py index 1f25818b8..916b98129 100644 --- a/rt_segment_speeds/scripts/test_speeds.py +++ b/rt_segment_speeds/scripts/test_speeds.py @@ -16,7 +16,35 @@ from segment_speed_utils import helpers, segment_calcs, wrangle_shapes from segment_speed_utils.project_vars import (SEGMENT_GCS, analysis_date, PROJECT_CRS, CONFIG_PATH) -import test_split + +def get_usable_vp_bounds_by_trip(df: dd.DataFrame) -> pd.DataFrame: + """ + Of all the usable vp, for each trip, find the min(vp_idx) + and max(vp_idx). + For the first stop, there will never be a previous vp to find, + because the previous vp_idx will belong to a different operator/trip. + But for segments in the middle of the shape, the previous vp can be anywhere, + maybe several segments away. + """ + + grouped_df = df.groupby("trip_instance_key", + observed=True, group_keys=False) + + start_vp = (grouped_df.vp_idx.min().reset_index() + .rename(columns = {"vp_idx": "min_vp_idx"}) + ) + end_vp = (grouped_df.vp_idx.max().reset_index() + .rename(columns = {"vp_idx": "max_vp_idx"}) + ) + + df2 = dd.merge( + start_vp, + end_vp, + on = "trip_instance_key", + how = "left" + ).reset_index(drop=True).compute() + + return df2 def linear_referencing_vp_against_line( vp: dd.DataFrame, @@ -182,7 +210,7 @@ def low_speed_segments_select_different_prior_vp( columns = ["trip_instance_key", "vp_idx", timestamp_col, "x", "y"] ) - vp_idx_bounds = test_split.get_usable_vp_bounds_by_trip(usable_vp) + vp_idx_bounds = get_usable_vp_bounds_by_trip(usable_vp) df2 = pd.merge( df1, From 882ad65211d547262a945f2ea103cc7cbf8651ce Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Wed, 20 Sep 2023 23:36:33 +0000 Subject: [PATCH 09/13] (remove): test split and notebook to test filling in 1 vp --- rt_segment_speeds/scripts/fill_in_1_vp.ipynb | 1161 ------------------ rt_segment_speeds/scripts/test_split.py | 377 ------ 2 files changed, 1538 deletions(-) delete mode 100644 rt_segment_speeds/scripts/fill_in_1_vp.ipynb delete mode 100644 rt_segment_speeds/scripts/test_split.py diff --git a/rt_segment_speeds/scripts/fill_in_1_vp.ipynb b/rt_segment_speeds/scripts/fill_in_1_vp.ipynb deleted file mode 100644 index 8427f7519..000000000 --- a/rt_segment_speeds/scripts/fill_in_1_vp.ipynb +++ /dev/null @@ -1,1161 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "id": "2c5b9cfe-eb62-4823-8008-1e9ecac2d930", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/opt/conda/lib/python3.9/site-packages/geopandas/_compat.py:124: UserWarning: The Shapely GEOS version (3.11.1-CAPI-1.17.1) is incompatible with the GEOS version PyGEOS was compiled with (3.10.1-CAPI-1.16.0). Conversions between both will be slow.\n", - " warnings.warn(\n" - ] - } - ], - "source": [ - "import dask.dataframe as dd\n", - "import dask_geopandas as dg\n", - "import geopandas as gpd\n", - "import numpy as np\n", - "import pandas as pd\n", - "\n", - "from segment_speed_utils.project_vars import (SEGMENT_GCS, \n", - " CONFIG_PATH, \n", - " PROJECT_CRS\n", - " )\n", - "from segment_speed_utils import (helpers, wrangle_shapes, \n", - " segment_calcs)\n", - "from shared_utils import rt_dates\n", - "from shared_utils.geography_utils import WGS84\n", - "\n", - "import test_split\n", - "\n", - "analysis_date = rt_dates.DATES[\"jul2023\"]" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "ed5ebb6b-934d-4451-a108-82895f5b0661", - "metadata": {}, - "outputs": [], - "source": [ - "from importlib import reload\n", - "\n", - "dict_inputs = helpers.get_parameters(CONFIG_PATH, \"stop_segments\")" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "e7ab12a8-9fef-4e5a-8be3-ad18992d685b", - "metadata": {}, - "outputs": [], - "source": [ - "USABLE_VP = dict_inputs[\"stage1\"]\n", - "INPUT_FILE = dict_inputs[\"stage3\"]\n", - "SEGMENT_FILE = dict_inputs[\"segments_file\"]\n", - "SEGMENT_IDENTIFIER_COLS = dict_inputs[\"segment_identifier_cols\"]\n", - "GROUPING_COL = dict_inputs[\"grouping_col\"]\n", - "TIMESTAMP_COL = dict_inputs[\"timestamp_col\"]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "29ef4e7b-82d2-427a-9122-f4ee1c34baa3", - "metadata": {}, - "outputs": [], - "source": [ - "usable_vp = dd.read_parquet(\n", - " f\"{SEGMENT_GCS}{USABLE_VP}_{analysis_date}\",\n", - " columns = [\"trip_instance_key\", \"vp_idx\", TIMESTAMP_COL, \"x\", \"y\"]\n", - ")\n", - "vp_idx_bounds = test_split.get_usable_vp_bounds_by_trip(usable_vp)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "86983844-a321-4d86-863a-3297bd3f7dd5", - "metadata": {}, - "outputs": [], - "source": [ - "from dask import delayed, compute\n", - "\n", - "# Start from pared down vp\n", - "df = delayed(pd.read_parquet)(\n", - " f\"{SEGMENT_GCS}vp_pare_down/{INPUT_FILE}_all_{analysis_date}\",\n", - " columns = SEGMENT_IDENTIFIER_COLS + [\"trip_instance_key\", \"vp_idx\"]\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "784086ef-5e92-4c06-ada0-1abe7819de2d", - "metadata": {}, - "outputs": [], - "source": [ - "df = pd.read_parquet(\n", - " f\"{SEGMENT_GCS}vp_pare_down/{INPUT_FILE}_all_{analysis_date}\",\n", - " #columns = SEGMENT_IDENTIFIER_COLS + [\"trip_instance_key\", \"vp_idx\"]\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "fd0b0494-8ca7-4a7f-8ff9-3f9b4ac4cced", - "metadata": {}, - "outputs": [], - "source": [ - "def make_wide(\n", - " df: pd.DataFrame, \n", - " segment_identifier_cols: list,\n", - " timestamp_col: str\n", - ") -> pd.DataFrame:\n", - " \n", - " group_cols = [\"trip_instance_key\"] + segment_identifier_cols\n", - " \n", - " # Sort and make sure we get a pairing of point 1 and 2 within segment\n", - " df = df.sort_values(group_cols + [\"vp_idx\"]).reset_index(drop=True)\n", - " \n", - " df = dd.from_pandas(df, npartitions=50, sort=True)\n", - "\n", - " df = df.assign(\n", - " obs = df.groupby(group_cols, \n", - " observed=True, group_keys=False\n", - " ).cumcount() + 1\n", - " )\n", - " \n", - " gdf = dg.from_dask_dataframe(\n", - " df, \n", - " geometry=dg.points_from_xy(df, \"x\", \"y\")\n", - " ).set_crs(WGS84).to_crs(PROJECT_CRS).drop(columns = [\"x\", \"y\"])\n", - " \n", - " segments = helpers.import_segments(\n", - " SEGMENT_GCS,\n", - " f\"{SEGMENT_FILE}_{analysis_date}\", \n", - " columns = segment_identifier_cols + [\"geometry\"]\n", - " ).dropna(subset=\"geometry\").reset_index(drop=True)\n", - " \n", - " gddf = dd.merge(\n", - " gdf,\n", - " segments,\n", - " on = segment_identifier_cols,\n", - " how = \"inner\"\n", - " ).rename(columns = {\n", - " \"geometry_x\": \"vp_geometry\",\n", - " \"geometry_y\": \"segment_geometry\"\n", - " }).set_geometry(\"vp_geometry\")\n", - " \n", - " shape_meters_series = gddf.map_partitions(\n", - " wrangle_shapes.project_point_geom_onto_linestring,\n", - " \"segment_geometry\",\n", - " \"vp_geometry\",\n", - " meta = (\"shape_meters\", \"float\")\n", - " )\n", - " \n", - " gddf[\"shape_meters\"] = shape_meters_series\n", - " \n", - " ddf = gddf.drop(columns = [\"vp_geometry\", \"segment_geometry\"]).reset_index(drop=True)\n", - " '''\n", - " ddf = segment_calcs.convert_timestamp_to_seconds(\n", - " ddf, [timestamp_col])\n", - " \n", - " # Must do point 1 against point 2\n", - " # Some segments only have 1 point, have no second point to find\n", - " point1 = ddf[ddf.obs==1].reset_index(drop=True)\n", - " point2 = ddf[ddf.obs==2].reset_index(drop=True)\n", - " \n", - " ddf_wide = dd.merge(\n", - " point1,\n", - " point2,\n", - " on = group_cols,\n", - " how = \"left\"\n", - " ).reset_index(drop=True)\n", - " '''\n", - " return ddf" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "540db34a-ae25-4fde-916e-0ed14aa3e0f0", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n", - "/opt/conda/lib/python3.9/site-packages/shapely/linear.py:90: RuntimeWarning: invalid value encountered in line_locate_point\n", - " return lib.line_locate_point(line, other)\n" - ] - } - ], - "source": [ - "results = make_wide(\n", - " df, \n", - " SEGMENT_IDENTIFIER_COLS, \n", - " TIMESTAMP_COL\n", - ").compute()" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "a222b1f4-f54a-4b18-acab-93282af02d1f", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
vp_idxtrip_instance_keylocation_timestamp_localshape_array_keystop_sequenceobsshape_meters
031407110000703b8b86da99dbf637e746c452362023-07-12 18:14:5815b81a6fa853534940f7f1c8f7c3a3ba110.000000
131407120000703b8b86da99dbf637e746c452362023-07-12 18:15:1315b81a6fa853534940f7f1c8f7c3a3ba120.000000
231407110000703b8b86da99dbf637e746c452362023-07-12 18:14:5815b81a6fa853534940f7f1c8f7c3a3ba2113.352249
331407190000703b8b86da99dbf637e746c452362023-07-12 18:17:3115b81a6fa853534940f7f1c8f7c3a3ba221114.127066
431407170000703b8b86da99dbf637e746c452362023-07-12 18:17:0015b81a6fa853534940f7f1c8f7c3a3ba317.565720
........................
991946808154ffffe3ff18b68f1b90b97f583d4601ef2023-07-12 19:06:37f8348bfb848a94699bf0a8d3c2be02c92021042.154977
991956808154ffffe3ff18b68f1b90b97f583d4601ef2023-07-12 19:06:37f8348bfb848a94699bf0a8d3c2be02c921115.118653
991966808156ffffe3ff18b68f1b90b97f583d4601ef2023-07-12 19:07:17f8348bfb848a94699bf0a8d3c2be02c9212393.326336
991976808156ffffe3ff18b68f1b90b97f583d4601ef2023-07-12 19:07:17f8348bfb848a94699bf0a8d3c2be02c92210.000000
991986808160ffffe3ff18b68f1b90b97f583d4601ef2023-07-12 19:08:38f8348bfb848a94699bf0a8d3c2be02c9222208.493146
\n", - "

4959972 rows × 7 columns

\n", - "
" - ], - "text/plain": [ - " vp_idx trip_instance_key location_timestamp_local \\\n", - "0 3140711 0000703b8b86da99dbf637e746c45236 2023-07-12 18:14:58 \n", - "1 3140712 0000703b8b86da99dbf637e746c45236 2023-07-12 18:15:13 \n", - "2 3140711 0000703b8b86da99dbf637e746c45236 2023-07-12 18:14:58 \n", - "3 3140719 0000703b8b86da99dbf637e746c45236 2023-07-12 18:17:31 \n", - "4 3140717 0000703b8b86da99dbf637e746c45236 2023-07-12 18:17:00 \n", - "... ... ... ... \n", - "99194 6808154 ffffe3ff18b68f1b90b97f583d4601ef 2023-07-12 19:06:37 \n", - "99195 6808154 ffffe3ff18b68f1b90b97f583d4601ef 2023-07-12 19:06:37 \n", - "99196 6808156 ffffe3ff18b68f1b90b97f583d4601ef 2023-07-12 19:07:17 \n", - "99197 6808156 ffffe3ff18b68f1b90b97f583d4601ef 2023-07-12 19:07:17 \n", - "99198 6808160 ffffe3ff18b68f1b90b97f583d4601ef 2023-07-12 19:08:38 \n", - "\n", - " shape_array_key stop_sequence obs shape_meters \n", - "0 15b81a6fa853534940f7f1c8f7c3a3ba 1 1 0.000000 \n", - "1 15b81a6fa853534940f7f1c8f7c3a3ba 1 2 0.000000 \n", - "2 15b81a6fa853534940f7f1c8f7c3a3ba 2 1 13.352249 \n", - "3 15b81a6fa853534940f7f1c8f7c3a3ba 2 2 1114.127066 \n", - "4 15b81a6fa853534940f7f1c8f7c3a3ba 3 1 7.565720 \n", - "... ... ... ... ... \n", - "99194 f8348bfb848a94699bf0a8d3c2be02c9 20 2 1042.154977 \n", - "99195 f8348bfb848a94699bf0a8d3c2be02c9 21 1 15.118653 \n", - "99196 f8348bfb848a94699bf0a8d3c2be02c9 21 2 393.326336 \n", - "99197 f8348bfb848a94699bf0a8d3c2be02c9 22 1 0.000000 \n", - "99198 f8348bfb848a94699bf0a8d3c2be02c9 22 2 208.493146 \n", - "\n", - "[4959972 rows x 7 columns]" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "results" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "45664688-422d-4336-b7ac-dcaa399b803d", - "metadata": {}, - "outputs": [], - "source": [ - "df2 = delayed(test_split.get_prior_position_on_segment)(\n", - " df, \n", - " SEGMENT_IDENTIFIER_COLS,\n", - " TIMESTAMP_COL\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "da871931-d2fa-42dd-8d66-b5bd6c735728", - "metadata": {}, - "outputs": [], - "source": [ - "df3 = delayed(dd.merge)(\n", - " df2,\n", - " vp_idx_bounds,\n", - " on = \"trip_instance_key\",\n", - " how = \"inner\"\n", - ")\n", - "\n", - "df3 = df3.assign(\n", - " prior_vp_idx = df3.apply(\n", - " lambda x: \n", - " x.vp_idx + 1 if (x.prior_vp_idx < x.min_vp_idx) and \n", - " (x.vp_idx + 1 <= x.max_vp_idx)\n", - " else x.prior_vp_idx, \n", - " axis=1)\n", - ").drop(columns = [\"trip_instance_key\", \"min_vp_idx\", \"max_vp_idx\"])\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6aedd4e2-c3a4-4b18-8ca5-9234fc10f992", - "metadata": {}, - "outputs": [], - "source": [ - "df3 = compute(df3)[0]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "22f8c086-4592-4c8f-9e08-cbe43c90373f", - "metadata": {}, - "outputs": [], - "source": [ - "def attach_vp_timestamp_location(\n", - " df: pd.DataFrame,\n", - " usable_vp: dd.DataFrame,\n", - " timestamp_col: str\n", - ") -> gpd.GeoDataFrame:\n", - " \"\"\"\n", - " \"\"\"\n", - " \n", - " # Turn the vp_idx we need into gdf\n", - " vp_to_keep = np.union1d(df.vp_idx, df.prior_vp_idx).tolist()\n", - " usable_vp2 = usable_vp[usable_vp.vp_idx.isin(vp_to_keep)]\n", - " \n", - " # Merge in the timestamp and x, y coords \n", - " usable_gdf = gpd.GeoDataFrame(\n", - " usable_vp2,\n", - " geometry = gpd.points_from_xy(usable_vp2.x, usable_vp2.y),\n", - " crs = WGS84\n", - " ).to_crs(PROJECT_CRS).drop(columns = [\"x\", \"y\"])\n", - " \n", - " \n", - " df_with_xy = pd.merge(\n", - " usable_gdf,\n", - " df,\n", - " on = \"vp_idx\",\n", - " how = \"inner\"\n", - " ).rename(columns = {\"geometry\": \"vp_geometry\"})\n", - " \n", - " # Merge again to get timestamp and x, y coords of previous point\n", - " usable_gdf2 = usable_gdf.rename(\n", - " columns = {\n", - " \"vp_idx\": \"prior_vp_idx\",\n", - " timestamp_col: f\"prior_{timestamp_col}\",\n", - " \"geometry\": \"prior_vp_geometry\"\n", - " }\n", - " ).drop(columns = \"trip_instance_key\")\n", - " \n", - " df_with_prior_xy = dd.merge(\n", - " df_with_xy,\n", - " usable_gdf2,\n", - " on = \"prior_vp_idx\",\n", - " how = \"inner\"\n", - " )\n", - " \n", - " return df_with_prior_xy" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "502f4715-a298-4520-b571-fae4d196cf2f", - "metadata": {}, - "outputs": [], - "source": [ - "from shared_utils.geography_utils import WGS84\n", - "\n", - "gdf = delayed(attach_vp_timestamp_location)(\n", - " df3,\n", - " usable_vp,\n", - " TIMESTAMP_COL\n", - ").persist()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "eeecaba2-9e58-4954-9660-5337ef02569c", - "metadata": {}, - "outputs": [], - "source": [ - "part2 = gdf[gdf.n_vp_seg==2]\n", - "\n", - "part2_keep = (part2.groupby([\"trip_instance_key\"] + SEGMENT_IDENTIFIER_COLS,\n", - " observed=True, group_keys=False)\n", - " .vp_idx\n", - " .max()\n", - " .reset_index()\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "150a9219-94c7-44cc-84ac-e9badd88c3e5", - "metadata": {}, - "outputs": [], - "source": [ - "part2_pared = delayed(dd.merge)(\n", - " part2,\n", - " part2_keep,\n", - " on = [\"trip_instance_key\", \"vp_idx\"] + SEGMENT_IDENTIFIER_COLS, \n", - " how = \"inner\"\n", - ")\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "14b7f596-1676-40e8-a185-c0f02ad7c7d2", - "metadata": {}, - "outputs": [], - "source": [ - "part2_gdf = delayed(test_split.merge_in_segments)(\n", - " part2_pared,\n", - " SEGMENT_IDENTIFIER_COLS,\n", - " GROUPING_COL,\n", - " n_vp_seg_value=2\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cefe0484-6a54-4b11-a562-5e77ff824b85", - "metadata": {}, - "outputs": [], - "source": [ - "p2_gdf = compute(part2_gdf)[0]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "88525881-e849-4a42-b0a3-aa14d9d38f38", - "metadata": {}, - "outputs": [], - "source": [ - "gddf = dg.from_geopandas(p2_gdf, npartitions=100)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "678c1e78-9fe3-4f11-a2f6-4fa7ced6c69c", - "metadata": {}, - "outputs": [], - "source": [ - "shape_meters_series = gddf.map_partitions(\n", - " wrangle_shapes.project_point_geom_onto_linestring,\n", - " \"geometry\",\n", - " \"vp_geometry\",\n", - " meta = (\"shape_meters\", \"float\")\n", - ")\n", - "\n", - "prior_shape_meters_series = gddf.map_partitions(\n", - " wrangle_shapes.project_point_geom_onto_linestring,\n", - " \"geometry\",\n", - " \"prior_vp_geometry\",\n", - " meta = (\"prior_shape_meters\", \"float\")\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "351cc3d5-ade0-47a3-ba30-aa8e24b4c9e7", - "metadata": {}, - "outputs": [], - "source": [ - "gddf[\"difference_shape_meters\"] = abs(\n", - " shape_meters_series - prior_shape_meters_series)\n", - "gddf[\"straight_distance\"] = gddf.vp_geometry.distance(gddf.prior_vp_geometry)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8f943943-e0bc-4526-b9ab-e8963eeec7ae", - "metadata": {}, - "outputs": [], - "source": [ - "scaling_factor = 1.75\n", - "min_meters_elapsed = 100\n", - "\n", - "# It's not only that difference_shape_meters==0 is wrong,\n", - "# Separate out these and try again with full shape \n", - "gddf = gddf.assign(\n", - " meters_elapsed = gddf.apply(\n", - " lambda x: x.straight_distance if (\n", - " x.difference_shape_meters <= min_meters_elapsed or \n", - " x.difference_shape_meters >= x.straight_distance*scaling_factor\n", - " ) else x.difference_shape_meters, \n", - " axis=1, \n", - " meta = (\"meters_elapsed\", \"float\")\n", - " ),\n", - ")\n", - "\n", - "gddf = segment_calcs.convert_timestamp_to_seconds(\n", - " gddf, [TIMESTAMP_COL, f\"prior_{TIMESTAMP_COL}\"])\n", - "\n", - "gddf = gddf.assign(\n", - " sec_elapsed = (gddf[f\"{TIMESTAMP_COL}_sec\"] - \n", - " gddf[f\"prior_{TIMESTAMP_COL}_sec\"]).abs()\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "b10cbeb9-558e-459b-93d3-becb118ba7b1", - "metadata": {}, - "outputs": [], - "source": [ - "ddf = gddf.drop(columns = [\"prior_vp_geometry\", \n", - " \"vp_geometry\", \"geometry\"])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d2d54fb3-d5dc-4a98-a304-9b5004e022b8", - "metadata": {}, - "outputs": [], - "source": [ - "ddf = ddf.repartition(npartitions=2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "df1d5370-3021-45fb-a6ef-7eb408185d5f", - "metadata": {}, - "outputs": [], - "source": [ - "results = ddf.compute()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d4c9fdf6-55b8-479a-94ae-ab36a4343171", - "metadata": {}, - "outputs": [], - "source": [ - "from shared_utils.rt_utils import MPH_PER_MPS\n", - "\n", - "results = results.assign(\n", - " speed_mph = (results.meters_elapsed.divide(results.sec_elapsed) * \n", - " MPH_PER_MPS)\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "433e0382-461e-4970-9d88-37e07a40efaf", - "metadata": {}, - "outputs": [], - "source": [ - "results[results.speed_mph < 5].shape" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "be27c87b-0de2-472e-9ea0-cc42da1d4256", - "metadata": {}, - "outputs": [], - "source": [ - "ok_results = results[results.speed_mph >= 5]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e177393a-86c0-4c74-824c-fe9548b461b2", - "metadata": {}, - "outputs": [], - "source": [ - "bad_results = results[(results.speed_mph >= 0) & (results.speed_mph <5)]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4ab46336-10a3-4a25-86d5-6f9343f3175f", - "metadata": {}, - "outputs": [], - "source": [ - "len(ok_results), len(bad_results)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6390361b-bfdd-44f3-833b-12b291343bd3", - "metadata": {}, - "outputs": [], - "source": [ - "bad_results" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "989bdfe8-8dde-4457-b4d9-ca110ecfabf7", - "metadata": {}, - "outputs": [], - "source": [ - "part1 = gdf[gdf.n_vp_seg==1]\n", - "\n", - "p1 = compute(part1)[0]\n", - "\n", - "p1.to_parquet(\"one_vp_in_seg.parquet\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1415ef3d-7a88-48ca-ad9c-a20a00618d61", - "metadata": {}, - "outputs": [], - "source": [ - "bad_shapes = bad_results.shape_array_key.unique().tolist()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ce58f30c-54b7-4ba1-854e-fcd9d0314012", - "metadata": {}, - "outputs": [], - "source": [ - "ok_results.to_parquet(\"ok_results.parquet\")\n", - "bad_results.to_parquet(\"bad_results.parquet\")" - ] - }, - { - "cell_type": "markdown", - "id": "77a87e87-77cf-4d90-bdba-a9f6eaaf6433", - "metadata": {}, - "source": [ - "For speeds that are unusually low, it is now confirmed it's not just `loop_or_inlining`. It happens on `loop_or_inlining==0` too, so it's good to have moved the sjoin postprocessing to all shapes." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3ae778cb-00df-4c31-9161-c969fa27565a", - "metadata": {}, - "outputs": [], - "source": [ - "segments = gpd.read_parquet(\n", - " f\"{SEGMENT_GCS}stop_segments_{analysis_date}.parquet\",\n", - " filters = [[(GROUPING_COL, \"in\", bad_shapes)]],\n", - " columns = SEGMENT_IDENTIFIER_COLS + [\"loop_or_inlining\", \"geometry\"]\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "53aab44a-b49f-4658-a79f-cf4cdf1e1383", - "metadata": {}, - "outputs": [], - "source": [ - "segments.loop_or_inlining.value_counts()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "bda0d4da-96f9-4833-95f2-c02f992a63c6", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.13" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/rt_segment_speeds/scripts/test_split.py b/rt_segment_speeds/scripts/test_split.py deleted file mode 100644 index b3765671e..000000000 --- a/rt_segment_speeds/scripts/test_split.py +++ /dev/null @@ -1,377 +0,0 @@ -""" -Transform df so that it is wide instead of long -prior to calculating speed. - -For segments with 2 vp, we can do this. -For segments with 1 vp...set a placeholder for how to -fill in the previous coord? - -Caveats to work into future function: -* pulling the prior vp can be from multiple segments ago -* we want to calculate distance between 2 points using shape and not segment -* the prior vp should just be vp_idx of current - 1 -* check that it falls between the bounds of a trip's min_vp_idx and max_vp_idx -""" -import dask.dataframe as dd -import dask_geopandas as dg -import datetime -import geopandas as gpd -import pandas as pd - -from typing import Literal - -from segment_speed_utils import helpers, segment_calcs, wrangle_shapes -from segment_speed_utils.project_vars import (SEGMENT_GCS, analysis_date, - CONFIG_PATH, PROJECT_CRS) -from shared_utils.geography_utils import WGS84 - -def get_prior_position_on_segment( - df: pd.DataFrame, - segment_identifier_cols: list, - time_col: str, -) -> gpd.GeoDataFrame: - """ - Get the prior vp on the segment. - If a segment has 2 points, this will fill it in with a value. - If it has 1 point, it returns NaN, so we will have to subset - to those rows and fix those separately. - """ - segment_trip_cols = ["trip_instance_key"] + segment_identifier_cols - - obs_per_segment_trip = ( - df.groupby(segment_trip_cols, - observed=True, group_keys=False) - .agg({"vp_idx": "count"}) - .reset_index() - .rename(columns = {"vp_idx": "n_vp_seg"}) - ) - - df2 = pd.merge( - df, - obs_per_segment_trip, - on = segment_trip_cols, - how = "inner" - ).sort_values( - segment_trip_cols + ["vp_idx"] - ).reset_index(drop=True) - - - df2 = df2.assign( - prior_vp_idx = (df2.groupby(segment_trip_cols, - observed=True, group_keys=False) - .vp_idx - .shift(1) - ) - ) - - df2 = df2.assign( - prior_vp_idx = df2.prior_vp_idx.fillna(df2.vp_idx - 1).astype(int) - ) - - return df2 - - -def get_usable_vp_bounds_by_trip(df: dd.DataFrame) -> pd.DataFrame: - """ - Of all the usable vp, for each trip, find the min(vp_idx) - and max(vp_idx). - For the first stop, there will never be a previous vp to find, - because the previous vp_idx will belong to a different operator/trip. - But for segments in the middle of the shape, the previous vp can be anywhere, - maybe several segments away. - """ - - grouped_df = df.groupby("trip_instance_key", - observed=True, group_keys=False) - - start_vp = (grouped_df.vp_idx.min().reset_index() - .rename(columns = {"vp_idx": "min_vp_idx"}) - ) - end_vp = (grouped_df.vp_idx.max().reset_index() - .rename(columns = {"vp_idx": "max_vp_idx"}) - ) - - df2 = dd.merge( - start_vp, - end_vp, - on = "trip_instance_key", - how = "left" - ).reset_index(drop=True).compute() - - return df2 - - -def merge_in_segments( - gdf: gpd.GeoDataFrame, - segment_identifier_cols: list, - grouping_col: str, - n_vp_seg_value: Literal[1,2] -) -> gpd.GeoDataFrame: - - # If segment has 1 point, then we have to use the shape, - # since the prior point can come from multiple segments away - if n_vp_seg_value==1: - - shapes = helpers.import_scheduled_shapes( - analysis_date, - columns = [grouping_col, "geometry"], - get_pandas = True, - crs = PROJECT_CRS - ) - - m1 = dd.merge( - gdf, - shapes, - on = grouping_col, - how = "inner" - ) - - # If segment has 2 points, then we can use segment geometry - elif n_vp_seg_value==2: - - segments = gpd.read_parquet( - f"{SEGMENT_GCS}stop_segments_{analysis_date}.parquet", - columns = segment_identifier_cols + ["geometry"] - ) - - m1 = dd.merge( - gdf, - segments, - on = segment_identifier_cols, - how = "inner" - ) - - return m1 - - -def attach_vp_timestamp_location( - df: pd.DataFrame, - usable_vp: dd.DataFrame, - timestamp_col: str -) -> gpd.GeoDataFrame: - """ - """ - # Merge in the timestamp and x, y coords - usable_gdf = dg.from_dask_dataframe( - usable_vp, - geometry = dg.points_from_xy(usable_vp, x = "x", y = "y") - ).drop(columns = ["x", "y"]).set_crs(WGS84) - - usable_gdf = usable_gdf.to_crs(PROJECT_CRS) - - df_with_xy = dd.merge( - usable_gdf, - df, - on = "vp_idx", - how = "inner" - ) - - # Merge again to get timestamp and x, y coords of previous point - usable_gdf2 = usable_gdf.rename( - columns = { - "vp_idx": "prior_vp_idx", - timestamp_col: f"prior_{timestamp_col}", - } - ).drop(columns = "trip_instance_key") - - df_with_prior_xy = dd.merge( - df_with_xy, - usable_gdf2, - on = "prior_vp_idx", - how = "inner" - ) - - return df_with_prior_xy - - -def linear_referencing_for_segment( - gdf: dg.GeoDataFrame, - timestamp_col: str, - scaling_factor: float = 1.75 -) -> dg.GeoDataFrame: - - gddf = gdf.repartition(npartitions=50) - - shape_meters_series = gddf.map_partitions( - wrangle_shapes.project_point_geom_onto_linestring, - "geometry", - "vp_geometry", - meta = ("shape_meters", "float") - ) - - prior_shape_meters_series = gddf.map_partitions( - wrangle_shapes.project_point_geom_onto_linestring, - "geometry", - "prior_vp_geometry", - meta = ("prior_shape_meters", "float") - ) - - #gddf["current_shape_meters"] = shape_meters_series - #gddf["prior_shape_meters"] = prior_shape_meters_series - gddf["difference_shape_meters"] = abs( - shape_meters_series - prior_shape_meters_series) - gddf["straight_distance"] = gddf.vp_geometry.distance(gddf.prior_vp_geometry) - - # Decide what distance to keep - # If difference between current_shape_meters and prior_shape_meters is 0, - # use the straight line distance. - # If difference between current_shape_meters and prior_shape_meters is way - # too high, don't use it, it could be from projecting against the full shape - gddf = gddf.assign( - meters_elapsed = gddf.apply( - lambda x: x.straight_distance if ( - x.difference_shape_meters == 0 or - x.difference_shape_meters >= x.straight_distance*scaling_factor - ) else x.difference_shape_meters, - axis=1, - meta = ("meters_elapsed", "float") - ), - ) - - gddf = segment_calcs.convert_timestamp_to_seconds( - gddf, [timestamp_col, f"prior_{timestamp_col}"]) - - drop_cols = ["difference_shape_meters", "straight_distance", - "vp_geometry", "prior_vp_geometry", "geometry" - ] - gddf2 = gddf.drop(columns = drop_cols) - - return gddf - - -def put_all_together( - analysis_date: str, - dict_inputs: dict = {} -): - USABLE_VP = dict_inputs["stage1"] - INPUT_FILE = dict_inputs["stage3"] - SEGMENT_FILE = dict_inputs["segments_file"] - SEGMENT_IDENTIFIER_COLS = dict_inputs["segment_identifier_cols"] - GROUPING_COL = dict_inputs["grouping_col"] - TIMESTAMP_COL = dict_inputs["timestamp_col"] - - time0 = datetime.datetime.now() - - # Import usable vp, which we'll use later for the x, y and timestamp - usable_vp = dd.read_parquet( - f"{SEGMENT_GCS}{USABLE_VP}_{analysis_date}", - columns = ["trip_instance_key", "vp_idx", TIMESTAMP_COL, "x", "y"] - ) - vp_idx_bounds = get_usable_vp_bounds_by_trip(usable_vp) - - # Start from pared down vp - df = pd.read_parquet( - f"{SEGMENT_GCS}vp_pare_down/{INPUT_FILE}_all_{analysis_date}", - columns = SEGMENT_IDENTIFIER_COLS + ["trip_instance_key", "vp_idx"] - ) - - # Make sure all segments have 2 points - # If it doesn't, fill it in with the previous vp_idx - df2 = get_prior_position_on_segment( - df, - SEGMENT_IDENTIFIER_COLS, - TIMESTAMP_COL - ) - - time1 = datetime.datetime.now() - print(f"get prior position: {time1 - time0}") - - # Check that the previous vp_idx actually occurs on the same trip - df3 = dd.merge( - df2, - vp_idx_bounds, - on = "trip_instance_key", - how = "inner" - ) - - # For the first segment, if we only have 1 vp, we can't find a previous point - # We'll use the next point then. - # but make sure that we never use a point outside of that trip - # later, we will have to use absolute value of difference in shape_meters - # since distance must be positive - df3 = df3.assign( - prior_vp_idx = df3.apply( - lambda x: - x.vp_idx + 1 if (x.prior_vp_idx < x.min_vp_idx) and - (x.vp_idx + 1 <= x.max_vp_idx) - else x.prior_vp_idx, - axis=1) - ).drop(columns = ["trip_instance_key", "min_vp_idx", "max_vp_idx"]) - - gdf = attach_vp_timestamp_location( - df3, - usable_vp, - TIMESTAMP_COL - ) - - time2 = datetime.datetime.now() - print(f"attach vp timestamp: {time2 - time1}") - - #part1 = gdf[gdf.n_vp_seg==1] - part2 = gdf[gdf.n_vp_seg==2] - - part2_keep = (part2.groupby(["trip_instance_key"] + SEGMENT_IDENTIFIER_COLS, - observed=True, group_keys=False) - .vp_idx - .max() - .reset_index() - ) - - part2_pared = dd.merge( - part2, - part2_keep, - on = ["trip_instance_key", "vp_idx"] + SEGMENT_IDENTIFIER_COLS, - how = "inner" - ) - - #part1_gdf = merge_in_segments( - # part1, - # SEGMENT_IDENTIFIER_COLS, - # GROUPING_COL, - # n_vp_seg_value=1 - #) - - part2_gdf = merge_in_segments( - part2_pared, - SEGMENT_IDENTIFIER_COLS, - GROUPING_COL, - n_vp_seg_value=2 - ) - - gdf3 = dd.multi.concat( - [ - #part1_gdf, - part2_gdf], - axis=0 - ).reset_index(drop=True)#.sort_values( - # SEGMENT_IDENTIFIER_COLS + ["trip_instance_key"] - #) - - time3 = datetime.datetime.now() - print(f"merge in segments: {time3 - time2}") - - gdf4 = linear_referencing_for_segment( - gdf3, - TIMESTAMP_COL, - scaling_factor = 1.75 - ) - - time4 = datetime.datetime.now() - print(f"linear ref: {time4 - time2}") - - return gdf4 - -if __name__ == "__main__": - - start = datetime.datetime.now() - STOP_SEG_DICT = helpers.get_parameters(CONFIG_PATH, "stop_segments") - gddf = put_all_together(analysis_date, STOP_SEG_DICT) - - gddf = gddf.repartition(npartitions=2) - - gddf.to_parquet("linear_ref") - - print(f"execution time: {datetime.datetime.now() - start}") - - - \ No newline at end of file From 93a908fcfca5d7226fe023d78c6a30e888b864c2 Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Thu, 21 Sep 2023 17:52:51 +0000 Subject: [PATCH 10/13] (remove): script for loop_or_inlining check, now done for all shapes --- rt_segment_speeds/scripts/A3_loop_inlining.py | 168 ------------------ 1 file changed, 168 deletions(-) delete mode 100644 rt_segment_speeds/scripts/A3_loop_inlining.py diff --git a/rt_segment_speeds/scripts/A3_loop_inlining.py b/rt_segment_speeds/scripts/A3_loop_inlining.py deleted file mode 100644 index ad4e64268..000000000 --- a/rt_segment_speeds/scripts/A3_loop_inlining.py +++ /dev/null @@ -1,168 +0,0 @@ -""" -Handle complex shapes and pare down -vehicle position points by first checking the dot product -to make sure we are keeping vehicle positions -running in the same direction as the segment. -""" -import dask.dataframe as dd -import dask_geopandas as dg -import datetime -import geopandas as gpd -import numpy as np -import pandas as pd -import shapely -import sys - -from loguru import logger - -from shared_utils.geography_utils import WGS84 -from segment_speed_utils import helpers, segment_calcs, wrangle_shapes -from segment_speed_utils.project_vars import (SEGMENT_GCS, analysis_date, - CONFIG_PATH, PROJECT_CRS) -from A3_valid_vehicle_positions import (identify_stop_segment_cases, - merge_usable_vp_with_sjoin_vpidx) - - - - -def find_errors_in_segment_groups( - vp_sjoin: dd.DataFrame, - segments: gpd.GeoDataFrame, - segment_identifier_cols: list, -) -> dd.DataFrame: - """ - For each sjoin result for each segment-trip: - (1) find the direction the segment is running - (2) use the mean timestamp to divide sjoin results into 2 groups - (3) for each group, find the first/last vp - (4) find the direction of each group of vp for segment-trip - (5) as long as vp are running in same direction as segment (dot product > 0), - keep those observations. - """ - group_cols = segment_identifier_cols + ["trip_instance_key"] - - segments = get_stop_segments_direction_vector( - segments) - - vp_grouped = split_vp_into_groups( - vp_sjoin, - group_cols, - col_to_find_groups = "location_timestamp_local" - ) - - vp_pared_by_group = get_first_last_position_in_group( - vp_grouped, group_cols) - - vp_with_segment_vec = pd.merge( - segments, - vp_pared_by_group, - on = segment_identifier_cols, - ) - - vp_dot_prod = find_vp_direction_vector( - vp_with_segment_vec, group_cols) - - # Only keep if vehicle positions are running in the same - # direction as the segment - # TODO: should we keep NaNs? NaNs weren't able to have a vector calculated, - # which could mean it's kind of an outlier in the segment, - # maybe should have been attached elsewhere - vp_same_direction = (vp_dot_prod[~(vp_dot_prod.dot_product < 0)] - [group_cols + ["group"]] - .drop_duplicates() - .reset_index(drop=True) - ) - - vp_to_keep = dd.merge( - vp_grouped, - vp_same_direction, - on = group_cols + ["group"], - how = "inner", - ).drop(columns = ["location_timestamp_local_sec", "group"]) - - return vp_to_keep - - -def pare_down_vp_for_special_cases( - analysis_date: str, - dict_inputs: dict = {} -): - """ - For special shapes, include a direction check where each - batch of vp have direction generated, and compare that against - the direction the segment is running. - """ - USABLE_VP = dict_inputs["stage1"] - INPUT_FILE_PREFIX = dict_inputs["stage2"] - SEGMENT_FILE = dict_inputs["segments_file"] - SEGMENT_IDENTIFIER_COLS = dict_inputs["segment_identifier_cols"] - GROUPING_COL = dict_inputs["grouping_col"] - TIMESTAMP_COL = dict_inputs["timestamp_col"] - EXPORT_FILE = dict_inputs["stage3"] - - - special_shapes = identify_stop_segment_cases( - analysis_date, GROUPING_COL, 1) - - vp_joined_to_segments = merge_usable_vp_with_sjoin_vpidx( - special_shapes, - f"{USABLE_VP}_{analysis_date}", - f"{INPUT_FILE_PREFIX}_{analysis_date}", - sjoin_filtering = [[(GROUPING_COL, "in", special_shapes)]], - columns = ["vp_idx", "trip_instance_key", TIMESTAMP_COL, - "x", "y"] - ) - - segments = helpers.import_segments( - file_name = f"{SEGMENT_FILE}_{analysis_date}", - filters = [[(GROUPING_COL, "in", special_shapes)]], - columns = SEGMENT_IDENTIFIER_COLS + ["geometry"], - partitioned = False - ) - - vp_pared_special = find_errors_in_segment_groups( - vp_joined_to_segments, - segments, - SEGMENT_IDENTIFIER_COLS - ) - - special_vp_to_keep = segment_calcs.keep_min_max_timestamps_by_segment( - vp_pared_special, - SEGMENT_IDENTIFIER_COLS + ["trip_instance_key"], - TIMESTAMP_COL - ) - - special_vp_to_keep = special_vp_to_keep.repartition(npartitions=1) - - special_vp_to_keep.to_parquet( - f"{SEGMENT_GCS}vp_pare_down/{EXPORT_FILE}_special_{analysis_date}", - overwrite = True) - - - -if __name__ == "__main__": - - LOG_FILE = "../logs/valid_vehicle_positions.log" - logger.add(LOG_FILE, retention="3 months") - logger.add(sys.stderr, - format="{time:YYYY-MM-DD at HH:mm:ss} | {level} | {message}", - level="INFO") - - logger.info(f"Analysis date: {analysis_date}") - - start = datetime.datetime.now() - - STOP_SEG_DICT = helpers.get_parameters(CONFIG_PATH, "stop_segments") - - time1 = datetime.datetime.now() - - pare_down_vp_for_special_cases( - analysis_date, - dict_inputs = STOP_SEG_DICT - ) - - time2 = datetime.datetime.now() - logger.info(f"pare down vp by stop segments special cases {time2 - time1}") - - end = datetime.datetime.now() - logger.info(f"execution time: {end-start}") \ No newline at end of file From 009ed58c20fd9c3024c94d5d481f273156206f23 Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Thu, 21 Sep 2023 20:32:37 +0000 Subject: [PATCH 11/13] test sjoin postprocessing through recalculating speeds --- rt_segment_speeds/logs/sjoin_vp_segments.log | 6 + .../logs/speeds_by_segment_trip.log | 24 ++ .../logs/valid_vehicle_positions.log | 18 + .../scripts/A2_sjoin_postprocessing.py | 1 - .../scripts/A3_valid_vehicle_positions.py | 2 +- .../scripts/B1_speeds_by_segment_trip.py | 371 ++++++++++++++---- rt_segment_speeds/scripts/Makefile | 32 +- .../segment_speed_utils/segment_calcs.py | 42 +- 8 files changed, 390 insertions(+), 106 deletions(-) diff --git a/rt_segment_speeds/logs/sjoin_vp_segments.log b/rt_segment_speeds/logs/sjoin_vp_segments.log index b1ea97d92..3ff93bd3b 100644 --- a/rt_segment_speeds/logs/sjoin_vp_segments.log +++ b/rt_segment_speeds/logs/sjoin_vp_segments.log @@ -42,3 +42,9 @@ 2023-09-14 13:06:15.758 | INFO | __main__::223 - execution time: 0:12:03.599629 2023-09-14 13:06:31.913 | INFO | __main__::457 - Analysis date: 2023-09-13 2023-09-14 13:14:08.656 | INFO | __main__::465 - remove erroneous sjoin results: 0:07:36.742381 +2023-09-21 10:58:58.711 | INFO | __main__::457 - Analysis date: 2023-09-13 +2023-09-21 11:05:58.377 | INFO | __main__::465 - remove erroneous sjoin results: 0:06:59.626650 +2023-09-21 12:11:55.875 | INFO | __main__::456 - Analysis date: 2023-08-15 +2023-09-21 12:19:13.216 | INFO | __main__::464 - remove erroneous sjoin results: 0:07:17.337816 +2023-09-21 12:58:44.489 | INFO | __main__::456 - Analysis date: 2023-07-12 +2023-09-21 13:07:33.610 | INFO | __main__::464 - remove erroneous sjoin results: 0:08:49.120980 diff --git a/rt_segment_speeds/logs/speeds_by_segment_trip.log b/rt_segment_speeds/logs/speeds_by_segment_trip.log index 0e9d05197..cabecc11e 100644 --- a/rt_segment_speeds/logs/speeds_by_segment_trip.log +++ b/rt_segment_speeds/logs/speeds_by_segment_trip.log @@ -40,3 +40,27 @@ 2023-08-24 14:51:47.828 | INFO | __main__:linear_referencing_and_speed_by_segment:123 - calculate speeds: 0:00:00.006706 2023-08-24 14:56:14.082 | INFO | __main__::150 - speeds for stop segments: 0:04:32.610802 2023-08-24 14:56:14.083 | INFO | __main__::151 - execution time: 0:04:32.611824 +2023-09-21 11:53:11.705 | INFO | __main__::367 - Analysis date: 2023-09-13 +2023-09-21 11:53:18.285 | INFO | __main__:linear_referencing_vp_against_line:58 - set up merged vp with segments: 0:00:00.152710 +2023-09-21 11:53:18.322 | INFO | __main__:linear_referencing_vp_against_line:76 - linear referencing: 0:00:00.037063 +2023-09-21 11:57:20.821 | INFO | __main__:linear_referencing_and_speed_by_segment:300 - linear referencing: 0:04:09.111393 +2023-09-21 12:03:20.231 | INFO | __main__:linear_referencing_and_speed_by_segment:311 - make wide and get initial speeds: 0:05:59.409953 +2023-09-21 12:05:33.476 | INFO | __main__:linear_referencing_and_speed_by_segment:352 - recalculate speeds and get final: 0:02:13.244950 +2023-09-21 12:05:46.785 | INFO | __main__::375 - speeds for stop segments: 0:12:35.079026 +2023-09-21 12:05:46.788 | INFO | __main__::376 - execution time: 0:12:35.081785 +2023-09-21 12:28:12.385 | INFO | __main__::367 - Analysis date: 2023-08-15 +2023-09-21 12:28:19.027 | INFO | __main__:linear_referencing_vp_against_line:58 - set up merged vp with segments: 0:00:00.162699 +2023-09-21 12:28:19.063 | INFO | __main__:linear_referencing_vp_against_line:76 - linear referencing: 0:00:00.036557 +2023-09-21 12:32:49.165 | INFO | __main__:linear_referencing_and_speed_by_segment:300 - linear referencing: 0:04:36.761005 +2023-09-21 12:39:56.447 | INFO | __main__:linear_referencing_and_speed_by_segment:311 - make wide and get initial speeds: 0:07:07.282236 +2023-09-21 12:42:14.396 | INFO | __main__:linear_referencing_and_speed_by_segment:352 - recalculate speeds and get final: 0:02:17.948959 +2023-09-21 12:42:27.893 | INFO | __main__::375 - speeds for stop segments: 0:14:15.493395 +2023-09-21 12:42:27.894 | INFO | __main__::376 - execution time: 0:14:15.494456 +2023-09-21 13:16:38.177 | INFO | __main__::367 - Analysis date: 2023-07-12 +2023-09-21 13:16:44.655 | INFO | __main__:linear_referencing_vp_against_line:58 - set up merged vp with segments: 0:00:00.156084 +2023-09-21 13:16:44.692 | INFO | __main__:linear_referencing_vp_against_line:76 - linear referencing: 0:00:00.037555 +2023-09-21 13:20:41.231 | INFO | __main__:linear_referencing_and_speed_by_segment:300 - linear referencing: 0:04:03.035504 +2023-09-21 13:26:57.794 | INFO | __main__:linear_referencing_and_speed_by_segment:311 - make wide and get initial speeds: 0:06:16.562615 +2023-09-21 13:29:08.771 | INFO | __main__:linear_referencing_and_speed_by_segment:352 - recalculate speeds and get final: 0:02:10.977540 +2023-09-21 13:29:21.791 | INFO | __main__::375 - speeds for stop segments: 0:12:43.599595 +2023-09-21 13:29:21.792 | INFO | __main__::376 - execution time: 0:12:43.600710 diff --git a/rt_segment_speeds/logs/valid_vehicle_positions.log b/rt_segment_speeds/logs/valid_vehicle_positions.log index 6027f701c..0c4f402f0 100644 --- a/rt_segment_speeds/logs/valid_vehicle_positions.log +++ b/rt_segment_speeds/logs/valid_vehicle_positions.log @@ -85,3 +85,21 @@ 2023-09-14 13:20:47.965 | INFO | __main__:pare_down_vp_by_segment:123 - exported: 0:06:15.874961 2023-09-14 13:20:47.966 | INFO | __main__::148 - pare down vp by stop segments for all cases 0:06:17.237883 2023-09-14 13:20:47.967 | INFO | __main__::151 - execution time: 0:06:17.243094 +2023-09-21 11:06:27.840 | INFO | __main__::134 - Analysis date: 2023-09-13 +2023-09-21 11:06:29.244 | INFO | __main__:pare_down_vp_by_segment:103 - merge usable vp with sjoin results: 0:00:01.386494 +2023-09-21 11:06:29.323 | INFO | __main__:pare_down_vp_by_segment:112 - keep enter/exit points: 0:00:00.079269 +2023-09-21 11:13:01.939 | INFO | __main__:pare_down_vp_by_segment:123 - exported: 0:06:32.615887 +2023-09-21 11:13:01.940 | INFO | __main__::148 - pare down vp by stop segments for all cases 0:06:34.083216 +2023-09-21 11:13:01.941 | INFO | __main__::151 - execution time: 0:06:34.088864 +2023-09-21 12:19:30.995 | INFO | __main__::134 - Analysis date: 2023-08-15 +2023-09-21 12:19:32.432 | INFO | __main__:pare_down_vp_by_segment:103 - merge usable vp with sjoin results: 0:00:01.410420 +2023-09-21 12:19:32.535 | INFO | __main__:pare_down_vp_by_segment:112 - keep enter/exit points: 0:00:00.103218 +2023-09-21 12:27:56.376 | INFO | __main__:pare_down_vp_by_segment:123 - exported: 0:08:23.840718 +2023-09-21 12:27:56.378 | INFO | __main__::148 - pare down vp by stop segments for all cases 0:08:25.356390 +2023-09-21 12:27:56.379 | INFO | __main__::151 - execution time: 0:08:25.360301 +2023-09-21 13:07:51.526 | INFO | __main__::134 - Analysis date: 2023-07-12 +2023-09-21 13:07:52.848 | INFO | __main__:pare_down_vp_by_segment:103 - merge usable vp with sjoin results: 0:00:01.297768 +2023-09-21 13:07:52.939 | INFO | __main__:pare_down_vp_by_segment:112 - keep enter/exit points: 0:00:00.091154 +2023-09-21 13:16:20.491 | INFO | __main__:pare_down_vp_by_segment:123 - exported: 0:08:27.552189 +2023-09-21 13:16:20.493 | INFO | __main__::148 - pare down vp by stop segments for all cases 0:08:28.942646 +2023-09-21 13:16:20.493 | INFO | __main__::151 - execution time: 0:08:28.946383 diff --git a/rt_segment_speeds/scripts/A2_sjoin_postprocessing.py b/rt_segment_speeds/scripts/A2_sjoin_postprocessing.py index 32327dbb5..886dc7208 100644 --- a/rt_segment_speeds/scripts/A2_sjoin_postprocessing.py +++ b/rt_segment_speeds/scripts/A2_sjoin_postprocessing.py @@ -380,7 +380,6 @@ def remove_erroneous_sjoin_results( """ USABLE_VP = dict_inputs["stage1"] INPUT_FILE_PREFIX = dict_inputs["stage2"] - SEGMENT_IDENTIFIER_COLS = dict_inputs["segment_identifier_cols"] SEGMENT_TRIP_COLS = ["trip_instance_key"] + SEGMENT_IDENTIFIER_COLS GROUPING_COL = dict_inputs["grouping_col"] diff --git a/rt_segment_speeds/scripts/A3_valid_vehicle_positions.py b/rt_segment_speeds/scripts/A3_valid_vehicle_positions.py index e77dc3dc5..880d4b6b0 100644 --- a/rt_segment_speeds/scripts/A3_valid_vehicle_positions.py +++ b/rt_segment_speeds/scripts/A3_valid_vehicle_positions.py @@ -116,7 +116,7 @@ def pare_down_vp_by_segment( .repartition(npartitions=3) ) vp_to_keep.to_parquet( - f"{SEGMENT_GCS}vp_pare_down/{EXPORT_FILE}_all_{analysis_date}", + f"{SEGMENT_GCS}{EXPORT_FILE}_{analysis_date}", overwrite=True ) diff --git a/rt_segment_speeds/scripts/B1_speeds_by_segment_trip.py b/rt_segment_speeds/scripts/B1_speeds_by_segment_trip.py index 07a4993bf..a7e51b834 100644 --- a/rt_segment_speeds/scripts/B1_speeds_by_segment_trip.py +++ b/rt_segment_speeds/scripts/B1_speeds_by_segment_trip.py @@ -8,16 +8,251 @@ import dask.dataframe as dd import dask_geopandas as dg import datetime +import geopandas as gpd +import numpy as np import pandas as pd import sys from loguru import logger -from shared_utils.geography_utils import WGS84 +from shared_utils import geography_utils from segment_speed_utils import helpers, segment_calcs, wrangle_shapes from segment_speed_utils.project_vars import (SEGMENT_GCS, analysis_date, PROJECT_CRS, CONFIG_PATH) +from shared_utils.rt_utils import MPH_PER_MPS + + +def linear_referencing_vp_against_line( + vp: dd.DataFrame, + segments: gpd.GeoDataFrame, + segment_identifier_cols: list, + timestamp_col: str +) -> dd.DataFrame: + """ + Take the vp x,y columns, make into gdf. + Merge in segment geometry and do linear referencing. + Return just the shape_meters result and timestamp converted to seconds. + """ + time0 = datetime.datetime.now() + + # https://stackoverflow.com/questions/71685387/faster-methods-to-create-geodataframe-from-a-dask-or-pandas-dataframe + # https://github.com/geopandas/dask-geopandas/issues/197 + vp_gddf = dg.from_dask_dataframe( + vp, + geometry=dg.points_from_xy(vp, "x", "y") + ).set_crs(geography_utils.WGS84).to_crs(PROJECT_CRS).drop(columns = ["x", "y"]) + + vp_with_seg_geom = dd.merge( + vp_gddf, + segments, + on = segment_identifier_cols, + how = "inner" + ).rename(columns = { + "geometry_x": "vp_geometry", + "geometry_y": "segment_geometry"} + ).set_geometry("vp_geometry") + + vp_with_seg_geom = vp_with_seg_geom.repartition(npartitions=50) + + time1 = datetime.datetime.now() + logger.info(f"set up merged vp with segments: {time1 - time0}") + + shape_meters_series = vp_with_seg_geom.map_partitions( + wrangle_shapes.project_point_geom_onto_linestring, + "segment_geometry", + "vp_geometry", + meta = ("shape_meters", "float") + ) + + vp_with_seg_geom = segment_calcs.convert_timestamp_to_seconds( + vp_with_seg_geom, [timestamp_col]) + + vp_with_seg_geom = vp_with_seg_geom.assign( + shape_meters = shape_meters_series, + segment_meters = vp_with_seg_geom.segment_geometry.length + ) + + time2 = datetime.datetime.now() + logger.info(f"linear referencing: {time2 - time1}") + + drop_cols = [f"{timestamp_col}", "vp_geometry", "segment_geometry"] + vp_with_seg_geom2 = vp_with_seg_geom.drop(columns = drop_cols) + + return vp_with_seg_geom2 + + +def make_wide_get_speed( + df: dd.DataFrame, + group_cols: list, + timestamp_col: str +) -> dd.DataFrame: + """ + Get df wide and set up current vp_idx and get meters/sec_elapsed + against prior and calculate speed. + """ + vp2 = ( + df.groupby(group_cols, + observed=True, group_keys=False) + .agg({"vp_idx": "max"}) + .reset_index() + .merge( + df, + on = group_cols + ["vp_idx"], + how = "inner" + ) + ) + + vp1 = ( + df.groupby(group_cols, + observed=True, group_keys=False) + .agg({"vp_idx": "min"}) + .reset_index() + .merge( + df, + on = group_cols + ["vp_idx"], + how = "inner" + ).rename(columns = { + "vp_idx": "prior_vp_idx", + f"{timestamp_col}_sec": f"prior_{timestamp_col}_sec", + "shape_meters": "prior_shape_meters", + }) + ) + + df_wide = dd.merge( + vp2, + vp1, + on = group_cols, + how = "left" + ) + + speed = segment_calcs.derive_speed( + df_wide, + distance_cols = ("prior_shape_meters", "shape_meters"), + time_cols = (f"prior_{timestamp_col}_sec", f"{timestamp_col}_sec") + ) + + speed = speed.assign( + pct_segment = speed.meters_elapsed.divide(speed.segment_meters) + ) + + return speed + + +def filter_for_unstable_speeds( + df: pd.DataFrame, + pct_segment_threshold: float +) -> tuple[pd.DataFrame]: + ok_speeds = df[df.pct_segment > pct_segment_threshold] + low_speeds = df[df.pct_segment <= pct_segment_threshold] + + return ok_speeds, low_speeds + + +def recalculate_low_speeds_with_straight_distance( + low_speeds_df: pd.DataFrame, + group_cols: list, + timestamp_col: str +): + """ + For low speed segments, select a different vp_idx. + Use the current vp_idx and subtract by 1. + This will fill in something where the segment only had 1 point previously. + """ + keep_cols = group_cols + [ + "vp_idx", "location_timestamp_local_sec", + ] + + df1 = low_speeds_df[keep_cols].drop_duplicates().reset_index(drop=True) + + df1 = df1.assign( + prior_vp_idx = df1.vp_idx - 1 + ) + + usable_vp = dd.read_parquet( + f"{SEGMENT_GCS}vp_usable_{analysis_date}", + columns = ["trip_instance_key", "vp_idx", timestamp_col, "x", "y"] + ) + + vp_idx_bounds = segment_calcs.get_usable_vp_bounds_by_trip(usable_vp) + + df2 = pd.merge( + df1, + vp_idx_bounds, + on = "trip_instance_key", + how = "inner" + ) + + # Check that the prior_vp_idx actually is on the same trip (must be within bounds) + # If not, select the next point + df2 = df2.assign( + prior_vp_idx = df2.apply( + lambda x: + x.vp_idx + 1 if (x.prior_vp_idx < x.min_vp_idx) and + (x.vp_idx + 1 <= x.max_vp_idx) + else x.prior_vp_idx, + axis=1) + ).drop(columns = ["trip_instance_key", "min_vp_idx", "max_vp_idx"]) + + # We will need point geom again, since we are using straight distance + subset_vp_idx = np.union1d( + df2.vp_idx.unique(), + df2.prior_vp_idx.unique() + ).tolist() + + usable_vp2 = usable_vp[usable_vp.vp_idx.isin(subset_vp_idx)].compute() + + usable_gdf = geography_utils.create_point_geometry( + usable_vp2, + longitude_col = "x", + latitude_col = "y", + crs = PROJECT_CRS + ).drop(columns = ["x", "y"]).reset_index(drop=True) + usable_gdf2 = segment_calcs.convert_timestamp_to_seconds( + usable_gdf, [timestamp_col]).drop(columns = timestamp_col) + + # Merge in coord for current_vp_idx + # we already have a timestamp_sec for current vp_idx + gdf = pd.merge( + usable_gdf2.drop(columns = f"{timestamp_col}_sec"), + df2, + on = "vp_idx", + how = "inner" + ) + + # Merge in coord for prior_vp_idx + gdf2 = pd.merge( + gdf, + usable_gdf2[ + ["vp_idx", f"{timestamp_col}_sec", "geometry"] + ].add_prefix("prior_"), + on = "prior_vp_idx", + how = "inner" + ) + + # should we do straight distance or interpolate against full shape? + # what if full shape is problematic? + # do we want to do a check against the scale? that's not very robust either though + + gdf2 = gdf2.assign( + straight_distance = gdf2.geometry.distance(gdf2.prior_geometry) + ) + + gdf2 = gdf2.assign( + sec_elapsed = (gdf2[f"{timestamp_col}_sec"] - + gdf2[f"prior_{timestamp_col}_sec"]).abs() + ) + + gdf2 = gdf2.assign( + speed_mph = gdf2.straight_distance.divide(gdf2.sec_elapsed) * MPH_PER_MPS + ) + + drop_cols = ["geometry", "prior_geometry"] + results = gdf2.drop(columns = drop_cols) + + return results + + def linear_referencing_and_speed_by_segment( analysis_date: str, dict_inputs: dict = {} @@ -25,6 +260,7 @@ def linear_referencing_and_speed_by_segment( """ With just enter / exit points on segments, do the linear referencing to get shape_meters, and then derive speed. + Do a second pass for low speed segments with straight distance. """ time0 = datetime.datetime.now() @@ -37,19 +273,14 @@ def linear_referencing_and_speed_by_segment( # Keep subset of columns - don't need it all. we can get the # columns dropped through segments file vp_keep_cols = [ - 'gtfs_dataset_key', 'gtfs_dataset_name', - 'trip_id', 'trip_instance_key', - 'schedule_gtfs_dataset_key', + 'trip_instance_key', TIMESTAMP_COL, - 'x', 'y' + 'x', 'y', 'vp_idx' ] + SEGMENT_IDENTIFIER_COLS - vp = helpers.import_vehicle_positions( - SEGMENT_GCS, - f"{VP_FILE}_{analysis_date}/", - file_type = "df", - columns = vp_keep_cols, - partitioned = True + vp = dd.read_parquet( + f"{SEGMENT_GCS}{VP_FILE}_{analysis_date}", + columns = vp_keep_cols ) segments = helpers.import_segments( @@ -58,75 +289,72 @@ def linear_referencing_and_speed_by_segment( columns = SEGMENT_IDENTIFIER_COLS + ["geometry"] ).dropna(subset="geometry").reset_index(drop=True) - # https://stackoverflow.com/questions/71685387/faster-methods-to-create-geodataframe-from-a-dask-or-pandas-dataframe - # https://github.com/geopandas/dask-geopandas/issues/197 - vp_gddf = dg.from_dask_dataframe( - vp, - geometry=dg.points_from_xy(vp, "x", "y") - ).set_crs(WGS84).to_crs(PROJECT_CRS).drop(columns = ["x", "y"]) - - vp_with_seg_geom = dd.merge( - vp_gddf, + vp_with_seg_geom = linear_referencing_vp_against_line( + vp, segments, - on = SEGMENT_IDENTIFIER_COLS, - how = "inner" - ).rename(columns = { - "geometry_x": "vp_geometry", - "geometry_y": "segment_geometry"} - ).set_geometry("vp_geometry") + SEGMENT_IDENTIFIER_COLS, + TIMESTAMP_COL + ).persist() - vp_with_seg_geom = vp_with_seg_geom.repartition(npartitions=50) - time1 = datetime.datetime.now() - logger.info(f"set up merged vp with segments: {time1 - time0}") + logger.info(f"linear referencing: {time1 - time0}") - shape_meters_series = vp_with_seg_geom.map_partitions( - wrangle_shapes.project_point_geom_onto_linestring, - "segment_geometry", - "vp_geometry", - meta = ("shape_meters", "float") - ) + SEGMENT_TRIP_COLS = ["trip_instance_key", + "segment_meters"] + SEGMENT_IDENTIFIER_COLS + + initial_speeds = make_wide_get_speed( + vp_with_seg_geom, SEGMENT_TRIP_COLS, TIMESTAMP_COL + ).compute() - vp_with_seg_geom["shape_meters"] = shape_meters_series - vp_with_seg_geom = segment_calcs.convert_timestamp_to_seconds( - vp_with_seg_geom, [TIMESTAMP_COL]) time2 = datetime.datetime.now() - logger.info(f"linear referencing: {time2 - time1}") - - # set up metadata for columns in exact order output appears for map_partitions - dtypes_dict = vp_with_seg_geom[ - ["gtfs_dataset_key", "gtfs_dataset_name", - "trip_id", "trip_instance_key", - "schedule_gtfs_dataset_key" - ] + SEGMENT_IDENTIFIER_COLS - ].dtypes.to_dict() - - speeds = vp_with_seg_geom.map_partitions( - segment_calcs.calculate_speed_by_segment_trip, - SEGMENT_IDENTIFIER_COLS, - f"{TIMESTAMP_COL}_sec", - meta = { - **dtypes_dict, - "min_time": "float", - "min_dist": "float", - "max_time": "float", - "max_dist": "float", - "meters_elapsed": "float", - "sec_elapsed": "float", - "speed_mph": "float", - }) - - speeds = speeds.repartition(npartitions=2) + logger.info(f"make wide and get initial speeds: {time2 - time1}") + + ok_speeds, low_speeds = filter_for_unstable_speeds( + initial_speeds, + pct_segment_threshold = 0.3 + ) + + low_speeds_recalculated = recalculate_low_speeds_with_straight_distance( + low_speeds, + SEGMENT_TRIP_COLS, + TIMESTAMP_COL + ) + + # Add a flag that tells us speed was recalculated + # Combine columns and rename straight distance as meters_elapsed + low_speeds_recalculated = low_speeds_recalculated.assign( + flag_recalculated = 1, + meters_elapsed = low_speeds_recalculated.straight_distance + ) + + keep_cols = SEGMENT_TRIP_COLS + [ + "vp_idx", "prior_vp_idx", + f"{TIMESTAMP_COL}_sec", f"prior_{TIMESTAMP_COL}_sec", + "meters_elapsed", + "sec_elapsed", + "pct_segment", + "speed_mph", + "flag_recalculated", + ] + + speeds = pd.concat([ + ok_speeds, + low_speeds_recalculated + ], axis=0).sort_values(SEGMENT_IDENTIFIER_COLS + ["trip_instance_key"] + ).reset_index(drop=True) + + speeds = speeds.assign( + flag_recalculated = speeds.flag_recalculated.fillna(0).astype("int8") + )[keep_cols] time3 = datetime.datetime.now() - logger.info(f"calculate speeds: {time3 - time2}") + logger.info(f"recalculate speeds and get final: {time3 - time2}") speeds.to_parquet( - f"{SEGMENT_GCS}{EXPORT_FILE}_{analysis_date}", - overwrite = True + f"{SEGMENT_GCS}{EXPORT_FILE}_{analysis_date}.parquet", ) - + if __name__ == "__main__": @@ -141,11 +369,8 @@ def linear_referencing_and_speed_by_segment( start = datetime.datetime.now() STOP_SEG_DICT = helpers.get_parameters(CONFIG_PATH, "stop_segments") - - linear_referencing_and_speed_by_segment( - analysis_date, - dict_inputs = STOP_SEG_DICT - ) + + linear_referencing_and_speed_by_segment(analysis_date, STOP_SEG_DICT) logger.info(f"speeds for stop segments: {datetime.datetime.now() - start}") logger.info(f"execution time: {datetime.datetime.now() - start}") diff --git a/rt_segment_speeds/scripts/Makefile b/rt_segment_speeds/scripts/Makefile index 239111bc5..d0ae2caa9 100644 --- a/rt_segment_speeds/scripts/Makefile +++ b/rt_segment_speeds/scripts/Makefile @@ -4,36 +4,18 @@ segmentize: python cut_normal_stop_segments.py python cut_special_stop_segments.py python concatenate_stop_segments.py - - -get_speeds_by_segment: - python A0_preprocessing.py - python A1_sjoin_vp_segments.py - python A2_valid_vehicle_positions.py - python A3_loop_inlining.py - python A4_concatenate_vp_pared.py - python B1_speeds_by_segment_trip.py - -export_data: - python B2_avg_speeds_by_segment.py - python B3_export.py - python C2_triangulate_vp.py - python C3_trip_route_speed.py - #python C1_rt_trip_diagnostics.py - speeds_pipeline: #python A0_preprocessing.py - python A1_sjoin_vp_segments.py - python A2_valid_vehicle_positions.py - python A3_loop_inlining.py - python A4_concatenate_vp_pared.py + #python A1_sjoin_vp_segments.py + python A2_sjoin_postprocessing.py + python A3_valid_vehicle_positions.py python B1_speeds_by_segment_trip.py - python B2_avg_speeds_by_segment.py - python B3_export.py - python C2_triangulate_vp.py - python C3_trip_route_speed.py + #python B2_avg_speeds_by_segment.py + #python B3_export.py + #python C2_triangulate_vp.py + #python C3_trip_route_speed.py download_roads: diff --git a/rt_segment_speeds/segment_speed_utils/segment_calcs.py b/rt_segment_speeds/segment_speed_utils/segment_calcs.py index dfe7597ca..0befce432 100644 --- a/rt_segment_speeds/segment_speed_utils/segment_calcs.py +++ b/rt_segment_speeds/segment_speed_utils/segment_calcs.py @@ -59,8 +59,8 @@ def keep_min_max_timestamps_by_segment( def derive_speed( df: pd.DataFrame, - distance_cols: tuple = ("min_dist", "max_dist"), - time_cols: tuple = ("min_time", "max_time") + distance_cols: tuple = ("prior_shape_meters", "shape_meters"), + time_cols: tuple = ("prior_location_timestamp_local_sec", "location_timestamp_local_sec") ) -> pd.DataFrame: """ Derive meters and sec elapsed to calculate speed_mph. @@ -69,19 +69,19 @@ def derive_speed( min_time, max_time = time_cols[0], time_cols[1] df = df.assign( - meters_elapsed = df[max_dist] - df[min_dist] + meters_elapsed = (df[max_dist] - df[min_dist]).abs() ) if df[min_time].dtype in ["float", "int"]: # If 2 time cols are already converted to seconds, just take difference df = df.assign( - sec_elapsed = (df[max_time] - df[min_time]) + sec_elapsed = (df[max_time] - df[min_time]).abs() ) else: # If 2 time cols are datetime, convert timedelta to seconds df = df.assign( sec_elapsed = (df[max_time] - df[min_time]).divide( - np.timedelta64(1, 's')), + np.timedelta64(1, 's')).abs(), ) df = df.assign( @@ -176,4 +176,34 @@ def derive_stop_delay( actual_minus_scheduled_sec = df[actual] - df[scheduled] ) - return df \ No newline at end of file + return df + + +def get_usable_vp_bounds_by_trip(df: dd.DataFrame) -> pd.DataFrame: + """ + Of all the usable vp, for each trip, find the min(vp_idx) + and max(vp_idx). + For the first stop, there will never be a previous vp to find, + because the previous vp_idx will belong to a different operator/trip. + But for segments in the middle of the shape, the previous vp can be anywhere, + maybe several segments away. + """ + + grouped_df = df.groupby("trip_instance_key", + observed=True, group_keys=False) + + start_vp = (grouped_df.vp_idx.min().reset_index() + .rename(columns = {"vp_idx": "min_vp_idx"}) + ) + end_vp = (grouped_df.vp_idx.max().reset_index() + .rename(columns = {"vp_idx": "max_vp_idx"}) + ) + + df2 = dd.merge( + start_vp, + end_vp, + on = "trip_instance_key", + how = "left" + ).reset_index(drop=True).compute() + + return df2 From 45bff9ad9baf889427f15326fdfd1b3d3fbfae76 Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Thu, 21 Sep 2023 20:45:41 +0000 Subject: [PATCH 12/13] (remove): test speeds script, move to B1 --- rt_segment_speeds/scripts/test_speeds.py | 400 ----------------------- 1 file changed, 400 deletions(-) delete mode 100644 rt_segment_speeds/scripts/test_speeds.py diff --git a/rt_segment_speeds/scripts/test_speeds.py b/rt_segment_speeds/scripts/test_speeds.py deleted file mode 100644 index 916b98129..000000000 --- a/rt_segment_speeds/scripts/test_speeds.py +++ /dev/null @@ -1,400 +0,0 @@ -import os -os.environ['USE_PYGEOS'] = '0' - -import dask.dataframe as dd -import dask_geopandas as dg -import datetime -import geopandas as gpd -import numpy as np -import pandas as pd -import sys - -from loguru import logger - -from shared_utils import geography_utils -from shared_utils.rt_utils import MPH_PER_MPS -from segment_speed_utils import helpers, segment_calcs, wrangle_shapes -from segment_speed_utils.project_vars import (SEGMENT_GCS, analysis_date, - PROJECT_CRS, CONFIG_PATH) - -def get_usable_vp_bounds_by_trip(df: dd.DataFrame) -> pd.DataFrame: - """ - Of all the usable vp, for each trip, find the min(vp_idx) - and max(vp_idx). - For the first stop, there will never be a previous vp to find, - because the previous vp_idx will belong to a different operator/trip. - But for segments in the middle of the shape, the previous vp can be anywhere, - maybe several segments away. - """ - - grouped_df = df.groupby("trip_instance_key", - observed=True, group_keys=False) - - start_vp = (grouped_df.vp_idx.min().reset_index() - .rename(columns = {"vp_idx": "min_vp_idx"}) - ) - end_vp = (grouped_df.vp_idx.max().reset_index() - .rename(columns = {"vp_idx": "max_vp_idx"}) - ) - - df2 = dd.merge( - start_vp, - end_vp, - on = "trip_instance_key", - how = "left" - ).reset_index(drop=True).compute() - - return df2 - -def linear_referencing_vp_against_line( - vp: dd.DataFrame, - segments: gpd.GeoDataFrame, - segment_identifier_cols: list, - timestamp_col: str -) -> dd.DataFrame: - time0 = datetime.datetime.now() - - # https://stackoverflow.com/questions/71685387/faster-methods-to-create-geodataframe-from-a-dask-or-pandas-dataframe - # https://github.com/geopandas/dask-geopandas/issues/197 - vp_gddf = dg.from_dask_dataframe( - vp, - geometry=dg.points_from_xy(vp, "x", "y") - ).set_crs(geography_utils.WGS84).to_crs(PROJECT_CRS).drop(columns = ["x", "y"]) - - vp_with_seg_geom = dd.merge( - vp_gddf, - segments, - on = segment_identifier_cols, - how = "inner" - ).rename(columns = { - "geometry_x": "vp_geometry", - "geometry_y": "segment_geometry"} - ).set_geometry("vp_geometry") - - vp_with_seg_geom = vp_with_seg_geom.repartition(npartitions=50) - - time1 = datetime.datetime.now() - logger.info(f"set up merged vp with segments: {time1 - time0}") - - shape_meters_series = vp_with_seg_geom.map_partitions( - wrangle_shapes.project_point_geom_onto_linestring, - "segment_geometry", - "vp_geometry", - meta = ("shape_meters", "float") - ) - - vp_with_seg_geom = segment_calcs.convert_timestamp_to_seconds( - vp_with_seg_geom, [timestamp_col]) - - vp_with_seg_geom = vp_with_seg_geom.assign( - shape_meters = shape_meters_series, - segment_meters = vp_with_seg_geom.segment_geometry.length - ) - - time2 = datetime.datetime.now() - logger.info(f"linear referencing: {time2 - time1}") - - drop_cols = [f"{timestamp_col}", "vp_geometry", "segment_geometry"] - vp_with_seg_geom2 = vp_with_seg_geom.drop(columns = drop_cols) - - return vp_with_seg_geom2 - - -def make_wide( - df: dd.DataFrame, - group_cols: list, - timestamp_col: str -) -> dd.DataFrame: - """ - Get df wide and set up current vp_idx and get meters/sec_elapsed - against prior. - """ - vp2 = ( - df.groupby(group_cols, - observed=True, group_keys=False) - .agg({"vp_idx": "max"}) - .reset_index() - .merge( - df, - on = group_cols + ["vp_idx"], - how = "inner" - ) - ) - - vp1 = ( - df.groupby(group_cols, - observed=True, group_keys=False) - .agg({"vp_idx": "min"}) - .reset_index() - .merge( - df, - on = group_cols + ["vp_idx"], - how = "inner" - ).rename(columns = { - "vp_idx": "prior_vp_idx", - f"{timestamp_col}_sec": f"prior_{timestamp_col}_sec", - "shape_meters": "prior_shape_meters", - }) - ) - - df_wide = dd.merge( - vp2, - vp1, - on = group_cols, - how = "left" - ) - - df_wide = df_wide.assign( - meters_elapsed = (df_wide.shape_meters - - df_wide.prior_shape_meters).abs(), - sec_elapsed = (df_wide[f"{timestamp_col}_sec"]- - df_wide[f"prior_{timestamp_col}_sec"]).abs(), - ) - - df_wide = df_wide.assign( - pct_segment = df_wide.meters_elapsed.divide(df_wide.segment_meters) - ) - - return df_wide - - -def calculate_speed( - df: dd.DataFrame, - distance_cols: tuple = ("prior_shape_meters", "shape_meters"), - time_cols: tuple = ("prior_location_timestamp_local_sec", "location_timestamp_local_sec") -) -> dd.DataFrame: - - min_dist, max_dist = distance_cols - min_time, max_time = time_cols - - df = df.assign( - meters_elapsed = (df[max_dist] - df[min_dist]).abs(), - sec_elapsed = (df[max_time] - df[min_time]).abs(), - ) - - df = df.assign( - speed_mph = (df.meters_elapsed.divide(df.sec_elapsed) * - MPH_PER_MPS) - ) - - return df - - -def filter_for_unstable_speeds( - df: pd.DataFrame, - pct_segment_threshold: float -) -> tuple[pd.DataFrame]: - ok_speeds = df[df.pct_segment > pct_segment_threshold] - low_speeds = df[df.pct_segment <= pct_segment_threshold] - - return ok_speeds, low_speeds - -def low_speed_segments_select_different_prior_vp( - low_speeds_df: pd.DataFrame, - group_cols: list, - timestamp_col: str -): - - keep_cols = group_cols + [ - "vp_idx", "location_timestamp_local_sec", - ] - - df1 = low_speeds_df[keep_cols] - - df1 = df1.assign( - prior_vp_idx = df1.vp_idx -1 - ) - - usable_vp = dd.read_parquet( - f"{SEGMENT_GCS}vp_usable_{analysis_date}", - columns = ["trip_instance_key", "vp_idx", timestamp_col, "x", "y"] - ) - - vp_idx_bounds = get_usable_vp_bounds_by_trip(usable_vp) - - df2 = pd.merge( - df1, - vp_idx_bounds, - on = "trip_instance_key", - how = "inner" - ) - - df2 = df2.assign( - prior_vp_idx = df2.apply( - lambda x: - x.vp_idx + 1 if (x.prior_vp_idx < x.min_vp_idx) and - (x.vp_idx + 1 <= x.max_vp_idx) - else x.prior_vp_idx, - axis=1) - ).drop(columns = ["trip_instance_key", "min_vp_idx", "max_vp_idx"]) - - - subset_vp_idx = np.union1d( - df2.vp_idx.unique(), - df2.prior_vp_idx.unique() - ).tolist() - - usable_vp2 = usable_vp[usable_vp.vp_idx.isin(subset_vp_idx)].compute() - - usable_gdf = geography_utils.create_point_geometry( - usable_vp2, - longitude_col = "x", - latitude_col = "y", - crs = PROJECT_CRS - ).drop(columns = ["x", "y"]).reset_index(drop=True) - - usable_gdf2 = segment_calcs.convert_timestamp_to_seconds( - usable_gdf, [timestamp_col]).drop(columns = timestamp_col) - - # Merge in coord for current_vp_idx - # we already have a timestamp_sec for current vp_idx - gdf = pd.merge( - usable_gdf2.drop(columns = f"{timestamp_col}_sec"), - df2, - on = "vp_idx", - how = "inner" - ) - - # Merge in coord for prior_vp_idx - gdf2 = pd.merge( - gdf, - usable_gdf2[["vp_idx", f"{timestamp_col}_sec", "geometry"]].add_prefix("prior_"), - on = "prior_vp_idx", - how = "inner" - ) - - # should we do straight distance or interpolate against full shape? - # what if full shape is problematic? - # do we want to do a check against the scale? that's not very robust either though - - gdf2 = gdf2.assign( - straight_distance = gdf2.geometry.distance(gdf2.prior_geometry) - ) - - gdf2 = gdf2.assign( - sec_elapsed = (gdf2[f"{timestamp_col}_sec"] - - gdf2[f"prior_{timestamp_col}_sec"]).abs() - ) - - gdf2 = gdf2.assign( - speed_mph = gdf2.straight_distance.divide(gdf2.sec_elapsed) * MPH_PER_MPS - ) - - drop_cols = ["geometry", "prior_geometry"] - results = gdf2.drop(columns = drop_cols) - - return results - - -def linear_referencing_and_speed_by_segment( - analysis_date: str, - dict_inputs: dict = {} -): - """ - With just enter / exit points on segments, - do the linear referencing to get shape_meters, and then derive speed. - """ - time0 = datetime.datetime.now() - - VP_FILE = dict_inputs["stage3"] - SEGMENT_FILE = dict_inputs["segments_file"] - SEGMENT_IDENTIFIER_COLS = dict_inputs["segment_identifier_cols"] - TIMESTAMP_COL = dict_inputs["timestamp_col"] - EXPORT_FILE = dict_inputs["stage4"] - - # Keep subset of columns - don't need it all. we can get the - # columns dropped through segments file - vp_keep_cols = [ - 'trip_instance_key', - TIMESTAMP_COL, - 'x', 'y', 'vp_idx' - ] + SEGMENT_IDENTIFIER_COLS - - vp = dd.read_parquet( - f"{SEGMENT_GCS}vp_pare_down/{VP_FILE}_all_{analysis_date}", - columns = vp_keep_cols - ) - - segments = helpers.import_segments( - SEGMENT_GCS, - f"{SEGMENT_FILE}_{analysis_date}", - columns = SEGMENT_IDENTIFIER_COLS + ["geometry"] - ).dropna(subset="geometry").reset_index(drop=True) - - vp_with_seg_geom = linear_referencing_vp_against_line( - vp, - segments, - SEGMENT_IDENTIFIER_COLS, - TIMESTAMP_COL - ).persist() - - time1 = datetime.datetime.now() - logger.info(f"linear referencing: {time1 - time0}") - - SEGMENT_TRIP_COLS = ["trip_instance_key", - "segment_meters"] + SEGMENT_IDENTIFIER_COLS - - vp_with_seg_wide = make_wide( - vp_with_seg_geom, SEGMENT_TRIP_COLS, TIMESTAMP_COL - ) - - initial_speeds = calculate_speed( - vp_with_seg_wide, - distance_cols = ("prior_shape_meters", "shape_meters"), - time_cols = (f"prior_{TIMESTAMP_COL}_sec", f"{TIMESTAMP_COL}_sec") - ).compute() - - time2 = datetime.datetime.now() - logger.info(f"make wide and get initial speeds: {time2 - time1}") - - ok_speeds, low_speeds = filter_for_unstable_speeds( - initial_speeds, - pct_segment_threshold = 0.3 - ) - - low_speeds_recalculated = low_speed_segments_select_different_prior_vp( - low_speeds, - SEGMENT_TRIP_COLS, - TIMESTAMP_COL - ) - - low_speeds_recalculated = low_speeds_recalculated.assign( - flag_recalculated = 1, - meters_elapsed = low_speeds_recalculated.straight_distance - ) - - keep_cols = SEGMENT_TRIP_COLS + [ - "vp_idx", "prior_vp_idx", - f"{TIMESTAMP_COL}_sec", f"prior_{TIMESTAMP_COL}_sec", - "meters_elapsed", - "sec_elapsed", - "pct_segment", - "speed_mph", - "flag_recalculated", - ] - - speeds = pd.concat([ - ok_speeds, - low_speeds_recalculated - ], axis=0).sort_values(SEGMENT_IDENTIFIER_COLS + ["trip_instance_key"] - ).reset_index(drop=True) - - speeds = speeds.assign( - flag_recalculated = speeds.flag_recalculated.fillna(0).astype("int8") - )[keep_cols] - - time3 = datetime.datetime.now() - logger.info(f"recalculate speeds and get final: {time3 - time2}") - - speeds.to_parquet( - f"{SEGMENT_GCS}{EXPORT_FILE}_{analysis_date}_df.parquet", - ) - - time4 = datetime.datetime.now() - logger.info(f"execution time: {time4 - time0}") - -if __name__ == "__main__": - - STOP_SEG_DICT = helpers.get_parameters(CONFIG_PATH, "stop_segments") - - linear_referencing_and_speed_by_segment(analysis_date, STOP_SEG_DICT) \ No newline at end of file From 56690f7f98b45ac9a989507b7c1b3eabdb19b7b4 Mon Sep 17 00:00:00 2001 From: tiffanychu90 Date: Fri, 22 Sep 2023 16:41:09 +0000 Subject: [PATCH 13/13] rerun mar-sep from sjoin postprocessing onward --- rt_segment_speeds/logs/avg_speeds.log | 14 ++++ rt_segment_speeds/logs/sjoin_vp_segments.log | 8 +++ .../logs/speeds_by_segment_trip.log | 32 +++++++++ .../logs/valid_vehicle_positions.log | 24 +++++++ .../scripts/B1_speeds_by_segment_trip.py | 6 +- .../scripts/B2_avg_speeds_by_segment.py | 70 ++++++------------- rt_segment_speeds/scripts/B3_export.py | 2 +- rt_segment_speeds/scripts/Makefile | 6 +- rt_segment_speeds/scripts/config.yml | 3 +- 9 files changed, 110 insertions(+), 55 deletions(-) diff --git a/rt_segment_speeds/logs/avg_speeds.log b/rt_segment_speeds/logs/avg_speeds.log index 866fd2852..d568f1f2b 100644 --- a/rt_segment_speeds/logs/avg_speeds.log +++ b/rt_segment_speeds/logs/avg_speeds.log @@ -12,3 +12,17 @@ 2023-08-18 14:53:19.222 | INFO | __main__::165 - execution time: 0:05:17.742302 2023-08-24 14:56:29.366 | INFO | __main__::143 - Analysis date: 2023-08-15 2023-08-24 15:02:21.592 | INFO | __main__::165 - execution time: 0:05:52.222770 +2023-09-21 14:37:32.197 | INFO | __main__::167 - Analysis date: 2023-09-13 +2023-09-21 14:43:35.784 | INFO | __main__::189 - execution time: 0:06:03.584427 +2023-09-21 15:49:19.146 | INFO | __main__::167 - Analysis date: 2023-06-14 +2023-09-21 15:54:51.743 | INFO | __main__::189 - execution time: 0:05:32.595873 +2023-09-21 17:54:03.991 | INFO | __main__::167 - Analysis date: 2023-05-17 +2023-09-21 18:00:12.730 | INFO | __main__::189 - execution time: 0:06:08.738161 +2023-09-21 18:40:52.924 | INFO | __main__::159 - Analysis date: 2023-04-12 +2023-09-21 18:47:14.179 | INFO | __main__::181 - execution time: 0:06:21.254666 +2023-09-21 19:28:44.034 | INFO | __main__::159 - Analysis date: 2023-03-15 +2023-09-21 19:34:39.639 | INFO | __main__::181 - execution time: 0:05:55.604394 +2023-09-21 19:36:10.116 | INFO | __main__::159 - Analysis date: 2023-07-12 +2023-09-21 19:41:33.957 | INFO | __main__::181 - execution time: 0:05:23.840096 +2023-09-21 19:43:00.511 | INFO | __main__::159 - Analysis date: 2023-08-15 +2023-09-21 19:49:14.412 | INFO | __main__::181 - execution time: 0:06:13.900161 diff --git a/rt_segment_speeds/logs/sjoin_vp_segments.log b/rt_segment_speeds/logs/sjoin_vp_segments.log index 3ff93bd3b..f6e542da7 100644 --- a/rt_segment_speeds/logs/sjoin_vp_segments.log +++ b/rt_segment_speeds/logs/sjoin_vp_segments.log @@ -48,3 +48,11 @@ 2023-09-21 12:19:13.216 | INFO | __main__::464 - remove erroneous sjoin results: 0:07:17.337816 2023-09-21 12:58:44.489 | INFO | __main__::456 - Analysis date: 2023-07-12 2023-09-21 13:07:33.610 | INFO | __main__::464 - remove erroneous sjoin results: 0:08:49.120980 +2023-09-21 15:22:02.693 | INFO | __main__::456 - Analysis date: 2023-06-14 +2023-09-21 15:29:14.363 | INFO | __main__::464 - remove erroneous sjoin results: 0:07:11.668827 +2023-09-21 17:26:50.733 | INFO | __main__::456 - Analysis date: 2023-05-17 +2023-09-21 17:33:37.420 | INFO | __main__::464 - remove erroneous sjoin results: 0:06:46.686094 +2023-09-21 18:03:52.838 | INFO | __main__::456 - Analysis date: 2023-04-12 +2023-09-21 18:11:04.091 | INFO | __main__::464 - remove erroneous sjoin results: 0:07:11.251722 +2023-09-21 18:55:33.230 | INFO | __main__::456 - Analysis date: 2023-03-15 +2023-09-21 19:05:26.800 | INFO | __main__::464 - remove erroneous sjoin results: 0:09:53.569193 diff --git a/rt_segment_speeds/logs/speeds_by_segment_trip.log b/rt_segment_speeds/logs/speeds_by_segment_trip.log index cabecc11e..855f2090e 100644 --- a/rt_segment_speeds/logs/speeds_by_segment_trip.log +++ b/rt_segment_speeds/logs/speeds_by_segment_trip.log @@ -64,3 +64,35 @@ 2023-09-21 13:29:08.771 | INFO | __main__:linear_referencing_and_speed_by_segment:352 - recalculate speeds and get final: 0:02:10.977540 2023-09-21 13:29:21.791 | INFO | __main__::375 - speeds for stop segments: 0:12:43.599595 2023-09-21 13:29:21.792 | INFO | __main__::376 - execution time: 0:12:43.600710 +2023-09-21 15:36:54.009 | INFO | __main__::369 - Analysis date: 2023-06-14 +2023-09-21 15:36:59.961 | INFO | __main__:linear_referencing_vp_against_line:58 - set up merged vp with segments: 0:00:00.156330 +2023-09-21 15:37:00.016 | INFO | __main__:linear_referencing_vp_against_line:76 - linear referencing: 0:00:00.055084 +2023-09-21 15:40:58.894 | INFO | __main__:linear_referencing_and_speed_by_segment:302 - linear referencing: 0:04:04.880927 +2023-09-21 15:46:35.541 | INFO | __main__:linear_referencing_and_speed_by_segment:313 - make wide and get initial speeds: 0:05:36.646767 +2023-09-21 15:48:52.854 | INFO | __main__:linear_referencing_and_speed_by_segment:354 - recalculate speeds and get final: 0:02:17.313285 +2023-09-21 15:49:04.953 | INFO | __main__::377 - speeds for stop segments: 0:12:10.943637 +2023-09-21 15:49:04.954 | INFO | __main__::378 - execution time: 0:12:10.944518 +2023-09-21 17:41:01.371 | INFO | __main__::369 - Analysis date: 2023-05-17 +2023-09-21 17:41:07.436 | INFO | __main__:linear_referencing_vp_against_line:58 - set up merged vp with segments: 0:00:00.155348 +2023-09-21 17:41:07.465 | INFO | __main__:linear_referencing_vp_against_line:76 - linear referencing: 0:00:00.028611 +2023-09-21 17:45:03.306 | INFO | __main__:linear_referencing_and_speed_by_segment:302 - linear referencing: 0:04:01.914759 +2023-09-21 17:51:13.000 | INFO | __main__:linear_referencing_and_speed_by_segment:313 - make wide and get initial speeds: 0:06:09.693714 +2023-09-21 17:53:38.147 | INFO | __main__:linear_referencing_and_speed_by_segment:354 - recalculate speeds and get final: 0:02:25.147277 +2023-09-21 17:53:49.471 | INFO | __main__::377 - speeds for stop segments: 0:12:48.084151 +2023-09-21 17:53:49.474 | INFO | __main__::378 - execution time: 0:12:48.087006 +2023-09-21 18:19:33.226 | INFO | __main__::369 - Analysis date: 2023-04-12 +2023-09-21 18:19:39.961 | INFO | __main__:linear_referencing_vp_against_line:58 - set up merged vp with segments: 0:00:00.173198 +2023-09-21 18:19:39.999 | INFO | __main__:linear_referencing_vp_against_line:76 - linear referencing: 0:00:00.037629 +2023-09-21 18:23:57.932 | INFO | __main__:linear_referencing_and_speed_by_segment:302 - linear referencing: 0:04:24.702955 +2023-09-21 18:30:30.758 | INFO | __main__:linear_referencing_and_speed_by_segment:313 - make wide and get initial speeds: 0:06:32.825103 +2023-09-21 18:32:55.850 | INFO | __main__:linear_referencing_and_speed_by_segment:354 - recalculate speeds and get final: 0:02:25.091989 +2023-09-21 18:33:07.969 | INFO | __main__::377 - speeds for stop segments: 0:13:34.743167 +2023-09-21 18:33:07.972 | INFO | __main__::378 - execution time: 0:13:34.745381 +2023-09-21 19:14:16.648 | INFO | __main__::369 - Analysis date: 2023-03-15 +2023-09-21 19:14:22.748 | INFO | __main__:linear_referencing_vp_against_line:58 - set up merged vp with segments: 0:00:00.170221 +2023-09-21 19:14:22.792 | INFO | __main__:linear_referencing_vp_against_line:76 - linear referencing: 0:00:00.044492 +2023-09-21 19:18:37.589 | INFO | __main__:linear_referencing_and_speed_by_segment:302 - linear referencing: 0:04:20.919458 +2023-09-21 19:24:39.102 | INFO | __main__:linear_referencing_and_speed_by_segment:313 - make wide and get initial speeds: 0:06:01.513090 +2023-09-21 19:28:06.672 | INFO | __main__:linear_referencing_and_speed_by_segment:354 - recalculate speeds and get final: 0:03:27.569825 +2023-09-21 19:28:23.294 | INFO | __main__::377 - speeds for stop segments: 0:14:06.628501 +2023-09-21 19:28:23.296 | INFO | __main__::378 - execution time: 0:14:06.630732 diff --git a/rt_segment_speeds/logs/valid_vehicle_positions.log b/rt_segment_speeds/logs/valid_vehicle_positions.log index 0c4f402f0..684a0bcbe 100644 --- a/rt_segment_speeds/logs/valid_vehicle_positions.log +++ b/rt_segment_speeds/logs/valid_vehicle_positions.log @@ -103,3 +103,27 @@ 2023-09-21 13:16:20.491 | INFO | __main__:pare_down_vp_by_segment:123 - exported: 0:08:27.552189 2023-09-21 13:16:20.493 | INFO | __main__::148 - pare down vp by stop segments for all cases 0:08:28.942646 2023-09-21 13:16:20.493 | INFO | __main__::151 - execution time: 0:08:28.946383 +2023-09-21 15:29:36.092 | INFO | __main__::134 - Analysis date: 2023-06-14 +2023-09-21 15:29:37.363 | INFO | __main__:pare_down_vp_by_segment:103 - merge usable vp with sjoin results: 0:00:01.266538 +2023-09-21 15:29:37.460 | INFO | __main__:pare_down_vp_by_segment:112 - keep enter/exit points: 0:00:00.096562 +2023-09-21 15:36:38.508 | INFO | __main__:pare_down_vp_by_segment:123 - exported: 0:07:01.048429 +2023-09-21 15:36:38.510 | INFO | __main__::148 - pare down vp by stop segments for all cases 0:07:02.413058 +2023-09-21 15:36:38.510 | INFO | __main__::151 - execution time: 0:07:02.417441 +2023-09-21 17:33:53.360 | INFO | __main__::134 - Analysis date: 2023-05-17 +2023-09-21 17:33:54.621 | INFO | __main__:pare_down_vp_by_segment:103 - merge usable vp with sjoin results: 0:00:01.258114 +2023-09-21 17:33:54.702 | INFO | __main__:pare_down_vp_by_segment:112 - keep enter/exit points: 0:00:00.080633 +2023-09-21 17:40:45.339 | INFO | __main__:pare_down_vp_by_segment:123 - exported: 0:06:50.637139 +2023-09-21 17:40:45.341 | INFO | __main__::148 - pare down vp by stop segments for all cases 0:06:51.977448 +2023-09-21 17:40:45.341 | INFO | __main__::151 - execution time: 0:06:51.981031 +2023-09-21 18:11:20.644 | INFO | __main__::134 - Analysis date: 2023-04-12 +2023-09-21 18:11:21.907 | INFO | __main__:pare_down_vp_by_segment:103 - merge usable vp with sjoin results: 0:00:01.253286 +2023-09-21 18:11:21.982 | INFO | __main__:pare_down_vp_by_segment:112 - keep enter/exit points: 0:00:00.074969 +2023-09-21 18:19:17.733 | INFO | __main__:pare_down_vp_by_segment:123 - exported: 0:07:55.751630 +2023-09-21 18:19:17.735 | INFO | __main__::148 - pare down vp by stop segments for all cases 0:07:57.081328 +2023-09-21 18:19:17.735 | INFO | __main__::151 - execution time: 0:07:57.086467 +2023-09-21 19:05:57.111 | INFO | __main__::134 - Analysis date: 2023-03-15 +2023-09-21 19:05:58.586 | INFO | __main__:pare_down_vp_by_segment:103 - merge usable vp with sjoin results: 0:00:01.446362 +2023-09-21 19:05:58.673 | INFO | __main__:pare_down_vp_by_segment:112 - keep enter/exit points: 0:00:00.087041 +2023-09-21 19:14:00.946 | INFO | __main__:pare_down_vp_by_segment:123 - exported: 0:08:02.272685 +2023-09-21 19:14:00.947 | INFO | __main__::148 - pare down vp by stop segments for all cases 0:08:03.807576 +2023-09-21 19:14:00.947 | INFO | __main__::151 - execution time: 0:08:03.812106 diff --git a/rt_segment_speeds/scripts/B1_speeds_by_segment_trip.py b/rt_segment_speeds/scripts/B1_speeds_by_segment_trip.py index a7e51b834..4078478f9 100644 --- a/rt_segment_speeds/scripts/B1_speeds_by_segment_trip.py +++ b/rt_segment_speeds/scripts/B1_speeds_by_segment_trip.py @@ -170,7 +170,8 @@ def recalculate_low_speeds_with_straight_distance( usable_vp = dd.read_parquet( f"{SEGMENT_GCS}vp_usable_{analysis_date}", - columns = ["trip_instance_key", "vp_idx", timestamp_col, "x", "y"] + columns = ["trip_instance_key", + "vp_idx", timestamp_col, "x", "y"] ) vp_idx_bounds = segment_calcs.get_usable_vp_bounds_by_trip(usable_vp) @@ -269,6 +270,7 @@ def linear_referencing_and_speed_by_segment( SEGMENT_IDENTIFIER_COLS = dict_inputs["segment_identifier_cols"] TIMESTAMP_COL = dict_inputs["timestamp_col"] EXPORT_FILE = dict_inputs["stage4"] + PCT_SEGMENT_MIN = dict_inputs["pct_segment_minimum"] # Keep subset of columns - don't need it all. we can get the # columns dropped through segments file @@ -312,7 +314,7 @@ def linear_referencing_and_speed_by_segment( ok_speeds, low_speeds = filter_for_unstable_speeds( initial_speeds, - pct_segment_threshold = 0.3 + pct_segment_threshold = PCT_SEGMENT_MIN ) low_speeds_recalculated = recalculate_low_speeds_with_straight_distance( diff --git a/rt_segment_speeds/scripts/B2_avg_speeds_by_segment.py b/rt_segment_speeds/scripts/B2_avg_speeds_by_segment.py index b06b0894b..657c9bdd8 100644 --- a/rt_segment_speeds/scripts/B2_avg_speeds_by_segment.py +++ b/rt_segment_speeds/scripts/B2_avg_speeds_by_segment.py @@ -29,7 +29,7 @@ def calculate_avg_speeds( avg = (grouped_df .agg({ "speed_mph": "median", - "trip_id": "nunique"}) + "trip_instance_key": "nunique"}) .reset_index() ) @@ -45,7 +45,7 @@ def calculate_avg_speeds( stats = pd.merge( avg.rename(columns = {"speed_mph": "p50_mph", - "trip_id": "n_trips"}), + "trip_instance_key": "n_trips"}), p20.rename(columns = {"speed_mph": "p20_mph"}), on = group_cols, how = "left" @@ -65,7 +65,6 @@ def speeds_with_segment_geom( analysis_date: str, max_speed_cutoff: int = 70, dict_inputs: dict = {}, - percent_segment_covered:float = 0.40, ) -> gpd.GeoDataFrame: """ Import the segment-trip table. @@ -90,53 +89,45 @@ def speeds_with_segment_geom( f"{SEGMENT_FILE}_{analysis_date}", columns = segment_cols_to_keep ) - - # CRS is 3310, calculate the length - segments["segment_length"] = segments.geometry.length - + # Read in speeds df = pd.read_parquet( - f"{SEGMENT_GCS}{SPEEDS_FILE}_{analysis_date}", - filters = [[("speed_mph", "<=", max_speed_cutoff)]]) + f"{SEGMENT_GCS}{SPEEDS_FILE}_{analysis_date}.parquet", + filters = [[ + ("speed_mph", "<=", max_speed_cutoff), + ("meters_elapsed", ">", 0), + ("sec_elapsed", ">", 0) + ]]) # Do a merge with segments - merge_cols = ['shape_array_key','stop_sequence','schedule_gtfs_dataset_key'] - df2 = pd.merge(segments, df, on = merge_cols, how = "inner") + df2 = pd.merge( + segments, + df, + on = SEGMENT_IDENTIFIER_COLS, + how = "inner" + ) # Keep only segments that have RT data. unique_segments = (df2[segment_cols_to_keep] .drop_duplicates() .reset_index(drop = True) - ) - - # Find percentage of meters elapsed vs. total segment length - df2 = df2.assign( - pct_seg = df2.meters_elapsed.divide(df2.segment_length) - ) - - # Filter out abnormally high and low speeds - # Threshold defaults to throwing away the bottom 20% of rows with low speeds - df3 = df2[(df2.pct_seg >= percent_segment_covered) & - (df2.speed_mph.notna()) & - (df2.sec_elapsed > 0) & - (df2.meters_elapsed > 0) - ] + ).to_crs(geography_utils.WGS84) time_of_day_df = sched_rt_utils.get_trip_time_buckets(analysis_date) - df4 = pd.merge( - df3, + df3 = pd.merge( + df2, time_of_day_df, on = "trip_instance_key", how = "inner" ) all_day = calculate_avg_speeds( - df4, + df3, SEGMENT_IDENTIFIER_COLS ) peak = calculate_avg_speeds( - df4[df4.time_of_day.isin(["AM Peak", "PM Peak"])], + df3[df3.time_of_day.isin(["AM Peak", "PM Peak"])], SEGMENT_IDENTIFIER_COLS ) @@ -145,29 +136,14 @@ def speeds_with_segment_geom( peak.assign(time_of_day = "peak") ], axis=0) - # Merge in segment geometry with a changed CRS - unique_segments = unique_segments.to_crs(geography_utils.WGS84) - - # Merge in segment geometry - segments = helpers.import_segments( - SEGMENT_GCS, - f"{SEGMENT_FILE}_{analysis_date}", - columns = SEGMENT_IDENTIFIER_COLS + [ - "schedule_gtfs_dataset_key", - "stop_id", - "loop_or_inlining", - "geometry", - "district_name" - ] - ).to_crs(geography_utils.WGS84) + # Merge in segment geometry gdf = pd.merge( unique_segments, stats, on = SEGMENT_IDENTIFIER_COLS, how = "left" - ) - + ).sort_values(SEGMENT_IDENTIFIER_COLS + ["time_of_day"]).reset_index(drop=True) return gdf @@ -187,7 +163,6 @@ def speeds_with_segment_geom( EXPORT_FILE = f'{STOP_SEG_DICT["stage5"]}_{analysis_date}' MAX_SPEED = 70 - MIN_SEGMENT_PERCENT = 0.40 # Average the speeds for segment for entire day # Drop speeds above our max cutoff @@ -195,7 +170,6 @@ def speeds_with_segment_geom( analysis_date, max_speed_cutoff = MAX_SPEED, dict_inputs = STOP_SEG_DICT, - percent_segment_covered = MIN_SEGMENT_PERCENT ) utils.geoparquet_gcs_export( diff --git a/rt_segment_speeds/scripts/B3_export.py b/rt_segment_speeds/scripts/B3_export.py index d51637287..17147560c 100644 --- a/rt_segment_speeds/scripts/B3_export.py +++ b/rt_segment_speeds/scripts/B3_export.py @@ -137,7 +137,7 @@ def finalize_df_for_export(gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame: final_gdf.drop(columns = "geometry").to_parquet( f"{SEGMENT_GCS}export/{INPUT_FILE}_tabular.parquet" ) - + utils.geoparquet_gcs_export( final_gdf[keep_cols], f"{SEGMENT_GCS}export/", diff --git a/rt_segment_speeds/scripts/Makefile b/rt_segment_speeds/scripts/Makefile index d0ae2caa9..f81ef8ebb 100644 --- a/rt_segment_speeds/scripts/Makefile +++ b/rt_segment_speeds/scripts/Makefile @@ -12,11 +12,11 @@ speeds_pipeline: python A2_sjoin_postprocessing.py python A3_valid_vehicle_positions.py python B1_speeds_by_segment_trip.py - #python B2_avg_speeds_by_segment.py - #python B3_export.py + python B2_avg_speeds_by_segment.py + python B3_export.py #python C2_triangulate_vp.py #python C3_trip_route_speed.py - + download_roads: #pip install esridump diff --git a/rt_segment_speeds/scripts/config.yml b/rt_segment_speeds/scripts/config.yml index 0e58d7c31..e69d68a9e 100644 --- a/rt_segment_speeds/scripts/config.yml +++ b/rt_segment_speeds/scripts/config.yml @@ -24,4 +24,5 @@ stop_segments: grouping_col: "shape_array_key" segment_identifier_cols: ["shape_array_key", "stop_sequence"] timestamp_col: "location_timestamp_local" - time_min_cutoff: 10 \ No newline at end of file + time_min_cutoff: 10 + pct_segment_minimum: 0.3 \ No newline at end of file