diff --git a/_shared_utils/shared_utils/gtfs_analytics_data.yml b/_shared_utils/shared_utils/gtfs_analytics_data.yml index 633e2a91a..32c574344 100644 --- a/_shared_utils/shared_utils/gtfs_analytics_data.yml +++ b/_shared_utils/shared_utils/gtfs_analytics_data.yml @@ -64,7 +64,7 @@ digest_tables: stop_segments: dir: ${gcs_paths.SEGMENT_GCS} - stage1: ${speeds_tables.usable_vp_file} + stage1: ${speeds_tables.usable_vp} stage2: "nearest/nearest_vp_shape_segments" stage3: "stop_arrivals" stage4: "speeds_stop_segments" @@ -77,7 +77,7 @@ stop_segments: rt_stop_times: dir: ${gcs_paths.SEGMENT_GCS} - stage1: ${speeds_tables.usable_vp_file} + stage1: ${speeds_tables.usable_vp} stage2: "nearest/nearest_vp_rt_stop_times" stage3: "rt_stop_times/stop_arrivals" stage4: "rt_stop_times/speeds" @@ -92,7 +92,7 @@ rt_stop_times: road_segments: dir: ${gcs_paths.SEGMENT_GCS} - stage1: ${speeds_tables.usable_vp_file} + stage1: ${speeds_tables.usable_vp} stage2: "nearest/nearest_vp_roads" stage3: "road_segments/stop_arrivals" stage4: "speeds_road_segments" diff --git a/_shared_utils/shared_utils/rt_dates.py b/_shared_utils/shared_utils/rt_dates.py index 379b251a8..93c4de5cd 100644 --- a/_shared_utils/shared_utils/rt_dates.py +++ b/_shared_utils/shared_utils/rt_dates.py @@ -52,6 +52,7 @@ "jan2024": "2024-01-17", "feb2024": "2024-02-14", "mar2024": "2024-03-13", + "apr2024": "2024-04-17", } y2023_dates = [ diff --git a/gtfs_funnel/Makefile b/gtfs_funnel/Makefile index 3deead58f..4d7a52596 100644 --- a/gtfs_funnel/Makefile +++ b/gtfs_funnel/Makefile @@ -16,7 +16,7 @@ preprocess: python cleanup.py python vp_condenser.py make route_typologies_data - python operator_scheduled_stats.py + python operator_scheduled_stats.py route_typologies_data: @@ -36,4 +36,4 @@ monthly_scheduled_data: funnel_gtfs_data: - make download_gtfs_data && make preprocess && make timeseries_preprocessing \ No newline at end of file + make download_gtfs_data && make preprocess && make timeseries_preprocessing diff --git a/gtfs_funnel/cleanup.py b/gtfs_funnel/cleanup.py index a9d71ff2a..db150d8ec 100644 --- a/gtfs_funnel/cleanup.py +++ b/gtfs_funnel/cleanup.py @@ -12,7 +12,7 @@ for analysis_date in analysis_date_list: - INPUT_FILE = GTFS_DATA_DICT.speed_tables.usable_vp + INPUT_FILE = GTFS_DATA_DICT.speeds_tables.usable_vp helpers.if_exists_then_delete( f"{SEGMENT_GCS}{INPUT_FILE}_{analysis_date}_stage" diff --git a/gtfs_funnel/logs/download_data.log b/gtfs_funnel/logs/download_data.log index 502032847..2b3e62837 100644 --- a/gtfs_funnel/logs/download_data.log +++ b/gtfs_funnel/logs/download_data.log @@ -320,3 +320,19 @@ 2024-03-14 11:44:27.599 | INFO | __main__:download_one_day:56 - execution time: 0:01:22.625555 2024-03-19 16:50:51.742 | INFO | __main__:download_one_year:35 - execution time: 0:00:43.062868 2024-04-02 09:03:24.949 | INFO | __main__:download_one_year:35 - execution time: 0:00:14.017626 +2024-04-18 10:27:32.761 | INFO | __main__:download_one_day:45 - Analysis date: 2024-04-17 +2024-04-18 10:27:35.561 | INFO | __main__:download_one_day:52 - # operators to run: 198 +2024-04-18 10:27:35.562 | INFO | __main__:download_one_day:56 - *********** Download trips data *********** +2024-04-18 10:28:15.325 | INFO | __main__:download_one_day:86 - execution time: 0:00:42.561678 +2024-04-18 10:28:35.712 | INFO | __main__:download_one_day:22 - Analysis date: 2024-04-17 +2024-04-18 10:28:38.053 | INFO | __main__:download_one_day:29 - # operators to run: 198 +2024-04-18 10:28:38.054 | INFO | __main__:download_one_day:33 - *********** Download stops data *********** +2024-04-18 10:28:51.192 | INFO | __main__:download_one_day:64 - execution time: 0:00:15.478316 +2024-04-18 10:29:14.197 | INFO | __main__:download_one_day:22 - Analysis date: 2024-04-17 +2024-04-18 10:29:16.683 | INFO | __main__:download_one_day:29 - # operators to run: 198 +2024-04-18 10:29:16.683 | INFO | __main__:download_one_day:33 - *********** Download routelines data *********** +2024-04-18 10:32:02.705 | INFO | __main__:download_one_day:63 - execution time: 0:02:48.507170 +2024-04-18 10:32:26.078 | INFO | __main__:download_one_day:21 - Analysis date: 2024-04-17 +2024-04-18 10:32:27.682 | INFO | __main__:download_one_day:29 - # operators to run: 171 +2024-04-18 10:32:27.683 | INFO | __main__:download_one_day:33 - *********** Download st data *********** +2024-04-18 10:34:57.448 | INFO | __main__:download_one_day:56 - execution time: 0:02:31.368995 diff --git a/gtfs_funnel/logs/download_vp_v2.log b/gtfs_funnel/logs/download_vp_v2.log index c555431b9..b312d0a69 100644 --- a/gtfs_funnel/logs/download_vp_v2.log +++ b/gtfs_funnel/logs/download_vp_v2.log @@ -207,3 +207,14 @@ 2024-03-14 11:58:41.151 | INFO | __main__::110 - export concatenated vp: 0:03:05.913001 2024-03-14 12:01:43.033 | INFO | __main__::132 - remove batched parquets 2024-03-14 12:01:43.035 | INFO | __main__::135 - execution time: 0:06:14.791580 +2024-04-18 10:35:22.856 | INFO | __main__::148 - Analysis date: 2024-04-17 +2024-04-18 10:38:25.816 | INFO | __main__:loop_through_batches_and_download_vp:111 - exported batch 0 to GCS: 0:03:02.950348 +2024-04-18 10:39:56.253 | INFO | __main__:loop_through_batches_and_download_vp:111 - exported batch 1 to GCS: 0:01:30.434267 +2024-04-18 10:45:13.007 | INFO | __main__:loop_through_batches_and_download_vp:111 - exported batch 2 to GCS: 0:05:16.754172 +2024-04-18 10:47:08.452 | INFO | __main__:loop_through_batches_and_download_vp:111 - exported batch 3 to GCS: 0:01:55.425286 +2024-04-18 10:47:08.453 | INFO | __main__::155 - execution time: 0:11:45.586863 +2024-04-18 10:47:29.679 | INFO | __main__::97 - Analysis date: 2024-04-17 +2024-04-18 10:47:37.286 | INFO | __main__::105 - concat and filter batched data: 0:00:07.606835 +2024-04-18 10:51:44.225 | INFO | __main__::112 - export concatenated vp: 0:04:06.939409 +2024-04-18 10:56:04.569 | INFO | __main__::134 - remove batched parquets +2024-04-18 10:56:04.570 | INFO | __main__::137 - execution time: 0:08:34.890652 diff --git a/gtfs_funnel/logs/vp_preprocessing.log b/gtfs_funnel/logs/vp_preprocessing.log index f678e8d84..9fe06b202 100644 --- a/gtfs_funnel/logs/vp_preprocessing.log +++ b/gtfs_funnel/logs/vp_preprocessing.log @@ -20,3 +20,11 @@ 2024-03-14 12:16:08.742 | INFO | __main__::202 - 2024-03-13: vp_direction script execution time: 0:07:35.556127 2024-03-14 12:43:58.062 | INFO | __main__::153 - 2024-03-13: condense vp for trip 0:04:45.267623 2024-03-14 12:56:43.421 | INFO | __main__::161 - 2024-03-13: prepare vp to use in nearest neighbor: 0:12:45.358549 +2024-04-18 11:04:43.747 | INFO | __main__::169 - 2024-04-17: pare down vp: 0:02:22.947760 +2024-04-18 11:08:55.978 | INFO | __main__:attach_prior_vp_add_direction:90 - persist vp gddf: 0:03:51.080485 +2024-04-18 11:13:14.285 | INFO | __main__:attach_prior_vp_add_direction:122 - np vectorize arrays for direction: 0:04:18.306848 +2024-04-18 11:13:22.726 | INFO | __main__::194 - 2024-04-17: export vp direction: 0:08:17.828237 +2024-04-18 11:14:49.190 | INFO | __main__::200 - 2024-04-17: export usable vp with direction: 0:01:26.463779 +2024-04-18 11:14:49.191 | INFO | __main__::203 - 2024-04-17: vp_direction script execution time: 0:09:44.292016 +2024-04-18 11:34:37.603 | INFO | __main__::120 - 2024-04-17: condense vp for trip 0:06:16.809244 +2024-04-18 11:52:47.882 | INFO | __main__::128 - 2024-04-17: prepare vp to use in nearest neighbor: 0:18:10.278588 diff --git a/gtfs_funnel/operator_scheduled_stats.py b/gtfs_funnel/operator_scheduled_stats.py index 1d5287675..48c233c48 100644 --- a/gtfs_funnel/operator_scheduled_stats.py +++ b/gtfs_funnel/operator_scheduled_stats.py @@ -121,26 +121,19 @@ def operator_typology_breakdown(df: pd.DataFrame) -> pd.DataFrame: Get a count of how many routes (not route-dir) have a certain primary typology. """ - df2 = (df - .groupby( - ["schedule_gtfs_dataset_key", "primary_typology"]) - .agg({"route_id": "nunique"}) - .reset_index() - ) - - df_wide = df2.pivot( - index="schedule_gtfs_dataset_key", - columns = "primary_typology", - values="route_id" - ).reset_index().fillna(0) - - typology_values = ["downtown_local", "local", - "rapid", "coverage"] - - df_wide[typology_values] = df_wide[typology_values].astype(int) + typology_values = [ + f"is_{i}" for i in + ["downtown_local", "local", "rapid", "coverage"] + ] - rename_dict = {old_name: f"n_{old_name}_routes" + df_wide = (df.groupby("schedule_gtfs_dataset_key") + .agg({**{c: "sum" for c in typology_values}}) + .reset_index() + ) + + rename_dict = {old_name: f"n_{old_name.replace('is_', '')}_routes" for old_name in typology_values} + df_wide = df_wide.rename(columns = rename_dict) return df_wide @@ -152,7 +145,7 @@ def operator_typology_breakdown(df: pd.DataFrame) -> pd.DataFrame: ROUTE_TYPOLOGY = GTFS_DATA_DICT.schedule_tables.route_typologies OPERATOR_EXPORT = GTFS_DATA_DICT.schedule_tables.operator_scheduled_stats - OPERATOR_ROUTE_EXPORT = GTFS_DATA_DICT.schedule_tables.operator_route + OPERATOR_ROUTE_EXPORT = GTFS_DATA_DICT.schedule_tables.operator_routes for analysis_date in analysis_date_list: start = datetime.datetime.now() diff --git a/gtfs_funnel/route_typologies.py b/gtfs_funnel/route_typologies.py index fd7610db5..6cf71efd5 100644 --- a/gtfs_funnel/route_typologies.py +++ b/gtfs_funnel/route_typologies.py @@ -244,7 +244,7 @@ def primary_secondary_typology( start = datetime.datetime.now() - #roads = delayed(prep_roads)(GTFS_DATA_DICT) + roads = delayed(prep_roads)(GTFS_DATA_DICT) ROAD_BUFFER_METERS = 20 TYPOLOGY_THRESHOLD = 0.10 diff --git a/gtfs_funnel/update_vars.py b/gtfs_funnel/update_vars.py index 9bc49defd..9db8a39ab 100644 --- a/gtfs_funnel/update_vars.py +++ b/gtfs_funnel/update_vars.py @@ -4,7 +4,7 @@ rt_dates.oct_week + rt_dates.apr_week) analysis_date_list = [ - rt_dates.DATES["mar2024"] + rt_dates.DATES["apr2024"] ] GTFS_DATA_DICT = catalog_utils.get_catalog("gtfs_analytics_data") diff --git a/high_quality_transit_areas/logs/hqta_processing.log b/high_quality_transit_areas/logs/hqta_processing.log index fcbefd361..8d9a1ee54 100644 --- a/high_quality_transit_areas/logs/hqta_processing.log +++ b/high_quality_transit_areas/logs/hqta_processing.log @@ -87,3 +87,10 @@ 2024-03-21 12:04:01.449 | INFO | __main__::163 - C3_create_bus_hqta_types 2024-03-13 execution time: 0:00:19.553787 2024-03-21 12:04:42.807 | INFO | __main__::295 - D1_assemble_hqta_points 2024-03-13 execution time: 0:00:22.988739 2024-03-21 12:05:20.102 | INFO | __main__::167 - D2_assemble_hqta_polygons 2024-03-13 execution time: 0:00:19.166756 +2024-04-18 12:02:44.870 | INFO | __main__::354 - A1_rail_ferry_brt_stops 2024-04-17 execution time: 0:00:59.115933 +2024-04-18 12:09:06.425 | INFO | __main__::256 - B2_sjoin_stops_to_segments 2024-04-17 execution time: 0:00:50.678918 +2024-04-18 12:09:36.340 | INFO | __main__::142 - C1_prep_pairwise_intersections 2024-04-17 execution time: 0:00:07.719892 +2024-04-18 12:10:31.226 | INFO | __main__::125 - C2_find_intersections 2024-04-17 execution time: 0:00:33.802270 +2024-04-18 12:11:31.609 | INFO | __main__::163 - C3_create_bus_hqta_types 2024-04-17 execution time: 0:00:37.330690 +2024-04-18 12:12:28.853 | INFO | __main__::296 - D1_assemble_hqta_points 2024-04-17 execution time: 0:00:31.955298 +2024-04-18 12:13:36.294 | INFO | __main__::167 - D2_assemble_hqta_polygons 2024-04-17 execution time: 0:00:40.596021 diff --git a/high_quality_transit_areas/update_vars.py b/high_quality_transit_areas/update_vars.py index 4dd1ee5f2..f59fe393c 100644 --- a/high_quality_transit_areas/update_vars.py +++ b/high_quality_transit_areas/update_vars.py @@ -1,6 +1,6 @@ from shared_utils import rt_dates -analysis_date = rt_dates.DATES["mar2024"] +analysis_date = rt_dates.DATES["apr2024"] GCS_FILE_PATH = ("gs://calitp-analytics-data/data-analyses/" "high_quality_transit_areas/") diff --git a/open_data/update_vars.py b/open_data/update_vars.py index 1bb037036..1be161dc9 100644 --- a/open_data/update_vars.py +++ b/open_data/update_vars.py @@ -1,7 +1,7 @@ from pathlib import Path from shared_utils import rt_dates -analysis_date = rt_dates.DATES["mar2024"] +analysis_date = rt_dates.DATES["apr2024"] GCS_FILE_PATH = "gs://calitp-analytics-data/data-analyses/" COMPILED_CACHED_VIEWS = f"{GCS_FILE_PATH}rt_delay/compiled_cached_views/" diff --git a/rt_scheduled_v_ran/logs/rt_v_scheduled_route_metrics.log b/rt_scheduled_v_ran/logs/rt_v_scheduled_route_metrics.log index 0ee893322..8576a9fcb 100644 --- a/rt_scheduled_v_ran/logs/rt_v_scheduled_route_metrics.log +++ b/rt_scheduled_v_ran/logs/rt_v_scheduled_route_metrics.log @@ -49,3 +49,4 @@ 2024-03-29 12:11:15.513 | INFO | __main__:route_metrics:47 - route aggregation 2023-04-14: 0:00:01.444968 2024-03-29 12:11:16.851 | INFO | __main__:route_metrics:47 - route aggregation 2023-04-15: 0:00:01.334524 2024-03-29 12:11:18.456 | INFO | __main__:route_metrics:47 - route aggregation 2023-04-16: 0:00:01.601076 +2024-04-18 13:34:04.718 | INFO | __main__:route_metrics:74 - route aggregation 2024-04-17: 0:00:02.236708 diff --git a/rt_scheduled_v_ran/logs/rt_v_scheduled_trip_metrics.log b/rt_scheduled_v_ran/logs/rt_v_scheduled_trip_metrics.log index dd8b0f65d..8a7cc8332 100644 --- a/rt_scheduled_v_ran/logs/rt_v_scheduled_trip_metrics.log +++ b/rt_scheduled_v_ran/logs/rt_v_scheduled_trip_metrics.log @@ -397,3 +397,7 @@ 2024-03-29 12:01:25.746 | INFO | __main__:rt_schedule_trip_metrics:337 - Total run time for metrics on 2023-10-12: 0:01:03.980200 2024-03-29 12:02:28.269 | INFO | __main__:rt_schedule_trip_metrics:337 - Total run time for metrics on 2023-10-13: 0:01:02.513314 2024-03-29 12:03:14.207 | INFO | __main__:rt_schedule_trip_metrics:337 - Total run time for metrics on 2023-10-14: 0:00:45.927788 +2024-04-18 12:23:17.745 | INFO | __main__:rt_schedule_trip_metrics:280 - tabular trip metrics 2024-04-17: 0:04:28.804318 +2024-04-18 13:02:41.637 | INFO | __main__:rt_schedule_trip_metrics:280 - tabular trip metrics 2024-04-17: 0:03:45.566584 +2024-04-18 13:32:29.183 | INFO | __main__:rt_schedule_trip_metrics:285 - spatial trip metrics 2024-04-17: 0:29:47.546487 +2024-04-18 13:33:34.374 | INFO | __main__:rt_schedule_trip_metrics:333 - Total run time for metrics on 2024-04-17: 0:34:38.304007 diff --git a/rt_scheduled_v_ran/scripts/Makefile b/rt_scheduled_v_ran/scripts/Makefile index 7a41cb268..cb6a04631 100644 --- a/rt_scheduled_v_ran/scripts/Makefile +++ b/rt_scheduled_v_ran/scripts/Makefile @@ -4,5 +4,6 @@ rt_sched_pipeline: python rt_v_scheduled_routes.py +# this can be run after rt_segment_speeds make rt_stop_times_pipeline is run schedule_rt_stop_times_table: python rt_stop_times.py diff --git a/rt_scheduled_v_ran/scripts/rt_stop_times.py b/rt_scheduled_v_ran/scripts/rt_stop_times.py index 363d63da8..1570cb61a 100644 --- a/rt_scheduled_v_ran/scripts/rt_stop_times.py +++ b/rt_scheduled_v_ran/scripts/rt_stop_times.py @@ -6,7 +6,7 @@ import pandas as pd from segment_speed_utils import helpers, segment_calcs -from update_vars import SEGMENT_GCS, RT_SCHED_GCS +from update_vars import GTFS_DATA_DICT, SEGMENT_GCS, RT_SCHED_GCS def prep_scheduled_stop_times( analysis_date: str diff --git a/rt_scheduled_v_ran/scripts/rt_v_scheduled_trip.py b/rt_scheduled_v_ran/scripts/rt_v_scheduled_trip.py index 190c387cc..1b2c83b7d 100644 --- a/rt_scheduled_v_ran/scripts/rt_v_scheduled_trip.py +++ b/rt_scheduled_v_ran/scripts/rt_v_scheduled_trip.py @@ -160,7 +160,7 @@ def buffer_shapes( crs=PROJECT_CRS, get_pandas=True, **kwargs - ).dropna( + ).pipe(helpers.remove_shapes_outside_ca).dropna( subset="geometry" ).query("shape_array_key not in @amtrak_outside_ca") diff --git a/rt_scheduled_v_ran/scripts/update_vars.py b/rt_scheduled_v_ran/scripts/update_vars.py index a31c37186..6f843ee48 100644 --- a/rt_scheduled_v_ran/scripts/update_vars.py +++ b/rt_scheduled_v_ran/scripts/update_vars.py @@ -4,7 +4,9 @@ oct_week = rt_dates.get_week("oct2023", exclude_wed=True) apr_week = rt_dates.get_week("apr2023", exclude_wed=True) -analysis_date_list = rt_dates.y2024_dates + rt_dates.y2023_dates + oct_week + apr_week +analysis_date_list = [ + rt_dates.DATES["apr2024"] +] GTFS_DATA_DICT = catalog_utils.get_catalog("gtfs_analytics_data") diff --git a/rt_segment_speeds/logs/avg_speeds.log b/rt_segment_speeds/logs/avg_speeds.log index 04b12af5b..a0e577382 100644 --- a/rt_segment_speeds/logs/avg_speeds.log +++ b/rt_segment_speeds/logs/avg_speeds.log @@ -132,3 +132,10 @@ 2024-03-28 18:45:04.105 | INFO | __main__::304 - average rollups for ['2023-10-09', '2023-10-10', '2023-10-11', '2023-10-12', '2023-10-13', '2023-10-14', '2023-10-15']: 0:07:22.839496 2024-03-28 18:52:09.308 | INFO | __main__:multi_day_segment_averages:240 - multi day segment execution time: 0:07:05.201271 2024-03-28 18:52:09.420 | INFO | __main__::304 - average rollups for ['2023-04-10', '2023-04-11', '2023-04-12', '2023-04-13', '2023-04-14', '2023-04-15', '2023-04-16']: 0:07:05.312950 +2024-04-18 15:42:58.828 | INFO | __main__:single_day_summary_averages:82 - trip avg 0:00:14.291404 +2024-04-18 15:43:09.833 | INFO | __main__:single_day_summary_averages:122 - route dir avg: 0:00:11.005300 +2024-04-18 15:43:09.834 | INFO | __main__:single_day_summary_averages:123 - single day summary speed execution time: 0:00:25.296704 +2024-04-18 15:43:09.912 | INFO | __main__::246 - average rollups for 2024-04-17: 0:00:25.384157 +2024-04-18 15:49:30.073 | INFO | __main__:single_day_segment_averages:167 - shape seg avg 0:05:04.862313 +2024-04-18 15:53:11.195 | INFO | __main__:single_day_segment_averages:183 - route dir seg avg 0:03:41.121253 +2024-04-18 15:53:11.196 | INFO | __main__:single_day_segment_averages:184 - single day segment execution time: 0:08:45.983566 diff --git a/rt_segment_speeds/logs/cut_stop_segments.log b/rt_segment_speeds/logs/cut_stop_segments.log index 689d5919a..bd5f3fc92 100644 --- a/rt_segment_speeds/logs/cut_stop_segments.log +++ b/rt_segment_speeds/logs/cut_stop_segments.log @@ -1,2 +1,3 @@ 0:15:29.6603982024-02-15 13:36:56.379 | INFO | __main__::156 - cut segments 2024-02-14: 0:13:55.835925 2024-03-14 13:33:18.528 | INFO | __main__::156 - cut segments 2024-03-13: 0:26:02.810762 +2024-04-18 12:44:11.808 | INFO | __main__::155 - cut segments 2024-04-17: 0:33:24.198635 diff --git a/rt_segment_speeds/logs/interpolate_stop_arrival.log b/rt_segment_speeds/logs/interpolate_stop_arrival.log index 5e358198e..5dcd6f608 100644 --- a/rt_segment_speeds/logs/interpolate_stop_arrival.log +++ b/rt_segment_speeds/logs/interpolate_stop_arrival.log @@ -1 +1,5 @@ 2024-02-15 14:18:47.082 | INFO | __main__:interpolate_stop_arrivals:110 - get stop arrivals 2024-02-14: 0:09:37.655118 +2024-04-18 14:30:37.401 | INFO | interpolate_stop_arrival:interpolate_stop_arrivals:203 - get stop arrivals 2024-04-17: 0:18:48.633416 +2024-04-18 14:36:29.213 | INFO | interpolate_stop_arrival:interpolate_stop_arrivals:214 - interpolate arrivals for stop_segments 2024-04-17: 2024-04-17: 0:24:40.445216 +2024-04-18 15:40:32.431 | INFO | interpolate_stop_arrival:interpolate_stop_arrivals:214 - interpolate arrivals for rt_stop_times 2024-04-17: 2024-04-17: 0:18:11.754862 +2024-04-18 15:42:16.358 | INFO | diff --git a/rt_segment_speeds/logs/nearest_vp.log b/rt_segment_speeds/logs/nearest_vp.log index 03724a3f0..ff8545a7e 100644 --- a/rt_segment_speeds/logs/nearest_vp.log +++ b/rt_segment_speeds/logs/nearest_vp.log @@ -1,2 +1,5 @@ 2024-02-15 13:48:26.270 | INFO | __main__:nearest_neighbor_shape_segments:203 - shape segments 2024-02-14: 0:10:31.474349 2024-02-15 14:08:52.783 | INFO | __main__:nearest_neighbor_rt_stop_times:135 - RT stop times 2024-02-14: 0:11:07.883083 +2024-04-18 13:19:11.258 | INFO | nearest_vp_to_stop:nearest_neighbor_for_stop:134 - nearest neighbor for stop_segments 2024-04-17: 0:27:28.612602 +2024-04-18 15:22:16.322 | INFO | nearest_vp_to_stop:nearest_neighbor_for_stop:134 - nearest neighbor for rt_stop_times 2024-04-17: 0:17:44.714181 + diff --git a/rt_segment_speeds/logs/speeds_by_segment_trip.log b/rt_segment_speeds/logs/speeds_by_segment_trip.log index 1fdc5243f..0b7875449 100644 --- a/rt_segment_speeds/logs/speeds_by_segment_trip.log +++ b/rt_segment_speeds/logs/speeds_by_segment_trip.log @@ -1,3 +1,5 @@ 2024-02-15 14:19:36.965 | INFO | __main__:calculate_speed_from_stop_arrivals:132 - speeds by segment: 2024-02-14: 0:00:31.989535 2024-02-15 14:45:36.035 | INFO | __main__:calculate_speed_from_stop_arrivals:132 - speeds by segment: 2024-01-17: 0:00:32.109610 2024-03-28 13:11:29.272 | INFO | __main__:calculate_speed_from_stop_arrivals:148 - speeds by segment for stop_segments 2024-03-13: 0:01:36.402022 +2024-04-18 15:04:52.606 | INFO | stop_arrivals_to_speed:calculate_speed_from_stop_arrivals:148 - speeds by segment for stop_segments 2024-04-17: 0:01:48.092886 +2024-04-18 15:42:16.358 | INFO | stop_arrivals_to_speed:calculate_speed_from_stop_arrivals:148 - speeds by segment for rt_stop_times 2024-04-17: 0:01:43.666310 diff --git a/rt_segment_speeds/scripts/average_segment_speeds.py b/rt_segment_speeds/scripts/average_segment_speeds.py index f0a00b4fb..e49aa1189 100644 --- a/rt_segment_speeds/scripts/average_segment_speeds.py +++ b/rt_segment_speeds/scripts/average_segment_speeds.py @@ -8,7 +8,6 @@ from dask import delayed, compute from loguru import logger -from pathlib import Path from typing import Literal from calitp_data_analysis.geography_utils import WGS84 @@ -240,33 +239,6 @@ def multi_day_segment_averages(analysis_date_list: list, dict_inputs: dict): logger.info(f"multi day segment execution time: {end - start}") return - -def stage_open_data_exports(analysis_date: str, dict_inputs: dict): - """ - For the datasets we publish to Geoportal, - export them to a stable GCS URL so we can always - read it in open_data/catalog.yml. - """ - datasets = [ - dict_inputs["route_dir_single_segment"], - dict_inputs["route_dir_single_summary"] - ] - - for d in datasets: - gdf = gpd.read_parquet( - f"{SEGMENT_GCS}{d}_{analysis_date}.parquet" - ) - - utils.geoparquet_gcs_export( - gdf, - f"{SEGMENT_GCS}export/", - f"{Path(d).stem}" - ) - del gdf - - print(f"overwrite {datasets}") - - return if __name__ == "__main__": @@ -287,7 +259,6 @@ def stage_open_data_exports(analysis_date: str, dict_inputs: dict): start = datetime.datetime.now() single_day_segment_averages(analysis_date, STOP_SEG_DICT) - stage_open_data_exports(analysis_date, STOP_SEG_DICT) end = datetime.datetime.now() diff --git a/rt_segment_speeds/scripts/cut_stop_segments.py b/rt_segment_speeds/scripts/cut_stop_segments.py index d7f778460..a04bc295c 100644 --- a/rt_segment_speeds/scripts/cut_stop_segments.py +++ b/rt_segment_speeds/scripts/cut_stop_segments.py @@ -16,7 +16,7 @@ from calitp_data_analysis import utils from calitp_data_analysis.geography_utils import WGS84 -from segment_speed_utils import gtfs_schedule_wrangling +from segment_speed_utils import gtfs_schedule_wrangling, helpers from update_vars import GTFS_DATA_DICT, SEGMENT_GCS from segment_speed_utils.project_vars import PROJECT_CRS diff --git a/rt_segment_speeds/scripts/interpolate_stop_arrival.py b/rt_segment_speeds/scripts/interpolate_stop_arrival.py index cfbf51668..7b9a8111c 100644 --- a/rt_segment_speeds/scripts/interpolate_stop_arrival.py +++ b/rt_segment_speeds/scripts/interpolate_stop_arrival.py @@ -12,7 +12,7 @@ from pathlib import Path from typing import Literal, Optional -from segment_speed_utils import (array_utils, +from segment_speed_utils import (array_utils, helpers, segment_calcs, wrangle_shapes) from update_vars import SEGMENT_GCS, GTFS_DATA_DICT from segment_speed_utils.project_vars import PROJECT_CRS, SEGMENT_TYPES diff --git a/rt_segment_speeds/scripts/pipe.py b/rt_segment_speeds/scripts/pipe.py index 6754d7429..d2464a626 100644 --- a/rt_segment_speeds/scripts/pipe.py +++ b/rt_segment_speeds/scripts/pipe.py @@ -4,6 +4,8 @@ interpolate_stop_arrivals.py, and calculate_speed_from_stop_arrivals.py """ +import sys +from loguru import logger from pathlib import Path from typing import Literal, Optional @@ -24,6 +26,7 @@ def nearest_neigbor_to_speed( interpolation of stop arrival, deriving segment speeds between stops. """ + LOG_FILE = "../logs/nearest_vp.log" logger.add(LOG_FILE, retention="3 months") logger.add(sys.stderr, @@ -47,7 +50,7 @@ def nearest_neigbor_to_speed( segment_type = segment_type, config_path = config_path ) - + LOG_FILE = "../logs/speeds_by_segment_trip.log" logger.add(LOG_FILE, retention="3 months") logger.add(sys.stderr, diff --git a/rt_segment_speeds/scripts/publish_open_data.py b/rt_segment_speeds/scripts/publish_open_data.py new file mode 100644 index 000000000..5f6922368 --- /dev/null +++ b/rt_segment_speeds/scripts/publish_open_data.py @@ -0,0 +1,46 @@ +""" +Export speeds for open data portal. +""" +import geopandas as gpd +import pandas as pd + +from pathlib import Path + +from calitp_data_analysis import utils +from update_vars import GTFS_DATA_DICT, SEGMENT_GCS + + +def stage_open_data_exports(analysis_date: str): + """ + For the datasets we publish to Geoportal, + export them to a stable GCS URL so we can always + read it in open_data/catalog.yml. + """ + datasets = [ + GTFS_DATA_DICT.stop_segments.route_dir_single_segment, + GTFS_DATA_DICT.rt_stop_times.route_dir_single_summary + ] + + for d in datasets: + gdf = gpd.read_parquet( + f"{SEGMENT_GCS}{d}_{analysis_date}.parquet" + ) + + utils.geoparquet_gcs_export( + gdf, + f"{SEGMENT_GCS}export/", + f"{Path(d).stem}" + ) + + print(f"overwrite {datasets}") + + return + + +if __name__ == "__main__": + + from segment_speed_utils.project_vars import analysis_date_list + + for analysis_date in analysis_date_list: + + stage_open_data_exports(analysis_date) diff --git a/rt_segment_speeds/scripts/stop_arrivals_to_speed.py b/rt_segment_speeds/scripts/stop_arrivals_to_speed.py index 8fa0c3418..3d203450c 100644 --- a/rt_segment_speeds/scripts/stop_arrivals_to_speed.py +++ b/rt_segment_speeds/scripts/stop_arrivals_to_speed.py @@ -51,7 +51,7 @@ def attach_operator_natural_identifiers( if segment_type == "stop_segments": SEGMENT_FILE = GTFS_DATA_DICT[segment_type].segments_file trip_used_for_shape = pd.read_parquet( - f"{SEGMENT_GCS}segment_options/" + f"{SEGMENT_GCS}" f"{SEGMENT_FILE}_{analysis_date}.parquet", columns = ["st_trip_instance_key"] ).st_trip_instance_key.unique() diff --git a/rt_segment_speeds/segment_speed_utils/project_vars.py b/rt_segment_speeds/segment_speed_utils/project_vars.py index 4bc9a96c2..2bde6761d 100644 --- a/rt_segment_speeds/segment_speed_utils/project_vars.py +++ b/rt_segment_speeds/segment_speed_utils/project_vars.py @@ -11,7 +11,7 @@ SHARED_GCS = GTFS_DATA_DICT.gcs_paths.SHARED_GCS PUBLIC_GCS = GTFS_DATA_DICT.gcs_paths.PUBLIC_GCS -analysis_date = rt_dates.DATES["mar2024"] +analysis_date = rt_dates.DATES["apr2024"] oct_week = rt_dates.get_week("oct2023", exclude_wed=True) apr_week = rt_dates.get_week("apr2023", exclude_wed=True)