|
10 | 10 | import yaml
|
11 | 11 | from utils.constants import DOCKER_INTERNAL_PATH
|
12 | 12 | from utils.logging import create_logger
|
13 |
| -from utils.times import TravelTimeCalculator, TravelTimeConfig |
| 13 | +from utils.times import ( |
| 14 | + TravelTimeCalculator, |
| 15 | + TravelTimeConfig, |
| 16 | + TravelTimeInputs, |
| 17 | +) |
14 | 18 | from utils.utils import format_time, get_md5_hash
|
15 | 19 |
|
16 | 20 | logger = create_logger(__name__)
|
@@ -75,15 +79,48 @@ def main() -> None:
|
75 | 79 |
|
76 | 80 | # Extract missing pairs to a separate DataFrame
|
77 | 81 | missing_pairs_df = results_df[results_df["duration_sec"].isnull()]
|
78 |
| - missing_pairs_df = ( |
79 |
| - pd.DataFrame(missing_pairs_df) |
80 |
| - .drop(columns=["duration_sec", "distance_km"]) |
81 |
| - .sort_values(by=["origin_id", "destination_id"]) |
82 |
| - ) |
| 82 | + |
| 83 | + # If there are missing pairs, rerun the routing for only those pairs |
| 84 | + # using a more aggressive (but time consuming) second pass approach |
| 85 | + if len(missing_pairs_df) > 0: |
| 86 | + logger.info( |
| 87 | + "Found %s missing pairs. Rerouting with a more aggressive method", |
| 88 | + len(missing_pairs_df), |
| 89 | + ) |
| 90 | + actor_sp = valhalla.Actor((Path.cwd() / "valhalla_sp.json").as_posix()) |
| 91 | + |
| 92 | + # Create a new input class, keeping only pairs that were unroutable |
| 93 | + inputs_sp = TravelTimeInputs( |
| 94 | + origins=inputs.origins[ |
| 95 | + inputs.origins["id"].isin( |
| 96 | + missing_pairs_df.index.get_level_values("origin_id") |
| 97 | + ) |
| 98 | + ].reset_index(drop=True), |
| 99 | + destinations=inputs.destinations[ |
| 100 | + inputs.destinations["id"].isin( |
| 101 | + missing_pairs_df.index.get_level_values("destination_id") |
| 102 | + ) |
| 103 | + ].reset_index(drop=True), |
| 104 | + chunk=None, |
| 105 | + max_split_size_origins=inputs.max_split_size_origins, |
| 106 | + max_split_size_destinations=inputs.max_split_size_destinations, |
| 107 | + ) |
| 108 | + |
| 109 | + # Route using the more aggressive settings and update the results |
| 110 | + tt_calc_sp = TravelTimeCalculator(actor_sp, config, inputs_sp) |
| 111 | + results_df.update(tt_calc_sp.get_times()) |
| 112 | + |
| 113 | + # Extract the missing pairs again since they may have changed |
| 114 | + missing_pairs_df = results_df[results_df["duration_sec"].isnull()] |
83 | 115 |
|
84 | 116 | # Drop missing pairs and sort for more efficient compression
|
85 |
| - results_df = results_df.dropna(subset=["duration_sec"]).sort_values( |
86 |
| - by=["origin_id", "destination_id"] |
| 117 | + missing_pairs_df = ( |
| 118 | + missing_pairs_df.drop(columns=["duration_sec", "distance_km"]) |
| 119 | + .sort_index() |
| 120 | + .reset_index() |
| 121 | + ) |
| 122 | + results_df = ( |
| 123 | + results_df.dropna(subset=["duration_sec"]).sort_index().reset_index() |
87 | 124 | )
|
88 | 125 |
|
89 | 126 | # Loop through files and write to both local and remote paths
|
|
0 commit comments