Skip to content

Commit 9e94055

Browse files
committed
Add second pass for times calculation
1 parent 44f99d2 commit 9e94055

File tree

3 files changed

+50
-9
lines changed

3 files changed

+50
-9
lines changed

data/src/calculate_times.py

Lines changed: 45 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,11 @@
1010
import yaml
1111
from utils.constants import DOCKER_INTERNAL_PATH
1212
from utils.logging import create_logger
13-
from utils.times import TravelTimeCalculator, TravelTimeConfig
13+
from utils.times import (
14+
TravelTimeCalculator,
15+
TravelTimeConfig,
16+
TravelTimeInputs,
17+
)
1418
from utils.utils import format_time, get_md5_hash
1519

1620
logger = create_logger(__name__)
@@ -75,15 +79,48 @@ def main() -> None:
7579

7680
# Extract missing pairs to a separate DataFrame
7781
missing_pairs_df = results_df[results_df["duration_sec"].isnull()]
78-
missing_pairs_df = (
79-
pd.DataFrame(missing_pairs_df)
80-
.drop(columns=["duration_sec", "distance_km"])
81-
.sort_values(by=["origin_id", "destination_id"])
82-
)
82+
83+
# If there are missing pairs, rerun the routing for only those pairs
84+
# using a more aggressive (but time consuming) second pass approach
85+
if len(missing_pairs_df) > 0:
86+
logger.info(
87+
"Found %s missing pairs. Rerouting with a more aggressive method",
88+
len(missing_pairs_df),
89+
)
90+
actor_sp = valhalla.Actor((Path.cwd() / "valhalla_sp.json").as_posix())
91+
92+
# Create a new input class, keeping only pairs that were unroutable
93+
inputs_sp = TravelTimeInputs(
94+
origins=inputs.origins[
95+
inputs.origins["id"].isin(
96+
missing_pairs_df.index.get_level_values("origin_id")
97+
)
98+
].reset_index(drop=True),
99+
destinations=inputs.destinations[
100+
inputs.destinations["id"].isin(
101+
missing_pairs_df.index.get_level_values("destination_id")
102+
)
103+
].reset_index(drop=True),
104+
chunk=None,
105+
max_split_size_origins=inputs.max_split_size_origins,
106+
max_split_size_destinations=inputs.max_split_size_destinations,
107+
)
108+
109+
# Route using the more aggressive settings and update the results
110+
tt_calc_sp = TravelTimeCalculator(actor_sp, config, inputs_sp)
111+
results_df.update(tt_calc_sp.get_times())
112+
113+
# Extract the missing pairs again since they may have changed
114+
missing_pairs_df = results_df[results_df["duration_sec"].isnull()]
83115

84116
# Drop missing pairs and sort for more efficient compression
85-
results_df = results_df.dropna(subset=["duration_sec"]).sort_values(
86-
by=["origin_id", "destination_id"]
117+
missing_pairs_df = (
118+
missing_pairs_df.drop(columns=["duration_sec", "distance_km"])
119+
.sort_index()
120+
.reset_index()
121+
)
122+
results_df = (
123+
results_df.dropna(subset=["duration_sec"]).sort_index().reset_index()
87124
)
88125

89126
# Loop through files and write to both local and remote paths

data/src/utils/times.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -507,7 +507,9 @@ def get_times(self) -> pd.DataFrame:
507507
)
508508
)
509509

510-
results_df = pd.concat(results, ignore_index=True)
510+
results_df = pd.concat(results, ignore_index=True).set_index(
511+
["origin_id", "destination_id"]
512+
)
511513
del results
512514

513515
return results_df

docker-compose.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@ services:
4545
- ./data/build/:/custom_files:rw
4646
- ./data/valhalla.json:/custom_files/valhalla.json:rw
4747
- ./data/valhalla.json:/data/valhalla.json:rw
48+
- ./data/valhalla_sp.json:/data/valhalla_sp.json:rw
49+
- ./data/valhalla_sp.json:/custom_files/valhalla_sp.json:rw
4850
- ./data/output:/data/output:rw
4951
# Read only volumes to serve data to the container
5052
- $HOME/.aws/credentials:/home/valhalla/.aws/credentials:ro

0 commit comments

Comments
 (0)