Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use estimated travel distances instead of euclidian distance #80

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions config/base.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,13 @@ optional_columns = [
]
n_matches = 10 # What is the maximum number of NTS matches we want for each SPC household?

[feasible_assignment]
# detour factor when converting euclidian distance to actual travel distance
detour_factor = 1.56
# decay rate when converting euclidian to travel distance (0.0001 is a good value)
# actual_distance = distance * (1 + ((detour_factor - 1) * np.exp(-decay_rate * distance)))
decay_rate = 0.0001

[work_assignment]
commute_level = "OA"
use_percentages = true # if true, optimization problem will try to minimize percentage difference at OD level (not absolute numbers). Recommended to set it to true
Expand Down
14 changes: 11 additions & 3 deletions scripts/3.1_assign_primary_feasible_zones.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,11 @@ def main(config_file):
logger.info("Creating estimated travel times matrix")
# Create a new travel time matrix based on distances between zones
travel_time_estimates = zones_to_time_matrix(
zones=boundaries, id_col=config.zone_id, time_units="m"
zones=boundaries,
id_col=config.zone_id,
time_units="m",
detour_factor=config.feasible_assignment.detour_factor,
decay_rate=config.feasible_assignment.decay_rate,
)
logger.info("Travel time estimates created")

Expand Down Expand Up @@ -222,7 +226,9 @@ def main(config_file):
zone_id=config.zone_id,
filter_by_activity=True,
activity_col="education_type",
time_tolerance=0.3,
time_tolerance=0.2,
detour_factor=config.feasible_assignment.detour_factor,
decay_rate=config.feasible_assignment.decay_rate,
)

logger.info("Saving feasible zones for education activities")
Expand All @@ -249,7 +255,9 @@ def main(config_file):
zone_id=config.zone_id,
filter_by_activity=True,
activity_col="dact",
time_tolerance=0.3,
time_tolerance=0.2,
detour_factor=config.feasible_assignment.detour_factor,
decay_rate=config.feasible_assignment.decay_rate,
)

logger.info("Saving feasible zones for work activities")
Expand Down
12 changes: 9 additions & 3 deletions scripts/3.3_assign_facility_all.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,10 @@ def main(config_file):
x_col="TripDisIncSW",
y_col="length",
x_label="Reported Travel Distance (km)",
y_label="Actual Distance - Euclidian (km)",
y_label="Actual Distance - Estimated (km)",
detour_factor=config.feasible_assignment.detour_factor,
decay_rate=config.feasible_assignment.decay_rate,
x_axis_max=50,
crs=f"EPSG:{config.output_crs}",
title_prefix=f"Scatter plot of TripDisIncSW vs. Length for {activity_type}",
save_dir=acbm.root_path / "data/processed/plots/assigning/",
Expand All @@ -344,8 +347,11 @@ def main(config_file):
activity_type_col="destination activity",
x_col="TripTotalTime",
y_col="length",
x_label="Reported Travel TIme (min)",
y_label="Actual Distance - Euclidian (km)",
x_label="Reported Travel Time (min)",
y_label="Actual Distance - Estimated (km)",
detour_factor=config.feasible_assignment.detour_factor,
decay_rate=config.feasible_assignment.decay_rate,
x_axis_max=180,
crs=f"EPSG:{config.output_crs}",
title_prefix="Scatter plot of TripTotalTime vs. Length",
save_dir=acbm.root_path / "data/processed/plots/assigning/",
Expand Down
2 changes: 2 additions & 0 deletions scripts/4_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,8 @@ def main(config_file):
start_wkt_col="start_location_geometry_wkt",
end_wkt_col="end_location_geometry_wkt",
crs_epsg=config.output_crs,
detour_factor=1.56,
decay_rate=0.0001,
)

# Plot: Aggregate
Expand Down
17 changes: 15 additions & 2 deletions src/acbm/assigning/feasible_zones_primary.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ def get_possible_zones(
travel_times: Optional[pd.DataFrame] = None,
filter_by_activity: bool = False,
time_tolerance: float = 0.2,
detour_factor: float = 1.56,
decay_rate: float = 0.0001,
) -> dict:
"""
Get possible zones for all activity chains in the dataset. This function loops over the travel_times dataframe and filters by mode, time of day and weekday/weekend.
Expand Down Expand Up @@ -104,6 +106,13 @@ def get_possible_zones(
activity chain's travel time (which is stored in "TripTotalTime"). Allowable travel_times are those that fall in the range of:
travel_time_reported * (1 - time_tolerance) <= travel_time_reported <= travel_time_reported * (1 + time_tolerance)
Default = 0.2
detour_factor: float
The detour factor used to calculate travel distances between zones from euclidian distances.
Default = 1.56
decay_rate: float
The decay rate used to calculate travel times between zones from travel distances. Detours make up a smaller portion of
longer distance trips. Default = 0.0001


Returns
-------
Expand Down Expand Up @@ -135,7 +144,11 @@ def get_possible_zones(
if travel_times is None:
logger.info("Travel time matrix not provided: Creating travel times estimates")
travel_times = zones_to_time_matrix(
zones=boundaries, id_col=zone_id, time_units="m"
zones=boundaries,
id_col=zone_id,
time_units="m",
detour_factor=detour_factor,
decay_rate=decay_rate,
)

list_of_modes = activity_chains["mode"].unique()
Expand Down Expand Up @@ -224,7 +237,7 @@ def get_possible_zones(
travel_times_filtered_mode_time_day = travel_times_filtered_mode
# if mode = car_passenger, we compare to the travel time for car (we don't
# have travel times for car_passenger)
if mode == "car_passenger":
if mode in ("car_passenger", "taxi"):
activity_chains_filtered = activity_chains[
(activity_chains["mode"] == "car")
]
Expand Down
19 changes: 19 additions & 0 deletions src/acbm/assigning/plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
from matplotlib import patches as mpatches
from shapely.geometry import LineString

from acbm.assigning.utils import _adjust_distance


def plot_workzone_assignment_line(
assignment_results: pd.DataFrame,
Expand Down Expand Up @@ -387,7 +389,10 @@ def plot_scatter_actual_reported(
title_prefix: str,
activity_type: str,
activity_type_col: str,
detour_factor: float,
decay_rate: float,
crs: str,
x_axis_max: float,
save_dir: str | Path | None = None,
display: bool = False,
):
Expand All @@ -404,7 +409,10 @@ def plot_scatter_actual_reported(
- title_prefix: Prefix for the plot titles.
- activity_type: Type of activity to plot.
- activity_type_col: Column name for the activity type.
- detour_factor: Detour factor for the distance adjustment. (see `_adjust_distance`)
- decay_rate: Decay rate for the distance adjustment.
- crs: Coordinate reference system (CRS) of the activities data.
- x_axis_max: Maximum value for the x-axis. (use to remove very high reported distances / times)
- save_dir: Directory to save the plots. If None, plots are not saved.
- display: Whether to display plots by calling `plt.show()`.
"""
Expand Down Expand Up @@ -433,6 +441,13 @@ def plot_scatter_actual_reported(
# calculate the length of the line_geometry in meters
activity_chains_plot["length"] = activity_chains_plot["line_geometry"].length

# Create a column for estimated distance
activity_chains_plot["length"] = activity_chains_plot["length"].apply(
lambda d: _adjust_distance(
d, detour_factor=detour_factor, decay_rate=decay_rate
)
)

# Calculate the number of rows and columns for the subplots. It is a function of the number of modes
nrows = math.ceil(len(activity_chains_plot["mode"].unique()) / 2)
ncols = 2
Expand All @@ -459,6 +474,10 @@ def plot_scatter_actual_reported(
p = np.poly1d(z)
ax.plot(subset_mode[x_col], p(subset_mode[x_col]), "r--")

# Set x-axis limit
if x_axis_max:
ax.set_xlim(0, x_axis_max)

ax.set_title(f"{title_prefix} for mode: {mode}")
ax.set_xlabel(x_label)
ax.set_ylabel(y_label)
Expand Down
39 changes: 38 additions & 1 deletion src/acbm/assigning/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -284,10 +284,39 @@ def _get_activities_per_zone_df(activities_per_zone: dict) -> pd.DataFrame:
return pd.concat(activities_per_zone.values())


def _adjust_distance(
distance: float,
detour_factor: float,
decay_rate: float,
) -> float:
"""
Adjusts euclidian distances by adding a detour factor. We use minkowski distance
and a decay rate, as longer detour makes up a smaller proportion of the total
distance as the distance increases.

Parameters
----------
distance : float
The original distance.
detour_factor : float
The detour factor to be applied.
decay_rate : float
The decay rate to be applied.

Returns
-------
float
The adjusted distance.
"""
return distance * (1 + ((detour_factor - 1) * np.exp(-decay_rate * distance)))


def zones_to_time_matrix(
zones: gpd.GeoDataFrame,
time_units: str,
id_col: Optional[str] = None,
detour_factor: float = 1.56,
decay_rate: float = 0.0001,
) -> pd.DataFrame:
"""
Calculates the distance matrix between the centroids of the given zones and returns it as a DataFrame. The matrix also adds
Expand All @@ -305,6 +334,8 @@ def zones_to_time_matrix(
The name of the column in the zones GeoDataFrame to use as the ID. If None, the index values are used. Default is None.
time_units: str, optional
The units to use for the travel time. Options are 's' for seconds and 'm' for minutes.
detour_factor: float, optional
The detour factor to apply to the distance. Default is 1.56.
Returns
-------
pd.DataFrame
Expand All @@ -321,6 +352,11 @@ def zones_to_time_matrix(
distances = distances.stack().reset_index()
distances.columns = [f"{id_col}_from", f"{id_col}_to", "distance"]

# adjust distance column by adding a detour factor
distances["distance"] = distances["distance"].apply(
lambda d: _adjust_distance(d, detour_factor, decay_rate)
)

# replace the index values with the id_col values if id_col is specified
if id_col is not None:
# create a mapping from index to id_col values
Expand Down Expand Up @@ -447,7 +483,8 @@ def intrazone_time(zones: gpd.GeoDataFrame, key_column: str) -> dict:
# Calculate the area of each zone
zones["area"] = zones["geometry"].area
# Calculate the average distance within each zone
zones["average_dist"] = np.sqrt(zones["area"]) / 2
# sqrt(area) / 2 would be radius
zones["average_dist"] = np.sqrt(zones["area"]) / 1.5

# Mode speeds in m/s
mode_speeds_mps = {
Expand Down
11 changes: 10 additions & 1 deletion src/acbm/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,12 @@ class MatchingParams(BaseModel):
chunk_size: int = 50_000


@dataclass(frozen=True)
class FeasibleAssignmentParams(BaseModel):
detour_factor: float
decay_rate: float


@dataclass(frozen=True)
class WorkAssignmentParams(BaseModel):
use_percentages: bool
Expand All @@ -51,10 +57,13 @@ class Postprocessing(BaseModel):

class Config(BaseModel):
parameters: Parameters = Field(description="Config: parameters.")
matching: MatchingParams = Field(description="Config: parameters for matching.")
feasible_assignment: FeasibleAssignmentParams = Field(
description="Config: parameters for assignment of feasible zones."
)
work_assignment: WorkAssignmentParams = Field(
description="Config: parameters for work assignment."
)
matching: MatchingParams = Field(description="Config: parameters for matching.")
postprocessing: Postprocessing = Field(
description="Config: parameters for postprocessing."
)
Expand Down
23 changes: 21 additions & 2 deletions src/acbm/validating/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@
import pandas as pd
from shapely import wkt

from acbm.assigning.utils import _adjust_distance


def process_sequences(
df: pd.DataFrame,
Expand Down Expand Up @@ -76,6 +78,8 @@ def calculate_od_distances(
end_wkt_col: str,
crs_epsg: int,
projected_epsg: int = 3857,
detour_factor: float = 1.56,
decay_rate: float = 0.0001,
) -> pd.DataFrame:
"""
Calculate distances between start and end geometries in a DataFrame.
Expand All @@ -94,6 +98,11 @@ def calculate_od_distances(
projected_epsg: int
EPSG code for the projected CRS (default is 3857). We need a metric crs
to calculte distances in meters.
detour_factor: float
Factor to adjust the estimated distance.
decay_rate: float
Decay rate for the distance adjustment. Detours are a smaller proportion of
the direct distance for longer trips.

Returns
-------
Expand All @@ -120,7 +129,17 @@ def calculate_od_distances(
gdf = gdf.to_crs(epsg=projected_epsg)
end_gdf = end_gdf.to_crs(epsg=projected_epsg)

# Calculate the distance between start and end geometries (in km)
gdf["distance"] = round(gdf.geometry.distance(end_gdf.geometry) / 1000, 1)
# Calculate the distance between start and end geometries (in m)
gdf["distance"] = gdf.geometry.distance(end_gdf.geometry)

# Estimate actual travel distance
gdf["distance"] = gdf["distance"].apply(
lambda d: _adjust_distance(
d, detour_factor=detour_factor, decay_rate=decay_rate
)
)

# convert distance to km
gdf["distance"] = round(gdf["distance"] / 1000, 1)

return gdf
Loading