Skip to content

Commit

Permalink
Decouple job clustering api from problem model
Browse files Browse the repository at this point in the history
  • Loading branch information
reinterpretcat committed Oct 1, 2024
1 parent 3fd4091 commit e4c60b7
Show file tree
Hide file tree
Showing 4 changed files with 52 additions and 27 deletions.
5 changes: 4 additions & 1 deletion vrp-cli/src/extensions/analyze/clusters.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ mod clusters_test;
use std::io::{BufReader, BufWriter, Read};
use std::sync::Arc;
use vrp_core::construction::clustering::dbscan::create_job_clusters;
use vrp_core::models::common::Timestamp;
use vrp_core::models::problem::{get_job_locations, JobIdDimension};
use vrp_core::models::Problem;
use vrp_core::prelude::{Float, GenericResult};
Expand All @@ -24,7 +25,9 @@ pub fn get_clusters<F: Read>(
let coord_index = problem.extras.get_coord_index().expect("cannot find coord index");
let coord_index = coord_index.as_ref();

let clusters = create_job_clusters(problem.as_ref(), min_points, epsilon)?;
let clusters = create_job_clusters(problem.jobs.all(), &problem.fleet, min_points, epsilon, |profile, job| {
problem.jobs.neighbors(profile, job, Timestamp::default())
})?;

let locations = clusters
.iter()
Expand Down
52 changes: 31 additions & 21 deletions vrp-core/src/construction/clustering/dbscan/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,32 +6,36 @@ mod dbscan_test;

use crate::algorithms::clustering::dbscan::create_clusters;
use crate::algorithms::geometry::Point;
use crate::models::common::Timestamp;
use crate::models::common::Profile;
use crate::models::problem::{Job, Single};
use crate::models::Problem;
use crate::prelude::{Cost, Fleet};
use rosomaxa::prelude::*;
use std::cmp::Ordering;
use std::collections::HashSet;
use std::sync::Arc;

/// Creates clusters of jobs using DBSCAN algorithm.
pub fn create_job_clusters(
problem: &Problem,
pub fn create_job_clusters<'a, FN, IR>(
jobs: &[Job],
fleet: &Fleet,
min_points: Option<usize>,
epsilon: Option<Float>,
) -> GenericResult<Vec<HashSet<Job>>> {
neighbour_fn: FN,
) -> GenericResult<Vec<HashSet<Job>>>
where
FN: Fn(&Profile, &Job) -> IR + 'a,
IR: Iterator<Item = (&'a Job, Cost)> + 'a,
{
let min_points = min_points.unwrap_or(3).max(2);
let epsilon = epsilon.unwrap_or_else(|| estimate_epsilon(problem, min_points));
let epsilon = epsilon.unwrap_or_else(|| estimate_epsilon(jobs, fleet, min_points, &neighbour_fn));

// get main parameters with some randomization
let profile = problem.fleet.profiles.first().ok_or_else(|| GenericError::from("cannot find any profile"))?;
// NOTE use always first profile. It is not yet clear what would be a better way to handle multiple profiles here.
let profile = fleet.profiles.first().ok_or_else(|| GenericError::from("cannot find any profile"))?;
// exclude jobs without locations from clustering
let jobs = problem.jobs.all().iter().filter(|j| job_has_locations(j)).cloned().collect::<Vec<_>>();
let jobs = jobs.iter().filter(|j| job_has_locations(j)).cloned().collect::<Vec<_>>();

let neighbor_fn = move |job| {
problem
.jobs
.neighbors(profile, job, 0.)
neighbour_fn(profile, job)
.filter(move |(job, _)| job_has_locations(job))
.take_while(move |(_, cost)| *cost < epsilon)
.map(|(job, _)| job)
Expand All @@ -44,21 +48,27 @@ pub fn create_job_clusters(
}

/// Estimates DBSCAN epsilon parameter.
fn estimate_epsilon(problem: &Problem, min_points: usize) -> Float {
let costs = get_average_costs(problem, min_points);
fn estimate_epsilon<'a, FN, IR>(jobs: &[Job], fleet: &Fleet, min_points: usize, neighbour_fn: &FN) -> Float
where
FN: Fn(&Profile, &Job) -> IR + 'a,
IR: Iterator<Item = (&'a Job, Cost)> + 'a,
{
let costs = get_average_costs(jobs, fleet, min_points, neighbour_fn);
let curve = costs.into_iter().enumerate().map(|(idx, cost)| Point::new(idx as Float, cost)).collect::<Vec<_>>();

// get max curvature approximation and return it as a guess for optimal epsilon value
get_max_curvature(curve.as_slice())
}

/// Gets average costs across all profiles.
fn get_average_costs(problem: &Problem, min_points: usize) -> Vec<Float> {
let jobs = problem.jobs.as_ref();
let mut costs = problem.fleet.profiles.iter().fold(vec![0.; jobs.size()], |mut acc, profile| {
jobs.all().iter().enumerate().for_each(|(idx, job)| {
let (sum, count) = jobs
.neighbors(profile, job, Timestamp::default())
fn get_average_costs<'a, FN, IR>(jobs: &[Job], fleet: &Fleet, min_points: usize, neighbour_fn: &FN) -> Vec<Float>
where
FN: Fn(&Profile, &Job) -> IR + 'a,
IR: Iterator<Item = (&'a Job, Cost)> + 'a,
{
let mut costs = fleet.profiles.iter().fold(vec![0.; jobs.len()], |mut acc, profile| {
jobs.iter().enumerate().for_each(|(idx, job)| {
let (sum, count) = neighbour_fn(profile, job)
.filter(|(j, _)| job_has_locations(j))
.take(min_points)
.map(|(_, cost)| cost)
Expand All @@ -69,7 +79,7 @@ fn get_average_costs(problem: &Problem, min_points: usize) -> Vec<Float> {
acc
});

costs.iter_mut().for_each(|cost| *cost /= problem.fleet.profiles.len() as Float);
costs.iter_mut().for_each(|cost| *cost /= fleet.profiles.len() as Float);

// sort all distances in ascending order
costs.sort_unstable_by(compare_floats_refs);
Expand Down
6 changes: 5 additions & 1 deletion vrp-core/src/solver/search/ruin/cluster_removal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ mod cluster_removal_test;
use super::*;
use crate::construction::clustering::dbscan::create_job_clusters;
use crate::construction::heuristics::InsertionContext;
use crate::models::common::Timestamp;
use crate::models::problem::Job;
use crate::models::Problem;
use crate::solver::search::{get_route_jobs, JobRemovalTracker, TabuList};
Expand All @@ -26,7 +27,10 @@ impl ClusterRemoval {
min_items: usize,
limits: RemovalLimits,
) -> GenericResult<Self> {
let clusters = create_job_clusters(problem.as_ref(), Some(min_items), None)?;
let clusters =
create_job_clusters(problem.jobs.all(), problem.fleet.as_ref(), Some(min_items), None, |profile, job| {
problem.jobs.neighbors(profile, job, Timestamp::default())
})?;
let mut clusters =
clusters.into_iter().map(|cluster| cluster.into_iter().collect::<Vec<_>>()).collect::<Vec<_>>();

Expand Down
16 changes: 12 additions & 4 deletions vrp-core/tests/unit/construction/clustering/dbscan_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use crate::helpers::construction::clustering::p;
use crate::helpers::models::domain::TestGoalContextBuilder;
use crate::helpers::models::problem::TestSingleBuilder;
use crate::helpers::solver::{generate_matrix_distances_from_points, generate_matrix_routes};
use crate::models::common::Location;
use crate::models::common::{Location, Timestamp};
use crate::models::{Extras, GoalContext};
use crate::prelude::{ActivityCost, TransportCost};

Expand Down Expand Up @@ -51,7 +51,10 @@ fn can_estimate_epsilon_impl(matrix: (usize, usize), nth_neighbor: usize, matrix
matrix_modify,
);

assert_eq!((estimate_epsilon(&problem, nth_neighbor) * 1000.).round() / 1000., expected);
let epsilon = estimate_epsilon(problem.jobs.all(), problem.fleet.as_ref(), nth_neighbor, &|profile, job| {
problem.jobs.neighbors(profile, job, Timestamp::default())
});
assert_eq!((epsilon * 1000.).round() / 1000., expected);
}

parameterized_test! {can_estimate_epsilon_having_zero_costs, min_points, {
Expand Down Expand Up @@ -88,7 +91,9 @@ fn can_estimate_epsilon_having_zero_costs_impl(min_points: usize) {
},
);

let costs = get_average_costs(&problem, min_points);
let costs = get_average_costs(problem.jobs.all(), &problem.fleet, min_points, &|profile, job| {
problem.jobs.neighbors(profile, job, Timestamp::default())
});

assert!(!costs.is_empty());
}
Expand All @@ -115,7 +120,10 @@ fn can_create_job_clusters_impl(param: (usize, Float), expected: &[Vec<Location>
|_| (vec![0.; 64], create_test_distances()),
);

let clusters = create_job_clusters(&problem, Some(min_points), Some(epsilon))
let clusters =
create_job_clusters(problem.jobs.all(), &problem.fleet, Some(min_points), Some(epsilon), |profile, job| {
problem.jobs.neighbors(profile, job, Timestamp::default())
})
.expect("cannot create job clusters")
.iter()
.map(|cluster| {
Expand Down

0 comments on commit e4c60b7

Please sign in to comment.