From e4c60b7440bfd7b0a925e5d95b6ce5d81c89360b Mon Sep 17 00:00:00 2001 From: reinterpretcat Date: Tue, 1 Oct 2024 22:46:33 +0200 Subject: [PATCH] Decouple job clustering api from problem model --- vrp-cli/src/extensions/analyze/clusters.rs | 5 +- .../src/construction/clustering/dbscan/mod.rs | 52 +++++++++++-------- .../src/solver/search/ruin/cluster_removal.rs | 6 ++- .../construction/clustering/dbscan_test.rs | 16 ++++-- 4 files changed, 52 insertions(+), 27 deletions(-) diff --git a/vrp-cli/src/extensions/analyze/clusters.rs b/vrp-cli/src/extensions/analyze/clusters.rs index 89c6164f8..3b7c8a506 100644 --- a/vrp-cli/src/extensions/analyze/clusters.rs +++ b/vrp-cli/src/extensions/analyze/clusters.rs @@ -5,6 +5,7 @@ mod clusters_test; use std::io::{BufReader, BufWriter, Read}; use std::sync::Arc; use vrp_core::construction::clustering::dbscan::create_job_clusters; +use vrp_core::models::common::Timestamp; use vrp_core::models::problem::{get_job_locations, JobIdDimension}; use vrp_core::models::Problem; use vrp_core::prelude::{Float, GenericResult}; @@ -24,7 +25,9 @@ pub fn get_clusters( let coord_index = problem.extras.get_coord_index().expect("cannot find coord index"); let coord_index = coord_index.as_ref(); - let clusters = create_job_clusters(problem.as_ref(), min_points, epsilon)?; + let clusters = create_job_clusters(problem.jobs.all(), &problem.fleet, min_points, epsilon, |profile, job| { + problem.jobs.neighbors(profile, job, Timestamp::default()) + })?; let locations = clusters .iter() diff --git a/vrp-core/src/construction/clustering/dbscan/mod.rs b/vrp-core/src/construction/clustering/dbscan/mod.rs index 8b95141bf..6bb7aae59 100644 --- a/vrp-core/src/construction/clustering/dbscan/mod.rs +++ b/vrp-core/src/construction/clustering/dbscan/mod.rs @@ -6,32 +6,36 @@ mod dbscan_test; use crate::algorithms::clustering::dbscan::create_clusters; use crate::algorithms::geometry::Point; -use crate::models::common::Timestamp; +use crate::models::common::Profile; use crate::models::problem::{Job, Single}; -use crate::models::Problem; +use crate::prelude::{Cost, Fleet}; use rosomaxa::prelude::*; use std::cmp::Ordering; use std::collections::HashSet; use std::sync::Arc; /// Creates clusters of jobs using DBSCAN algorithm. -pub fn create_job_clusters( - problem: &Problem, +pub fn create_job_clusters<'a, FN, IR>( + jobs: &[Job], + fleet: &Fleet, min_points: Option, epsilon: Option, -) -> GenericResult>> { + neighbour_fn: FN, +) -> GenericResult>> +where + FN: Fn(&Profile, &Job) -> IR + 'a, + IR: Iterator + 'a, +{ let min_points = min_points.unwrap_or(3).max(2); - let epsilon = epsilon.unwrap_or_else(|| estimate_epsilon(problem, min_points)); + let epsilon = epsilon.unwrap_or_else(|| estimate_epsilon(jobs, fleet, min_points, &neighbour_fn)); - // get main parameters with some randomization - let profile = problem.fleet.profiles.first().ok_or_else(|| GenericError::from("cannot find any profile"))?; + // NOTE use always first profile. It is not yet clear what would be a better way to handle multiple profiles here. + let profile = fleet.profiles.first().ok_or_else(|| GenericError::from("cannot find any profile"))?; // exclude jobs without locations from clustering - let jobs = problem.jobs.all().iter().filter(|j| job_has_locations(j)).cloned().collect::>(); + let jobs = jobs.iter().filter(|j| job_has_locations(j)).cloned().collect::>(); let neighbor_fn = move |job| { - problem - .jobs - .neighbors(profile, job, 0.) + neighbour_fn(profile, job) .filter(move |(job, _)| job_has_locations(job)) .take_while(move |(_, cost)| *cost < epsilon) .map(|(job, _)| job) @@ -44,8 +48,12 @@ pub fn create_job_clusters( } /// Estimates DBSCAN epsilon parameter. -fn estimate_epsilon(problem: &Problem, min_points: usize) -> Float { - let costs = get_average_costs(problem, min_points); +fn estimate_epsilon<'a, FN, IR>(jobs: &[Job], fleet: &Fleet, min_points: usize, neighbour_fn: &FN) -> Float +where + FN: Fn(&Profile, &Job) -> IR + 'a, + IR: Iterator + 'a, +{ + let costs = get_average_costs(jobs, fleet, min_points, neighbour_fn); let curve = costs.into_iter().enumerate().map(|(idx, cost)| Point::new(idx as Float, cost)).collect::>(); // get max curvature approximation and return it as a guess for optimal epsilon value @@ -53,12 +61,14 @@ fn estimate_epsilon(problem: &Problem, min_points: usize) -> Float { } /// Gets average costs across all profiles. -fn get_average_costs(problem: &Problem, min_points: usize) -> Vec { - let jobs = problem.jobs.as_ref(); - let mut costs = problem.fleet.profiles.iter().fold(vec![0.; jobs.size()], |mut acc, profile| { - jobs.all().iter().enumerate().for_each(|(idx, job)| { - let (sum, count) = jobs - .neighbors(profile, job, Timestamp::default()) +fn get_average_costs<'a, FN, IR>(jobs: &[Job], fleet: &Fleet, min_points: usize, neighbour_fn: &FN) -> Vec +where + FN: Fn(&Profile, &Job) -> IR + 'a, + IR: Iterator + 'a, +{ + let mut costs = fleet.profiles.iter().fold(vec![0.; jobs.len()], |mut acc, profile| { + jobs.iter().enumerate().for_each(|(idx, job)| { + let (sum, count) = neighbour_fn(profile, job) .filter(|(j, _)| job_has_locations(j)) .take(min_points) .map(|(_, cost)| cost) @@ -69,7 +79,7 @@ fn get_average_costs(problem: &Problem, min_points: usize) -> Vec { acc }); - costs.iter_mut().for_each(|cost| *cost /= problem.fleet.profiles.len() as Float); + costs.iter_mut().for_each(|cost| *cost /= fleet.profiles.len() as Float); // sort all distances in ascending order costs.sort_unstable_by(compare_floats_refs); diff --git a/vrp-core/src/solver/search/ruin/cluster_removal.rs b/vrp-core/src/solver/search/ruin/cluster_removal.rs index c2a30c2c1..05d1a3bd9 100644 --- a/vrp-core/src/solver/search/ruin/cluster_removal.rs +++ b/vrp-core/src/solver/search/ruin/cluster_removal.rs @@ -5,6 +5,7 @@ mod cluster_removal_test; use super::*; use crate::construction::clustering::dbscan::create_job_clusters; use crate::construction::heuristics::InsertionContext; +use crate::models::common::Timestamp; use crate::models::problem::Job; use crate::models::Problem; use crate::solver::search::{get_route_jobs, JobRemovalTracker, TabuList}; @@ -26,7 +27,10 @@ impl ClusterRemoval { min_items: usize, limits: RemovalLimits, ) -> GenericResult { - let clusters = create_job_clusters(problem.as_ref(), Some(min_items), None)?; + let clusters = + create_job_clusters(problem.jobs.all(), problem.fleet.as_ref(), Some(min_items), None, |profile, job| { + problem.jobs.neighbors(profile, job, Timestamp::default()) + })?; let mut clusters = clusters.into_iter().map(|cluster| cluster.into_iter().collect::>()).collect::>(); diff --git a/vrp-core/tests/unit/construction/clustering/dbscan_test.rs b/vrp-core/tests/unit/construction/clustering/dbscan_test.rs index 24690d0d7..5e87df77c 100644 --- a/vrp-core/tests/unit/construction/clustering/dbscan_test.rs +++ b/vrp-core/tests/unit/construction/clustering/dbscan_test.rs @@ -4,7 +4,7 @@ use crate::helpers::construction::clustering::p; use crate::helpers::models::domain::TestGoalContextBuilder; use crate::helpers::models::problem::TestSingleBuilder; use crate::helpers::solver::{generate_matrix_distances_from_points, generate_matrix_routes}; -use crate::models::common::Location; +use crate::models::common::{Location, Timestamp}; use crate::models::{Extras, GoalContext}; use crate::prelude::{ActivityCost, TransportCost}; @@ -51,7 +51,10 @@ fn can_estimate_epsilon_impl(matrix: (usize, usize), nth_neighbor: usize, matrix matrix_modify, ); - assert_eq!((estimate_epsilon(&problem, nth_neighbor) * 1000.).round() / 1000., expected); + let epsilon = estimate_epsilon(problem.jobs.all(), problem.fleet.as_ref(), nth_neighbor, &|profile, job| { + problem.jobs.neighbors(profile, job, Timestamp::default()) + }); + assert_eq!((epsilon * 1000.).round() / 1000., expected); } parameterized_test! {can_estimate_epsilon_having_zero_costs, min_points, { @@ -88,7 +91,9 @@ fn can_estimate_epsilon_having_zero_costs_impl(min_points: usize) { }, ); - let costs = get_average_costs(&problem, min_points); + let costs = get_average_costs(problem.jobs.all(), &problem.fleet, min_points, &|profile, job| { + problem.jobs.neighbors(profile, job, Timestamp::default()) + }); assert!(!costs.is_empty()); } @@ -115,7 +120,10 @@ fn can_create_job_clusters_impl(param: (usize, Float), expected: &[Vec |_| (vec![0.; 64], create_test_distances()), ); - let clusters = create_job_clusters(&problem, Some(min_points), Some(epsilon)) + let clusters = + create_job_clusters(problem.jobs.all(), &problem.fleet, Some(min_points), Some(epsilon), |profile, job| { + problem.jobs.neighbors(profile, job, Timestamp::default()) + }) .expect("cannot create job clusters") .iter() .map(|cluster| {