From ef047383083377b1279ba74939c93e723509d23e Mon Sep 17 00:00:00 2001
From: parijatkawale <parijat.kawale@gmail.com>
Date: Tue, 7 May 2024 08:27:55 -0400
Subject: [PATCH] Fixed execution for testcase for regressor

---
 algorithms/linfa-ensemble/Cargo.toml          |   4 +-
 .../examples/adaboost_regressor.rs            | 140 ++++++-----
 .../examples/random_forest_regressor.rs       | 232 +++++++-----------
 3 files changed, 156 insertions(+), 220 deletions(-)

diff --git a/algorithms/linfa-ensemble/Cargo.toml b/algorithms/linfa-ensemble/Cargo.toml
index 033f11b37..18c475e52 100644
--- a/algorithms/linfa-ensemble/Cargo.toml
+++ b/algorithms/linfa-ensemble/Cargo.toml
@@ -26,7 +26,7 @@ features = ["std", "derive"]
 linfa = { version = "0.7.0", path = "../.." }
 linfa-trees = { version = "0.7.0", path = "../linfa-trees"}
 serde = { version = "1.0", features = ["derive","std"] }
-linfa-datasets = { version = "0.7.0", path = "../../datasets/", features = ["iris", "mnist", "boston"] }
+linfa-datasets = { version = "0.7.0", path = "../../datasets/", features = ["iris", "mnist", "boston", "diabetes"] }
 ndarray = { version = "0.15" , features = ["rayon", "approx"]}
 ndarray-rand = "0.14"
 rand = { version = "0.8", features = ["small_rng"] }
@@ -39,7 +39,7 @@ csv = "1.1"
 
 [dev-dependencies]
 rand = { version = "0.8", features = ["small_rng"] }
-linfa-datasets = { version = "0.7.0", path = "../../datasets/", features = ["iris", "mnist", "boston"] }
+linfa-datasets = { version = "0.7.0", path = "../../datasets/", features = ["iris", "mnist", "boston", "diabetes"] }
 rayon = {version = "1.10.0"}
 approx = {version = "0.5"}
 
diff --git a/algorithms/linfa-ensemble/examples/adaboost_regressor.rs b/algorithms/linfa-ensemble/examples/adaboost_regressor.rs
index 03042e04a..4c3580d09 100644
--- a/algorithms/linfa-ensemble/examples/adaboost_regressor.rs
+++ b/algorithms/linfa-ensemble/examples/adaboost_regressor.rs
@@ -1,74 +1,72 @@
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use ndarray::{Array2, Array1, s};
-    use ndarray_csv::Array2Reader;
-    use std::fs::File;
-    use rand::rngs::StdRng;
-    use rand::SeedableRng;
-    use csv::ReaderBuilder;
-    use linfa_ensemble::AdaBoostRegressor;
-    use linfa_datasets::{boston, diabetes};
-
-    #[test]
-    fn test_adaboost_with_boston_housing() {
-        // Load the dataset
-        let dataset = boston();  // dataset now contains both features and targets
-
-        // Parameters for AdaBoost
-        let n_estimators = 50;
-        let learning_rate = 1.0;
-        let max_depth = 4;
-        let min_samples_split = 10;
-        let random_state = 42; // Random state for reproducibility
-
-        // Create AdaBoostRegressor instance
-        let mut regressor = AdaBoostRegressor::new(n_estimators, learning_rate, random_state, max_depth, min_samples_split);
-
-        // Fit the regressor to the Boston Housing dataset
-        regressor.fit(dataset.records(), dataset.targets());
-
-        // Make predictions
-        let predictions = regressor.predict(dataset.records());
-
-        // Calculate Mean Squared Error
-        let mse = (dataset.targets() - &predictions).mapv(|a| a.powi(2)).mean().unwrap_or(0.0);  // Calculate Mean Squared Error
-        let rmse = mse.sqrt();  // Calculate Root Mean Squared Error
-        println!("Root Mean Squared Error: {}", rmse);
-
-        // Assert to check if RMSE is below a threshold
-        assert!(rmse < 25.0, "The RMSE should be lower than 25.0, but it was {}", rmse);
-    }
-
-    #[test]
-    fn test_adaboost_with_diabetes() {
-        // Load the dataset
-        let dataset = diabetes();
-
-        // Parameters for AdaBoost
-        let n_estimators = 100;  
-        let learning_rate = 0.5;
-        let max_depth = 3;        
-        let min_samples_split = 5; 
-        let random_state = 42;  
-
-        // Create AdaBoostRegressor instance
-        let mut regressor = AdaBoostRegressor::new(n_estimators, learning_rate, random_state, max_depth, min_samples_split);
-
-        // Fit the regressor to the Diabetes dataset
-        regressor.fit(dataset.records(), dataset.targets());
-
-        // Make predictions
-        let predictions = regressor.predict(dataset.records());
-
-        // Calculate Mean Squared Error
-        let mse = (dataset.targets() - &predictions).mapv(|a| a.powi(2)).mean().unwrap_or(0.0);  // Calculate Mean Squared Error
-        let rmse = mse.sqrt();  // Calculate Root Mean Squared Error
-        println!("Root Mean Squared Error: {}", rmse);
-
-        // Assert to check if RMSE is below a threshold
-        assert!(rmse < 200.0, "The RMSE should be lower than 200.0, but it was {}", rmse);
-    }
+use ndarray::{Array2, Array1, s};
+use ndarray_csv::Array2Reader;
+use std::fs::File;
+use rand::rngs::StdRng;
+use rand::SeedableRng;
+use csv::ReaderBuilder;
+use linfa_ensemble::AdaBoostRegressor;
+use linfa_datasets::{boston, diabetes};
+
+pub fn test_adaboost_with_boston_housing() {
+    // Load the dataset
+    let dataset = boston();  // dataset now contains both features and targets
+
+    // Parameters for AdaBoost
+    let n_estimators = 50;
+    let learning_rate = 1.0;
+    let max_depth = 4;
+    let min_samples_split = 10;
+    let random_state = 42; // Random state for reproducibility
+
+    // Create AdaBoostRegressor instance
+    let mut regressor = AdaBoostRegressor::new(n_estimators, learning_rate, random_state, max_depth, min_samples_split);
+
+    // Fit the regressor to the Boston Housing dataset
+    regressor.fit(dataset.records(), dataset.targets());
+
+    // Make predictions
+    let predictions = regressor.predict(dataset.records());
+
+    // Calculate Mean Squared Error
+    let mse = (dataset.targets() - &predictions).mapv(|a| a.powi(2)).mean().unwrap_or(0.0);  // Calculate Mean Squared Error
+    let rmse = mse.sqrt();  // Calculate Root Mean Squared Error
+    println!("Root Mean Squared Error for Boston Housing Dataset: {}", rmse);
+
+    // Assert to check if RMSE is below a threshold
+    assert!(rmse < 25.0, "The RMSE should be lower than 25.0, but it was {}", rmse);
+}
+
+pub fn test_adaboost_with_diabetes() {
+    // Load the dataset
+    let dataset = diabetes();
+
+    // Parameters for AdaBoost
+    let n_estimators = 100;  
+    let learning_rate = 0.5;
+    let max_depth = 3;        
+    let min_samples_split = 5; 
+    let random_state = 42;  
+
+    // Create AdaBoostRegressor instance
+    let mut regressor = AdaBoostRegressor::new(n_estimators, learning_rate, random_state, max_depth, min_samples_split);
 
+    // Fit the regressor to the Diabetes dataset
+    regressor.fit(dataset.records(), dataset.targets());
 
+    // Make predictions
+    let predictions = regressor.predict(dataset.records());
+
+    // Calculate Mean Squared Error
+    let mse = (dataset.targets() - &predictions).mapv(|a| a.powi(2)).mean().unwrap_or(0.0);  // Calculate Mean Squared Error
+    let rmse = mse.sqrt();  // Calculate Root Mean Squared Error
+    println!("Root Mean Squared Error for diabetes: {}", rmse);
+
+    // Assert to check if RMSE is below a threshold
+    assert!(rmse < 200.0, "The RMSE should be lower than 200.0, but it was {}", rmse);
 }
+
+
+fn main(){
+    test_adaboost_with_boston_housing();
+    test_adaboost_with_diabetes();
+}
\ No newline at end of file
diff --git a/algorithms/linfa-ensemble/examples/random_forest_regressor.rs b/algorithms/linfa-ensemble/examples/random_forest_regressor.rs
index 7b34fedf0..f8ca9e9f7 100644
--- a/algorithms/linfa-ensemble/examples/random_forest_regressor.rs
+++ b/algorithms/linfa-ensemble/examples/random_forest_regressor.rs
@@ -1,164 +1,102 @@
-// use linfa_ensemble::RandomForestRegressor;
-// use ndarray::{Array1, Axis};
-// use rand::seq::SliceRandom;
-// use rand::thread_rng;
-// use linfa_ensemble::visualization;
-
-// fn main() {
-//     // Number of trees in the forest
-//     let num_trees = 100;
-//     // Number of features to consider for each split
-//     let max_features = 4; // Set to the number of features in your dataset or adjust as needed
-//     // Maximum depth of each tree
-//     let max_depth = 10;
-//     // Minimum number of samples required to split a node
-//     let min_samples_split = 5;
-
-//     // Load the Iris dataset
-//     let iris = linfa_datasets::diabetes();
-//     let iris_cloned = iris.clone();
-
-//     // Extract features and targets
-//     let features = iris_cloned.records();
-//     let targets = iris.targets().mapv(|x| x as f64);
-
-//     // Shuffle and split the data into train and test
-//     let mut rng = thread_rng();
-//     let mut indices: Vec<usize> = (0..features.nrows()).collect();
-//     indices.shuffle(&mut rng);
-//     let split_index = (features.nrows() as f64 * 0.8) as usize; // 60% train, 40% test
-//     let train_indices = &indices[..split_index];
-//     let test_indices = &indices[split_index..];
-
-//     let train_features = features.select(Axis(0), train_indices);
-//     let train_targets = targets.select(Axis(0), train_indices);
-//     let test_features = features.select(Axis(0), test_indices);
-//     let test_targets = targets.select(Axis(0), test_indices);
-
-//     // Train random forest regressor
-//     let mut forest = RandomForestRegressor::new(num_trees, max_features, max_depth, min_samples_split);
-//     forest.fit(&train_features, &train_targets);
-
-//     // Predict on test dataset
-//     let predictions = forest.predict(&test_features);
-
-//     // Evaluate performance
-//     let mse = mean_squared_error(&test_targets, &predictions);
-//     println!("Mean Squared Error: {}", mse);
-
-
-
-//     println!("Generated graph");
-// }
-
-// fn mean_squared_error(actual: &Array1<f64>, predicted: &Array1<f64>) -> f64 {
-//     let errors = actual - predicted;
-//     let squared_errors = errors.mapv(|x| x.powi(2));
-//     squared_errors.mean().unwrap()
-// }
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use approx::assert_relative_eq;
-    use linfa_datasets::{iris, diabetes};
-    use linfa_ensemble::RandomForestRegressor;
-    use ndarray::{Array1, Array2, Axis}; // For floating-point assertions
-    use linfa_ensemble::visualization;
-
-
-    fn calculate_rmse(actual: &Array1<f64>, predicted: &Array1<f64>) -> f64 {
-        let errors = actual - predicted;
-        let mse = errors.mapv(|e| e.powi(2)).mean().unwrap();
-        mse.sqrt()
-    }
-
-    fn load_iris_data() -> (Array2<f64>, Array1<f64>) {
-        // Load the dataset
-        let dataset = iris();
+use approx::assert_relative_eq;
+use linfa_datasets::{iris, diabetes};
+use linfa_ensemble::RandomForestRegressor;
+use ndarray::{Array1, Array2, Axis}; // For floating-point assertions
+use linfa_ensemble::visualization;
+
+fn calculate_rmse(actual: &Array1<f64>, predicted: &Array1<f64>) -> f64 {
+    let errors = actual - predicted;
+    let mse = errors.mapv(|e| e.powi(2)).mean().unwrap();
+    mse.sqrt()
+}
 
-        // Extract features; assuming all rows and all but the last column if last is target
-        let features = dataset.records().clone();
+fn load_iris_data() -> (Array2<f64>, Array1<f64>) {
+    // Load the dataset
+    let dataset = iris();
 
-        let targets = dataset.targets().mapv(|x| x as f64);
+    // Extract features; assuming all rows and all but the last column if last is target
+    let features = dataset.records().clone();
 
-        (features, targets)
-    }
+    let targets = dataset.targets().mapv(|x| x as f64);
 
-    fn load_diabetes_data() -> (Array2<f64>, Array1<f64>) {
-        let dataset = diabetes();
+    (features, targets)
+}
 
-        let features = dataset.records().clone();
-        let targets = dataset.targets().mapv(|x| x as f64);
+fn load_diabetes_data() -> (Array2<f64>, Array1<f64>) {
+    let dataset = diabetes();
 
-        (features, targets)
-    }
+    let features = dataset.records().clone();
+    let targets = dataset.targets().mapv(|x| x as f64);
 
-    #[test]
-    fn test_random_forest_with_diabetes() {
-         let (features, targets) = load_diabetes_data();
-
-        // Split data into training and testing sets
-        let split_ratio = 0.7; // Using 70% of the data for training
-        let split_index = (features.nrows() as f64 * split_ratio) as usize;
-        let (train_features, test_features) = features.view().split_at(Axis(0), split_index);
-        let (train_targets, test_targets) = targets.view().split_at(Axis(0), split_index);
-
-        let mut forest = RandomForestRegressor::new(100, 10, 5, 10);
-        // Convert views to owned arrays before passing to fit
-        forest.fit(&train_features.to_owned(), &train_targets.to_owned());
-        let train_predictions = forest.predict(&train_features.to_owned());
-        let test_predictions = forest.predict(&test_features.to_owned());
-
-        // Evaluate the performance on the test set
-        let test_rmse = calculate_rmse(&test_targets.to_owned(), &test_predictions);
-        println!("Test RMSE for Diabetes Dataset: {:?}", test_rmse);
-
-        // Assert that the RMSE is below an acceptable threshold
-        assert!(test_rmse < 70.0, "The RMSE should be lower than 60.0");
-
-        // Visualization of training and testing results
-        visualization::plot_scatter(
-            &train_targets.to_owned(),
-            &train_predictions,
-            &test_targets.to_owned(),
-            &test_predictions,
-            "diabetes_rf_scatter.png",
-        ).unwrap();
-    }
+    (features, targets)
+}
 
+fn test_random_forest_with_diabetes() {
+    let (features, targets) = load_diabetes_data();
+
+    // Split data into training and testing sets
+    let split_ratio = 0.7; // Using 70% of the data for training
+    let split_index = (features.nrows() as f64 * split_ratio) as usize;
+    let (train_features, test_features) = features.view().split_at(Axis(0), split_index);
+    let (train_targets, test_targets) = targets.view().split_at(Axis(0), split_index);
+
+    let mut forest = RandomForestRegressor::new(150, 10, 5, 10);
+    forest.fit(&train_features.to_owned(), &train_targets.to_owned());
+    let train_predictions = forest.predict(&train_features.to_owned());
+    let test_predictions = forest.predict(&test_features.to_owned());
+
+    // Evaluate the performance on the test set
+    let test_rmse = calculate_rmse(&test_targets.to_owned(), &test_predictions);
+    println!("Test RMSE for Diabetes Dataset: {:?}", test_rmse);
+
+    // Assert that the RMSE is below an acceptable threshold
+    assert!(test_rmse < 70.0, "The RMSE should be lower than 60.0");
+
+    // Visualization of training and testing results
+    visualization::plot_scatter(
+        &train_targets.to_owned(),
+        &train_predictions,
+        &test_targets.to_owned(),
+        &test_predictions,
+        "diabetes_rf_scatter.png",
+    ).unwrap();
+}
 
-    #[test]
-    fn test_random_forest_with_iris() {
-        let (features, targets) = load_iris_data();
+fn test_random_forest_with_iris() {
+    let (features, targets) = load_iris_data();
 
-        let mut forest = RandomForestRegressor::new(100, 10, 3, 10);
-        forest.fit(&features, &targets);
-        let predictions = forest.predict(&features);
+    let mut forest = RandomForestRegressor::new(100, 10, 3, 10);
+    forest.fit(&features, &targets);
+    let predictions = forest.predict(&features);
 
-        // Define a tolerance level
-        let tolerance = 0.1; // Tolerance level for correct classification
-        let mut correct = 0;
-        let mut incorrect = 0;
+    // Define a tolerance level
+    let tolerance = 0.1; // Tolerance level for correct classification
+    let mut correct = 0;
+    let mut incorrect = 0;
 
-        // Count correct and incorrect predictions
-        for (&actual, &predicted) in targets.iter().zip(predictions.iter()) {
-            if (predicted - actual).abs() < tolerance {
-                correct += 1;
-            } else {
-                incorrect += 1;
-            }
+    // Count correct and incorrect predictions
+    for (&actual, &predicted) in targets.iter().zip(predictions.iter()) {
+        if (predicted - actual).abs() < tolerance {
+            correct += 1;
+        } else {
+            incorrect += 1;
         }
+    }
 
-        println!("Correct predictions: {}", correct);
-        println!("Incorrect predictions: {}", incorrect);
+    println!("Correct predictions: {}", correct);
+    println!("Incorrect predictions: {}", incorrect);
 
-        let rmse = (&predictions - &targets)
-            .mapv(|a| a.powi(2))
-            .mean()
-            .unwrap()
-            .sqrt();
+    let rmse = (&predictions - &targets)
+        .mapv(|a| a.powi(2))
+        .mean()
+        .unwrap()
+        .sqrt();
 
-        println!("RMSE: {:?}", rmse);
-    }
+    println!("Test RMSE for Iris Dataset: {:?}", rmse);
+}
+
+
+fn main() {
+    test_random_forest_with_iris();
+    test_random_forest_with_diabetes();
 }