diff --git a/algorithms/linfa-ensemble/Cargo.toml b/algorithms/linfa-ensemble/Cargo.toml index 033f11b37..18c475e52 100644 --- a/algorithms/linfa-ensemble/Cargo.toml +++ b/algorithms/linfa-ensemble/Cargo.toml @@ -26,7 +26,7 @@ features = ["std", "derive"] linfa = { version = "0.7.0", path = "../.." } linfa-trees = { version = "0.7.0", path = "../linfa-trees"} serde = { version = "1.0", features = ["derive","std"] } -linfa-datasets = { version = "0.7.0", path = "../../datasets/", features = ["iris", "mnist", "boston"] } +linfa-datasets = { version = "0.7.0", path = "../../datasets/", features = ["iris", "mnist", "boston", "diabetes"] } ndarray = { version = "0.15" , features = ["rayon", "approx"]} ndarray-rand = "0.14" rand = { version = "0.8", features = ["small_rng"] } @@ -39,7 +39,7 @@ csv = "1.1" [dev-dependencies] rand = { version = "0.8", features = ["small_rng"] } -linfa-datasets = { version = "0.7.0", path = "../../datasets/", features = ["iris", "mnist", "boston"] } +linfa-datasets = { version = "0.7.0", path = "../../datasets/", features = ["iris", "mnist", "boston", "diabetes"] } rayon = {version = "1.10.0"} approx = {version = "0.5"} diff --git a/algorithms/linfa-ensemble/examples/adaboost_regressor.rs b/algorithms/linfa-ensemble/examples/adaboost_regressor.rs index 03042e04a..4c3580d09 100644 --- a/algorithms/linfa-ensemble/examples/adaboost_regressor.rs +++ b/algorithms/linfa-ensemble/examples/adaboost_regressor.rs @@ -1,74 +1,72 @@ -#[cfg(test)] -mod tests { - use super::*; - use ndarray::{Array2, Array1, s}; - use ndarray_csv::Array2Reader; - use std::fs::File; - use rand::rngs::StdRng; - use rand::SeedableRng; - use csv::ReaderBuilder; - use linfa_ensemble::AdaBoostRegressor; - use linfa_datasets::{boston, diabetes}; - - #[test] - fn test_adaboost_with_boston_housing() { - // Load the dataset - let dataset = boston(); // dataset now contains both features and targets - - // Parameters for AdaBoost - let n_estimators = 50; - let learning_rate = 1.0; - let max_depth = 4; - let min_samples_split = 10; - let random_state = 42; // Random state for reproducibility - - // Create AdaBoostRegressor instance - let mut regressor = AdaBoostRegressor::new(n_estimators, learning_rate, random_state, max_depth, min_samples_split); - - // Fit the regressor to the Boston Housing dataset - regressor.fit(dataset.records(), dataset.targets()); - - // Make predictions - let predictions = regressor.predict(dataset.records()); - - // Calculate Mean Squared Error - let mse = (dataset.targets() - &predictions).mapv(|a| a.powi(2)).mean().unwrap_or(0.0); // Calculate Mean Squared Error - let rmse = mse.sqrt(); // Calculate Root Mean Squared Error - println!("Root Mean Squared Error: {}", rmse); - - // Assert to check if RMSE is below a threshold - assert!(rmse < 25.0, "The RMSE should be lower than 25.0, but it was {}", rmse); - } - - #[test] - fn test_adaboost_with_diabetes() { - // Load the dataset - let dataset = diabetes(); - - // Parameters for AdaBoost - let n_estimators = 100; - let learning_rate = 0.5; - let max_depth = 3; - let min_samples_split = 5; - let random_state = 42; - - // Create AdaBoostRegressor instance - let mut regressor = AdaBoostRegressor::new(n_estimators, learning_rate, random_state, max_depth, min_samples_split); - - // Fit the regressor to the Diabetes dataset - regressor.fit(dataset.records(), dataset.targets()); - - // Make predictions - let predictions = regressor.predict(dataset.records()); - - // Calculate Mean Squared Error - let mse = (dataset.targets() - &predictions).mapv(|a| a.powi(2)).mean().unwrap_or(0.0); // Calculate Mean Squared Error - let rmse = mse.sqrt(); // Calculate Root Mean Squared Error - println!("Root Mean Squared Error: {}", rmse); - - // Assert to check if RMSE is below a threshold - assert!(rmse < 200.0, "The RMSE should be lower than 200.0, but it was {}", rmse); - } +use ndarray::{Array2, Array1, s}; +use ndarray_csv::Array2Reader; +use std::fs::File; +use rand::rngs::StdRng; +use rand::SeedableRng; +use csv::ReaderBuilder; +use linfa_ensemble::AdaBoostRegressor; +use linfa_datasets::{boston, diabetes}; + +pub fn test_adaboost_with_boston_housing() { + // Load the dataset + let dataset = boston(); // dataset now contains both features and targets + + // Parameters for AdaBoost + let n_estimators = 50; + let learning_rate = 1.0; + let max_depth = 4; + let min_samples_split = 10; + let random_state = 42; // Random state for reproducibility + + // Create AdaBoostRegressor instance + let mut regressor = AdaBoostRegressor::new(n_estimators, learning_rate, random_state, max_depth, min_samples_split); + + // Fit the regressor to the Boston Housing dataset + regressor.fit(dataset.records(), dataset.targets()); + + // Make predictions + let predictions = regressor.predict(dataset.records()); + + // Calculate Mean Squared Error + let mse = (dataset.targets() - &predictions).mapv(|a| a.powi(2)).mean().unwrap_or(0.0); // Calculate Mean Squared Error + let rmse = mse.sqrt(); // Calculate Root Mean Squared Error + println!("Root Mean Squared Error for Boston Housing Dataset: {}", rmse); + + // Assert to check if RMSE is below a threshold + assert!(rmse < 25.0, "The RMSE should be lower than 25.0, but it was {}", rmse); +} + +pub fn test_adaboost_with_diabetes() { + // Load the dataset + let dataset = diabetes(); + + // Parameters for AdaBoost + let n_estimators = 100; + let learning_rate = 0.5; + let max_depth = 3; + let min_samples_split = 5; + let random_state = 42; + + // Create AdaBoostRegressor instance + let mut regressor = AdaBoostRegressor::new(n_estimators, learning_rate, random_state, max_depth, min_samples_split); + // Fit the regressor to the Diabetes dataset + regressor.fit(dataset.records(), dataset.targets()); + // Make predictions + let predictions = regressor.predict(dataset.records()); + + // Calculate Mean Squared Error + let mse = (dataset.targets() - &predictions).mapv(|a| a.powi(2)).mean().unwrap_or(0.0); // Calculate Mean Squared Error + let rmse = mse.sqrt(); // Calculate Root Mean Squared Error + println!("Root Mean Squared Error for diabetes: {}", rmse); + + // Assert to check if RMSE is below a threshold + assert!(rmse < 200.0, "The RMSE should be lower than 200.0, but it was {}", rmse); } + + +fn main(){ + test_adaboost_with_boston_housing(); + test_adaboost_with_diabetes(); +} \ No newline at end of file diff --git a/algorithms/linfa-ensemble/examples/random_forest_regressor.rs b/algorithms/linfa-ensemble/examples/random_forest_regressor.rs index 7b34fedf0..f8ca9e9f7 100644 --- a/algorithms/linfa-ensemble/examples/random_forest_regressor.rs +++ b/algorithms/linfa-ensemble/examples/random_forest_regressor.rs @@ -1,164 +1,102 @@ -// use linfa_ensemble::RandomForestRegressor; -// use ndarray::{Array1, Axis}; -// use rand::seq::SliceRandom; -// use rand::thread_rng; -// use linfa_ensemble::visualization; - -// fn main() { -// // Number of trees in the forest -// let num_trees = 100; -// // Number of features to consider for each split -// let max_features = 4; // Set to the number of features in your dataset or adjust as needed -// // Maximum depth of each tree -// let max_depth = 10; -// // Minimum number of samples required to split a node -// let min_samples_split = 5; - -// // Load the Iris dataset -// let iris = linfa_datasets::diabetes(); -// let iris_cloned = iris.clone(); - -// // Extract features and targets -// let features = iris_cloned.records(); -// let targets = iris.targets().mapv(|x| x as f64); - -// // Shuffle and split the data into train and test -// let mut rng = thread_rng(); -// let mut indices: Vec = (0..features.nrows()).collect(); -// indices.shuffle(&mut rng); -// let split_index = (features.nrows() as f64 * 0.8) as usize; // 60% train, 40% test -// let train_indices = &indices[..split_index]; -// let test_indices = &indices[split_index..]; - -// let train_features = features.select(Axis(0), train_indices); -// let train_targets = targets.select(Axis(0), train_indices); -// let test_features = features.select(Axis(0), test_indices); -// let test_targets = targets.select(Axis(0), test_indices); - -// // Train random forest regressor -// let mut forest = RandomForestRegressor::new(num_trees, max_features, max_depth, min_samples_split); -// forest.fit(&train_features, &train_targets); - -// // Predict on test dataset -// let predictions = forest.predict(&test_features); - -// // Evaluate performance -// let mse = mean_squared_error(&test_targets, &predictions); -// println!("Mean Squared Error: {}", mse); - - - -// println!("Generated graph"); -// } - -// fn mean_squared_error(actual: &Array1, predicted: &Array1) -> f64 { -// let errors = actual - predicted; -// let squared_errors = errors.mapv(|x| x.powi(2)); -// squared_errors.mean().unwrap() -// } - -#[cfg(test)] -mod tests { - use super::*; - use approx::assert_relative_eq; - use linfa_datasets::{iris, diabetes}; - use linfa_ensemble::RandomForestRegressor; - use ndarray::{Array1, Array2, Axis}; // For floating-point assertions - use linfa_ensemble::visualization; - - - fn calculate_rmse(actual: &Array1, predicted: &Array1) -> f64 { - let errors = actual - predicted; - let mse = errors.mapv(|e| e.powi(2)).mean().unwrap(); - mse.sqrt() - } - - fn load_iris_data() -> (Array2, Array1) { - // Load the dataset - let dataset = iris(); +use approx::assert_relative_eq; +use linfa_datasets::{iris, diabetes}; +use linfa_ensemble::RandomForestRegressor; +use ndarray::{Array1, Array2, Axis}; // For floating-point assertions +use linfa_ensemble::visualization; + +fn calculate_rmse(actual: &Array1, predicted: &Array1) -> f64 { + let errors = actual - predicted; + let mse = errors.mapv(|e| e.powi(2)).mean().unwrap(); + mse.sqrt() +} - // Extract features; assuming all rows and all but the last column if last is target - let features = dataset.records().clone(); +fn load_iris_data() -> (Array2, Array1) { + // Load the dataset + let dataset = iris(); - let targets = dataset.targets().mapv(|x| x as f64); + // Extract features; assuming all rows and all but the last column if last is target + let features = dataset.records().clone(); - (features, targets) - } + let targets = dataset.targets().mapv(|x| x as f64); - fn load_diabetes_data() -> (Array2, Array1) { - let dataset = diabetes(); + (features, targets) +} - let features = dataset.records().clone(); - let targets = dataset.targets().mapv(|x| x as f64); +fn load_diabetes_data() -> (Array2, Array1) { + let dataset = diabetes(); - (features, targets) - } + let features = dataset.records().clone(); + let targets = dataset.targets().mapv(|x| x as f64); - #[test] - fn test_random_forest_with_diabetes() { - let (features, targets) = load_diabetes_data(); - - // Split data into training and testing sets - let split_ratio = 0.7; // Using 70% of the data for training - let split_index = (features.nrows() as f64 * split_ratio) as usize; - let (train_features, test_features) = features.view().split_at(Axis(0), split_index); - let (train_targets, test_targets) = targets.view().split_at(Axis(0), split_index); - - let mut forest = RandomForestRegressor::new(100, 10, 5, 10); - // Convert views to owned arrays before passing to fit - forest.fit(&train_features.to_owned(), &train_targets.to_owned()); - let train_predictions = forest.predict(&train_features.to_owned()); - let test_predictions = forest.predict(&test_features.to_owned()); - - // Evaluate the performance on the test set - let test_rmse = calculate_rmse(&test_targets.to_owned(), &test_predictions); - println!("Test RMSE for Diabetes Dataset: {:?}", test_rmse); - - // Assert that the RMSE is below an acceptable threshold - assert!(test_rmse < 70.0, "The RMSE should be lower than 60.0"); - - // Visualization of training and testing results - visualization::plot_scatter( - &train_targets.to_owned(), - &train_predictions, - &test_targets.to_owned(), - &test_predictions, - "diabetes_rf_scatter.png", - ).unwrap(); - } + (features, targets) +} +fn test_random_forest_with_diabetes() { + let (features, targets) = load_diabetes_data(); + + // Split data into training and testing sets + let split_ratio = 0.7; // Using 70% of the data for training + let split_index = (features.nrows() as f64 * split_ratio) as usize; + let (train_features, test_features) = features.view().split_at(Axis(0), split_index); + let (train_targets, test_targets) = targets.view().split_at(Axis(0), split_index); + + let mut forest = RandomForestRegressor::new(150, 10, 5, 10); + forest.fit(&train_features.to_owned(), &train_targets.to_owned()); + let train_predictions = forest.predict(&train_features.to_owned()); + let test_predictions = forest.predict(&test_features.to_owned()); + + // Evaluate the performance on the test set + let test_rmse = calculate_rmse(&test_targets.to_owned(), &test_predictions); + println!("Test RMSE for Diabetes Dataset: {:?}", test_rmse); + + // Assert that the RMSE is below an acceptable threshold + assert!(test_rmse < 70.0, "The RMSE should be lower than 60.0"); + + // Visualization of training and testing results + visualization::plot_scatter( + &train_targets.to_owned(), + &train_predictions, + &test_targets.to_owned(), + &test_predictions, + "diabetes_rf_scatter.png", + ).unwrap(); +} - #[test] - fn test_random_forest_with_iris() { - let (features, targets) = load_iris_data(); +fn test_random_forest_with_iris() { + let (features, targets) = load_iris_data(); - let mut forest = RandomForestRegressor::new(100, 10, 3, 10); - forest.fit(&features, &targets); - let predictions = forest.predict(&features); + let mut forest = RandomForestRegressor::new(100, 10, 3, 10); + forest.fit(&features, &targets); + let predictions = forest.predict(&features); - // Define a tolerance level - let tolerance = 0.1; // Tolerance level for correct classification - let mut correct = 0; - let mut incorrect = 0; + // Define a tolerance level + let tolerance = 0.1; // Tolerance level for correct classification + let mut correct = 0; + let mut incorrect = 0; - // Count correct and incorrect predictions - for (&actual, &predicted) in targets.iter().zip(predictions.iter()) { - if (predicted - actual).abs() < tolerance { - correct += 1; - } else { - incorrect += 1; - } + // Count correct and incorrect predictions + for (&actual, &predicted) in targets.iter().zip(predictions.iter()) { + if (predicted - actual).abs() < tolerance { + correct += 1; + } else { + incorrect += 1; } + } - println!("Correct predictions: {}", correct); - println!("Incorrect predictions: {}", incorrect); + println!("Correct predictions: {}", correct); + println!("Incorrect predictions: {}", incorrect); - let rmse = (&predictions - &targets) - .mapv(|a| a.powi(2)) - .mean() - .unwrap() - .sqrt(); + let rmse = (&predictions - &targets) + .mapv(|a| a.powi(2)) + .mean() + .unwrap() + .sqrt(); - println!("RMSE: {:?}", rmse); - } + println!("Test RMSE for Iris Dataset: {:?}", rmse); +} + + +fn main() { + test_random_forest_with_iris(); + test_random_forest_with_diabetes(); }