Linfa's 0.4.0 release (#127)

* Bump version 0.4.0, add release notes * Add t-SNE example to release notes * Add example for t-SNE and start writing preprocessing example * Fix typo * Add example for TF-IDF text preprocessing * Explain that target construction is omitted and traits changes * Add contribute section on the new lapack traits * Link to crates.io in the base crate documentation * Link to crates.io in the base crate documentation * Add mnist example * Run rustfmt * Fix mnist example
rust-ml · Apr 28, 2021 · ce8a815 · ce8a815
1 parent a5a479f
commit ce8a815
Show file tree

Hide file tree

Showing 25 changed files with 349 additions and 53 deletions.
diff --git a/CONTRIBUTE.md b/CONTRIBUTE.md
@@ -128,3 +128,15 @@ fn main() {
     /// ...
 }
 ```
+
+## Use the lapack trait bound
+
+When you want to implement an algorithm which requires the [Lapack](https://docs.rs/ndarray-linalg/0.13.1/ndarray_linalg/types/trait.Lapack.html) bound, then you could add the trait bound to the `linfa::Float` standard bound, e.g. `F: Float + Scalar + Lapack`. If you do that you're currently running into conflicting function definitions of [num_traits::Float](https://docs.rs/num-traits/0.2.14/num_traits/float/trait.Float.html) and [cauchy::Scalar](https://docs.rs/cauchy/0.4.0/cauchy/trait.Scalar.html) with the first defined for real-valued values and the second for complex values. 
+
+If you want to avoid that you can use the `linfa::dataset::{WithLapack, WithoutLapack}` traits, which basically adds the lapack trait bound for a block and then removes it again so that the conflicts can be avoided. For example:
+```rust
+let decomp = covariance.with_lapack().cholesky(UPLO::Lower)?;
+let sol = decomp
+     .solve_triangular(UPLO::Lower, Diag::NonUnit, &Array::eye(n_features))?
+     .without_lapack();
+```
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "linfa"
-version = "0.3.1"
+version = "0.4.0"
 authors = [
     "Luca Palmieri <rust@lpalmieri.com>",
     "Lorenz Schmidt <bytesnake@mailbox.org>",

diff --git a/algorithms/linfa-bayes/Cargo.toml b/algorithms/linfa-bayes/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "linfa-bayes"
-version = "0.3.1"
+version = "0.4.0"
 authors = ["VasanthakumarV <vasanth260m12@gmail.com>"]
 description = "Collection of Naive Bayes Algorithms"
 edition = "2018"
@@ -15,8 +15,8 @@ ndarray = { version = "0.14" , features = ["blas", "approx"]}
 ndarray-stats = "0.4"
 thiserror = "1"
 
-linfa = { version = "0.3.1", path = "../.." }
+linfa = { version = "0.4.0", path = "../.." }
 
 [dev-dependencies]
 approx = "0.4"
-linfa-datasets = { version = "0.3.1", path = "../../datasets", features = ["winequality"] }
+linfa-datasets = { version = "0.4.0", path = "../../datasets", features = ["winequality"] }
diff --git a/algorithms/linfa-clustering/Cargo.toml b/algorithms/linfa-clustering/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "linfa-clustering"
-version = "0.3.1"
+version = "0.4.0"
 edition = "2018"
 authors = [
     "Luca Palmieri <rust@lpalmieri.com>",
@@ -36,7 +36,8 @@ num-traits = "0.2"
 rand_isaac = "0.3"
 thiserror = "1"
 partitions = "0.2.4"
-linfa = { version = "0.3.1", path = "../..", features = ["ndarray-linalg"] }
+
+linfa = { version = "0.4.0", path = "../..", features = ["ndarray-linalg"] }
 
 [dev-dependencies]
 ndarray-npy = { version = "0.7", default-features = false }

diff --git a/algorithms/linfa-clustering/src/gaussian_mixture/algorithm.rs b/algorithms/linfa-clustering/src/gaussian_mixture/algorithm.rs
@@ -257,8 +257,6 @@ impl<F: Float> GaussianMixtureModel<F> {
         let n_features = covariances.shape()[1];
         let mut precisions_chol = Array::zeros((n_clusters, n_features, n_features));
         for (k, covariance) in covariances.outer_iter().enumerate() {
-            dbg!(&covariance.shape());
-            dbg!(&covariance.with_lapack().shape());
             let decomp = covariance.with_lapack().cholesky(UPLO::Lower)?;
             let sol = decomp
                 .solve_triangular(UPLO::Lower, Diag::NonUnit, &Array::eye(n_features))?

diff --git a/algorithms/linfa-elasticnet/Cargo.toml b/algorithms/linfa-elasticnet/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "linfa-elasticnet"
-version = "0.3.1"
+version = "0.4.0"
 authors = [
     "Paul Körbitz / Google <koerbitz@google.com>",
     "Lorenz Schmidt <bytesnake@mailbox.org>"
@@ -35,9 +35,9 @@ num-traits = "0.2"
 approx = "0.4"
 thiserror = "1"
 
-linfa = { version = "0.3.1", path = "../.." }
+linfa = { version = "0.4.0", path = "../.." }
 
 [dev-dependencies]
-linfa-datasets = { version = "0.3.1", path = "../../datasets", features = ["diabetes"] }
+linfa-datasets = { version = "0.4.0", path = "../../datasets", features = ["diabetes"] }
 ndarray-rand = "0.13"
 rand_isaac = "0.3"
diff --git a/algorithms/linfa-hierarchical/Cargo.toml b/algorithms/linfa-hierarchical/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "linfa-hierarchical"
-version = "0.3.1"
+version = "0.4.0"
 authors = ["Lorenz Schmidt <lorenz.schmidt@mailbox.org>"]
 edition = "2018"
 
@@ -17,10 +17,10 @@ categories = ["algorithms", "mathematics", "science"]
 ndarray = { version = "0.14", default-features = false }
 kodama = "0.2"
 
-linfa = { version = "0.3.1", path = "../.." }
-linfa-kernel = { version = "0.3.1", path = "../linfa-kernel" }
+linfa = { version = "0.4.0", path = "../.." }
+linfa-kernel = { version = "0.4.0", path = "../linfa-kernel" }
 
 [dev-dependencies]
 rand = "0.8"
 ndarray-rand = "0.13"
-linfa-datasets = { version = "0.3.1", path = "../../datasets", features = ["iris"] }
+linfa-datasets = { version = "0.4.0", path = "../../datasets", features = ["iris"] }
diff --git a/algorithms/linfa-ica/Cargo.toml b/algorithms/linfa-ica/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "linfa-ica"
-version = "0.3.1"
+version = "0.4.0"
 authors = ["VasanthakumarV <vasanth260m12@gmail.com>"]
 description = "A collection of Independent Component Analysis (ICA) algorithms"
 edition = "2018"
@@ -32,7 +32,7 @@ num-traits = "0.2"
 rand_isaac = "0.3"
 thiserror = "1"
 
-linfa = { version = "0.3.1", path = "../..", features = ["ndarray-linalg"] }
+linfa = { version = "0.4.0", path = "../..", features = ["ndarray-linalg"] }
 
 [dev-dependencies]
 ndarray-npy = { version = "0.7", default-features = false }

diff --git a/algorithms/linfa-kernel/Cargo.toml b/algorithms/linfa-kernel/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "linfa-kernel"
-version = "0.3.1"
+version = "0.4.0"
 authors = ["Lorenz Schmidt <bytesnake@mailbox.org>"]
 description = "Kernel methods for non-linear algorithms"
 edition = "2018"
@@ -30,4 +30,4 @@ sprs = { version="0.9.4", default-features = false }
 hnsw = "0.6"
 space = "0.10"
 
-linfa = { version = "0.3.1", path = "../.." }
+linfa = { version = "0.4.0", path = "../.." }
diff --git a/algorithms/linfa-linear/Cargo.toml b/algorithms/linfa-linear/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "linfa-linear"
-version = "0.3.1"
+version = "0.4.0"
 authors = [
     "Paul Körbitz / Google <koerbitz@google.com>",
     "VasanthakumarV <vasanth260m12@gmail.com>"
@@ -25,8 +25,8 @@ argmin = { version = "0.4", features = ["ndarrayl"] }
 serde = { version = "1.0", default-features = false, features = ["derive"] }
 thiserror = "1"
 
-linfa = { version = "0.3.1", path = "../.." }
+linfa = { version = "0.4.0", path = "../.." }
 
 [dev-dependencies]
-linfa-datasets = { version = "0.3.1", path = "../../datasets", features = ["diabetes"] }
+linfa-datasets = { version = "0.4.0", path = "../../datasets", features = ["diabetes"] }
 approx = "0.4"
diff --git a/algorithms/linfa-logistic/Cargo.toml b/algorithms/linfa-logistic/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "linfa-logistic"
-version = "0.3.1"
+version = "0.4.0"
 authors = ["Paul Körbitz / Google <koerbitz@google.com>"]
 
 description = "A Machine Learning framework for Rust"
@@ -21,8 +21,8 @@ argmin = { version = "0.4", features = ["ndarrayl"] }
 serde = "1.0"
 thiserror = "1"
 
-linfa = { version = "0.3.1", path = "../.." }
+linfa = { version = "0.4.0", path = "../.." }
 
 [dev-dependencies]
 approx = "0.4"
-linfa-datasets = { version = "0.3.1", path = "../../datasets", features = ["winequality"] }
+linfa-datasets = { version = "0.4.0", path = "../../datasets", features = ["winequality"] }
diff --git a/algorithms/linfa-pls/Cargo.toml b/algorithms/linfa-pls/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "linfa-pls"
-version = "0.3.1"
+version = "0.4.0"
 edition = "2018"
 authors = ["relf <remi.lafage@onera.fr>"]
 description = "Partial Least Squares family methods"
@@ -32,9 +32,9 @@ rand_isaac = "0.3"
 num-traits = "0.2"
 paste = "1.0"
 thiserror = "1"
-linfa = { version = "0.3.1", path = "../..", features = ["ndarray-linalg"] }
+linfa = { version = "0.4.0", path = "../..", features = ["ndarray-linalg"] }
 
 [dev-dependencies]
-linfa-datasets = { version = "0.3.1", path = "../../datasets", features = ["linnerud"] }
+linfa-datasets = { version = "0.4.0", path = "../../datasets", features = ["linnerud"] }
 rand_isaac = "0.3"
 approx = "0.4"
diff --git a/algorithms/linfa-pls/README.md b/algorithms/linfa-pls/README.md
@@ -0,0 +1,27 @@
+# Partial Least Squares
+
+`linfa-pls` provides a pure Rust implementation of the partial least squares algorithm family.
+
+## The Big Picture
+
+`linfa-pls` is a crate in the [`linfa`](https://crates.io/crates/linfa) ecosystem, an effort to create a toolkit for classical Machine Learning implemented in pure Rust, akin to Python's `scikit-learn`.
+
+## Current state
+
+`linfa-pls` currently provides an implementation of the following methods: 
+
+ - Partial Least Squares
+
+## Examples
+
+There is an usage example in the `examples/` directory. The example uses a BLAS backend, to run it and use the `intel-mkl` library do:
+
+```bash
+$ cargo run --example pls_regression --features linfa/intel-mkl-system
+```
+
+## License
+Dual-licensed to be compatible with the Rust project.
+
+Licensed under the Apache License, Version 2.0 <http://www.apache.org/licenses/LICENSE-2.0> or the MIT license <http://opensource.org/licenses/MIT>, at your option. This file may not be copied, modified, or distributed except according to those terms.
+
diff --git a/algorithms/linfa-preprocessing/Cargo.toml b/algorithms/linfa-preprocessing/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "linfa-preprocessing"
-version = "0.3.1"
+version = "0.4.0"
 authors = ["Sauro98 <ivadonadi98@gmail.com>"]
 
 description = "A Machine Learning framework for Rust"
@@ -17,7 +17,7 @@ categories = ["algorithms", "mathematics", "science"]
 
 [dependencies]
 
-linfa = { version = "0.3.1", path = "../..", features = ["ndarray-linalg"] }
+linfa = { version = "0.4.0", path = "../..", features = ["ndarray-linalg"] }
 ndarray = { version = "0.14", default-features = false, features = ["approx", "blas"] }
 ndarray-linalg = { version = "0.13" }
 ndarray-stats = "0.4"
@@ -30,8 +30,8 @@ encoding = "0.2"
 sprs =  { version="0.9.4", default-features = false }
 
 [dev-dependencies]
-linfa-datasets = { version = "0.3.1", path = "../../datasets", features = ["diabetes", "winequality"] }
-linfa-bayes = { version = "0.3.1", path = "../linfa-bayes" }
+linfa-datasets = { version = "0.4.0", path = "../../datasets", features = ["diabetes", "winequality"] }
+linfa-bayes = { version = "0.4.0", path = "../linfa-bayes" }
 iai = "0.1" 
 curl = "0.4.35"
 flate2 = "1.0.20"

diff --git a/algorithms/linfa-reduction/Cargo.toml b/algorithms/linfa-reduction/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "linfa-reduction"
-version = "0.3.1"
+version = "0.4.0"
 authors = ["Lorenz Schmidt <bytesnake@mailbox.org>"]
 description = "A collection of dimensionality reduction techniques"
 edition = "2018"
@@ -31,11 +31,11 @@ ndarray-rand = "0.13"
 num-traits = "0.2"
 thiserror = "1"
 
-linfa = { version = "0.3.1", path = "../..", features = ["ndarray-linalg"] }
-linfa-kernel = { version = "0.3.1", path = "../linfa-kernel" }
+linfa = { version = "0.4.0", path = "../..", features = ["ndarray-linalg"] }
+linfa-kernel = { version = "0.4.0", path = "../linfa-kernel" }
 
 [dev-dependencies]
 rand = { version = "0.8", features = ["small_rng"] }
 ndarray-npy = { version = "0.7", default-features = false }
-linfa-datasets = { version = "0.3.1", path = "../../datasets", features = ["iris"] }
+linfa-datasets = { version = "0.4.0", path = "../../datasets", features = ["iris"] }
 approx = { version = "0.4", default-features = false, features = ["std"] }
diff --git a/algorithms/linfa-svm/Cargo.toml b/algorithms/linfa-svm/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "linfa-svm"
-version = "0.3.1"
+version = "0.4.0"
 edition = "2018"
 authors = ["Lorenz Schmidt <lorenz.schmidt@mailbox.org>"]
 description = "Support Vector Machines"
@@ -29,9 +29,9 @@ ndarray-rand = "0.13"
 num-traits = "0.2"
 thiserror = "1"
 
-linfa = { version = "0.3.1", path = "../.." }
-linfa-kernel = { version = "0.3.1", path = "../linfa-kernel" }
+linfa = { version = "0.4.0", path = "../.." }
+linfa-kernel = { version = "0.4.0", path = "../linfa-kernel" }
 
 [dev-dependencies]
-linfa-datasets = { version = "0.3.1", path = "../../datasets", features = ["winequality", "diabetes"] }
+linfa-datasets = { version = "0.4.0", path = "../../datasets", features = ["winequality", "diabetes"] }
 rand_isaac = "0.3"
diff --git a/algorithms/linfa-trees/Cargo.toml b/algorithms/linfa-trees/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "linfa-trees"
-version = "0.3.1"
+version = "0.4.0"
 edition = "2018"
 authors = ["Moss Ebeling <moss@banay.me>"]
 description = "A collection of tree-based algorithms"
@@ -27,14 +27,14 @@ features = ["std", "derive"]
 ndarray = { version = "0.14" , features = ["rayon", "approx"]}
 ndarray-rand = "0.13"
 
-linfa = { version = "0.3.1", path = "../.." }
+linfa = { version = "0.4.0", path = "../.." }
 
 [dev-dependencies]
 rand = { version = "0.8", features = ["small_rng"] }
 criterion = "0.3"
 approx = "0.4"
 
-linfa-datasets = { version = "0.3.1", path = "../../datasets/", features = ["iris"] }
+linfa-datasets = { version = "0.4.0", path = "../../datasets/", features = ["iris"] }
 
 [[bench]]
 name = "decision_tree"

diff --git a/algorithms/linfa-tsne/Cargo.toml b/algorithms/linfa-tsne/Cargo.toml
@@ -1,6 +1,6 @@
 [package]
 name = "linfa-tsne"
-version = "0.3.1"
+version = "0.4.0"
 authors = ["Lorenz Schmidt <lorenz.schmidt@mailbox.org>"]
 edition = "2018"
 
@@ -10,7 +10,7 @@ license = "MIT/Apache-2.0"
 repository = "https://github.com/rust-ml/linfa"
 readme = "README.md"
 
-keywords = ["tsne", "data visualization", "clustering", "machine-learning", "linfa"]
+keywords = ["tsne", "visualization", "clustering", "machine-learning", "linfa"]
 categories = ["algorithms", "mathematics", "science"]
 
 [dependencies]
@@ -19,11 +19,12 @@ ndarray = { version = "0.14", default-features = false }
 ndarray-rand = "0.13"
 bhtsne = "0.4.0"
 
-linfa = { version = "0.3.1", path = "../.." }
+linfa = { version = "0.4.0", path = "../.." }
 
 [dev-dependencies]
 rand = "0.8"
 approx = "0.4"
+mnist = { version = "0.4", features = ["download"] }
 
-linfa-datasets = { version = "0.3.1", path = "../../datasets", features = ["iris"] }
-linfa-reduction = { version = "0.3.1", path = "../linfa-reduction" }
+linfa-datasets = { version = "0.4.0", path = "../../datasets", features = ["iris"] }
+linfa-reduction = { version = "0.4.0", path = "../linfa-reduction" }