Merge pull request #65 from Marco-Congedo/dev

v0.3.6
Marco-Congedo · Feb 13, 2020 · 5c775a1 · 5c775a1
2 parents 224f108 + 611284c
commit 5c775a1
Show file tree

Hide file tree

Showing 5 changed files with 59 additions and 33 deletions.
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "PosDefManifoldML"
 uuid = "a07f4532-e2c9-11e9-2ea2-6d98fe4a1f21"
 authors = ["Marco-Congedo <marco.congedo@gmail.com>"]
-version = "0.3.5"
+version = "0.3.6"
 
 [deps]
 Dates = "ade2ca70-3891-5945-98fb-dc099432e06a"

diff --git a/README.md b/README.md
@@ -1,12 +1,12 @@
 # PosDefManifoldML.jl
 
-| **Documentation**  | 
+| **Documentation**  |
 |:---------------------------------------:|
 | [![](https://img.shields.io/badge/docs-dev-blue.svg)](https://Marco-Congedo.github.io/PosDefManifoldML.jl/dev) |
 
-**PosDefManifoldML** is a [**Julia**](https://julialang.org/) package for classifying data in the [**Riemannian manifolds**](https://en.wikipedia.org/wiki/Riemannian_manifold) **P** of real or complex [**positive definite matrices**](https://en.wikipedia.org/wiki/Definiteness_of_a_matrix). It is based on the [PosDefManifold.jl](https://github.com/Marco-Congedo/PosDefManifold.jl) and [GLMNet.jl](https://github.com/JuliaStats/GLMNet.jl) packages. 
+**PosDefManifoldML** is a [**Julia**](https://julialang.org/) package for classifying data in the [**Riemannian manifolds**](https://en.wikipedia.org/wiki/Riemannian_manifold) **P** of real or complex [**positive definite matrices**](https://en.wikipedia.org/wiki/Definiteness_of_a_matrix). It is based on the [PosDefManifold.jl](https://github.com/Marco-Congedo/PosDefManifold.jl) and [GLMNet.jl](https://github.com/JuliaStats/GLMNet.jl) packages.
 
-[Machine learning](https://en.wikipedia.org/wiki/Machine_learning) (ML) in **P** can either operate directly on the manifold, which requires dedicated Riemannian methods, or the data can be projected onto the **tangent space**, where standard (Euclidean) machine learning methods apply (e.g., linear discriminant analysis, support-vector machine, logistic regression, random forest, deep neuronal networks, etc). 
+[Machine learning](https://en.wikipedia.org/wiki/Machine_learning) (ML) in **P** can either operate directly on the manifold, which requires dedicated Riemannian methods, or the data can be projected onto the **tangent space**, where standard (Euclidean) machine learning methods apply (e.g., linear discriminant analysis, support-vector machine, logistic regression, random forest, deep neuronal networks, etc).
 
 ![](/docs/src/assets/Fig1.jpg)
 
@@ -82,6 +82,24 @@ model=fit(ENLR(), PTr, yTr; alpha=0.5)
 # average accuracy obtained by 10-fold cross-validation:
 cv = cvAcc(ENLR(), PTr, yTr; alpha=0.5)
 
+# (1)
+# craete and fit (train) an SVM model
+# finding the best model by cross-validation:
+model=fit(SVM(), PTr, yTr)
+#
+# predict labels (classify the testing set) using the 'best' model:
+yPred=predict(model, PTe, :l)
+#
+# prediction error in percent
+predictErr(yTe, yPred)
+#
+# ...
+
+# (2)
+# average accuracy obtained by 10-fold cross-validation:
+cv = cvAcc(SVM(), PTr, yTr)
+
+
 ```
 
 ## About the Authors
@@ -92,9 +110,6 @@ author, is a research scientist of [CNRS](http://www.cnrs.fr/en) (Centre Nationa
 Saloni Jain is a student at the
 [Indian Institute of Technology, Kharagpur](http://www.iitkgp.ac.in/), India.
 
-| **Documentation**  | 
+| **Documentation**  |
 |:---------------------------------------:|
 | [![](https://img.shields.io/badge/docs-dev-blue.svg)](https://Marco-Congedo.github.io/PosDefManifoldML.jl/dev) |
-
-
-
diff --git a/docs/Project.toml b/docs/Project.toml
@@ -1,5 +1,5 @@
 authors = ["Marco Congedo, Saloni Jain, Anton Andreev"]
-version = "0.3.5"
+version = "0.3.6"
 
 [deps]
 Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4"

diff --git a/src/PosDefManifoldML.jl b/src/PosDefManifoldML.jl
@@ -1,5 +1,5 @@
 #   Unit "simulations.jl" of the PosDefManifoldML Package for Julia language
-#   v 0.3.5 - last update 31st of January 2020
+#   v 0.3.6 - last update February 13 2020
 #
 #   MIT License
 #   Copyright (c) 2019,

diff --git a/src/cv.jl b/src/cv.jl
@@ -80,14 +80,15 @@ CVacc(s::String)=CVacc(s, nothing, nothing, nothing, nothing, nothing, nothing,
 
 """
 ```
-function cvAcc(model   :: MLmodel,
+function cvAcc(model    :: MLmodel,
                𝐏Tr     :: ℍVector,
-               yTr     :: IntVector;
-           nFolds    :: Int       = min(10, length(yTr)÷3),
-           scoring   :: Symbol    = :b,
-           shuffle   :: Bool      = false,
-           verbose   :: Bool      = true,
-           outModels :: Bool      = false,
+               yTr      :: IntVector;
+           nFolds       :: Int      = min(10, length(yTr)÷3),
+           scoring      :: Symbol   = :b,
+           shuffle      :: Bool     = false,
+           verbose      :: Bool     = true,
+           outModels    :: Bool     = false,
+           ⏩           :: Bool     = true,
            fitArgs...)
 ```
 Cross-validation accuracy for a machine learning `model`:
@@ -115,10 +116,14 @@ If `verbose` is true (default), information is printed in the REPL.
 This option is included to allow repeated calls to this function
 without crowding the REPL.
 
-if `outModels` is true return a 2-tuple holding a [`CVacc`](@ref) structure
+If `outModels` is true return a 2-tuple holding a [`CVacc`](@ref) structure
 and a `nFolds`-vector of the model fitted for each fold,
 otherwise (default), return only a [`CVacc`](@ref) structure.
 
+If `⏩` the folds and some other computations are multi-threaded.
+It is true by default. Set it to false if there are problems in running
+this function.
+
 `fitArgs` are optional keyword arguments that are passed to the
 [`fit`](@ref) function called for each fold of the cross-validation.
 For each machine learning model, all optional keyword arguments of
@@ -176,22 +181,23 @@ cv=cvAcc(ENLR(Fisher), PTr, yTr; shuffle=true, nFolds=8, w=:b)
 
 ```
 """
-function cvAcc(model   :: MLmodel,
+function cvAcc(model    :: MLmodel,
                𝐏Tr     :: ℍVector,
-               yTr     :: IntVector;
-           nFolds    :: Int       = min(10, length(yTr)÷3),
-           scoring   :: Symbol    = :b,
-           shuffle   :: Bool      = false,
-           verbose   :: Bool      = true,
-           outModels :: Bool      = false,
+               yTr      :: IntVector;
+           nFolds       :: Int      = min(10, length(yTr)÷3),
+           scoring      :: Symbol   = :b,
+           shuffle      :: Bool     = false,
+           verbose      :: Bool     = true,
+           outModels    :: Bool     = false,
+           ⏩           :: Bool     = true,
            fitArgs...)
 
     ⌚ = now()
     verbose && println(greyFont, "\nPerforming $(nFolds)-fold cross-validation...")
 
     z  = length(unique(yTr))            # number of classes
     𝐐  = [ℍ[] for i=1:z]               # data arranged by class
-    for j=1:length(𝐏Tr) @inbounds push!(𝐐[yTr[j]], 𝐏Tr[j]) end
+    for j=1:length(𝐏Tr) push!(𝐐[yTr[j]], 𝐏Tr[j]) end
 
     # pre-allocated memory
     𝐐Tr = [ℍ[] for f=1:nFolds]                 # training data in 1 vector per folds
@@ -205,7 +211,11 @@ function cvAcc(model   :: MLmodel,
     ℳ=Vector{MLmodel}(undef, nFolds)            # ML models
 
     # get indeces for all CVs (separated for each class)
-    @threads for i=1:z indTr[i], indTe[i] = cvSetup(length(𝐐[i]), nFolds; shuffle=shuffle) end
+    if ⏩
+       @threads for i=1:z indTr[i], indTe[i] = cvSetup(length(𝐐[i]), nFolds; shuffle=shuffle) end
+    else
+        for i=1:z indTr[i], indTe[i] = cvSetup(length(𝐐[i]), nFolds; shuffle=shuffle) end
+    end
 
     fitArgs✔=()
     # make sure the user doesn't pass arguments that skrew up the cv
@@ -230,16 +240,16 @@ function cvAcc(model   :: MLmodel,
     # This is a quick approximation since the initialization is not critical,
     # but it hastens the computation time since itera. alg. require less iters.
     if      model.metric in (Fisher, logdet0)
-                M0=means(logEuclidean, 𝐐; ⏩=true)
-                if model isa TSmodel M0=mean(logEuclidean, M0; ⏩=true) end
+                M0=means(logEuclidean, 𝐐; ⏩=⏩)
+                if model isa TSmodel M0=mean(logEuclidean, M0; ⏩=⏩) end
     elseif  model.metric == Wasserstein
-                M0=ℍVector([generalizedMean(𝐐[i], 0.5; ⏩=true) for i=1:length(𝐐)])
-                if model isa Tsmodel M0=generalizedMean(M0, 0.5; ⏩=true) end
+                M0=ℍVector([generalizedMean(𝐐[i], 0.5; ⏩=⏩) for i=1:length(𝐐)])
+                if model isa Tsmodel M0=generalizedMean(M0, 0.5; ⏩=⏩) end
     else    M0=nothing;
     end
 
     # perform cv
-    @threads for f=1:nFolds
+    function fold(f::Int)
         @static if VERSION >= v"1.3" print(defaultFont, rand(dice), " ") end # print a random dice in the REPL
 
         # get testing data for current fold
@@ -270,8 +280,9 @@ function cvAcc(model   :: MLmodel,
                         s[f] = 𝚺(CM[f][i, i] for i=1:z)/ sumCM
 
         CM[f]/=sumCM # confusion matrices in proportions
-
     end
+
+    ⏩ ? (@threads for f=1:nFolds fold(f) end) : (for f=1:nFolds fold(f) end)
     verbose && println(greyFont, "\nDone in ", defaultFont, now()-⌚)
 
     # compute mean and sd (balanced) accuracy