diff --git a/Project.toml b/Project.toml index b49a5cd..70ad553 100644 --- a/Project.toml +++ b/Project.toml @@ -1,7 +1,7 @@ name = "PosDefManifoldML" uuid = "a07f4532-e2c9-11e9-2ea2-6d98fe4a1f21" authors = ["Marco-Congedo "] -version = "0.3.5" +version = "0.3.6" [deps] Dates = "ade2ca70-3891-5945-98fb-dc099432e06a" diff --git a/README.md b/README.md index 6c9c794..707e4b2 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,12 @@ # PosDefManifoldML.jl -| **Documentation** | +| **Documentation** | |:---------------------------------------:| | [![](https://img.shields.io/badge/docs-dev-blue.svg)](https://Marco-Congedo.github.io/PosDefManifoldML.jl/dev) | -**PosDefManifoldML** is a [**Julia**](https://julialang.org/) package for classifying data in the [**Riemannian manifolds**](https://en.wikipedia.org/wiki/Riemannian_manifold) **P** of real or complex [**positive definite matrices**](https://en.wikipedia.org/wiki/Definiteness_of_a_matrix). It is based on the [PosDefManifold.jl](https://github.com/Marco-Congedo/PosDefManifold.jl) and [GLMNet.jl](https://github.com/JuliaStats/GLMNet.jl) packages. +**PosDefManifoldML** is a [**Julia**](https://julialang.org/) package for classifying data in the [**Riemannian manifolds**](https://en.wikipedia.org/wiki/Riemannian_manifold) **P** of real or complex [**positive definite matrices**](https://en.wikipedia.org/wiki/Definiteness_of_a_matrix). It is based on the [PosDefManifold.jl](https://github.com/Marco-Congedo/PosDefManifold.jl) and [GLMNet.jl](https://github.com/JuliaStats/GLMNet.jl) packages. -[Machine learning](https://en.wikipedia.org/wiki/Machine_learning) (ML) in **P** can either operate directly on the manifold, which requires dedicated Riemannian methods, or the data can be projected onto the **tangent space**, where standard (Euclidean) machine learning methods apply (e.g., linear discriminant analysis, support-vector machine, logistic regression, random forest, deep neuronal networks, etc). +[Machine learning](https://en.wikipedia.org/wiki/Machine_learning) (ML) in **P** can either operate directly on the manifold, which requires dedicated Riemannian methods, or the data can be projected onto the **tangent space**, where standard (Euclidean) machine learning methods apply (e.g., linear discriminant analysis, support-vector machine, logistic regression, random forest, deep neuronal networks, etc). ![](/docs/src/assets/Fig1.jpg) @@ -82,6 +82,24 @@ model=fit(ENLR(), PTr, yTr; alpha=0.5) # average accuracy obtained by 10-fold cross-validation: cv = cvAcc(ENLR(), PTr, yTr; alpha=0.5) +# (1) +# craete and fit (train) an SVM model +# finding the best model by cross-validation: +model=fit(SVM(), PTr, yTr) +# +# predict labels (classify the testing set) using the 'best' model: +yPred=predict(model, PTe, :l) +# +# prediction error in percent +predictErr(yTe, yPred) +# +# ... + +# (2) +# average accuracy obtained by 10-fold cross-validation: +cv = cvAcc(SVM(), PTr, yTr) + + ``` ## About the Authors @@ -92,9 +110,6 @@ author, is a research scientist of [CNRS](http://www.cnrs.fr/en) (Centre Nationa Saloni Jain is a student at the [Indian Institute of Technology, Kharagpur](http://www.iitkgp.ac.in/), India. -| **Documentation** | +| **Documentation** | |:---------------------------------------:| | [![](https://img.shields.io/badge/docs-dev-blue.svg)](https://Marco-Congedo.github.io/PosDefManifoldML.jl/dev) | - - - diff --git a/docs/Project.toml b/docs/Project.toml index cf0b32b..865d68b 100644 --- a/docs/Project.toml +++ b/docs/Project.toml @@ -1,5 +1,5 @@ authors = ["Marco Congedo, Saloni Jain, Anton Andreev"] -version = "0.3.5" +version = "0.3.6" [deps] Documenter = "e30172f5-a6a5-5a46-863b-614d45cd2de4" diff --git a/src/PosDefManifoldML.jl b/src/PosDefManifoldML.jl index 6aeb31b..65f1221 100644 --- a/src/PosDefManifoldML.jl +++ b/src/PosDefManifoldML.jl @@ -1,5 +1,5 @@ # Unit "simulations.jl" of the PosDefManifoldML Package for Julia language -# v 0.3.5 - last update 31st of January 2020 +# v 0.3.6 - last update February 13 2020 # # MIT License # Copyright (c) 2019, diff --git a/src/cv.jl b/src/cv.jl index 8dec6bf..be1b80b 100644 --- a/src/cv.jl +++ b/src/cv.jl @@ -80,14 +80,15 @@ CVacc(s::String)=CVacc(s, nothing, nothing, nothing, nothing, nothing, nothing, """ ``` -function cvAcc(model :: MLmodel, +function cvAcc(model :: MLmodel, 𝐏Tr :: ℍVector, - yTr :: IntVector; - nFolds :: Int = min(10, length(yTr)÷3), - scoring :: Symbol = :b, - shuffle :: Bool = false, - verbose :: Bool = true, - outModels :: Bool = false, + yTr :: IntVector; + nFolds :: Int = min(10, length(yTr)÷3), + scoring :: Symbol = :b, + shuffle :: Bool = false, + verbose :: Bool = true, + outModels :: Bool = false, + ⏩ :: Bool = true, fitArgs...) ``` Cross-validation accuracy for a machine learning `model`: @@ -115,10 +116,14 @@ If `verbose` is true (default), information is printed in the REPL. This option is included to allow repeated calls to this function without crowding the REPL. -if `outModels` is true return a 2-tuple holding a [`CVacc`](@ref) structure +If `outModels` is true return a 2-tuple holding a [`CVacc`](@ref) structure and a `nFolds`-vector of the model fitted for each fold, otherwise (default), return only a [`CVacc`](@ref) structure. +If `⏩` the folds and some other computations are multi-threaded. +It is true by default. Set it to false if there are problems in running +this function. + `fitArgs` are optional keyword arguments that are passed to the [`fit`](@ref) function called for each fold of the cross-validation. For each machine learning model, all optional keyword arguments of @@ -176,14 +181,15 @@ cv=cvAcc(ENLR(Fisher), PTr, yTr; shuffle=true, nFolds=8, w=:b) ``` """ -function cvAcc(model :: MLmodel, +function cvAcc(model :: MLmodel, 𝐏Tr :: ℍVector, - yTr :: IntVector; - nFolds :: Int = min(10, length(yTr)÷3), - scoring :: Symbol = :b, - shuffle :: Bool = false, - verbose :: Bool = true, - outModels :: Bool = false, + yTr :: IntVector; + nFolds :: Int = min(10, length(yTr)÷3), + scoring :: Symbol = :b, + shuffle :: Bool = false, + verbose :: Bool = true, + outModels :: Bool = false, + ⏩ :: Bool = true, fitArgs...) ⌚ = now() @@ -191,7 +197,7 @@ function cvAcc(model :: MLmodel, z = length(unique(yTr)) # number of classes 𝐐 = [ℍ[] for i=1:z] # data arranged by class - for j=1:length(𝐏Tr) @inbounds push!(𝐐[yTr[j]], 𝐏Tr[j]) end + for j=1:length(𝐏Tr) push!(𝐐[yTr[j]], 𝐏Tr[j]) end # pre-allocated memory 𝐐Tr = [ℍ[] for f=1:nFolds] # training data in 1 vector per folds @@ -205,7 +211,11 @@ function cvAcc(model :: MLmodel, ℳ=Vector{MLmodel}(undef, nFolds) # ML models # get indeces for all CVs (separated for each class) - @threads for i=1:z indTr[i], indTe[i] = cvSetup(length(𝐐[i]), nFolds; shuffle=shuffle) end + if ⏩ + @threads for i=1:z indTr[i], indTe[i] = cvSetup(length(𝐐[i]), nFolds; shuffle=shuffle) end + else + for i=1:z indTr[i], indTe[i] = cvSetup(length(𝐐[i]), nFolds; shuffle=shuffle) end + end fitArgs✔=() # make sure the user doesn't pass arguments that skrew up the cv @@ -230,16 +240,16 @@ function cvAcc(model :: MLmodel, # This is a quick approximation since the initialization is not critical, # but it hastens the computation time since itera. alg. require less iters. if model.metric in (Fisher, logdet0) - M0=means(logEuclidean, 𝐐; ⏩=true) - if model isa TSmodel M0=mean(logEuclidean, M0; ⏩=true) end + M0=means(logEuclidean, 𝐐; ⏩=⏩) + if model isa TSmodel M0=mean(logEuclidean, M0; ⏩=⏩) end elseif model.metric == Wasserstein - M0=ℍVector([generalizedMean(𝐐[i], 0.5; ⏩=true) for i=1:length(𝐐)]) - if model isa Tsmodel M0=generalizedMean(M0, 0.5; ⏩=true) end + M0=ℍVector([generalizedMean(𝐐[i], 0.5; ⏩=⏩) for i=1:length(𝐐)]) + if model isa Tsmodel M0=generalizedMean(M0, 0.5; ⏩=⏩) end else M0=nothing; end # perform cv - @threads for f=1:nFolds + function fold(f::Int) @static if VERSION >= v"1.3" print(defaultFont, rand(dice), " ") end # print a random dice in the REPL # get testing data for current fold @@ -270,8 +280,9 @@ function cvAcc(model :: MLmodel, s[f] = 𝚺(CM[f][i, i] for i=1:z)/ sumCM CM[f]/=sumCM # confusion matrices in proportions - end + + ⏩ ? (@threads for f=1:nFolds fold(f) end) : (for f=1:nFolds fold(f) end) verbose && println(greyFont, "\nDone in ", defaultFont, now()-⌚) # compute mean and sd (balanced) accuracy