From d60506cff6ecb2c3ca36da394846840ac73afb62 Mon Sep 17 00:00:00 2001 From: Michael BAUDIN Date: Sun, 25 Jun 2023 19:23:17 +0200 Subject: [PATCH] Create methods to compute Linear Model LOO and K-Fold residuals --- .../FunctionalChaosValidation.cxx | 1 - .../LinearModel/LinearModelValidation.cxx | 68 +++++++++++++------ .../openturns/LinearModelValidation.hxx | 10 ++- 3 files changed, 55 insertions(+), 24 deletions(-) diff --git a/lib/src/Uncertainty/Algorithm/MetaModel/FunctionalChaos/FunctionalChaosValidation.cxx b/lib/src/Uncertainty/Algorithm/MetaModel/FunctionalChaos/FunctionalChaosValidation.cxx index 270b1e4cc7b..773b975231d 100644 --- a/lib/src/Uncertainty/Algorithm/MetaModel/FunctionalChaos/FunctionalChaosValidation.cxx +++ b/lib/src/Uncertainty/Algorithm/MetaModel/FunctionalChaos/FunctionalChaosValidation.cxx @@ -164,7 +164,6 @@ Sample FunctionalChaosValidation::computeLeaveOneOutResiduals() const const Point marginalOutputPoint(marginalOutputSample.asPoint()); const Sample outputMetamodelMarginal(outputMetamodel.getMarginal(j)); const Point residuals(marginalOutputPoint - outputMetamodelMarginal.asPoint()); - Sample squaredCorrectedResiduals(sampleSize, 1); for (UnsignedInteger i = 0; i < sampleSize; ++i) residualsLOO(i, j) = residuals[i] / (1.0 - diagonalH[i]); } // For output marginal indices diff --git a/lib/src/Uncertainty/Algorithm/MetaModel/LinearModel/LinearModelValidation.cxx b/lib/src/Uncertainty/Algorithm/MetaModel/LinearModel/LinearModelValidation.cxx index d0a193b8a87..2b2f1929d27 100644 --- a/lib/src/Uncertainty/Algorithm/MetaModel/LinearModel/LinearModelValidation.cxx +++ b/lib/src/Uncertainty/Algorithm/MetaModel/LinearModel/LinearModelValidation.cxx @@ -82,6 +82,18 @@ LinearModelResult LinearModelValidation::getLinearModelResult() const /* Compute LOO mean squared error */ Scalar LinearModelValidation::computeLeaveOneOutMeanSquaredError() const +{ + const UnsignedInteger sampleSize = linearModelResult_.getSampleResiduals().getSize(); + const Sample looResiduals(computeLeaveOneOutResiduals()); + Sample squaredDelta(sampleSize, 1); + for (UnsignedInteger i = 0; i < sampleSize; ++i) + squaredDelta(i, 0) = std::pow(looResiduals(i, 0), 2); + const Scalar looError(squaredDelta.computeMean()[0]); + return looError; +} + +/* Compute LOO residuals */ +Sample LinearModelValidation::computeLeaveOneOutResiduals() const { const Sample residualsSample(linearModelResult_.getSampleResiduals()); const UnsignedInteger sampleSize = residualsSample.getSize(); @@ -89,21 +101,39 @@ Scalar LinearModelValidation::computeLeaveOneOutMeanSquaredError() const if (basisSize > sampleSize) throw InvalidArgumentException(HERE) << "Error: the sample size is: " << sampleSize << " which is lower than the basis size: " << basisSize; - const Function lmMetamodel(linearModelResult_.getMetaModel()); const Point diagonalH(linearModelResult_.getLeverages()); const Sample residuals(linearModelResult_.getSampleResiduals()); - Sample squaredDelta(sampleSize, 1); + Sample looResiduals(sampleSize, 1); for (UnsignedInteger i = 0; i < sampleSize; ++i) + looResiduals(i, 0) = residuals(i, 0) / (1.0 - diagonalH[i]); + return looResiduals; +} + +/* Compute K-Fold mean squared error */ +Scalar LinearModelValidation::computeKFoldMeanSquaredError(const UnsignedInteger & kParameter) const +{ + const Sample residualsSample(linearModelResult_.getSampleResiduals()); + const UnsignedInteger sampleSize = residualsSample.getSize(); + const UnsignedInteger basisSize = linearModelResult_.getBasis().getSize(); + KFoldSplitter splitter(sampleSize, kParameter); + Sample kfoldResiduals(computeKFoldResiduals(kParameter)); + Sample squaredResidualsKFoldSample(kParameter, 1); + for (UnsignedInteger foldIndex = 0; foldIndex < kParameter; ++foldIndex) { - const Scalar correctedResidual = residuals(i, 0) / (1.0 - diagonalH[i]); - squaredDelta(i, 0) = std::pow(correctedResidual, 2); + Indices indicesTest; + const Indices indicesTrain(splitter.generate(indicesTest)); + const UnsignedInteger foldDimension = indicesTest.getSize(); + Point kfoldResidualsPoint(foldDimension); + for (UnsignedInteger i1 = 0; i1 < foldDimension; ++i1) + kfoldResidualsPoint[i1] = kfoldResiduals(indicesTest[i1], 0); + squaredResidualsKFoldSample(foldIndex, 0) = kfoldResidualsPoint.normSquare() / indicesTest.getSize(); } - const Scalar looError(squaredDelta.computeMean()[0]); - return looError; + const Scalar kfoldError = squaredResidualsKFoldSample.computeMean()[0]; + return kfoldError; } -/* Compute K-Fold mean squared error */ -Scalar LinearModelValidation::computeKFoldMeanSquaredError(const UnsignedInteger & k) const +/* Compute K-Fold residuals */ +Sample LinearModelValidation::computeKFoldResiduals(const UnsignedInteger & kParameter) const { const Sample residualsSample(linearModelResult_.getSampleResiduals()); const UnsignedInteger sampleSize = residualsSample.getSize(); @@ -112,29 +142,25 @@ Scalar LinearModelValidation::computeKFoldMeanSquaredError(const UnsignedInteger throw InvalidArgumentException(HERE) << "Error: the sample size is: " << sampleSize << " which is lower than the basis size: " << basisSize; SymmetricMatrix projectionMatrix(linearModelResult_.computeProjectionMatrix()); - KFoldSplitter splitter(sampleSize, k); - Sample squaredResidualsKFoldSample(k, 1); - for (UnsignedInteger foldIndex = 0; foldIndex < k; ++foldIndex) + KFoldSplitter splitter(sampleSize, kParameter); + Sample kfoldResiduals(sampleSize, 1); + Sample squaredResidualsKFoldSample(kParameter, 1); + for (UnsignedInteger foldIndex = 0; foldIndex < kParameter; ++foldIndex) { Indices indicesTest; const Indices indicesTrain(splitter.generate(indicesTest)); - // Use global lmAlgorithm to derive the estimate of K-Fold error const UnsignedInteger foldDimension = indicesTest.getSize(); SymmetricMatrix projectionKFoldMatrix(foldDimension); for (UnsignedInteger i1 = 0; i1 < foldDimension; ++i1) - { for (UnsignedInteger i2 = 0; i2 < 1 + i1; ++i2) - { projectionKFoldMatrix(i1, i2) = projectionMatrix(indicesTest[i1], indicesTest[i2]); - } - } const SymmetricMatrix reducedMatrix(IdentityMatrix(indicesTest.getSize()) - projectionKFoldMatrix); const Sample residualsSampleKFoldTest(residualsSample.select(indicesTest)); const Point residualsKFold(reducedMatrix.solveLinearSystem(residualsSampleKFoldTest.asPoint())); - squaredResidualsKFoldSample(foldIndex, 0) = residualsKFold.normSquare() / indicesTest.getSize(); + for (UnsignedInteger i1 = 0; i1 < foldDimension; ++i1) + kfoldResiduals(indicesTest[i1], 0) = residualsKFold[i1]; } - const Scalar kfoldError = squaredResidualsKFoldSample.computeMean()[0]; - return kfoldError; + return kfoldResiduals; } /* Compute LOO R2 score */ @@ -148,10 +174,10 @@ Point LinearModelValidation::computeLeaveOneOutR2Score() const } /* Compute K-Fold R2 score */ -Point LinearModelValidation::computeKFoldR2Score(const UnsignedInteger & k) const +Point LinearModelValidation::computeKFoldR2Score(const UnsignedInteger & kParameter) const { Point meanSquaredError(1); - meanSquaredError[0] = computeKFoldMeanSquaredError(k); + meanSquaredError[0] = computeKFoldMeanSquaredError(kParameter); Sample outputSample(linearModelResult_.getOutputSample()); Point r2Score(MetaModelValidation::ComputeR2Score(meanSquaredError, outputSample)); return r2Score; diff --git a/lib/src/Uncertainty/Algorithm/MetaModel/LinearModel/openturns/LinearModelValidation.hxx b/lib/src/Uncertainty/Algorithm/MetaModel/LinearModel/openturns/LinearModelValidation.hxx index dbaab0ddcd2..96232998473 100644 --- a/lib/src/Uncertainty/Algorithm/MetaModel/LinearModel/openturns/LinearModelValidation.hxx +++ b/lib/src/Uncertainty/Algorithm/MetaModel/LinearModel/openturns/LinearModelValidation.hxx @@ -59,17 +59,23 @@ public: /** Linear model accessor */ LinearModelResult getLinearModelResult() const; + /** Leave-one-out residuals accessor */ + Sample computeLeaveOneOutResiduals() const; + /** Leave-one-out error accessor */ Scalar computeLeaveOneOutMeanSquaredError() const; /** K-Fold error accessor */ - Scalar computeKFoldMeanSquaredError(const UnsignedInteger & k = ResourceMap::GetAsUnsignedInteger("LinearModelValidation-DefaultKFoldParameter")) const; + Scalar computeKFoldMeanSquaredError(const UnsignedInteger & kParameter = ResourceMap::GetAsUnsignedInteger("LinearModelValidation-DefaultKFoldParameter")) const; /** Compute LOO R2 score */ Point computeLeaveOneOutR2Score() const; + /** Compute K-Fold residuals */ + Sample computeKFoldResiduals(const UnsignedInteger & kParameter = ResourceMap::GetAsUnsignedInteger("LinearModelValidation-DefaultKFoldParameter")) const; + /** Compute K-Fold R2 score */ - Point computeKFoldR2Score(const UnsignedInteger & k) const; + Point computeKFoldR2Score(const UnsignedInteger & kParameter = ResourceMap::GetAsUnsignedInteger("LinearModelValidation-DefaultKFoldParameter")) const; /** Method save() stores the object through the StorageManager */ void save(Advocate & adv) const override;