From 544254668051efc11c0a9eb2d87b338bc3ade2f9 Mon Sep 17 00:00:00 2001 From: Michael BAUDIN Date: Wed, 1 May 2024 11:58:10 +0200 Subject: [PATCH] FCValidation and LMValidation: Python unit tests --- python/test/CMakeLists.txt | 2 + .../test/t_FunctionalChaosValidation_std.py | 281 ++++++++++++++++++ python/test/t_LinearModelValidation_std.py | 118 ++++++++ 3 files changed, 401 insertions(+) create mode 100644 python/test/t_FunctionalChaosValidation_std.py create mode 100644 python/test/t_LinearModelValidation_std.py diff --git a/python/test/CMakeLists.txt b/python/test/CMakeLists.txt index 7f47550f34..9dbf08b9f3 100644 --- a/python/test/CMakeLists.txt +++ b/python/test/CMakeLists.txt @@ -747,6 +747,7 @@ ot_pyinstallcheck_test (FunctionalChaos_gsobol_sparse) ot_pyinstallcheck_test (FunctionalChaos_mixed IGNOREOUT) ot_pyinstallcheck_test (FunctionalChaos_nd) ot_pyinstallcheck_test (FunctionalChaosSobolIndices_std) +ot_pyinstallcheck_test (FunctionalChaosValidation_std IGNOREOUT) ot_pyinstallcheck_test (LeastSquaresExpansion_std IGNOREOUT) ot_pyinstallcheck_test (IntegrationExpansion_std IGNOREOUT) ot_pyinstallcheck_test (KrigingAlgorithm_std) @@ -766,6 +767,7 @@ endif () ot_pyinstallcheck_test (LinearModelAlgorithm_std) ot_pyinstallcheck_test (LinearModelAnalysis_std) ot_pyinstallcheck_test (LinearModelStepwiseAlgorithm_std) +ot_pyinstallcheck_test (LinearModelValidation_std IGNOREOUT) ot_pyinstallcheck_test (KrigingAlgorithm_isotropic_std IGNOREOUT) ot_pyinstallcheck_test (FieldToPointFunctionalChaosAlgorithm_std IGNOREOUT) ot_pyinstallcheck_test (FieldFunctionalChaosSobolIndices_std IGNOREOUT) diff --git a/python/test/t_FunctionalChaosValidation_std.py b/python/test/t_FunctionalChaosValidation_std.py new file mode 100644 index 0000000000..4e7663f65b --- /dev/null +++ b/python/test/t_FunctionalChaosValidation_std.py @@ -0,0 +1,281 @@ +#! /usr/bin/env python + +import openturns as ot +from openturns.usecases import ishigami_function +from openturns.testing import assert_almost_equal + + +def computeMSENaiveLOO( + inputSample, + outputSample, + inputDistribution, + adaptiveStrategy, + projectionStrategy, +): + """ + Compute mean squared error by (naive) LOO. + + Parameters + ---------- + inputSample : Sample(size, input_dimension) + The inputSample dataset. + outputSample : Sample(size, output_dimension) + The outputSample dataset. + inputDistribution : ot.Distribution. + The distribution of the input variable. + adaptiveStrategy : ot.AdaptiveStrategy + The method to select relevant coefficients. + projectionStrategy : ot.ProjectionStrategy + The method to compute the coefficients. + + Returns + ------- + mse : Point(output_dimension) + The mean squared error. + """ + # + sampleSize = inputSample.getSize() + outputDimension = outputSample.getDimension() + splitter = ot.LeaveOneOutSplitter(sampleSize) + residualsLOO = ot.Sample(sampleSize, outputDimension) + indexLOO = 0 + for indicesTrain, indicesTest in splitter: + inputSampleTrain, inputSampleTest = ( + inputSample[indicesTrain], + inputSample[indicesTest], + ) + outputSampleTrain, outputSampleTest = ( + outputSample[indicesTrain], + outputSample[indicesTest], + ) + algoLOO = ot.FunctionalChaosAlgorithm( + inputSampleTrain, + outputSampleTrain, + inputDistribution, + adaptiveStrategy, + projectionStrategy, + ) + algoLOO.run() + chaosResultLOO = algoLOO.getResult() + metamodelLOO = chaosResultLOO.getMetaModel() + outputPrediction = metamodelLOO(inputSampleTest) + for j in range(outputDimension): + residualsLOO[indexLOO, j] = outputSampleTest[0, j] - outputPrediction[0, j] + indexLOO += 1 + mse = ot.Point(outputDimension) + for j in range(outputDimension): + marginalResidualsLOO = residualsLOO.getMarginal(j).asPoint() + mse[j] = marginalResidualsLOO.normSquare() / sampleSize + return mse + + +def computeMSENaiveKFold( + inputSample, + outputSample, + inputDistribution, + adaptiveStrategy, + projectionStrategy, + kParameter=5, +): + """ + Compute mean squared error by (naive) KFold. + + Parameters + ---------- + inputSample : Sample(size, input_dimension) + The inputSample dataset. + outputSample : Sample(size, output_dimension) + The outputSample dataset. + inputDistribution : ot.Distribution. + The distribution of the input variable. + adaptiveStrategy : ot.AdaptiveStrategy + The method to select relevant coefficients. + projectionStrategy : ot.ProjectionStrategy + The method to compute the coefficients. + kParameter : int, in (2, sampleSize) + The parameter K. + + Returns + ------- + mse : Point(output_dimension) + The mean squared error. + """ + # + sampleSize = inputSample.getSize() + outputDimension = outputSample.getDimension() + splitter = ot.KFoldSplitter(sampleSize, kParameter) + squaredResiduals = ot.Sample(sampleSize, outputDimension) + for indicesTrain, indicesTest in splitter: + inputSampleTrain, inputSampleTest = ( + inputSample[indicesTrain], + inputSample[indicesTest], + ) + outputSampleTrain, outputSampleTest = ( + outputSample[indicesTrain], + outputSample[indicesTest], + ) + algoKFold = ot.FunctionalChaosAlgorithm( + inputSampleTrain, + outputSampleTrain, + inputDistribution, + adaptiveStrategy, + projectionStrategy, + ) + algoKFold.run() + chaosResultKFold = algoKFold.getResult() + metamodelKFold = chaosResultKFold.getMetaModel() + predictionsKFold = metamodelKFold(inputSampleTest) + residualsKFold = outputSampleTest - predictionsKFold + foldSize = indicesTest.getSize() + for j in range(outputDimension): + for i in range(foldSize): + squaredResiduals[indicesTest[i], j] = residualsKFold[i, j] ** 2 + mse = squaredResiduals.computeMean() + return mse + + +ot.TESTPREAMBLE() + +# Problem parameters +im = ishigami_function.IshigamiModel() + +dimension = im.distributionX.getDimension() + +# Compute the sample size from number of folds to guarantee a non constant integer +# number of points per fold +kFoldParameter = 10 +foldSampleSize = 20 +sampleSize = foldSampleSize * kFoldParameter + 1 + +degree = 5 +enumerateFunction = ot.LinearEnumerateFunction(dimension) +basisSize = enumerateFunction.getBasisSizeFromTotalDegree(degree) +print("basisSize = ", basisSize) +productBasis = ot.OrthogonalProductPolynomialFactory( + [ot.LegendreFactory()] * dimension, enumerateFunction +) +adaptiveStrategy = ot.FixedStrategy(productBasis, basisSize) +selectionAlgorithm = ( + ot.PenalizedLeastSquaresAlgorithmFactory() +) # Get a full PCE: do not use model selection. +projectionStrategy = ot.LeastSquaresStrategy(selectionAlgorithm) +inputSample = im.distributionX.getSample(sampleSize) +outputSample = im.model(inputSample) +algo = ot.FunctionalChaosAlgorithm( + inputSample, outputSample, im.distributionX, adaptiveStrategy, projectionStrategy +) +algo.run() +chaosResult = algo.getResult() + +# +print("1. Analytical leave-one-out") +splitterLOO = ot.LeaveOneOutSplitter(sampleSize) +validationLOO = ot.FunctionalChaosValidation( + chaosResult, splitterLOO +) +mseLOOAnalytical = validationLOO.computeMeanSquaredError() +print("Analytical LOO MSE = ", mseLOOAnalytical) +assert validationLOO.getSplitter().getN() == sampleSize + +# Naive leave-one-out +mseLOOnaive = computeMSENaiveLOO( + inputSample, + outputSample, + im.distributionX, + adaptiveStrategy, + projectionStrategy, +) +print("Naive LOO MSE = ", mseLOOnaive) + +# Test +rtolLOO = 1.0e-8 +atolLOO = 0.0 +assert_almost_equal(mseLOOAnalytical, mseLOOnaive, rtolLOO, atolLOO) + +# Check LOO R2 +r2ScoreLOO = validationLOO.computeR2Score() +print("Analytical LOO R2 score = ", r2ScoreLOO) +sampleVariance = outputSample.computeCentralMoment(2) +print("sampleVariance = ", sampleVariance) +r2ScoreReference = 1.0 - mseLOOAnalytical[0] / sampleVariance[0] +print("Computed R2 score = ", r2ScoreReference) +rtolLOO = 1.0e-12 +atolLOO = 0.0 +assert_almost_equal(r2ScoreReference, r2ScoreLOO[0], rtolLOO, atolLOO) + +# +print("2. Analytical K-Fold") +splitterKF = ot.KFoldSplitter(sampleSize, kFoldParameter) +validationKFold = ot.FunctionalChaosValidation( + chaosResult, splitterKF +) +print("KFold with K = ", kFoldParameter) +assert validationKFold.getSplitter().getN() == sampleSize +# TODO: fix this +# assert validationKFold.getSplitter().getSize() == kFoldParameter + +# Compute mean squared error +mseKFoldAnalytical = validationKFold.computeMeanSquaredError() +print("Analytical KFold MSE = ", mseKFoldAnalytical) + +# Naive KFold +mseKFoldnaive = computeMSENaiveKFold( + inputSample, + outputSample, + im.distributionX, + adaptiveStrategy, + projectionStrategy, + kFoldParameter, +) +print("Naive KFold MSE = ", mseKFoldnaive) + +# Test +rtolKFold = 1.0e-5 +atolKFold = 0.0 +assert_almost_equal(mseKFoldAnalytical, mseKFoldnaive, rtolKFold, atolKFold) + +# Check K-Fold R2 +r2ScoreKFold = validationKFold.computeR2Score() +print("Analytical K-Fold R2 score = ", r2ScoreKFold) +r2ScoreReference = 1.0 - mseKFoldAnalytical[0] / sampleVariance[0] +print("Computed R2 score = ", r2ScoreReference) +rtolKFold = 1.0e-12 +atolKFold = 0.0 +assert_almost_equal(r2ScoreReference, r2ScoreKFold[0], rtolKFold, atolKFold) + +# +print("3. Setting FunctionalChaosValidation-ModelSelection to true") +# enables to do LOO CV on a sparse model. +ot.ResourceMap.SetAsBool("FunctionalChaosValidation-ModelSelection", True) +selectionAlgorithm = ( + ot.LeastSquaresMetaModelSelectionFactory() +) # Get a sparse PCE (i.e. with model selection). +projectionStrategy = ot.LeastSquaresStrategy(selectionAlgorithm) +inputSample = im.distributionX.getSample(sampleSize) +outputSample = im.model(inputSample) +algo = ot.FunctionalChaosAlgorithm( + inputSample, outputSample, im.distributionX, adaptiveStrategy, projectionStrategy +) +algo.run() +chaosResult = algo.getResult() + +# Analytical leave-one-out +splitterLOO = ot.LeaveOneOutSplitter(sampleSize) +validationLOO = ot.FunctionalChaosValidation( + chaosResult, splitterLOO +) +mseLOOAnalytical = validationLOO.computeMeanSquaredError() +print("Analytical LOO MSE = ", mseLOOAnalytical) +# Naive leave-one-out +mseLOOnaive = computeMSENaiveLOO( + inputSample, + outputSample, + im.distributionX, + adaptiveStrategy, + projectionStrategy, +) +print("Naive LOO MSE = ", mseLOOnaive) +# Test +rtolLOO = 1.0e-1 # We cannot have more accuracy, as the MSE estimator is then biased +atolLOO = 0.0 +assert_almost_equal(mseLOOAnalytical, mseLOOnaive, rtolLOO, atolLOO) diff --git a/python/test/t_LinearModelValidation_std.py b/python/test/t_LinearModelValidation_std.py new file mode 100644 index 0000000000..642baa94e8 --- /dev/null +++ b/python/test/t_LinearModelValidation_std.py @@ -0,0 +1,118 @@ +#! /usr/bin/env python + +import openturns as ot +from openturns.testing import assert_almost_equal + +ot.TESTPREAMBLE() + +print("Fit y ~ 3 - 2 x1 + x2 + epsilon") +kFoldParameter = 4 +foldRootSize = 3 +# Makes so that k does not divide the sample size. +# In this case, we must take into account for the different weight of +# each fold. +sampleSize = foldRootSize * kFoldParameter + 1 +print("sampleSize = ", sampleSize) +aCollection = [] +marginal1 = ot.Uniform(-1.0, 1.0) +aCollection.append(marginal1) +aCollection.append(marginal1) +distribution = ot.JointDistribution(aCollection) +inputSample = distribution.getSample(sampleSize) +print("inputSample=", inputSample) +g = ot.SymbolicFunction(["x1", "x2"], ["3 - 2 * x1 + x2"]) +noise = ot.Normal(0.0, 0.5) +outputSample = g(inputSample) + noise.getSample(sampleSize) +print("outputSample=", outputSample) +lmAlgo = ot.LinearModelAlgorithm(inputSample, outputSample) +result = lmAlgo.getResult() + +# Create LOO validation +splitterLOO = ot.LeaveOneOutSplitter(sampleSize) +validationLOO = ot.LinearModelValidation(result, splitterLOO) +print(validationLOO) + +# Compute analytical LOO MSE +print("Compute Analytical LOO MSE") +mseLOOAnalytical = validationLOO.computeMeanSquaredError() +print("Analytical LOO MSE =", mseLOOAnalytical[0]) + +# Compute naive leave-one-out +residualsLOO = ot.Point(sampleSize) +j = 0 +for indicesTrain, indicesTest in splitterLOO: + inputSampleTrainLOO = inputSample[indicesTrain] + inputSampleLOOTest = inputSample[indicesTest] + outputSampleTrainLOO = outputSample[indicesTrain] + outputSampleLOOTest = outputSample[indicesTest] + lmAlgoLOO = ot.LinearModelAlgorithm(inputSampleTrainLOO, outputSampleTrainLOO) + resultLOO = lmAlgoLOO.getResult() + metamodelLOO = resultLOO.getMetaModel() + predictionLOOTest = metamodelLOO(inputSampleLOOTest) + residualsLOOTest = predictionLOOTest.asPoint() - outputSampleLOOTest.asPoint() + residualsLOO[j] = residualsLOOTest[0] + j += 1 + +mseLOOnaive = residualsLOO.normSquare() / sampleSize +print("Naive LOO MSE =", mseLOOnaive) + +# Test +rtolLOO = 1.0e-12 +atolLOO = 0.0 +assert_almost_equal(mseLOOAnalytical[0], mseLOOnaive, rtolLOO, atolLOO) + +# Check LOO R2 +r2ScoreLOO = validationLOO.computeR2Score() +print("Analytical LOO R2 score = ", r2ScoreLOO[0]) +sampleVariance = outputSample.computeCentralMoment(2) +r2ScoreReference = 1.0 - mseLOOAnalytical[0] / sampleVariance[0] +print("Computed R2 score = ", r2ScoreReference) +rtolLOO = 1.0e-12 +atolLOO = 0.0 +assert_almost_equal(r2ScoreReference, r2ScoreLOO[0], rtolLOO, atolLOO) + +# Create KFold validation +splitterKFold = ot.KFoldSplitter(sampleSize, kFoldParameter) +validationKFold = ot.LinearModelValidation( + result, splitterKFold +) +print(validationKFold) + +# Compute analytical KFold MSE +mseKFoldAnalytical = validationKFold.computeMeanSquaredError() +print("Analytical KFold MSE =", mseKFoldAnalytical[0]) + +# Naive KFold +residualsKFold = ot.Point(sampleSize) +foldIndex = 0 +for indicesTrain, indicesTest in splitterKFold: + inputSampleKFoldTrain = inputSample[indicesTrain] + outputSampleKFoldTrain = outputSample[indicesTrain] + inputSampleKFoldTest = inputSample[indicesTest] + outputSampleKFoldTest = outputSample[indicesTest] + lmAlgoKFold = ot.LinearModelAlgorithm(inputSampleKFoldTrain, outputSampleKFoldTrain) + resultKFold = lmAlgoKFold.getResult() + metamodelKFold = resultKFold.getMetaModel() + predictionKFoldTest = metamodelKFold(inputSampleKFoldTest) + residualsKFoldTest = predictionKFoldTest.asPoint() - outputSampleKFoldTest.asPoint() + foldSize = indicesTest.getSize() + for k in range(foldSize): + residualsKFold[indicesTest[k]] = residualsKFoldTest[k] + foldIndex += 1 + +mseKFoldnaive = residualsKFold.normSquare() / sampleSize +print("Naive KFold MSE =", mseKFoldnaive) + +# Test +rtolKFold = 1.0e-7 +atolKFold = 0.0 +assert_almost_equal(mseKFoldAnalytical[0], mseKFoldnaive, rtolKFold, atolKFold) + +# Check K-Fold R2 +r2ScoreKFold = validationKFold.computeR2Score() +print("Analytical K-Fold R2 score =", r2ScoreKFold[0]) +r2ScoreReference = 1.0 - mseKFoldAnalytical[0] / sampleVariance[0] +print("Computed R2 score =", r2ScoreReference) +rtolKFold = 1.0e-12 +atolKFold = 0.0 +assert_almost_equal(r2ScoreReference, r2ScoreKFold[0], rtolLOO, atolLOO)