diff --git a/README.md b/README.md index 1c1b4d5..ca28a01 100644 --- a/README.md +++ b/README.md @@ -128,9 +128,8 @@ more robust classification network. This repository introduces the following functions that are used throughout the examples: -- [`buildConstrainedNetwork`](conslearn/buildConstrainedNetwork.m) - Build a - network with specific constraints induced on the architecture and - initialization of the weights. +- [`buildConstrainedNetwork`](conslearn/buildConstrainedNetwork.m) - Build a multi-layer perceptron (MLP) with constraints on the architecture and initialization of the weights. +- [`buildConvexCNN`](conslearn/buildConvexCNN.m) - Build a fully-inpt convex convolutional neural network (CNN). - [`trainConstrainedNetwork`](conslearn/trainConstrainedNetwork.m) - Train a constrained network and maintain the constraint during training. - [`lipschitzUpperBound`](conslearn/lipschitzUpperBound.m) - Compute an upper diff --git a/conslearn/+conslearn/+convex/+cnn/buildImageFICCNN.m b/conslearn/+conslearn/+convex/+cnn/buildImageFICCNN.m new file mode 100644 index 0000000..6459eda --- /dev/null +++ b/conslearn/+conslearn/+convex/+cnn/buildImageFICCNN.m @@ -0,0 +1,148 @@ +function net = buildImageFICCNN(inputSize, outputSize, filterSize, numFilters, options) +% BUILDIMAGEFICCNN Construct a fully-input convex convolutional neural +% network for image inputs. +% +% NET = BUILDIMAGEFICCNN(INPUTSIZE, OUTPUTSIZE, FILTERSIZE, NUMFILTERS) +% creates a fully-input convex dlnetwork object, NET. +% +% INPUTSIZE is a row vector of integers [h w c], where h, w, and c +% correspond ot the height, width and number of channels respectively. +% +% OUTPUTSIZE is an intenger indicating the number of neurons in the +% output fully connected layer. +% +% FILTERSIZE is matrix with two columns specifying the height and width +% for each convolutional layer. The network will have as many +% convolutional layers as there are rows in FILTERSIZE. If FILTERSIZE is +% provided as a column vector, it is assumed that the filters are square. +% +% NUMFILTERES is a column vector of integers that specifies the number of +% filters for each convolutional layers. It must have the same number of +% rows as FILTERSIZE. +% +% NET = BUILDIMAGEFICCNN(__, NAME=VALUE) specifies additional options +% using one or more name-value arguments. +% +% Stride - Stride for each convolutional +% layer, specified as a two-column +% matrix where the first column is +% the stride height, and the second +% column is the stride width. If +% Stride is specified as a column +% vector, a square stride is assumed. +% The default value is 1 for all +% layers. +% +% DilationFactor - Dilation factor for each +% convolutional layer, specified as a +% two-column matrix where the frist +% column is the stride height and the +% second column is the stride width. +% If DilationFactor is a column +% vector, a square dilation factor is +% assumed. The default value is 1 for +% all layers. +% +% Padding - Padding method for each +% convolutional layer specified as +% "same" or "causal". Padding must be +% a string array with the same number +% of rows as FITLERSIZE. The default +% value is "causal" for all layers. +% +% PaddingValue - Padding for each convolutional +% layer, specified as a column vector +% with the same number of rows as +% FILTERSIZE. The default value is 0 +% for all layers. +% +% ConvexNonDecreasingActivation - Convex non-decreasing activation +% function, specified as "softplus" +% or "relu". The default value is +% "relu". + +% Copyright 2024 The MathWorks, Inc. + +arguments + inputSize (1,:) {mustBeNonempty, mustBeReal, mustBeInteger, mustBePositive, mustBeTwoOrThreeRowVector(inputSize, "inputSize")} + outputSize (1,1) {mustBeReal, mustBeInteger, mustBePositive} + filterSize {mustBeNonempty, mustBeReal, mustBeInteger, mustBePositive, mustBeOneOrTwoColumn(filterSize, "filterSize")} + numFilters (:,1) {mustBeNonempty, mustBeReal, mustBeInteger, mustBePositive, mustBeEqualLength(filterSize, numFilters, "numFilters")} + options.Stride {mustBeNonempty, mustBeReal, mustBeInteger, mustBePositive, mustBeOneOrTwoColumn(options.Stride, "Stride"), mustBeEqualLength(filterSize, options.Stride, "Stride")} = ones(numel(numFilters), 2) + options.DilationFactor {mustBeNonempty, mustBeReal, mustBeInteger, mustBePositive, mustBeOneOrTwoColumn(options.DilationFactor, "DilationFactor"), mustBeEqualLength(filterSize, options.DilationFactor, "DilationFactor")} = ones(numel(numFilters), 2) + options.Padding (:,1) {mustBeNonzeroLengthText, mustBeMember(options.Padding, "same"), mustBeEqualLength(filterSize, options.Padding, "Padding")} = repelem("same", numel(numFilters)); + options.PaddingValue (:,1) {mustBeNonempty, mustBeReal, mustBeEqualLength(filterSize, options.PaddingValue, "PaddingValue")} = zeros(numel(numFilters), 1); + options.ConvexNonDecreasingActivation {mustBeNonzeroLengthText, mustBeTextScalar, mustBeMember(options.ConvexNonDecreasingActivation, ["relu", "softplus"])} = "relu" +end + + + +% Select the activation function based on user input +switch options.ConvexNonDecreasingActivation + case "relu" + activationLayer = @(name) reluLayer(Name=name); + case "softplus" + activationLayer = @(name) softplusLayer(Name=name); +end + +% Build the input layer +layers = [ + imageInputLayer(inputSize, Name="input", Normalization="none") + ]; + +% Build the convolutional layers +for ii = 1:numel(numFilters) + convLayerName = "conv2d_+_" + ii; + activationLayerName = "cnd_" + ii; + batchNormLayerName = "batchnorm_+_" + ii; + + convLayer = convolution2dLayer(filterSize(ii, :), numFilters(ii), ... + Stride=options.Stride(ii, :), ... + DilationFactor=options.DilationFactor(ii, :), ... + Padding=options.Padding(ii), ... + PaddingValue=options.PaddingValue(ii), ... + Name=convLayerName); + + layers = [ + layers; + convLayer; + activationLayer(activationLayerName); + batchNormalizationLayer(Name=batchNormLayerName) + ]; %#ok +end + +% Modify the name of the first convolutional layer to remove constraints +layers(2).Name = "conv2d_1"; + +% Add final pooling and fully connected layers +layers = [ + layers; + globalAveragePooling2dLayer(Name="global_avg_pool"); + fullyConnectedLayer(outputSize, Name="fc_+_end") + ]; + +% Initialize the dlnetwork +net = dlnetwork(layers); + +% Make the network convex +net = conslearn.convex.makeNetworkConvex(net); + +end + +function mustBeTwoOrThreeRowVector(x, name) +if ~(isrow(x) && (numel(x) == 2 || numel(x) == 3)) + error("'%s' must be a row vector with two or three elements.", name); +end +end + +function mustBeOneOrTwoColumn(x, name) +if ~(size(x, 2) == 1 || size(x, 2) == 2) + error("'%s' must be an array with one or two columns.", name); +end +end + +function mustBeEqualLength(filterSize, otherVar, otherVarName) +if ~isequal(size(filterSize, 1), size(otherVar, 1)) + error("'%s' must have the same number of rows as the filter size value.", otherVarName); +end +end diff --git a/conslearn/+conslearn/+convex/+cnn/buildSequenceFICCNN.m b/conslearn/+conslearn/+convex/+cnn/buildSequenceFICCNN.m new file mode 100644 index 0000000..9030bba --- /dev/null +++ b/conslearn/+conslearn/+convex/+cnn/buildSequenceFICCNN.m @@ -0,0 +1,124 @@ +function net = buildSequenceFICCNN(inputSize, outputSize, filterSize, numFilters, options) +% BUILDSEQUENCEFICCNN Construct a fully-input convex convolutional +% neural network for sequence inputs. +% +% NET = BUILDSEQUENCEFICCNN(INPUTSIZE, OUTPUTSIZE, FILTERSIZE, +% NUMFILTERS) creates a fully-input convex dlnetwork object, NET. +% +% INPUTSIZE is a integer indicating the number of features. +% +% OUTPUTSIZE is an intenger indicating the number of neurons in the +% output fully connected layer. +% +% FILTERSIZE is column vector of integer filter sizes. The network will +% have as many convolutional layers as there are rows in FILTERSIZE. +% +% NUMFILTERES is a column vector of integers that specifies the number of +% filters for each convolutional layers. It must have the same number of +% rows as FILTERSIZE. +% +% NET = BUILDSEQUENCEFICCNN(__, NAME=VALUE) specifies additional options +% using one or more name-value arguments. +% +% Stride - Stride for each convolutional +% layer, specified as a column vector +% of integers with the same number of +% rows as FILTERSIZE. The default +% value is 1 for all layers. +% +% DilationFactor - Dilation factor for each +% convolutional layer, specified as a +% column vector with the same number +% of rows as FILTERSIZE. The default +% value is 1 for all layers. +% +% Padding - Padding method for each +% convolutional layer specified as +% "same" or "causal". Padding must be +% a a string array with the same +% number of rows as FITLERSIZE. The +% default value is "causal" for all +% layers. +% +% PaddingValue - Padding for each convolutional +% layer, specified as a column vector +% with the same number of rows as +% FILTERSIZE. The default value is 0 +% for all layers. +% +% ConvexNonDecreasingActivation - Convex non-decreasing activation +% function, specified as "softplus" +% or "relu". The default value is +% "relu". + +% Copyright 2024 The MathWorks, Inc. + +arguments + inputSize (1,1) {mustBeReal, mustBeInteger, mustBePositive} + outputSize (1,1) {mustBeReal, mustBeInteger, mustBePositive} + filterSize (:,1) {mustBeNonempty, mustBeReal, mustBeInteger, mustBePositive} + numFilters (:,1) {mustBeNonempty, mustBeReal, mustBeInteger, mustBePositive, mustBeEqualLength(filterSize, numFilters, "numFilters")} + options.Stride (:,1) {mustBeNonempty, mustBeReal, mustBeInteger, mustBePositive, mustBeEqualLength(filterSize, options.Stride, "Stride")} = ones(numel(filterSize), 1) + options.DilationFactor (:,1) {mustBeNonempty, mustBeReal, mustBeInteger, mustBePositive, mustBeEqualLength(filterSize, options.DilationFactor, "DilationFactor")} = ones(numel(filterSize), 1) + options.Padding (:,1) {mustBeNonzeroLengthText, mustBeMember(options.Padding, ["same", "causal"]), mustBeEqualLength(filterSize, options.Padding, "Padding")} = repelem("causal", numel(filterSize)) + options.PaddingValue (:,1) {mustBeNonempty, mustBeReal, mustBeEqualLength(filterSize, options.PaddingValue, "PaddingValue")} = zeros(numel(filterSize), 1) + options.ConvexNonDecreasingActivation {mustBeNonzeroLengthText, mustBeTextScalar, mustBeMember(options.ConvexNonDecreasingActivation, ["relu", "softplus"])} = "relu" +end + +% Select the activation function based on user input +switch options.ConvexNonDecreasingActivation + case "relu" + activationLayer = @(name) reluLayer(Name=name); + case "softplus" + activationLayer = @(name) softplusLayer(Name=name); +end + +% Build the input layer +layers = [ + sequenceInputLayer(inputSize, Name="input", Normalization="none") + ]; + +% Build the convolutional layers +for ii = 1:numel(numFilters) + convLayerName = "conv1d_+_" + ii; + activationLayerName = "cnd_" + ii; + batchNormLayerName = "batchnorm_+_" + ii; + + convLayer = convolution1dLayer(filterSize(ii), numFilters(ii), ... + Stride=options.Stride(ii), ... + DilationFactor=options.DilationFactor(ii), ... + Padding=options.Padding(ii), ... + PaddingValue=options.PaddingValue(ii), ... + Name=convLayerName); + + layers = [ + layers; + convLayer; + activationLayer(activationLayerName); + batchNormalizationLayer(Name=batchNormLayerName) + ]; %#ok +end + +% Modify the name of the first convolutional layer to remove constraints +layers(2).Name = "conv1d_1"; + +% Add final pooling and fully connected layers +layers = [ + layers; + globalAveragePooling1dLayer(Name="global_avg_pool"); + fullyConnectedLayer(outputSize, Name="fc_+_end") + ]; + +% Initialize the dlnetwork +net = dlnetwork(layers); + +% Make the network convex +net = conslearn.convex.makeNetworkConvex(net); + +end + +function mustBeEqualLength(filterSize, otherVar, otherVarName) +if ~isequal(size(filterSize, 1), size(otherVar, 1)) + error("'%s' must have the same number of rows as the filter size value.", otherVarName); +end +end diff --git a/conslearn/+conslearn/+convex/getConvexParameterIdx.m b/conslearn/+conslearn/+convex/getConvexParameterIdx.m index d19ab41..f5b5959 100644 --- a/conslearn/+conslearn/+convex/getConvexParameterIdx.m +++ b/conslearn/+conslearn/+convex/getConvexParameterIdx.m @@ -1,8 +1,8 @@ function convexParameterIdx = getConvexParameterIdx(net) % GETCONVEXPARAMETERIDX Returns the indices in the learnable parameter % table of the network that correspond to weights with convex constraints. -% The network *must* be created using the buildConstrainedNetwork function -% with a convex constraint type. +% The network *must* be created using the buildConstrainedNetwork or +% buildConvexCNN function with a convex constraint type. % Copyright 2024 The MathWorks, Inc. @@ -11,6 +11,6 @@ end convexParameterIdx = contains(net.Learnables.Layer,"_+_") & ... - contains(net.Learnables.Parameter,"Weight"); + ( contains(net.Learnables.Parameter,"Weight") | contains(net.Learnables.Parameter,"Scale")); end diff --git a/conslearn/+conslearn/+convex/makeNetworkConvex.m b/conslearn/+conslearn/+convex/makeNetworkConvex.m index 86036ba..42c3e81 100644 --- a/conslearn/+conslearn/+convex/makeNetworkConvex.m +++ b/conslearn/+conslearn/+convex/makeNetworkConvex.m @@ -1,8 +1,8 @@ function net = makeNetworkConvex(net) % MAKENETWORKCONVEX Constrain the weights of a convex network to ensure % convexity of the outputs with respect to the network inputs. The network -% *must* be created using the buildConstrainedNetwork function with a convex -% constraint type. +% *must* be created using the buildConstrainedNetwork or +% buildConvexCNN function with a convex constraint type. % Copyright 2024 The MathWorks, Inc. diff --git a/conslearn/buildConstrainedNetwork.m b/conslearn/buildConstrainedNetwork.m index 1a3d852..cea1a38 100644 --- a/conslearn/buildConstrainedNetwork.m +++ b/conslearn/buildConstrainedNetwork.m @@ -1,47 +1,54 @@ function net = buildConstrainedNetwork(constraint, inputSize, numHiddenUnits, options) -% BUILDCONSTRAINEDNETWORK Construct a constrained neural network. +% BUILDCONSTRAINEDNETWORK Construct a constrained multi-layer +% perceptron. % % NET = BUILDCONSTRAINEDNETWORK(CONSTRAINT, INPUTSIZE, NUMHIDDENUNITS) -% creates an initialized dlnetwork object, NET, which has the -% constraint specified by CONSTRAINT, where CONSTRAINT is specified as: +% creates an initialized dlnetwork object, NET, which has the constraint +% specified by CONSTRAINT, where CONSTRAINT is specified as: % "fully-convex", "partially-convex", "fully-monotonic", -% "partially-monotonic", or "lipschitz". The network has a -% featureInputLayer or an imageInputLayer, depending on if INPUTSIZE -% is a scalar or a vector with three elements. NUMHIDDENUNITS is a vector of -% integers that corresponds to the sizes and number of fully connected -% layers in the network. +% "partially-monotonic", or "lipschitz". % -% NET = BUILDCONSTRAINEDNETWORK(__,NAME=VALUE) specifies additional +% The network includes either a featureInputLayer or an imageInputLayer, +% depending on INPUTSIZE: +% +% - If INPUTSIZE is a scalar, then the network has a featureInputLayer. +% +% - If INPUTSIZE is a vector with three elements, then the network has an +% imageInputLayer. +% +% NUMHIDDENUNITS is a vector of integers that corresponds to the sizes +% and number of fully connected layers in the network. +% +% NET = BUILDCONSTRAINEDNETWORK(__, NAME=VALUE) specifies additional % options using one or more name-value arguments. % % BUILDCONSTRAINEDNETWORK name-value arguments depend on the value of -% CONSTRAINT. +% CONSTRAINT. % % These options and default values apply to convex constrained networks: % -% ConvexNonDecreasingActivation - Convex, non-decreasing -% ("fully-convex") activation functions. -% ("partially-convex") The options are "softplus" or "relu". -% The default is "softplus". +% ConvexNonDecreasingActivation - Convex, non-decreasing +% ("fully-convex") activation functions. +% ("partially-convex") The options are "softplus" or +% "relu". The default is "softplus". % Activation - Network activation function. % ("partially-convex") The options are "tanh", "relu" or -% "fullsort". -% The default is "tanh". +% "fullsort". The default is "tanh". % ConvexChannelIdx - Channel indices for the inputs that -% ("partially-convex") carry convex dependency with the -% output, specified as a vector of -% positive integers. For image inputs, -% the convex channel indices correspond -% to the indices in the flattened image -% input. -% The default value is 1. +% ("partially-convex") carry convex dependency with the +% output, specified as a vector of +% positive integers. For image +% inputs, the convex channel indices +% correspond to the indices in the +% flattened image input. The default +% value is 1. % % These options and default values apply to monotonic constrained % networks: % -% Activation - Network activation function. +% Activation - Network activation function. % ("fully-monotonic") The options are "tanh", "relu" or -% ("partially-monotonic") "fullsort". +% ("partially-monotonic") "fullsort". % The default is "fullsort". % ResidualScaling - The scale factor applied to the sum % ("fully-monotonic") of the inputs that carry monotonic @@ -58,33 +65,34 @@ % The default value is Inf. % MonotonicChannelIdx - Channel indices for the inputs that % ("partially-monotonic") carry monotonic dependency with the -% output, specified as a vector of -% positive integers. For image inputs, -% the monotonic channel indices -% correspond to the indices in the -% flattened image input. -% The default value is 1. +% output, specified as a vector of +% positive integers. For image +% inputs, the monotonic channel +% indices correspond to the indices +% in the flattened image input. The +% default value is 1. % % The following options and default values apply to Lipschitz constrained % networks: % -% Activation - Network activation function. +% Activation - Network activation function. % The options are "tanh", "relu" or -% "fullsort". -% The default is "fullsort". -% UpperBoundLipschitzConstant - Upper bound on the Lipschitz constant -% for the network, as a positive real -% number. -% The default value is 1. +% "fullsort". The default is +% "fullsort". +% UpperBoundLipschitzConstant - Upper bound on the Lipschitz +% constant for the network, as a +% positive real number. The default +% value is 1. % pNorm - p-norm value for measuring % distance with respect to the % Lipschitz continuity definition. % The default value is 1. % % [1] Amos, Brandon, et al. Input Convex Neural Networks. arXiv:1609.07152, -% arXiv, 14 June 2017. arXiv.org, https://doi.org/10.48550/arXiv.1609.07152. -% [2] Kitouni, Ouail, et al. Expressive Monotonic Neural Networks. -% arXiv:2307.07512, arXiv, 14 July 2023. arXiv.org, http://arxiv.org/abs/2307.07512. +% arXiv, 14 June 2017. arXiv.org, +% https://doi.org/10.48550/arXiv.1609.07152. [2] Kitouni, Ouail, et al. +% Expressive Monotonic Neural Networks. arXiv:2307.07512, arXiv, 14 July +% 2023. arXiv.org, http://arxiv.org/abs/2307.07512. % Copyright 2024 The MathWorks, Inc. @@ -273,7 +281,7 @@ function iValidateConstraintWithActivation(param, constraint) function iValidatePNorm(param) if (~isequal(param,1) && ~isequal(param,2) && ~isequal(param,Inf)) && ~isempty(param) -error("Invalid 'PNorm' value. Value must be 1, 2, or Inf.") + error("Invalid 'PNorm' value. Value must be 1, 2, or Inf.") end end diff --git a/conslearn/buildConvexCNN.m b/conslearn/buildConvexCNN.m new file mode 100644 index 0000000..f085988 --- /dev/null +++ b/conslearn/buildConvexCNN.m @@ -0,0 +1,120 @@ +function net = buildConvexCNN(inputSize, varargin) +% BUILDCONVEXCNN Construct a fully input convex convolutional neural +% network. +% +% NET = BUILDCONVEXCNN(INPUTSIZE, OUTPUTSIZE, FILTERSIZE, NUMFILTERS) +% creates a fully input convex initialized dlnetwork object, NET. +% +% The network includes either a sequenceInputLayer or an imageInputLayer, +% depending on INPUTSIZE: +% +% - If INPUTSIZE is a scalar, then the network has a sequenceInputLayer. +% - If INPUTSIZE is a vector with two or three elements, then the +% network has an imageInputLayer. +% +% OUTPUTSIZE is an integer indicating the number of neurons in the output +% fully connected layer. +% +% FILTERSIZE defines the filter sizes for the convolutional layers. The +% network will have as many convolutional layers as there are rows in +% FILTERSIZE. +% +% - If INPUTSIZE is a scalar, FILTERSIZE must be a column vector of +% integers specifying the length of the filters for each +% convolution1dLayer. +% - If INPUTSIZE is a vector, FILTERSIZE is a matrix with two columns +% specifying the height and width of the filters for each +% convolution2dLayer. If FILTERSIZE is provided as a column vector, it +% is assumed that the filters are square. +% +% NUMFILTERS is a column vector of integers that specifies the number of +% filters for each convolutional layer. It must have the same number of +% rows as FILTERSIZE +% +% NET = BUILDCONVEXCNN(__, NAME=VALUE) specifies additional options using +% one or more name-value arguments. +% +% These options and default values apply to fully-convex constrained +% networks: +% +% Stride - Stride for each convolutional +% layer, specified as a matrix with +% the same number of rows as +% FILTERSIZE. If INPUTSIZE is a +% scalar, Stride must be a column +% vector specifying the stride +% length. If INPUTSIZE is a vector, +% Stride is a two-column matrix +% specifying the stride width and +% height. If INPUTSIZE is a scalar +% and Stride is a column vector, a +% square stride is assumed. The +% default value is 1 for all layers. +% +% DilationFactor - Dilation factor for each +% convolutional layer, specified as +% as matrix with the same number of +% rows as FILTERSIZE. If INPUTSIZE is +% a scalar, DilationFactor must be a +% column vector specifying the +% dilation length. If INPUTSIZE is a +% vector, DilationFactor is a +% two-column matrix specifying the +% stride width and height. If +% INPUTSIZE is a scalar and +% DilationFactor is a column vector, +% a square dilation factor is +% assumed. The default value is 1 for +% all layers. +% +% Padding - Padding method for each +% convolutional layer, specified as +% "same" or "causal". Padding must be +% a string array with the same number +% of rows as FILTERSIZE. If INPUTSIZE +% is a scalar, the default value is +% "causal" for all layers. If +% INPUTSIZE is a vector, the default +% value is "same" for all layers. +% +% PaddingValue - Padding for each convolutional +% layer, specified as a column vector +% with the same number of rows as +% FILTERSIZE. The default value is 0 +% for all layers. +% +% ConvexNonDecreasingActivation - Convex non-decreasing activation +% function, specified as "softplus" +% or "relu". The default value is +% "relu". + +% Copyright 2024 The MathWorks, Inc. + +arguments + inputSize (1,:) {mustBeNonempty, mustBeReal, mustBeInteger, mustBePositive} +end + +arguments(Repeating) + varargin +end + +switch numel(inputSize) + case 1 + try + net = conslearn.convex.cnn.buildSequenceFICCNN(inputSize, varargin{:}); + catch ME + throwAsCaller(ME); + end + case {2, 3} + try + net = conslearn.convex.cnn.buildImageFICCNN(inputSize, varargin{:}); + catch ME + throwAsCaller(ME); + end + otherwise + error("Input size must be a vector with one, two, or three elements."); +end + +end + + diff --git a/conslearn/functionSignatures.json b/conslearn/functionSignatures.json new file mode 100644 index 0000000..2c890a7 --- /dev/null +++ b/conslearn/functionSignatures.json @@ -0,0 +1,33 @@ +{ + "buildConvexCNN": + { + "inputs": + [ + {"name":"inputSize", "kind":"required", "type":["numeric", "scalar"]}, + {"name":"outputSize", "kind":"required", "type":"numeric"}, + {"name":"filterSize", "kind":"required", "type":"numeric"}, + {"name":"numFilters", "kind":"required", "type":"numeric"}, + {"name":"Stride", "kind":"namevalue", "type":"numeric"}, + {"name":"DilationFactor", "kind":"namevalue", "type":"numeric"}, + {"name":"Padding", "kind":"namevalue", "type":"choices={'causal', 'same'}"}, + {"name":"PaddingValue", "kind":"namevalue", "type":"numeric"}, + {"name":"ConvexNonDecreasingActivation", "kind":"namevalue", "type":"choices={'relu', 'softplus'}"} + ] + }, + + "buildConvexCNN": + { + "inputs": + [ + {"name":"inputSize", "kind":"required", "type":"numeric"}, + {"name":"outputSize", "kind":"required", "type":"numeric"}, + {"name":"filterSize", "kind":"required", "type":"numeric"}, + {"name":"numFilters", "kind":"required", "type":"numeric"}, + {"name":"Stride", "kind":"namevalue", "type":"numeric"}, + {"name":"DilationFactor", "kind":"namevalue", "type":"numeric"}, + {"name":"Padding", "kind":"namevalue", "type":"choices={'same'}"}, + {"name":"PaddingValue", "kind":"namevalue", "type":"numeric"}, + {"name":"ConvexNonDecreasingActivation", "kind":"namevalue", "type":"choices={'relu', 'softplus'}"} + ] + } +} diff --git a/tests/system/tFullyInputConvexNetwork.m b/tests/system/tFullyInputConvexNetwork.m index 953c096..2035931 100644 --- a/tests/system/tFullyInputConvexNetwork.m +++ b/tests/system/tFullyInputConvexNetwork.m @@ -8,7 +8,7 @@ end methods (Test) - function verifyNetworkOutputIsFullyConvex(testCase, PndActivationFunctionSet, TargetGeneratorFunctionSet) + function verifyNetworkOutputIsFullyConvexMLP(testCase, PndActivationFunctionSet, TargetGeneratorFunctionSet) % Create training data xTrain = -2:0.01:2; tTrain = TargetGeneratorFunctionSet(xTrain); @@ -63,6 +63,135 @@ function verifyNetworkOutputIsFullyConvex(testCase, PndActivationFunctionSet, Ta testCase.verifyLessThanOrEqual(f_c, m*c + y0 + epsilon); end end + + function verifyNetworkOutputIsFullyConvexForFeatureInputCNN(testCase, TargetGeneratorFunctionSet) + % Create training data + xTrain = -2:0.01:2; + tTrain = TargetGeneratorFunctionSet(xTrain); + + % Package dataset into a minibatchqueue that can be used by + % 'trainConstrainedNetwork' + xds = arrayDatastore(xTrain'); + tds = arrayDatastore(tTrain'); + cds = combine(xds, tds); + mbqTrain = minibatchqueue(cds, 2, ... + MiniBatchSize=length(xTrain), ... + OutputAsDlarray=[1 1], ... + MiniBatchFormat=["BTC", "BC"], ... + OutputEnvironment="cpu"); + + % Build fully convex network + inputSize = 1; + outputSize = 1; + filterSize = 2; + numFilters = 8; + ficnn = buildConvexCNN( ... + inputSize, ... + outputSize, ... + filterSize, ... + numFilters); + + % Train fully convex network. Use just 1 epoch. + maxEpochs = 1; + initialLearnRate = 0.05; + decay = 0.01; + ficnn = trainConstrainedNetwork("fully-convex", ficnn, mbqTrain, ... + TrainingMonitor=false, ... + MaxEpochs=maxEpochs, ... + InitialLearnRate=initialLearnRate, ... + Decay=decay, ... + LossMetric="mae"); + + % Check convexity. Take points A = (a, f(a)) and B = (b, f(b)), + % where a