Skip to content

Commit

Permalink
Adds capability to build and train fully input convex CNNs.
Browse files Browse the repository at this point in the history
Adds capability to build and train fully input convex CNNs.
  • Loading branch information
jkeeley-MW committed Nov 11, 2024
1 parent b75bb78 commit fc585b1
Show file tree
Hide file tree
Showing 14 changed files with 1,099 additions and 51 deletions.
5 changes: 2 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -128,9 +128,8 @@ more robust classification network.
This repository introduces the following functions that are used throughout the
examples:

- [`buildConstrainedNetwork`](conslearn/buildConstrainedNetwork.m) - Build a
network with specific constraints induced on the architecture and
initialization of the weights.
- [`buildConstrainedNetwork`](conslearn/buildConstrainedNetwork.m) - Build a multi-layer perceptron (MLP) with constraints on the architecture and initialization of the weights.
- [`buildConvexCNN`](conslearn/buildConvexCNN.m) - Build a fully-inpt convex convolutional neural network (CNN).
- [`trainConstrainedNetwork`](conslearn/trainConstrainedNetwork.m) - Train a
constrained network and maintain the constraint during training.
- [`lipschitzUpperBound`](conslearn/lipschitzUpperBound.m) - Compute an upper
Expand Down
148 changes: 148 additions & 0 deletions conslearn/+conslearn/+convex/+cnn/buildImageFICCNN.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
function net = buildImageFICCNN(inputSize, outputSize, filterSize, numFilters, options)
% BUILDIMAGEFICCNN Construct a fully-input convex convolutional neural
% network for image inputs.
%
% NET = BUILDIMAGEFICCNN(INPUTSIZE, OUTPUTSIZE, FILTERSIZE, NUMFILTERS)
% creates a fully-input convex dlnetwork object, NET.
%
% INPUTSIZE is a row vector of integers [h w c], where h, w, and c
% correspond ot the height, width and number of channels respectively.
%
% OUTPUTSIZE is an intenger indicating the number of neurons in the
% output fully connected layer.
%
% FILTERSIZE is matrix with two columns specifying the height and width
% for each convolutional layer. The network will have as many
% convolutional layers as there are rows in FILTERSIZE. If FILTERSIZE is
% provided as a column vector, it is assumed that the filters are square.
%
% NUMFILTERES is a column vector of integers that specifies the number of
% filters for each convolutional layers. It must have the same number of
% rows as FILTERSIZE.
%
% NET = BUILDIMAGEFICCNN(__, NAME=VALUE) specifies additional options
% using one or more name-value arguments.
%
% Stride - Stride for each convolutional
% layer, specified as a two-column
% matrix where the first column is
% the stride height, and the second
% column is the stride width. If
% Stride is specified as a column
% vector, a square stride is assumed.
% The default value is 1 for all
% layers.
%
% DilationFactor - Dilation factor for each
% convolutional layer, specified as a
% two-column matrix where the frist
% column is the stride height and the
% second column is the stride width.
% If DilationFactor is a column
% vector, a square dilation factor is
% assumed. The default value is 1 for
% all layers.
%
% Padding - Padding method for each
% convolutional layer specified as
% "same" or "causal". Padding must be
% a string array with the same number
% of rows as FITLERSIZE. The default
% value is "causal" for all layers.
%
% PaddingValue - Padding for each convolutional
% layer, specified as a column vector
% with the same number of rows as
% FILTERSIZE. The default value is 0
% for all layers.
%
% ConvexNonDecreasingActivation - Convex non-decreasing activation
% function, specified as "softplus"
% or "relu". The default value is
% "relu".

% Copyright 2024 The MathWorks, Inc.

arguments
inputSize (1,:) {mustBeNonempty, mustBeReal, mustBeInteger, mustBePositive, mustBeTwoOrThreeRowVector(inputSize, "inputSize")}
outputSize (1,1) {mustBeReal, mustBeInteger, mustBePositive}
filterSize {mustBeNonempty, mustBeReal, mustBeInteger, mustBePositive, mustBeOneOrTwoColumn(filterSize, "filterSize")}
numFilters (:,1) {mustBeNonempty, mustBeReal, mustBeInteger, mustBePositive, mustBeEqualLength(filterSize, numFilters, "numFilters")}
options.Stride {mustBeNonempty, mustBeReal, mustBeInteger, mustBePositive, mustBeOneOrTwoColumn(options.Stride, "Stride"), mustBeEqualLength(filterSize, options.Stride, "Stride")} = ones(numel(numFilters), 2)
options.DilationFactor {mustBeNonempty, mustBeReal, mustBeInteger, mustBePositive, mustBeOneOrTwoColumn(options.DilationFactor, "DilationFactor"), mustBeEqualLength(filterSize, options.DilationFactor, "DilationFactor")} = ones(numel(numFilters), 2)
options.Padding (:,1) {mustBeNonzeroLengthText, mustBeMember(options.Padding, "same"), mustBeEqualLength(filterSize, options.Padding, "Padding")} = repelem("same", numel(numFilters));
options.PaddingValue (:,1) {mustBeNonempty, mustBeReal, mustBeEqualLength(filterSize, options.PaddingValue, "PaddingValue")} = zeros(numel(numFilters), 1);
options.ConvexNonDecreasingActivation {mustBeNonzeroLengthText, mustBeTextScalar, mustBeMember(options.ConvexNonDecreasingActivation, ["relu", "softplus"])} = "relu"
end



% Select the activation function based on user input
switch options.ConvexNonDecreasingActivation
case "relu"
activationLayer = @(name) reluLayer(Name=name);
case "softplus"
activationLayer = @(name) softplusLayer(Name=name);
end

% Build the input layer
layers = [
imageInputLayer(inputSize, Name="input", Normalization="none")
];

% Build the convolutional layers
for ii = 1:numel(numFilters)
convLayerName = "conv2d_+_" + ii;
activationLayerName = "cnd_" + ii;
batchNormLayerName = "batchnorm_+_" + ii;

convLayer = convolution2dLayer(filterSize(ii, :), numFilters(ii), ...
Stride=options.Stride(ii, :), ...
DilationFactor=options.DilationFactor(ii, :), ...
Padding=options.Padding(ii), ...
PaddingValue=options.PaddingValue(ii), ...
Name=convLayerName);

layers = [
layers;
convLayer;
activationLayer(activationLayerName);
batchNormalizationLayer(Name=batchNormLayerName)
]; %#ok<AGROW>
end

% Modify the name of the first convolutional layer to remove constraints
layers(2).Name = "conv2d_1";

% Add final pooling and fully connected layers
layers = [
layers;
globalAveragePooling2dLayer(Name="global_avg_pool");
fullyConnectedLayer(outputSize, Name="fc_+_end")
];

% Initialize the dlnetwork
net = dlnetwork(layers);

% Make the network convex
net = conslearn.convex.makeNetworkConvex(net);

end

function mustBeTwoOrThreeRowVector(x, name)
if ~(isrow(x) && (numel(x) == 2 || numel(x) == 3))
error("'%s' must be a row vector with two or three elements.", name);
end
end

function mustBeOneOrTwoColumn(x, name)
if ~(size(x, 2) == 1 || size(x, 2) == 2)
error("'%s' must be an array with one or two columns.", name);
end
end

function mustBeEqualLength(filterSize, otherVar, otherVarName)
if ~isequal(size(filterSize, 1), size(otherVar, 1))
error("'%s' must have the same number of rows as the filter size value.", otherVarName);
end
end
124 changes: 124 additions & 0 deletions conslearn/+conslearn/+convex/+cnn/buildSequenceFICCNN.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
function net = buildSequenceFICCNN(inputSize, outputSize, filterSize, numFilters, options)
% BUILDSEQUENCEFICCNN Construct a fully-input convex convolutional
% neural network for sequence inputs.
%
% NET = BUILDSEQUENCEFICCNN(INPUTSIZE, OUTPUTSIZE, FILTERSIZE,
% NUMFILTERS) creates a fully-input convex dlnetwork object, NET.
%
% INPUTSIZE is a integer indicating the number of features.
%
% OUTPUTSIZE is an intenger indicating the number of neurons in the
% output fully connected layer.
%
% FILTERSIZE is column vector of integer filter sizes. The network will
% have as many convolutional layers as there are rows in FILTERSIZE.
%
% NUMFILTERES is a column vector of integers that specifies the number of
% filters for each convolutional layers. It must have the same number of
% rows as FILTERSIZE.
%
% NET = BUILDSEQUENCEFICCNN(__, NAME=VALUE) specifies additional options
% using one or more name-value arguments.
%
% Stride - Stride for each convolutional
% layer, specified as a column vector
% of integers with the same number of
% rows as FILTERSIZE. The default
% value is 1 for all layers.
%
% DilationFactor - Dilation factor for each
% convolutional layer, specified as a
% column vector with the same number
% of rows as FILTERSIZE. The default
% value is 1 for all layers.
%
% Padding - Padding method for each
% convolutional layer specified as
% "same" or "causal". Padding must be
% a a string array with the same
% number of rows as FITLERSIZE. The
% default value is "causal" for all
% layers.
%
% PaddingValue - Padding for each convolutional
% layer, specified as a column vector
% with the same number of rows as
% FILTERSIZE. The default value is 0
% for all layers.
%
% ConvexNonDecreasingActivation - Convex non-decreasing activation
% function, specified as "softplus"
% or "relu". The default value is
% "relu".

% Copyright 2024 The MathWorks, Inc.

arguments
inputSize (1,1) {mustBeReal, mustBeInteger, mustBePositive}
outputSize (1,1) {mustBeReal, mustBeInteger, mustBePositive}
filterSize (:,1) {mustBeNonempty, mustBeReal, mustBeInteger, mustBePositive}
numFilters (:,1) {mustBeNonempty, mustBeReal, mustBeInteger, mustBePositive, mustBeEqualLength(filterSize, numFilters, "numFilters")}
options.Stride (:,1) {mustBeNonempty, mustBeReal, mustBeInteger, mustBePositive, mustBeEqualLength(filterSize, options.Stride, "Stride")} = ones(numel(filterSize), 1)
options.DilationFactor (:,1) {mustBeNonempty, mustBeReal, mustBeInteger, mustBePositive, mustBeEqualLength(filterSize, options.DilationFactor, "DilationFactor")} = ones(numel(filterSize), 1)
options.Padding (:,1) {mustBeNonzeroLengthText, mustBeMember(options.Padding, ["same", "causal"]), mustBeEqualLength(filterSize, options.Padding, "Padding")} = repelem("causal", numel(filterSize))
options.PaddingValue (:,1) {mustBeNonempty, mustBeReal, mustBeEqualLength(filterSize, options.PaddingValue, "PaddingValue")} = zeros(numel(filterSize), 1)
options.ConvexNonDecreasingActivation {mustBeNonzeroLengthText, mustBeTextScalar, mustBeMember(options.ConvexNonDecreasingActivation, ["relu", "softplus"])} = "relu"
end

% Select the activation function based on user input
switch options.ConvexNonDecreasingActivation
case "relu"
activationLayer = @(name) reluLayer(Name=name);
case "softplus"
activationLayer = @(name) softplusLayer(Name=name);
end

% Build the input layer
layers = [
sequenceInputLayer(inputSize, Name="input", Normalization="none")
];

% Build the convolutional layers
for ii = 1:numel(numFilters)
convLayerName = "conv1d_+_" + ii;
activationLayerName = "cnd_" + ii;
batchNormLayerName = "batchnorm_+_" + ii;

convLayer = convolution1dLayer(filterSize(ii), numFilters(ii), ...
Stride=options.Stride(ii), ...
DilationFactor=options.DilationFactor(ii), ...
Padding=options.Padding(ii), ...
PaddingValue=options.PaddingValue(ii), ...
Name=convLayerName);

layers = [
layers;
convLayer;
activationLayer(activationLayerName);
batchNormalizationLayer(Name=batchNormLayerName)
]; %#ok<AGROW>
end

% Modify the name of the first convolutional layer to remove constraints
layers(2).Name = "conv1d_1";

% Add final pooling and fully connected layers
layers = [
layers;
globalAveragePooling1dLayer(Name="global_avg_pool");
fullyConnectedLayer(outputSize, Name="fc_+_end")
];

% Initialize the dlnetwork
net = dlnetwork(layers);

% Make the network convex
net = conslearn.convex.makeNetworkConvex(net);

end

function mustBeEqualLength(filterSize, otherVar, otherVarName)
if ~isequal(size(filterSize, 1), size(otherVar, 1))
error("'%s' must have the same number of rows as the filter size value.", otherVarName);
end
end
6 changes: 3 additions & 3 deletions conslearn/+conslearn/+convex/getConvexParameterIdx.m
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
function convexParameterIdx = getConvexParameterIdx(net)
% GETCONVEXPARAMETERIDX Returns the indices in the learnable parameter
% table of the network that correspond to weights with convex constraints.
% The network *must* be created using the buildConstrainedNetwork function
% with a convex constraint type.
% The network *must* be created using the buildConstrainedNetwork or
% buildConvexCNN function with a convex constraint type.

% Copyright 2024 The MathWorks, Inc.

Expand All @@ -11,6 +11,6 @@
end

convexParameterIdx = contains(net.Learnables.Layer,"_+_") & ...
contains(net.Learnables.Parameter,"Weight");
( contains(net.Learnables.Parameter,"Weight") | contains(net.Learnables.Parameter,"Scale"));
end

4 changes: 2 additions & 2 deletions conslearn/+conslearn/+convex/makeNetworkConvex.m
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
function net = makeNetworkConvex(net)
% MAKENETWORKCONVEX Constrain the weights of a convex network to ensure
% convexity of the outputs with respect to the network inputs. The network
% *must* be created using the buildConstrainedNetwork function with a convex
% constraint type.
% *must* be created using the buildConstrainedNetwork or
% buildConvexCNN function with a convex constraint type.

% Copyright 2024 The MathWorks, Inc.

Expand Down
Loading

0 comments on commit fc585b1

Please sign in to comment.