Skip to content

Commit

Permalink
Fixed pernicious convlayer error calculation error. Speeds and accura…
Browse files Browse the repository at this point in the history
…cy are vastly improved.
  • Loading branch information
qdm097 committed May 23, 2020
1 parent b329a56 commit de28ee2
Show file tree
Hide file tree
Showing 6 changed files with 66 additions and 107 deletions.
45 changes: 10 additions & 35 deletions CNN1/ConvolutionLayer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,7 @@ class ConvolutionLayer : iLayer
public int InputLength { get; set; }
double[,] RMSGrad { get; set; }
public double[] Errors { get; set; }
public double[] IntermediaryErrors { get; set; }
public double[] ZVals { get; set; }
double[,] Input { get; set; }
public double[] Values { get; set; }
public double AvgUpdate { get; set; }
public static int StepSize = 1;
Expand Down Expand Up @@ -73,66 +71,44 @@ public void Descend(int batchsize)
}
Gradients = new double[KernelSize, KernelSize];
}
public void Backprop(double[] input, bool useless)
{
Gradients = new double[KernelSize, KernelSize];
for (int k = 0; k < KernelSize; k++)
{
for (int j = 0; j < KernelSize; j++)
{
Gradients[k, j] += input[j] * Maths.TanhDerriv(ZVals[k]) * Errors[k];
if (NN.UseMomentum)
{
Momentums[k, j] = (Momentums[k, j] * NN.Momentum) - (NN.LearningRate * Gradients[k, j]);
Gradients[k, j] += Momentums[k, j];
}
}
}
}
/// <summary>
/// Calculates the errors of the convolution
/// </summary>
/// <param name="outputlayer">The layer which comes after the convolutional layer</param>
public void CalcError(iLayer outputlayer)
public void Backprop(double[] input, iLayer outputlayer, bool uselessbool, int uselessint)
{
//Calc errors
double[,] Input = Maths.Convert(input);
if (outputlayer is FullyConnectedLayer)
{
//Errors with respect to the output of the convolution
//dl/do
IntermediaryErrors = new double[outputlayer.InputLength];
Errors = new double[outputlayer.InputLength];
for (int k = 0; k < outputlayer.Length; k++)
{
for (int j = 0; j < outputlayer.InputLength; j++)
{
IntermediaryErrors[j] += outputlayer.Weights[k, j] * Maths.TanhDerriv(outputlayer.ZVals[k]) * outputlayer.Errors[k];
Errors[j] += outputlayer.Weights[k, j] * Maths.TanhDerriv(outputlayer.ZVals[k]) * outputlayer.Errors[k];
}
}
//Errors with respect to the filter
Errors = Maths.Convert(Convolve(Maths.Convert(IntermediaryErrors), Input));
}
if (outputlayer is ConvolutionLayer)
{
var CLOutput = outputlayer as ConvolutionLayer;
//Flipped?
IntermediaryErrors = Maths.Convert(CLOutput.FullConvolve(CLOutput.Weights, Maths.Convert(CLOutput.IntermediaryErrors)));
Errors = Maths.Convert(Convolve(Maths.Convert(IntermediaryErrors), Input));
Errors = Maths.Convert(CLOutput.FullConvolve(CLOutput.Weights, Maths.Convert(CLOutput.Errors)));
}
if (outputlayer is PoolingLayer)
{
var PLOutput = outputlayer as PoolingLayer;
int iterator = 0;
IntermediaryErrors = new double[ZVals.Length];
Errors = new double[ZVals.Length];
for (int i = 0; i < ZVals.Length; i++)
{
if (PLOutput.Mask[i] == 0) { continue; }
IntermediaryErrors[i] = PLOutput.Errors[iterator];
Errors[i] = PLOutput.Errors[iterator];
iterator++;
}
//Errors with respect to the filter
Errors = Maths.Convert(Convolve(Maths.Convert(IntermediaryErrors), Input));
}
//Calc gradients (errors with respect to the filter)
Gradients = Convolve(Maths.Convert(Errors), Input);
}
public void CalcError(double useless) { throw new Exception("The convolution layer is never an output layer"); }
/// <summary>
/// Calculates the dot product of the kernel and input matrix.
/// Matrices should be size [x, y] and [y], respectively, where x is the output size and y is the latent space's size
Expand All @@ -151,7 +127,6 @@ public void Calculate(double[] input, bool isoutput)
/// <param name="isoutput"></param>
public void Calculate(double[,] input, bool isoutput)
{
Input = input;
var output = Convolve(Weights, input);
ZVals = Maths.Convert(output);
if (!isoutput) { output = Maths.Tanh(output); }
Expand Down
98 changes: 44 additions & 54 deletions CNN1/FullyConnectedLayer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -112,12 +112,54 @@ public void Descend(int batchsize)
BiasGradient = new double[Length];
}
/// <summary>
/// Descent for other layers
/// Backpropegation of error and calcluation of gradients
/// </summary>
/// <param name="input">Previous layer's values</param>
/// <param name="isoutput">Whether the layer is the output layer</param>
public void Backprop(double[] input, bool isoutput)
public void Backprop(double[] input, iLayer outputlayer, bool isoutput, int correct)
{
//Calculate error
if (isoutput)
{
Errors = new double[Length];
for (int i = 0; i < Length; i++)
{
Errors[i] = 2d * ((i == correct ? 1d : 0d) - Values[i]);
}
}
else
{
if (outputlayer is FullyConnectedLayer)
{
var FCLOutput = outputlayer as FullyConnectedLayer;
Errors = new double[Length];
for (int k = 0; k < FCLOutput.Length; k++)
{
for (int j = 0; j < Length; j++)
{
Errors[j] += FCLOutput.Weights[k, j] * Maths.TanhDerriv(outputlayer.ZVals[k]) * FCLOutput.Errors[k];
}
}
}
if (outputlayer is ConvolutionLayer)
{
var CLOutput = outputlayer as ConvolutionLayer;
Errors = Maths.Convert(CLOutput.FullConvolve(CLOutput.Weights, Maths.Convert(CLOutput.Errors)));
}
if (outputlayer is PoolingLayer)
{
var PLOutput = outputlayer as PoolingLayer;
int iterator = 0;
Errors = new double[Length];
for (int i = 0; i < Length; i++)
{
if (PLOutput.Mask[i] == 0) { continue; }
Errors[i] = PLOutput.Errors[iterator];
iterator++;
}
}
}
//Calculate gradients
for (int i = 0; i < Length; i++)
{
for (int ii = 0; ii < InputLength; ii++)
Expand All @@ -140,58 +182,6 @@ public void Backprop(double[] input, bool isoutput)
}
}
}
/// <summary>
/// I used the following intuition to work out this method of backpropegation,
/// because I could not find an explanation anywhere online:
/// "Error is how much you're wrong, adjusted for how much your superior cares and how much he's wrong"
/// I then realized that this applies to convolution as much as it does normally.
/// So that means the error, with respect to any given input value, is defined the same as normally.
/// In other words, <i>you can use the same formula as normal, but calculate it with convolution</i>
/// This is done like so: "Error += output.weight * output.error * tanhderriv(output.zval)"
/// With respect to the given indices: i, ii, j, jj.
/// All adjusted for convolution, demonstraighted below.
/// </summary>
/// <param name="outputlayer"></param>
public void CalcError(iLayer outputlayer)
{
if (outputlayer is FullyConnectedLayer)
{
var FCLOutput = outputlayer as FullyConnectedLayer;
Errors = new double[Length];
for (int k = 0; k < FCLOutput.Length; k++)
{
for (int j = 0; j < Length; j++)
{
Errors[j] += FCLOutput.Weights[k, j] * Maths.TanhDerriv(outputlayer.ZVals[k]) * FCLOutput.Errors[k];
}
}
}
if (outputlayer is ConvolutionLayer)
{
var CLOutput = outputlayer as ConvolutionLayer;
Errors = Maths.Convert(CLOutput.FullConvolve(CLOutput.Weights, Maths.Convert(CLOutput.IntermediaryErrors)));
}
if (outputlayer is PoolingLayer)
{
var PLOutput = outputlayer as PoolingLayer;
int iterator = 0;
Errors = new double[Length];
for (int i = 0; i < Length; i++)
{
if (PLOutput.Mask[i] == 0) { continue; }
Errors[i] = PLOutput.Errors[iterator];
iterator++;
}
}
}
public void CalcError(double correct)
{
Errors = new double[Length];
for (int i = 0; i < Length; i++)
{
Errors[i] = 2d * ((i == correct ? 1d : 0d) - Values[i]);
}
}
public void Calculate(double[] input, bool output)
{
var vals = new double[Length];
Expand Down
14 changes: 5 additions & 9 deletions CNN1/NN.cs
Original file line number Diff line number Diff line change
Expand Up @@ -69,17 +69,13 @@ public void Run(double[] input, int correct, bool testing)
}
if (!testing)
{
//Errors
Layers[NumLayers - 1].CalcError(correct);
for (int i = NumLayers - 2; i >= 0; i--)
{
Layers[i].CalcError(Layers[i + 1]);
}
//Backprop
Layers[0].Backprop(input, NumLayers == 1);
for (int i = 1; i < NumLayers; i++)
for (int i = NumLayers - 1; i >= 0; i--)
{
Layers[i].Backprop(Layers[i - 1].Values, i == Layers.Count - 1); continue;
bool isoutput = i == Layers.Count - 1;
iLayer outputlayer = isoutput ? null : Layers[i + 1];
double[] inputvals = i == 0 ? input : Layers[i - 1].Values;
Layers[i].Backprop(inputvals, outputlayer, isoutput, correct);
}
}
//Report values
Expand Down
10 changes: 5 additions & 5 deletions CNN1/PoolingLayer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,7 @@ class PoolingLayer : iLayer
public int Length { get; set; }
public int InputLength { get; set; }
public void Descend(int anint) { }
public void Backprop(double[] anarray, bool abool) { }
public iLayer Init(Random arandom, bool abool) { return this; }
public void CalcError(double adouble) { }
//Pooling stuff
public int PoolSize { get; set; }
public double[] Values { get; set; }
Expand All @@ -30,8 +28,9 @@ public PoolingLayer(int poolsize, int priorsize)
Values = new double[Length];
ZVals = new double[Length];
}
public void CalcError(iLayer outputlayer)
public void Backprop(double[] input, iLayer outputlayer, bool uselessbool, int uselessint)
{
//Calc errors
if (outputlayer is FullyConnectedLayer)
{
var FCLOutput = outputlayer as FullyConnectedLayer;
Expand All @@ -47,7 +46,7 @@ public void CalcError(iLayer outputlayer)
if (outputlayer is ConvolutionLayer)
{
var CLOutput = outputlayer as ConvolutionLayer;
Errors = Maths.Convert(CLOutput.FullConvolve(CLOutput.Weights, Maths.Convert(CLOutput.IntermediaryErrors)));
Errors = Maths.Convert(CLOutput.FullConvolve(CLOutput.Weights, Maths.Convert(CLOutput.Errors)));
}
if (outputlayer is PoolingLayer)
{
Expand All @@ -61,6 +60,7 @@ public void CalcError(iLayer outputlayer)
iterator++;
}
}
//There are no gradient with respect to a pooling layer
}
public void Calculate(double[] input, bool useless)
{
Expand All @@ -70,7 +70,7 @@ public void Calculate(double[,] input, bool useless)
{

if (input.GetLength(0) % PoolSize != 0 || input.GetLength(1) % PoolSize != 0)
{ throw new Exception("Unclean divide in PoolSizeing"); }
{ throw new Exception("Unclean divide in PoolSizing"); }
double[,] output = new double[input.GetLength(0) / PoolSize, input.GetLength(1) / PoolSize];
var mask = new double[input.GetLength(0), input.GetLength(1)];
int currentx = 0, currenty = 0;
Expand Down
2 changes: 1 addition & 1 deletion CNN1/WBs.txt

Large diffs are not rendered by default.

4 changes: 1 addition & 3 deletions CNN1/iLayer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,7 @@ interface iLayer
/// </summary>
/// <param name="input">Previous layer's values</param>
/// <param name="output">Whether the layer is the output layer</param>
void Backprop(double[] input, bool output);
void CalcError(iLayer output);
void CalcError(double correct);
void Backprop(double[] input, iLayer outputlayer, bool isoutput, int correct);
void Calculate(double[] input, bool output);
void Calculate(double[,] input, bool output);
}
Expand Down

0 comments on commit de28ee2

Please sign in to comment.