Fixed pernicious convlayer error calculation error. Speeds and accura…

…cy are vastly improved.
qdm097 · May 23, 2020 · de28ee2 · de28ee2
1 parent b329a56
commit de28ee2
Show file tree

Hide file tree

Showing 6 changed files with 66 additions and 107 deletions.
diff --git a/CNN1/ConvolutionLayer.cs b/CNN1/ConvolutionLayer.cs
@@ -17,9 +17,7 @@ class ConvolutionLayer : iLayer
         public int InputLength { get; set; }
         double[,] RMSGrad { get; set; }
         public double[] Errors { get; set; }
-        public double[] IntermediaryErrors { get; set; }
         public double[] ZVals { get; set; }
-        double[,] Input { get; set; }
         public double[] Values { get; set; }
         public double AvgUpdate { get; set; }
         public static int StepSize = 1;
@@ -73,66 +71,44 @@ public void Descend(int batchsize)
             }
             Gradients = new double[KernelSize, KernelSize];
         }
-        public void Backprop(double[] input, bool useless)
-        {
-            Gradients = new double[KernelSize, KernelSize];
-            for (int k = 0; k < KernelSize; k++)
-            {
-                for (int j = 0; j < KernelSize; j++)
-                {
-                    Gradients[k, j] += input[j] * Maths.TanhDerriv(ZVals[k]) * Errors[k];
-                    if (NN.UseMomentum)
-                    {
-                        Momentums[k, j] = (Momentums[k, j] * NN.Momentum) - (NN.LearningRate * Gradients[k, j]);
-                        Gradients[k, j] += Momentums[k, j];
-                    }
-                }
-            }
-        }
-        /// <summary>
-        /// Calculates the errors of the convolution
-        /// </summary>
-        /// <param name="outputlayer">The layer which comes after the convolutional layer</param>
-        public void CalcError(iLayer outputlayer)
+        public void Backprop(double[] input, iLayer outputlayer, bool uselessbool, int uselessint)
         {
+            //Calc errors
+            double[,] Input = Maths.Convert(input);
             if (outputlayer is FullyConnectedLayer)
             {
                 //Errors with respect to the output of the convolution
                 //dl/do
-                IntermediaryErrors = new double[outputlayer.InputLength];
+                Errors = new double[outputlayer.InputLength];
                 for (int k = 0; k < outputlayer.Length; k++)
                 {
                     for (int j = 0; j < outputlayer.InputLength; j++)
                     {
-                        IntermediaryErrors[j] += outputlayer.Weights[k, j] * Maths.TanhDerriv(outputlayer.ZVals[k]) * outputlayer.Errors[k];
+                        Errors[j] += outputlayer.Weights[k, j] * Maths.TanhDerriv(outputlayer.ZVals[k]) * outputlayer.Errors[k];
                     }
                 }
-                //Errors with respect to the filter
-                Errors = Maths.Convert(Convolve(Maths.Convert(IntermediaryErrors), Input));
             }
             if (outputlayer is ConvolutionLayer)
             {
                 var CLOutput = outputlayer as ConvolutionLayer;
                 //Flipped?
-                IntermediaryErrors = Maths.Convert(CLOutput.FullConvolve(CLOutput.Weights, Maths.Convert(CLOutput.IntermediaryErrors)));
-                Errors = Maths.Convert(Convolve(Maths.Convert(IntermediaryErrors), Input));
+                Errors = Maths.Convert(CLOutput.FullConvolve(CLOutput.Weights, Maths.Convert(CLOutput.Errors)));
             }
             if (outputlayer is PoolingLayer)
             {
                 var PLOutput = outputlayer as PoolingLayer;
                 int iterator = 0;
-                IntermediaryErrors = new double[ZVals.Length];
+                Errors = new double[ZVals.Length];
                 for (int i = 0; i < ZVals.Length; i++)
                 {
                     if (PLOutput.Mask[i] == 0) { continue; }
-                    IntermediaryErrors[i] = PLOutput.Errors[iterator];
+                    Errors[i] = PLOutput.Errors[iterator];
                     iterator++;
                 }
-                //Errors with respect to the filter
-                Errors = Maths.Convert(Convolve(Maths.Convert(IntermediaryErrors), Input));
             }
+            //Calc gradients (errors with respect to the filter)
+            Gradients = Convolve(Maths.Convert(Errors), Input);
         }
-        public void CalcError(double useless) { throw new Exception("The convolution layer is never an output layer"); }
         /// <summary>
         /// Calculates the dot product of the kernel and input matrix.
         /// Matrices should be size [x, y] and [y], respectively, where x is the output size and y is the latent space's size
@@ -151,7 +127,6 @@ public void Calculate(double[] input, bool isoutput)
         /// <param name="isoutput"></param>
         public void Calculate(double[,] input, bool isoutput)
         {
-            Input = input;
             var output = Convolve(Weights, input);
             ZVals = Maths.Convert(output);
             if (!isoutput) { output = Maths.Tanh(output); }

diff --git a/CNN1/FullyConnectedLayer.cs b/CNN1/FullyConnectedLayer.cs
@@ -112,12 +112,54 @@ public void Descend(int batchsize)
             BiasGradient = new double[Length];
         }
         /// <summary>
-        /// Descent for other layers
+        /// Backpropegation of error and calcluation of gradients
         /// </summary>
         /// <param name="input">Previous layer's values</param>
         /// <param name="isoutput">Whether the layer is the output layer</param>
-        public void Backprop(double[] input, bool isoutput)
+        public void Backprop(double[] input, iLayer outputlayer, bool isoutput, int correct)
         {
+            //Calculate error
+            if (isoutput)
+            {
+                Errors = new double[Length];
+                for (int i = 0; i < Length; i++)
+                {
+                    Errors[i] = 2d * ((i == correct ? 1d : 0d) - Values[i]);
+                }
+            }
+            else
+            {
+                if (outputlayer is FullyConnectedLayer)
+                {
+                    var FCLOutput = outputlayer as FullyConnectedLayer;
+                    Errors = new double[Length];
+                    for (int k = 0; k < FCLOutput.Length; k++)
+                    {
+                        for (int j = 0; j < Length; j++)
+                        {
+                            Errors[j] += FCLOutput.Weights[k, j] * Maths.TanhDerriv(outputlayer.ZVals[k]) * FCLOutput.Errors[k];
+                        }
+                    }
+                }
+                if (outputlayer is ConvolutionLayer)
+                {
+                    var CLOutput = outputlayer as ConvolutionLayer;
+                    Errors = Maths.Convert(CLOutput.FullConvolve(CLOutput.Weights, Maths.Convert(CLOutput.Errors)));
+                }
+                if (outputlayer is PoolingLayer)
+                {
+                    var PLOutput = outputlayer as PoolingLayer;
+                    int iterator = 0;
+                    Errors = new double[Length];
+                    for (int i = 0; i < Length; i++)
+                    {
+                        if (PLOutput.Mask[i] == 0) { continue; }
+                        Errors[i] = PLOutput.Errors[iterator];
+                        iterator++;
+                    }
+                }
+            }
+            //Calculate gradients
             for (int i = 0; i < Length; i++)
             {
                 for (int ii = 0; ii < InputLength; ii++)
@@ -140,58 +182,6 @@ public void Backprop(double[] input, bool isoutput)
                 }
             }
         }
-        /// <summary>
-        /// I used the following intuition to work out this method of backpropegation, 
-        /// because I could not find an explanation anywhere online:
-        /// "Error is how much you're wrong, adjusted for how much your superior cares and how much he's wrong"
-        /// I then realized that this applies to convolution as much as it does normally.
-        /// So that means the error, with respect to any given input value, is defined the same as normally.
-        /// In other words, <i>you can use the same formula as normal, but calculate it with convolution</i>
-        /// This is done like so: "Error += output.weight * output.error * tanhderriv(output.zval)"
-        /// With respect to the given indices: i, ii, j, jj.
-        /// All adjusted for convolution, demonstraighted below.
-        /// </summary>
-        /// <param name="outputlayer"></param>
-        public void CalcError(iLayer outputlayer)
-        {
-            if (outputlayer is FullyConnectedLayer)
-            {
-                var FCLOutput = outputlayer as FullyConnectedLayer;
-                Errors = new double[Length];
-                for (int k = 0; k < FCLOutput.Length; k++)
-                {
-                    for (int j = 0; j < Length; j++)
-                    {
-                        Errors[j] += FCLOutput.Weights[k, j] * Maths.TanhDerriv(outputlayer.ZVals[k]) * FCLOutput.Errors[k];
-                    }
-                }
-            }
-            if (outputlayer is ConvolutionLayer)
-            {
-                var CLOutput = outputlayer as ConvolutionLayer;
-                Errors = Maths.Convert(CLOutput.FullConvolve(CLOutput.Weights, Maths.Convert(CLOutput.IntermediaryErrors)));
-            }
-            if (outputlayer is PoolingLayer)
-            {
-                var PLOutput = outputlayer as PoolingLayer;
-                int iterator = 0;
-                Errors = new double[Length];
-                for (int i = 0; i < Length; i++)
-                {
-                    if (PLOutput.Mask[i] == 0) { continue; }
-                    Errors[i] = PLOutput.Errors[iterator];
-                    iterator++;
-                }
-            }
-        }
-        public void CalcError(double correct)
-        {
-            Errors = new double[Length];
-            for (int i = 0; i < Length; i++)
-            {
-                Errors[i] = 2d * ((i == correct ? 1d : 0d) - Values[i]);
-            }
-        }
         public void Calculate(double[] input, bool output)
         {
             var vals = new double[Length];

diff --git a/CNN1/NN.cs b/CNN1/NN.cs
@@ -69,17 +69,13 @@ public void Run(double[] input, int correct, bool testing)
             }
             if (!testing)
             {
-                //Errors
-                Layers[NumLayers - 1].CalcError(correct);
-                for (int i = NumLayers - 2; i >= 0; i--)
-                {
-                    Layers[i].CalcError(Layers[i + 1]); 
-                }
                 //Backprop
-                Layers[0].Backprop(input, NumLayers == 1);
-                for (int i = 1; i < NumLayers; i++)
+                for (int i = NumLayers - 1; i >= 0; i--)
                 {
-                    Layers[i].Backprop(Layers[i - 1].Values, i == Layers.Count - 1); continue; 
+                    bool isoutput = i == Layers.Count - 1;
+                    iLayer outputlayer = isoutput ? null : Layers[i + 1];
+                    double[] inputvals = i == 0 ? input : Layers[i - 1].Values;
+                    Layers[i].Backprop(inputvals, outputlayer, isoutput, correct); 
                 }
             }
             //Report values

diff --git a/CNN1/PoolingLayer.cs b/CNN1/PoolingLayer.cs
@@ -16,9 +16,7 @@ class PoolingLayer : iLayer
         public int Length { get; set; }
         public int InputLength { get; set; }
         public void Descend(int anint) { }
-        public void Backprop(double[] anarray, bool abool) { }
         public iLayer Init(Random arandom, bool abool) { return this; }
-        public void CalcError(double adouble) { }
         //Pooling stuff
         public int PoolSize { get; set; }
         public double[] Values { get; set; }
@@ -30,8 +28,9 @@ public PoolingLayer(int poolsize, int priorsize)
             Values = new double[Length];
             ZVals = new double[Length];
         }
-        public void CalcError(iLayer outputlayer)
+        public void Backprop(double[] input, iLayer outputlayer, bool uselessbool, int uselessint)
         {
+            //Calc errors
             if (outputlayer is FullyConnectedLayer)
             {
                 var FCLOutput = outputlayer as FullyConnectedLayer;
@@ -47,7 +46,7 @@ public void CalcError(iLayer outputlayer)
             if (outputlayer is ConvolutionLayer)
             {
                 var CLOutput = outputlayer as ConvolutionLayer;
-                Errors = Maths.Convert(CLOutput.FullConvolve(CLOutput.Weights, Maths.Convert(CLOutput.IntermediaryErrors)));
+                Errors = Maths.Convert(CLOutput.FullConvolve(CLOutput.Weights, Maths.Convert(CLOutput.Errors)));
             }
             if (outputlayer is PoolingLayer)
             {
@@ -61,6 +60,7 @@ public void CalcError(iLayer outputlayer)
                     iterator++;
                 }
             }
+            //There are no gradient with respect to a pooling layer
         }
         public void Calculate(double[] input, bool useless)
         {
@@ -70,7 +70,7 @@ public void Calculate(double[,] input, bool useless)
         {
 
             if (input.GetLength(0) % PoolSize != 0 || input.GetLength(1) % PoolSize != 0)
-            { throw new Exception("Unclean divide in PoolSizeing"); }
+            { throw new Exception("Unclean divide in PoolSizing"); }
             double[,] output = new double[input.GetLength(0) / PoolSize, input.GetLength(1) / PoolSize];
             var mask = new double[input.GetLength(0), input.GetLength(1)];
             int currentx = 0, currenty = 0;

diff --git a/CNN1/WBs.txt b/CNN1/WBs.txt
diff --git a/CNN1/iLayer.cs b/CNN1/iLayer.cs
@@ -23,9 +23,7 @@ interface iLayer
         /// </summary>
         /// <param name="input">Previous layer's values</param>
         /// <param name="output">Whether the layer is the output layer</param>
-        void Backprop(double[] input, bool output);
-        void CalcError(iLayer output);
-        void CalcError(double correct);
+        void Backprop(double[] input, iLayer outputlayer, bool isoutput, int correct);
         void Calculate(double[] input, bool output);
         void Calculate(double[,] input, bool output);
     }