diff --git a/README.md b/README.md index c33f793..20d34f6 100644 --- a/README.md +++ b/README.md @@ -29,13 +29,12 @@ Read the paper [here](https://arxiv.org/abs/1902.06714). |------------|------------------|------------------------|----------------------|--------------|---------------| | Input | `input` | n/a | 1, 3 | n/a | n/a | | Dense (fully-connected) | `dense` | `input1d`, `flatten` | 1 | ✅ | ✅ | -| Convolutional (2-d) | `conv2d` | `input3d`, `conv2d`, `maxpool2d`, `reshape` | 3 | ✅ | ❌ | +| Convolutional (2-d) | `conv2d` | `input3d`, `conv2d`, `maxpool2d`, `reshape` | 3 | ✅ | ✅(*) | | Max-pooling (2-d) | `maxpool2d` | `input3d`, `conv2d`, `maxpool2d`, `reshape` | 3 | ✅ | ✅ | | Flatten | `flatten` | `input3d`, `conv2d`, `maxpool2d`, `reshape` | 1 | ✅ | ✅ | | Reshape (1-d to 3-d) | `reshape` | `input1d`, `dense`, `flatten` | 3 | ✅ | ✅ | -**Note:** The training of convolutional layers has been discovered to be broken -as of release 0.13.0. This will be fixed in a future (hopefully next) release. +(*) See Issue [#145](https://github.com/modern-fortran/neural-fortran/issues/145) regarding non-converging CNN training on the MNIST dataset. ## Getting started diff --git a/src/nf/nf_layer_constructors_submodule.f90 b/src/nf/nf_layer_constructors_submodule.f90 index 002a83b..91de36e 100644 --- a/src/nf/nf_layer_constructors_submodule.f90 +++ b/src/nf/nf_layer_constructors_submodule.f90 @@ -8,7 +8,7 @@ use nf_input3d_layer, only: input3d_layer use nf_maxpool2d_layer, only: maxpool2d_layer use nf_reshape_layer, only: reshape3d_layer - use nf_activation, only: activation_function, sigmoid + use nf_activation, only: activation_function, relu, sigmoid implicit none @@ -27,7 +27,7 @@ pure module function conv2d(filters, kernel_size, activation) result(res) if (present(activation)) then allocate(activation_tmp, source=activation) else - allocate(activation_tmp, source=sigmoid()) + allocate(activation_tmp, source=relu()) end if res % activation = activation_tmp % get_name() diff --git a/src/nf/nf_network_submodule.f90 b/src/nf/nf_network_submodule.f90 index 5bafb7c..a35064b 100644 --- a/src/nf/nf_network_submodule.f90 +++ b/src/nf/nf_network_submodule.f90 @@ -305,12 +305,14 @@ pure module subroutine backward(self, output) select type(next_layer => self % layers(n + 1) % p) type is(dense_layer) call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient) - type is(flatten_layer) - call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient) type is(conv2d_layer) call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient) + type is(flatten_layer) + call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient) type is(maxpool2d_layer) call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient) + type is(reshape3d_layer) + call self % layers(n) % backward(self % layers(n - 1), next_layer % gradient) end select end if diff --git a/test/test_conv2d_layer.f90 b/test/test_conv2d_layer.f90 index ae0474e..5840063 100644 --- a/test/test_conv2d_layer.f90 +++ b/test/test_conv2d_layer.f90 @@ -24,9 +24,9 @@ program test_conv2d_layer write(stderr, '(a)') 'conv2d layer should not be marked as initialized yet.. failed' end if - if (.not. conv_layer % activation == 'sigmoid') then + if (.not. conv_layer % activation == 'relu') then ok = .false. - write(stderr, '(a)') 'conv2d layer is defaults to sigmoid activation.. failed' + write(stderr, '(a)') 'conv2d layer defaults to relu activation.. failed' end if input_layer = input([3, 32, 32]) @@ -62,7 +62,7 @@ program test_conv2d_layer call conv_layer % forward(input_layer) call conv_layer % get_output(output) - if (.not. all(abs(output - 0.5) < tolerance)) then + if (.not. all(abs(output) < tolerance)) then ok = .false. write(stderr, '(a)') 'conv2d layer with zero input and sigmoid function must forward to all 0.5.. failed' end if diff --git a/test/test_conv2d_network.f90 b/test/test_conv2d_network.f90 index 4d87a52..47c9a81 100644 --- a/test/test_conv2d_network.f90 +++ b/test/test_conv2d_network.f90 @@ -1,7 +1,7 @@ program test_conv2d_network use iso_fortran_env, only: stderr => error_unit - use nf, only: conv2d, input, network + use nf, only: conv2d, input, network, dense, sgd, maxpool2d implicit none @@ -21,6 +21,7 @@ program test_conv2d_network ok = .false. end if + ! Test for output shape allocate(sample_input(3, 32, 32)) sample_input = 0 @@ -32,6 +33,115 @@ program test_conv2d_network ok = .false. end if + deallocate(sample_input, output) + + training1: block + + type(network) :: cnn + real :: y(1) + real :: tolerance = 1e-5 + integer :: n + integer, parameter :: num_iterations = 1000 + + ! Test training of a minimal constant mapping + allocate(sample_input(1, 5, 5)) + call random_number(sample_input) + + cnn = network([ & + input(shape(sample_input)), & + conv2d(filters=1, kernel_size=3), & + conv2d(filters=1, kernel_size=3), & + dense(1) & + ]) + + y = [0.1234567] + + do n = 1, num_iterations + call cnn % forward(sample_input) + call cnn % backward(y) + call cnn % update(optimizer=sgd(learning_rate=1.)) + if (all(abs(cnn % predict(sample_input) - y) < tolerance)) exit + end do + + if (.not. n <= num_iterations) then + write(stderr, '(a)') & + 'convolutional network 1 should converge in simple training.. failed' + ok = .false. + end if + + end block training1 + + training2: block + + type(network) :: cnn + real :: x(1, 8, 8) + real :: y(1) + real :: tolerance = 1e-5 + integer :: n + integer, parameter :: num_iterations = 1000 + + call random_number(x) + y = [0.1234567] + + cnn = network([ & + input(shape(x)), & + conv2d(filters=1, kernel_size=3), & + maxpool2d(pool_size=2), & + conv2d(filters=1, kernel_size=3), & + dense(1) & + ]) + + do n = 1, num_iterations + call cnn % forward(x) + call cnn % backward(y) + call cnn % update(optimizer=sgd(learning_rate=1.)) + if (all(abs(cnn % predict(x) - y) < tolerance)) exit + end do + + if (.not. n <= num_iterations) then + write(stderr, '(a)') & + 'convolutional network 2 should converge in simple training.. failed' + ok = .false. + end if + + end block training2 + + training3: block + + type(network) :: cnn + real :: x(1, 12, 12) + real :: y(9) + real :: tolerance = 1e-5 + integer :: n + integer, parameter :: num_iterations = 5000 + + call random_number(x) + y = [0.12345, 0.23456, 0.34567, 0.45678, 0.56789, 0.67890, 0.78901, 0.89012, 0.90123] + + cnn = network([ & + input(shape(x)), & + conv2d(filters=1, kernel_size=3), & ! 1x12x12 input, 1x10x10 output + maxpool2d(pool_size=2), & ! 1x10x10 input, 1x5x5 output + conv2d(filters=1, kernel_size=3), & ! 1x5x5 input, 1x3x3 output + dense(9) & ! 9 outputs + ]) + + do n = 1, num_iterations + call cnn % forward(x) + call cnn % backward(y) + call cnn % update(optimizer=sgd(learning_rate=1.)) + if (all(abs(cnn % predict(x) - y) < tolerance)) exit + end do + + if (.not. n <= num_iterations) then + write(stderr, '(a)') & + 'convolutional network 3 should converge in simple training.. failed' + ok = .false. + end if + + end block training3 + + if (ok) then print '(a)', 'test_conv2d_network: All tests passed.' else