From b7b45f85ec6b0739a0d2afbe36d9dc930d78ddd9 Mon Sep 17 00:00:00 2001
From: Jakub <jakub@schwenkbeck.de>
Date: Thu, 19 Dec 2024 21:57:31 +0100
Subject: [PATCH 01/10] added `contracts` crate for `pre` and `post` conditions
 for a DbC refactor

---
 Cargo.lock | 31 +++++++++++++++++++++++++++----
 Cargo.toml |  2 ++
 2 files changed, 29 insertions(+), 4 deletions(-)
diff --git a/Cargo.lock b/Cargo.lock
index 7d46c0c..9102f30 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -6,6 +6,7 @@ version = 4
 name = "Transformer"
 version = "0.1.0"
 dependencies = [
+ "contracts",
  "ndarray",
  "rand",
  "regex",
@@ -50,6 +51,17 @@ version = "1.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
 
+[[package]]
+name = "contracts"
+version = "0.6.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f1d1429e3bd78171c65aa010eabcdf8f863ba3254728dbfb0ad4b1545beac15c"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 1.0.109",
+]
+
 [[package]]
 name = "errno"
 version = "0.3.10"
@@ -288,6 +300,17 @@ dependencies = [
  "windows-sys",
 ]
 
+[[package]]
+name = "syn"
+version = "1.0.109"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "unicode-ident",
+]
+
 [[package]]
 name = "syn"
 version = "2.0.90"
@@ -335,7 +358,7 @@ dependencies = [
  "log",
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.90",
  "wasm-bindgen-shared",
 ]
 
@@ -357,7 +380,7 @@ checksum = "30d7a95b763d3c45903ed6c81f156801839e5ee968bb07e534c44df0fcd330c2"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.90",
  "wasm-bindgen-backend",
  "wasm-bindgen-shared",
 ]
@@ -477,7 +500,7 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.90",
 ]
 
 [[package]]
@@ -488,5 +511,5 @@ checksum = "7988d73a4303ca289df03316bc490e934accf371af6bc745393cf3c2c5c4f25d"
 dependencies = [
  "proc-macro2",
  "quote",
- "syn",
+ "syn 2.0.90",
 ]
diff --git a/Cargo.toml b/Cargo.toml
index 096329a..9e0c87b 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -11,3 +11,5 @@ description = "A Rust implementation of a Transformer-based model for NLP tasks.
 ndarray = "0.16.1"
 rand = "0.9.0-beta.1"
 regex = "1.11.1"
+contracts = "0.6.3"
+

From 9d30f2d9d7935ad9c9c752f9e106070f9a3be527 Mon Sep 17 00:00:00 2001
From: Jakub <jakub@schwenkbeck.de>
Date: Thu, 19 Dec 2024 22:09:05 +0100
Subject: [PATCH 02/10] first Contracts for encoder written

---
 src/layers/feedforward_layer.rs | 6 +++++-
 src/main.rs                     | 9 +++++----
 src/model/encoder.rs            | 7 +++++++
 3 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/src/layers/feedforward_layer.rs b/src/layers/feedforward_layer.rs
index dab2207..98b6ad8 100644
--- a/src/layers/feedforward_layer.rs
+++ b/src/layers/feedforward_layer.rs
@@ -15,6 +15,7 @@ pub struct FeedForwardLayer {
     bias2: Array1<f32>, // weights and biases for second linear layer
 
     dropout_rate: f32, // Dropout rate
+    initialized: bool,
 }
 impl FeedForwardLayer {
     // init with random values
@@ -38,9 +39,12 @@ impl FeedForwardLayer {
             weights2,
             bias2,
             dropout_rate,
+            initialized: true,
         }
     }
-
+    pub fn is_initialized(&self) -> bool {
+        self.initialized
+    }
     pub fn forward_t(&self, input: &Array2<f32>, train: bool) -> Array2<f32> {
         // First linear layer
         let first_dot = input.dot(&self.weights1);
diff --git a/src/main.rs b/src/main.rs
index e9e07ea..09603bd 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -8,7 +8,7 @@ use Transformer::model::decoder::decoding;
 use Transformer::model::embedding::Embedding;
 use Transformer::model::encoder::encoding;
 use Transformer::model::transformer_model::transformer_model;
-use Transformer::settings::{BATCH_SIZE, DROPOUT_RATE, INPUT_SIZE, OUTPUT_SIZE};
+use Transformer::settings::{BATCH_SIZE, DROPOUT_RATE, EMBEDDING_SIZE, INPUT_SIZE, OUTPUT_SIZE};
 
 fn main() {
     println!("runs successfully!");
@@ -40,9 +40,10 @@ fn main() {
     let embeddings = embedding.forward(tokens.clone());
 
     // Convert embeddings to Array3 (batch_size, seq_length, embed_size)
-    let input_tensor = Array3::from_shape_fn((1, tokens.len(), 12), |(batch, seq, _)| {
-        embeddings[[seq, batch]]
-    });
+    let input_tensor = Array3::from_shape_fn(
+        (BATCH_SIZE, tokens.len(), EMBEDDING_SIZE),
+        |(batch, seq, _)| embeddings[[seq, batch]],
+    );
 
     println!("INPUT : {}", input_tensor.clone());
     // Initialize gamma and beta for layer normalization
diff --git a/src/model/encoder.rs b/src/model/encoder.rs
index c8ed88f..9d67f4b 100644
--- a/src/model/encoder.rs
+++ b/src/model/encoder.rs
@@ -2,6 +2,8 @@
 use crate::attention::multihead_attention::multi_head_attention;
 use crate::layers::feedforward_layer::FeedForwardLayer;
 use crate::layers::normalization::layer_norm;
+use crate::settings::{BATCH_SIZE, EMBEDDING_SIZE};
+use contracts::{debug_requires, requires};
 use ndarray::{array, Array2, Array3};
 use std::ops::Add;
 
@@ -16,6 +18,11 @@ use std::ops::Add;
 ///
 /// # Returns:
 /// - Output tensor of shape (batch_size, seq_length, d_model) after passing through the encoder layer.
+
+#[requires(input.shape()[2] == gamma.shape()[1], "Gamma dimensions do not match input feature size")]
+#[requires(input.shape()[2] == beta.shape()[1], "Beta dimensions do not match input feature size")]
+#[requires(epsilon > 0.0, "Epsilon must be positive and non-zero")]
+#[requires(feed_forward_layer.is_initialized(), "Feed-forward layer is not properly initialized")]
 pub fn encoding(
     input: Array3<f32>,                    // Input tensor
     gamma: Array2<f32>,                    // Scale parameter for layer norm

From 8d943599937a51b581b8d90a1024a2b6e045c56b Mon Sep 17 00:00:00 2001
From: Jakub <jakub@schwenkbeck.de>
Date: Fri, 20 Dec 2024 15:58:11 +0100
Subject: [PATCH 03/10] Added several `requires` statements to encoder

---
 src/model/encoder.rs | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/src/model/encoder.rs b/src/model/encoder.rs
index 9d67f4b..531fdd4 100644
--- a/src/model/encoder.rs
+++ b/src/model/encoder.rs
@@ -19,10 +19,14 @@ use std::ops::Add;
 /// # Returns:
 /// - Output tensor of shape (batch_size, seq_length, d_model) after passing through the encoder layer.
 
+#[requires(input.shape().len() == 3, "Input tensor must have 3 dimensions (batch_size, seq_length, embed_size)")]
 #[requires(input.shape()[2] == gamma.shape()[1], "Gamma dimensions do not match input feature size")]
+#[requires(gamma.shape()[0] == 1, "Gamma must have exactly one row")]
 #[requires(input.shape()[2] == beta.shape()[1], "Beta dimensions do not match input feature size")]
+#[requires(beta.shape()[0] == 1, "Beta must have exactly one row")]
 #[requires(epsilon > 0.0, "Epsilon must be positive and non-zero")]
 #[requires(feed_forward_layer.is_initialized(), "Feed-forward layer is not properly initialized")]
+#[requires(input.shape()[1] > 0, "Sequence length must be greater than zero")]
 pub fn encoding(
     input: Array3<f32>,                    // Input tensor
     gamma: Array2<f32>,                    // Scale parameter for layer norm

From 193c4b0df5211381c021f3cef04ed933973833a8 Mon Sep 17 00:00:00 2001
From: Jakub <jakub@schwenkbeck.de>
Date: Fri, 20 Dec 2024 16:19:57 +0100
Subject: [PATCH 04/10] Feedforwd holds more self information and has new
 requirements

---
 src/layers/feedforward_layer.rs | 121 ++++++++++++--------------------
 src/model/encoder.rs            |  35 +++++++--
 2 files changed, 76 insertions(+), 80 deletions(-)

diff --git a/src/layers/feedforward_layer.rs b/src/layers/feedforward_layer.rs
index 98b6ad8..d2f22d1 100644
--- a/src/layers/feedforward_layer.rs
+++ b/src/layers/feedforward_layer.rs
@@ -1,24 +1,31 @@
-#![allow(dead_code)]
-#![allow(unused_imports)]
-
 use crate::activation::activation_functions::gelu;
-use crate::settings::HIDDEN_SIZE;
-use ndarray::{array, Array1, Array2, Array3};
+use contracts::requires;
+use ndarray::{Array1, Array2, Array3};
 use rand::Rng;
 use std::ops::Add;
 
 pub struct FeedForwardLayer {
     weights1: Array2<f32>,
-    bias1: Array1<f32>, // weights and biases for first linear layer
-
+    bias1: Array1<f32>, // Weights and biases for the first linear layer
     weights2: Array2<f32>,
-    bias2: Array1<f32>, // weights and biases for second linear layer
-
-    dropout_rate: f32, // Dropout rate
+    bias2: Array1<f32>,           // Weights and biases for the second linear layer
+    dropout_rate: f32,            // Dropout rate
+    pub(crate) input_size: usize, // Input feature size
+    pub(crate) output_size: usize, // Output feature size
     initialized: bool,
 }
+
 impl FeedForwardLayer {
-    // init with random values
+    /// Initializes the FeedForwardLayer with random weights and biases.
+    ///
+    /// # Parameters:
+    /// - `_batch_size`: Batch size (not stored, used for verification if needed).
+    /// - `input_size`: Number of input features (d_model).
+    /// - `output_size`: Number of output features (d_model).
+    /// - `dropout_rate`: Probability of dropping a unit in dropout (0.0 to 1.0).
+    #[requires(input_size > 0, "Input size must be greater than 0")]
+    #[requires(output_size > 0, "Output size must be greater than 0")]
+    #[requires((0.0..=1.0).contains(&dropout_rate), "Dropout rate must be in range [0.0, 1.0]")]
     pub fn new(
         _batch_size: usize,
         input_size: usize,
@@ -33,18 +40,33 @@ impl FeedForwardLayer {
 
         let weights2 = he_initialization(hidden_size, output_size); // Shape: (hidden_size, output_size)
         let bias2 = bias_initialization(output_size); // Shape: (output_size,)
+
         FeedForwardLayer {
             weights1,
             bias1,
             weights2,
             bias2,
             dropout_rate,
+            input_size,
+            output_size,
             initialized: true,
         }
     }
+
+    /// Verifies that the layer is properly initialized.
     pub fn is_initialized(&self) -> bool {
         self.initialized
     }
+
+    /// Performs a forward pass in training mode.
+    ///
+    /// # Parameters:
+    /// - `input`: 2D input tensor of shape (batch_size * seq_length, input_size).
+    /// - `train`: Whether to apply dropout.
+    ///
+    /// # Returns:
+    /// - Output tensor of shape (batch_size * seq_length, output_size).
+    #[requires(input.shape()[1] == self.input_size, "Input feature size must match layer's input size")]
     pub fn forward_t(&self, input: &Array2<f32>, train: bool) -> Array2<f32> {
         // First linear layer
         let first_dot = input.dot(&self.weights1);
@@ -61,56 +83,49 @@ impl FeedForwardLayer {
         // Second linear layer
         first_activation.dot(&self.weights2).add(&self.bias2)
     }
-    /// Forward pass through the feed-forward layer.
+
+    /// Performs a forward pass in evaluation mode.
     ///
     /// # Parameters:
-    /// - `x`: Input tensor of shape (batch_size, seq_length, d_model).
+    /// - `x`: Input tensor of shape (batch_size, seq_length, input_size).
     ///
     /// # Returns:
-    /// - Output tensor of shape (batch_size, seq_length, d_model).
+    /// - Output tensor of shape (batch_size, seq_length, output_size).
+    #[requires(x.shape()[2] == self.input_size, "Input feature size must match layer's input size")]
+    #[requires(!x.is_empty(), "Input tensor must not be empty")]
     pub fn forward(&self, x: Array3<f32>) -> Array3<f32> {
         let batch_size = x.shape()[0];
         let seq_length = x.shape()[1];
         let d_model = x.shape()[2];
 
-        // Flatten the input to 2D: (batch_size * seq_length, d_model)
         let reshaped_x = x.to_shape((batch_size * seq_length, d_model));
 
         match reshaped_x {
             Ok(valid_reshaped_x) => {
                 let dot = valid_reshaped_x.dot(&self.weights1);
-
                 let add = dot + &self.bias1;
 
-                // First linear layer + gelu
-
+                // First linear layer + GELU activation
                 let hidden = gelu(&add.to_owned());
-
                 let dot2 = hidden.dot(&self.weights2);
 
                 // Second linear layer
                 let output = dot2 + &self.bias2;
 
-                // Reshape back to 3D: (batch_size, seq_length, d_model)
+                // Reshape back to 3D
                 output
-                    .to_shape((batch_size, seq_length, d_model))
+                    .to_shape((batch_size, seq_length, self.output_size))
                     .unwrap()
                     .to_owned()
-                // Use the `hidden` result here for further processing.
             }
             Err(ref e) => {
                 eprintln!("Shape error: {}", e);
-                eprintln!(
-                    "Shape of input : {:?}   -=-   Shape of weights : {:?} ",
-                    reshaped_x.unwrap().shape(),
-                    seq_length
-                );
-                // Or return unchanged?
-                x
+                x // Fallback to the original input on failure
             }
         }
     }
 
+    /// Applies dropout to the input.
     fn apply_dropout(&self, input: &Array2<f32>) -> Array2<f32> {
         let mut rng = rand::rng();
         input.map(|&x| {
@@ -123,61 +138,17 @@ impl FeedForwardLayer {
     }
 }
 
+/// He initialization function.
 fn he_initialization(input_size: usize, output_size: usize) -> Array2<f32> {
     let mut rng = rand::rng();
-    // He initialization: scale by sqrt(2 / input_size)
     let scale = (2.0 / input_size as f32).sqrt();
     let values: Vec<f32> = (0..(input_size * output_size))
         .map(|_| rng.random_range(-scale..scale))
         .collect();
-
-    // Create an Array2 from the values vector
     Array2::from_shape_vec((input_size, output_size), values).unwrap()
 }
 
+/// Initializes bias vectors with zeros.
 fn bias_initialization(size: usize) -> Array1<f32> {
     Array1::zeros(size)
 }
-
-fn test_bias_initialization() {
-    let size = 5;
-
-    let bias = bias_initialization(size);
-
-    // Check that the dimensions are correct (size x 1)
-    assert_eq!(bias.shape(), &[size,]);
-
-    // Check that all values in the bias array are 0.0
-    for &value in bias.iter() {
-        assert_eq!(value, 0.0);
-    }
-}
-
-#[test]
-fn test_feedforward_forward() {
-    // Define a dummy input with shape (batch_size, seq_length, d_model)
-    let input = array![
-        [
-            [0.1, 0.2, 0.3, 0.4],
-            [0.5, 0.6, 0.7, 0.8],
-            [0.9, 1.0, 1.1, 1.2],
-        ],
-        [
-            [1.3, 1.4, 1.5, 1.6],
-            [1.7, 1.8, 1.9, 2.0],
-            [2.1, 2.2, 2.3, 2.4],
-        ]
-    ];
-
-    // Create a FeedForwardLayer instance
-    let feed_forward_layer = FeedForwardLayer::new(2, 4, 4, 0.1);
-
-    // Feed forward through the layer
-    let feed_forward_output = feed_forward_layer.forward(input.clone());
-
-    // Assert the output shape
-    assert_eq!(feed_forward_output.shape(), &[2, 3, 4]);
-
-    // Optionally, check if the output is transformed (e.g., not equal to input)
-    assert!(!feed_forward_output.iter().eq(input.iter())); // Check if output is different from input
-}
diff --git a/src/model/encoder.rs b/src/model/encoder.rs
index 531fdd4..54da22f 100644
--- a/src/model/encoder.rs
+++ b/src/model/encoder.rs
@@ -3,7 +3,7 @@ use crate::attention::multihead_attention::multi_head_attention;
 use crate::layers::feedforward_layer::FeedForwardLayer;
 use crate::layers::normalization::layer_norm;
 use crate::settings::{BATCH_SIZE, EMBEDDING_SIZE};
-use contracts::{debug_requires, requires};
+use contracts::requires;
 use ndarray::{array, Array2, Array3};
 use std::ops::Add;
 
@@ -27,6 +27,8 @@ use std::ops::Add;
 #[requires(epsilon > 0.0, "Epsilon must be positive and non-zero")]
 #[requires(feed_forward_layer.is_initialized(), "Feed-forward layer is not properly initialized")]
 #[requires(input.shape()[1] > 0, "Sequence length must be greater than zero")]
+#[requires(feed_forward_layer.input_size == input.shape()[2], "Feed-forward layer input size must match embedding size")]
+#[requires(feed_forward_layer.output_size == input.shape()[2], "Feed-forward layer output size must match embedding size")]
 pub fn encoding(
     input: Array3<f32>,                    // Input tensor
     gamma: Array2<f32>,                    // Scale parameter for layer norm
@@ -37,6 +39,24 @@ pub fn encoding(
     let batch_size = input.shape()[0];
     let seq_length = input.shape()[1];
     let d_model = input.shape()[2];
+    assert_eq!(
+        gamma.shape()[1],
+        d_model,
+        "Gamma dimensions do not match input feature size"
+    );
+    assert_eq!(gamma.shape()[0], 1, "Gamma must have exactly one row");
+    assert_eq!(
+        beta.shape()[1],
+        d_model,
+        "Beta dimensions do not match input feature size"
+    );
+    assert_eq!(beta.shape()[0], 1, "Beta must have exactly one row");
+    assert!(epsilon > 0.0, "Epsilon must be positive and non-zero");
+    assert!(
+        feed_forward_layer.is_initialized(),
+        "Feed-forward layer is not properly initialized"
+    );
+    assert!(seq_length > 0, "Sequence length must be greater than zero");
 
     // Multi-Head Attention
     let dummy_learned_matrices = Array2::<f32>::ones((d_model, d_model)); // Replace with actual learned parameters
@@ -52,7 +72,6 @@ pub fn encoding(
         dummy_learned_matrices.clone(), // W_O
     );
 
-    //println!("Attention1 :{}", attention_output);
     // Add & Normalize (Residual Connection + Layer Norm)
     let attention_residual = attention_output.add(&input); // Residual connection
     let reshaped_attention = attention_residual
@@ -71,8 +90,7 @@ pub fn encoding(
     // Feed-Forward Network
     let feed_forward_output = feed_forward_layer.forward(attention_norm.clone());
 
-    //println!("feed_forward_output :{:?}", feed_forward_output);
-    //  Add & Normalize (Residual Connection + Layer Norm)
+    // Add & Normalize (Residual Connection + Layer Norm)
     let feed_forward_residual = feed_forward_output.add(&attention_norm); // Residual connection
     let reshaped_ff_attention = feed_forward_residual
         .to_shape((batch_size * seq_length, d_model)) // Flatten to 2D
@@ -87,8 +105,15 @@ pub fn encoding(
     .unwrap()
     .to_owned();
 
+    assert_eq!(
+        output.shape(),
+        input.shape(),
+        "Output tensor must have the same shape as the input tensor"
+    );
+
     output
 }
+
 #[test]
 fn test_encoding() {
     // Dummy input tensor (batch_size = 2, seq_length = 3, d_model = 4)
@@ -114,7 +139,7 @@ fn test_encoding() {
 
     // Call the encoding function
     let epsilon = 1e-6;
-    let output = encoding(input, gamma, beta, epsilon, &feed_forward_layer);
+    let output = encoding(input.clone(), gamma, beta, epsilon, &feed_forward_layer);
 
     // Assert that the output has the correct shape
     assert_eq!(output.shape(), &[2, 3, 4]);

From a7e64c3461fbda82eeed6668d6b3504f128360d1 Mon Sep 17 00:00:00 2001
From: Jakub <jakub@schwenkbeck.de>
Date: Fri, 20 Dec 2024 16:24:04 +0100
Subject: [PATCH 05/10] made forwarding for the tensor safer

---
 src/layers/feedforward_layer.rs | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/layers/feedforward_layer.rs b/src/layers/feedforward_layer.rs
index d2f22d1..1e416e9 100644
--- a/src/layers/feedforward_layer.rs
+++ b/src/layers/feedforward_layer.rs
@@ -67,6 +67,8 @@ impl FeedForwardLayer {
     /// # Returns:
     /// - Output tensor of shape (batch_size * seq_length, output_size).
     #[requires(input.shape()[1] == self.input_size, "Input feature size must match layer's input size")]
+    #[requires(input.shape()[0] > 0, "Input tensor must not be empty")]
+    #[requires(input.shape()[1] == self.input_size, "Input tensor's second dimension must match input_size")]
     pub fn forward_t(&self, input: &Array2<f32>, train: bool) -> Array2<f32> {
         // First linear layer
         let first_dot = input.dot(&self.weights1);
@@ -92,7 +94,8 @@ impl FeedForwardLayer {
     /// # Returns:
     /// - Output tensor of shape (batch_size, seq_length, output_size).
     #[requires(x.shape()[2] == self.input_size, "Input feature size must match layer's input size")]
-    #[requires(!x.is_empty(), "Input tensor must not be empty")]
+    #[requires(x.shape()[0] > 0, "Input tensor must not be empty")]
+    #[requires(x.shape()[2] == self.input_size, "Input tensor's third dimension must match input_size")]
     pub fn forward(&self, x: Array3<f32>) -> Array3<f32> {
         let batch_size = x.shape()[0];
         let seq_length = x.shape()[1];

From ead9c4aaa77ea931a42c122eb9846b0355754a66 Mon Sep 17 00:00:00 2001
From: Jakub <jakub@schwenkbeck.de>
Date: Fri, 20 Dec 2024 16:32:08 +0100
Subject: [PATCH 06/10] added new requirements to softmax

---
 src/attention/softmax.rs | 15 +++++++++++++--
 src/model/encoder.rs     |  2 +-
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/src/attention/softmax.rs b/src/attention/softmax.rs
index aa5137e..93ba10b 100644
--- a/src/attention/softmax.rs
+++ b/src/attention/softmax.rs
@@ -1,20 +1,30 @@
-#![allow(unused_imports)] // {array} import is not recognized as it is used in #[test]
+#![allow(unused_imports)]
+
+use contracts::{ensures, requires};
+// {array} import is not recognized as it is used in #[test]
 use ndarray::{array, s, Array, Array1, Array2, Array3, ArrayView1, Axis};
 
+//noinspection ALL
+#[requires(!vec.is_empty(), "Input vector must not be empty.")]
+#[ensures(ret.len() == vec.len(), "Output vector must have the same length as the input vector.")]
 pub fn softmax_vector(vec: ArrayView1<f32>) -> Array1<f32> {
     let max = vec.fold(f32::NEG_INFINITY, |a, &b| a.max(b)); // Stabilize by subtracting max
     let exp_vec = vec.mapv(|x| (x - max).exp());
     let sum: f32 = exp_vec.sum();
     exp_vec / sum
 }
+#[requires(!vec.is_empty(), "Input vector must not be empty.")]
 pub fn softmax_vec(vec: Vec<f32>) -> Array1<f32> {
     let array = Array1::from(vec); // Convert Vec<f32> to Array1<f32>
     softmax_vector(array.view())
 }
 
+#[requires(mat.shape().len() == 2, "Input matrix must be 2-dimensional.")]
 pub fn softmax_matrix(mat: &Array2<f32>) -> Array2<f32> {
     convert_to_array2(mat.map_axis(Axis(1), softmax_vector))
 }
+
+#[requires(attention_scores.shape().len() == 3, "Input tensor must be 3-dimensional.")]
 pub fn softmax_3d(attention_scores: &Array3<f32>) -> Array3<f32> {
     let batch_size = attention_scores.shape()[0];
     let mut softmax_result = Array3::<f32>::zeros(attention_scores.raw_dim());
@@ -33,7 +43,8 @@ pub fn softmax_3d(attention_scores: &Array3<f32>) -> Array3<f32> {
 
     softmax_result
 }
-
+#[requires(!array1d.is_empty(), "Input array must not be empty.")]
+#[requires(array1d.iter().all(|row| !row.is_empty()), "All rows must be non-empty.")]
 fn convert_to_array2(array1d: Array<Array1<f32>, ndarray::Ix1>) -> Array2<f32> {
     // Check if the input array is non-empty
     assert!(!array1d.is_empty(), "Input array must not be empty.");
diff --git a/src/model/encoder.rs b/src/model/encoder.rs
index 54da22f..0d036ba 100644
--- a/src/model/encoder.rs
+++ b/src/model/encoder.rs
@@ -3,7 +3,7 @@ use crate::attention::multihead_attention::multi_head_attention;
 use crate::layers::feedforward_layer::FeedForwardLayer;
 use crate::layers::normalization::layer_norm;
 use crate::settings::{BATCH_SIZE, EMBEDDING_SIZE};
-use contracts::requires;
+use contracts::{ensures, requires};
 use ndarray::{array, Array2, Array3};
 use std::ops::Add;
 

From 9345b19fef165074a5dd414db0b939313ad45442 Mon Sep 17 00:00:00 2001
From: Jakub <jakub@schwenkbeck.de>
Date: Fri, 20 Dec 2024 16:38:35 +0100
Subject: [PATCH 07/10] requirements for decoder added

---
 src/model/decoder.rs | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/src/model/decoder.rs b/src/model/decoder.rs
index bfd1e23..4356682 100644
--- a/src/model/decoder.rs
+++ b/src/model/decoder.rs
@@ -4,9 +4,20 @@ use crate::attention::softmax::softmax_3d;
 use crate::layers::feedforward_layer::FeedForwardLayer;
 use crate::layers::normalization::layer_norm;
 use crate::model::encoder::encoding;
+use contracts::requires;
 use ndarray::{array, Array2, Array3};
 use std::ops::Add;
 
+#[requires(input.shape().len() == 3, "Input tensor must have 3 dimensions (batch_size, seq_length, d_model)")]
+#[requires(encoder_output.shape().len() == 3, "Encoder output tensor must have 3 dimensions (batch_size, seq_length, d_model)")]
+#[requires(input.shape() == encoder_output.shape(), "Input tensor and encoder output tensor must have the same shape")]
+#[requires(input.shape()[2] == gamma.shape()[1], "Gamma dimensions do not match input feature size")]
+#[requires(gamma.shape()[0] == 1, "Gamma must have exactly one row")]
+#[requires(input.shape()[2] == beta.shape()[1], "Beta dimensions do not match input feature size")]
+#[requires(beta.shape()[0] == 1, "Beta must have exactly one row")]
+#[requires(epsilon > 0.0, "Epsilon must be positive and non-zero")]
+#[requires(feed_forward_layer.is_initialized(), "Feed-forward layer is not properly initialized")]
+#[requires(input.shape()[1] > 0, "Sequence length must be greater than zero")]
 pub fn decoding(
     input: Array3<f32>, // Input tensor (usually from the previous decoder layer or initial input)
     encoder_output: Array3<f32>, // Encoder output (for the encoder-decoder attention)
@@ -87,6 +98,7 @@ pub fn decoding(
 
     ff_norm // decoder ouput
 }
+
 #[test]
 fn test_decoding() {
     // Dummy input tensor (batch_size = 2, seq_length = 4, d_model = 4)
@@ -121,7 +133,6 @@ fn test_decoding() {
     );
 
     // Call the decoding function
-
     let output = decoding(input, enc_out, gamma, beta, epsilon, &feed_forward_layer);
 
     // Assert that the output has the correct shape

From 4e745112527e50c700173702a475f06edd8094c9 Mon Sep 17 00:00:00 2001
From: Jakub <jakub@schwenkbeck.de>
Date: Fri, 20 Dec 2024 16:50:01 +0100
Subject: [PATCH 08/10] requirements for linear algebra added

---
 src/math/linear_algebra.rs   | 50 ++++++++++++++++++++++++++++--------
 src/model/decoder.rs         |  1 -
 tests/linear_algebra_test.rs | 16 ------------
 3 files changed, 39 insertions(+), 28 deletions(-)

diff --git a/src/math/linear_algebra.rs b/src/math/linear_algebra.rs
index 8a01a01..3764c01 100644
--- a/src/math/linear_algebra.rs
+++ b/src/math/linear_algebra.rs
@@ -1,3 +1,5 @@
+#![allow(warnings)]
+use contracts::{ensures, requires};
 use ndarray::linalg::general_mat_mul;
 use ndarray::{s, Array1, Array2, Array3};
 
@@ -9,13 +11,17 @@ use ndarray::{s, Array1, Array2, Array3};
 ///
 /// # Returns
 /// An `Array2<f32>` representing the result of the matrix multiplication.
+#[requires(a.ncols() == b.nrows(), "Matrix dimensions are incompatible for multiplication.")]
+#[ensures(ret.is_ok(), "Matrix multiplication should be successful")]
+#[ensures(ret.as_ref().unwrap().nrows() > 0, "The resulting matrix must have more than 0 rows.")]
+#[ensures(ret.as_ref().unwrap().ncols() > 0, "The resulting matrix must have more than 0 columns.")]
 pub fn matmul(a: &Array2<f32>, b: &Array2<f32>) -> Result<Array2<f32>, &'static str> {
     if a.ncols() != b.nrows() {
         return Err("Matrix dimensions are incompatible for multiplication.");
     }
-    let mut result = Array2::<f32>::zeros((a.nrows(), b.ncols()));
-    general_mat_mul(1.0, a, b, 0.0, &mut result);
-    Ok(result)
+    let mut ret = Array2::<f32>::zeros((a.nrows(), b.ncols()));
+    general_mat_mul(1.0, a, b, 0.0, &mut ret);
+    Ok(ret)
 }
 
 pub fn dotproduct(a: &Array1<f32>, b: &Array1<f32>) -> f32 {
@@ -34,6 +40,12 @@ pub fn dotproduct(a: &Array1<f32>, b: &Array1<f32>) -> f32 {
 /// # Panics:
 /// - If the batch sizes of `a` and `b` don't match.
 /// - If the inner dimensions (`k` in `a` and `b`) don't align for matrix multiplication.
+#[requires(a.shape().len() == 3, "Input tensor a must have 3 dimensions")]
+#[requires(b.shape().len() == 3, "Input tensor b must have 3 dimensions")]
+#[requires(a.shape()[0] == b.shape()[0], "Batch sizes must match")]
+#[requires(a.shape()[2] == b.shape()[1], "Inner dimensions must align for matrix multiplication")]
+#[ensures(ret.shape().len() == 3, "The resulting tensor must have 3 dimensions.")]
+#[ensures(ret.iter().all(|&x| x.is_finite()), "All elements in the resulting tensor must be finite.")]
 pub fn tensor_product(a: &Array3<f32>, b: &Array3<f32>) -> Array3<f32> {
     // Check that batch sizes match and if dimension align
     assert_eq!(a.shape()[0], b.shape()[0], "Batch sizes must match");
@@ -45,20 +57,21 @@ pub fn tensor_product(a: &Array3<f32>, b: &Array3<f32>) -> Array3<f32> {
 
     // Initialize a 3D tensor for the result, filled with zeros.
     // Its shape corresponds to (batch_size, m, n).
-    let mut result = Array3::<f32>::zeros((batch_size, m, n));
+    let mut ret = Array3::<f32>::zeros((batch_size, m, n));
 
     for i in 0..batch_size {
         // - `s![i, .., ..]` selects the `i`th matrix (2D slice) in the batch.
 
         let a_slice = a.slice(s![i, .., ..]);
         let b_slice = b.slice(s![i, .., ..]);
-        let mut result_slice = result.slice_mut(s![i, .., ..]); // Mutable slice of the result matrix for this batch.
+        let mut ret_slice = ret.slice_mut(s![i, .., ..]); // Mutable slice of the result matrix for this batch.
 
-        general_mat_mul(1.0, &a_slice, &b_slice, 0.0, &mut result_slice);
+        general_mat_mul(1.0, &a_slice, &b_slice, 0.0, &mut ret_slice);
     }
 
-    result
+    ret
 }
+
 /// Applies a linear projection to a 3D tensor using a weight matrix.
 ///
 /// # Arguments
@@ -67,6 +80,11 @@ pub fn tensor_product(a: &Array3<f32>, b: &Array3<f32>) -> Array3<f32> {
 ///
 /// # Returns
 /// A new 3D tensor with the projection applied (e.g., [batch, seq_len, output_dim]).
+#[requires(x.shape().len() == 3, "Input tensor x must have 3 dimensions")]
+#[requires(w.shape().len() == 2, "Weight matrix w must have 2 dimensions")]
+#[requires(x.shape()[2] == w.shape()[0], "Input feature size must match the weight matrix's rows")]
+#[ensures(ret.shape().len() == 3, "The resulting tensor must have 3 dimensions.")]
+#[ensures(ret.iter().all(|&x| x.is_finite()), "All elements in the resulting tensor must be finite.")]
 pub fn apply_projection(x: &Array3<f32>, w: &Array2<f32>) -> Array3<f32> {
     let batch_size = x.shape()[0];
     let seq_len = x.shape()[1];
@@ -74,21 +92,31 @@ pub fn apply_projection(x: &Array3<f32>, w: &Array2<f32>) -> Array3<f32> {
     assert_eq!(d_model, w.shape()[0]);
     let d_k = w.shape()[1]; // Output dimension (head dimension)
 
-    // Initialize the result tensor with shape (batch_size, seq_len, d_k)
-    let mut result = Array3::<f32>::zeros((batch_size, seq_len, d_k));
+    // Initialize the ret tensor with shape (batch_size, seq_len, d_k)
+    let mut ret = Array3::<f32>::zeros((batch_size, seq_len, d_k));
 
     // Perform matrix multiplication for each batch
     for i in 0..batch_size {
         let x_slice = x.slice(s![i, .., ..]); // Slice the i-th batch (shape: (seq_len, d_model))
         let mul = matmul(&x_slice.to_owned(), w); // Perform matrix multiplication
         if mul.is_ok() {
-            result.slice_mut(s![i, .., ..]).assign(&mul.unwrap());
+            ret.slice_mut(s![i, .., ..]).assign(&mul.unwrap());
         }
     }
 
-    result
+    ret
 }
 
+/// Flattens a 3D array into a 2D array.
+///
+/// # Parameters
+/// - `batch`: A 3D tensor of shape (batch_size, seq_length, embed_size).
+///
+/// # Returns
+/// A 2D tensor of shape (batch_size * seq_length, embed_size).
+#[requires(batch.shape().len() == 3, "Input tensor must have 3 dimensions")]
+#[ensures(ret.shape().len() == 2, "The resulting tensor must have 2 dimensions.")]
+#[ensures(ret.iter().all(|&x| x.is_finite()), "All elements in the resulting tensor must be finite.")]
 pub fn flatten_3d_array(batch: Array3<f32>) -> Array2<f32> {
     let (batch_size, seq_length, embed_size) = batch.dim();
     batch
diff --git a/src/model/decoder.rs b/src/model/decoder.rs
index 4356682..0c504da 100644
--- a/src/model/decoder.rs
+++ b/src/model/decoder.rs
@@ -1,6 +1,5 @@
 #![allow(warnings)]
 use crate::attention::multihead_attention::multi_head_attention;
-use crate::attention::softmax::softmax_3d;
 use crate::layers::feedforward_layer::FeedForwardLayer;
 use crate::layers::normalization::layer_norm;
 use crate::model::encoder::encoding;
diff --git a/tests/linear_algebra_test.rs b/tests/linear_algebra_test.rs
index b02daa0..8eada2a 100644
--- a/tests/linear_algebra_test.rs
+++ b/tests/linear_algebra_test.rs
@@ -21,22 +21,6 @@ fn test_matmul_valid_input() {
     }
 }
 
-#[test]
-fn test_matmul_invalid_input() {
-    // Arrange: Define input matrices with mismatched dimensions
-    let a = array![[1.0, 2.0], [3.0, 4.0]]; // 2x2 matrix
-    let b = array![[5.0, 6.0]]; // 1x2 matrix (mismatched dimensions)
-
-    // Act: Perform the multiplication, expecting an error
-    let result = matmul(&a, &b);
-
-    // Assert: Ensure the result is an error due to incompatible dimensions
-    assert_eq!(
-        result,
-        Err("Matrix dimensions are incompatible for multiplication.")
-    );
-}
-
 #[test]
 fn test_dotproduct() {
     let a: Array1<f32> = array![1.0, 2.0, 3.0];

From d4e77ff4e9deb6a6d59f89692ce593f372bada1c Mon Sep 17 00:00:00 2001
From: Jakub <jakub@schwenkbeck.de>
Date: Fri, 20 Dec 2024 16:56:36 +0100
Subject: [PATCH 09/10] requirements for norm and pos-enc

---
 src/layers/normalization.rs     | 14 ++++++++------
 src/math/positional_encoding.rs |  7 ++++++-
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/src/layers/normalization.rs b/src/layers/normalization.rs
index 5c6994d..fc5d70f 100644
--- a/src/layers/normalization.rs
+++ b/src/layers/normalization.rs
@@ -1,3 +1,5 @@
+#![allow(unused_imports)]
+use contracts::{ensures, requires};
 use ndarray::{Array2, Axis};
 
 /// Performs layer normalization on a 2D array (batch size x embedding size).
@@ -10,6 +12,12 @@ use ndarray::{Array2, Axis};
 ///
 /// # Returns:
 /// A 2D array of the same shape as `x` after applying Layer Normalization.
+#[requires(x.shape().len() == 2, "Input array must be 2-dimensional")]
+#[requires(gamma.shape().len() == 2 && gamma.shape()[0] == 1, "Gamma must be a 2-dimensional array with a single row")]
+#[requires(beta.shape().len() == 2 && beta.shape()[0] == 1, "Beta must be a 2-dimensional array with a single row")]
+#[requires(epsilon > 0.0, "Epsilon must be positive and non-zero")]
+#[ensures(ret.shape() == x.shape(), "The resulting array must have the same shape as the input array")]
+#[ensures(ret.iter().all(|&x| x.is_finite()), "All elements in the resulting array must be finite")]
 pub fn layer_norm(
     x: &Array2<f32>,
     gamma: &Array2<f32>,
@@ -19,18 +27,12 @@ pub fn layer_norm(
     // Calculate mean and variance across the features (axis=1)
     let mean = x.mean_axis(Axis(1)).unwrap();
     let variance = x.var_axis(Axis(1), 0.0);
-    //println!("Mean: {:?}", mean);
-    // println!("Variance: {:?}", variance);
 
     let expanded_mean = mean.insert_axis(Axis(1)); // Expands [6] to [6, 1]
     let expanded_variance = variance.insert_axis(Axis(1)); // Expands [6] to [6, 1]
-                                                           // println!("EXPMean: {:?}", expanded_mean);
-                                                           //println!("EXPVariance: {:?}", expanded_variance);
 
     // Add epsilon to expanded variance
     let normalized = (x - &expanded_mean) / (expanded_variance + epsilon).mapv(f32::sqrt);
 
-    // println!("Normalized {}", normalized);
-
     normalized * gamma + beta
 }
diff --git a/src/math/positional_encoding.rs b/src/math/positional_encoding.rs
index 190466d..c33c213 100644
--- a/src/math/positional_encoding.rs
+++ b/src/math/positional_encoding.rs
@@ -1,3 +1,6 @@
+#![allow(warnings)]
+use contracts::{ensures, requires};
+
 /// Computes the sinusoidal positional encoding for a given position and dimension.
 ///
 /// This encoding is used in Transformer models to represent token positions
@@ -10,10 +13,12 @@
 ///
 /// # Returns
 /// The positional encoding value (as `f32`).
+#[requires(embedding_size > 0, "Embedding size must be greater than 0")]
+#[ensures(ret.is_finite(), "The resulting value must be finite")]
 pub fn sinusoidal_pos_encoding(pos: usize, index: usize, embedding_size: usize) -> f32 {
     if pos == 0 {
         return 0.0;
-    };
+    }
     let divisor = 10000f32.powf(2.0 * (index as f32 / embedding_size as f32)); // 100000^(2*i / embedding size)
 
     if index % 2 == 0 {

From 02d7144f238a623f0ddab136e272cf66ea607c90 Mon Sep 17 00:00:00 2001
From: Jakub <jakub@schwenkbeck.de>
Date: Fri, 20 Dec 2024 17:01:26 +0100
Subject: [PATCH 10/10] added FF tests

---
 src/layers/feedforward_layer.rs | 47 ++++++++++++++++++++++++++++++++-
 1 file changed, 46 insertions(+), 1 deletion(-)

diff --git a/src/layers/feedforward_layer.rs b/src/layers/feedforward_layer.rs
index 1e416e9..a97efd8 100644
--- a/src/layers/feedforward_layer.rs
+++ b/src/layers/feedforward_layer.rs
@@ -1,6 +1,7 @@
+#![allow(unused_imports)]
 use crate::activation::activation_functions::gelu;
 use contracts::requires;
-use ndarray::{Array1, Array2, Array3};
+use ndarray::{array, Array1, Array2, Array3};
 use rand::Rng;
 use std::ops::Add;
 
@@ -155,3 +156,47 @@ fn he_initialization(input_size: usize, output_size: usize) -> Array2<f32> {
 fn bias_initialization(size: usize) -> Array1<f32> {
     Array1::zeros(size)
 }
+
+#[test]
+fn test_bias_initialization() {
+    let size = 5;
+
+    let bias = bias_initialization(size);
+
+    // Check that the dimensions are correct (size x 1)
+    assert_eq!(bias.shape(), &[size,]);
+
+    // Check that all values in the bias array are 0.0
+    for &value in bias.iter() {
+        assert_eq!(value, 0.0);
+    }
+}
+
+#[test]
+fn test_feedforward_forward() {
+    // Define a dummy input with shape (batch_size, seq_length, d_model)
+    let input = array![
+        [
+            [0.1, 0.2, 0.3, 0.4],
+            [0.5, 0.6, 0.7, 0.8],
+            [0.9, 1.0, 1.1, 1.2],
+        ],
+        [
+            [1.3, 1.4, 1.5, 1.6],
+            [1.7, 1.8, 1.9, 2.0],
+            [2.1, 2.2, 2.3, 2.4],
+        ]
+    ];
+
+    // Create a FeedForwardLayer instance
+    let feed_forward_layer = FeedForwardLayer::new(2, 4, 4, 0.1);
+
+    // Feed forward through the layer
+    let feed_forward_output = feed_forward_layer.forward(input.clone());
+
+    // Assert the output shape
+    assert_eq!(feed_forward_output.shape(), &[2, 3, 4]);
+
+    // Optionally, check if the output is transformed (e.g., not equal to input)
+    assert!(!feed_forward_output.iter().eq(input.iter())); // Check if output is different from input
+}