Merge pull request #11 from JakubSchwenkbeck/refactor/design-by-contract

Implement Design by Contract
JakubSchwenkbeck · Dec 20, 2024 · 55fa876 · 55fa876
2 parents 23f9866 + 02d7144
commit 55fa876
Show file tree

Hide file tree

Showing 11 changed files with 205 additions and 80 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -11,3 +11,5 @@ description = "A Rust implementation of a Transformer-based model for NLP tasks.
 ndarray = "0.16.1"
 rand = "0.9.0-beta.1"
 regex = "1.11.1"
+contracts = "0.6.3"
+
diff --git a/src/attention/softmax.rs b/src/attention/softmax.rs
@@ -1,20 +1,30 @@
-#![allow(unused_imports)] // {array} import is not recognized as it is used in #[test]
+#![allow(unused_imports)]
+
+use contracts::{ensures, requires};
+// {array} import is not recognized as it is used in #[test]
 use ndarray::{array, s, Array, Array1, Array2, Array3, ArrayView1, Axis};
 
+//noinspection ALL
+#[requires(!vec.is_empty(), "Input vector must not be empty.")]
+#[ensures(ret.len() == vec.len(), "Output vector must have the same length as the input vector.")]
 pub fn softmax_vector(vec: ArrayView1<f32>) -> Array1<f32> {
     let max = vec.fold(f32::NEG_INFINITY, |a, &b| a.max(b)); // Stabilize by subtracting max
     let exp_vec = vec.mapv(|x| (x - max).exp());
     let sum: f32 = exp_vec.sum();
     exp_vec / sum
 }
+#[requires(!vec.is_empty(), "Input vector must not be empty.")]
 pub fn softmax_vec(vec: Vec<f32>) -> Array1<f32> {
     let array = Array1::from(vec); // Convert Vec<f32> to Array1<f32>
     softmax_vector(array.view())
 }
 
+#[requires(mat.shape().len() == 2, "Input matrix must be 2-dimensional.")]
 pub fn softmax_matrix(mat: &Array2<f32>) -> Array2<f32> {
     convert_to_array2(mat.map_axis(Axis(1), softmax_vector))
 }
+
+#[requires(attention_scores.shape().len() == 3, "Input tensor must be 3-dimensional.")]
 pub fn softmax_3d(attention_scores: &Array3<f32>) -> Array3<f32> {
     let batch_size = attention_scores.shape()[0];
     let mut softmax_result = Array3::<f32>::zeros(attention_scores.raw_dim());
@@ -33,7 +43,8 @@ pub fn softmax_3d(attention_scores: &Array3<f32>) -> Array3<f32> {
 
     softmax_result
 }
-
+#[requires(!array1d.is_empty(), "Input array must not be empty.")]
+#[requires(array1d.iter().all(|row| !row.is_empty()), "All rows must be non-empty.")]
 fn convert_to_array2(array1d: Array<Array1<f32>, ndarray::Ix1>) -> Array2<f32> {
     // Check if the input array is non-empty
     assert!(!array1d.is_empty(), "Input array must not be empty.");

diff --git a/src/layers/feedforward_layer.rs b/src/layers/feedforward_layer.rs
@@ -1,23 +1,32 @@
-#![allow(dead_code)]
 #![allow(unused_imports)]
-
 use crate::activation::activation_functions::gelu;
-use crate::settings::HIDDEN_SIZE;
+use contracts::requires;
 use ndarray::{array, Array1, Array2, Array3};
 use rand::Rng;
 use std::ops::Add;
 
 pub struct FeedForwardLayer {
     weights1: Array2<f32>,
-    bias1: Array1<f32>, // weights and biases for first linear layer
-
+    bias1: Array1<f32>, // Weights and biases for the first linear layer
     weights2: Array2<f32>,
-    bias2: Array1<f32>, // weights and biases for second linear layer
-
-    dropout_rate: f32, // Dropout rate
+    bias2: Array1<f32>,           // Weights and biases for the second linear layer
+    dropout_rate: f32,            // Dropout rate
+    pub(crate) input_size: usize, // Input feature size
+    pub(crate) output_size: usize, // Output feature size
+    initialized: bool,
 }
+
 impl FeedForwardLayer {
-    // init with random values
+    /// Initializes the FeedForwardLayer with random weights and biases.
+    ///
+    /// # Parameters:
+    /// - `_batch_size`: Batch size (not stored, used for verification if needed).
+    /// - `input_size`: Number of input features (d_model).
+    /// - `output_size`: Number of output features (d_model).
+    /// - `dropout_rate`: Probability of dropping a unit in dropout (0.0 to 1.0).
+    #[requires(input_size > 0, "Input size must be greater than 0")]
+    #[requires(output_size > 0, "Output size must be greater than 0")]
+    #[requires((0.0..=1.0).contains(&dropout_rate), "Dropout rate must be in range [0.0, 1.0]")]
     pub fn new(
         _batch_size: usize,
         input_size: usize,
@@ -32,15 +41,35 @@ impl FeedForwardLayer {
 
         let weights2 = he_initialization(hidden_size, output_size); // Shape: (hidden_size, output_size)
         let bias2 = bias_initialization(output_size); // Shape: (output_size,)
+
         FeedForwardLayer {
             weights1,
             bias1,
             weights2,
             bias2,
             dropout_rate,
+            input_size,
+            output_size,
+            initialized: true,
         }
     }
 
+    /// Verifies that the layer is properly initialized.
+    pub fn is_initialized(&self) -> bool {
+        self.initialized
+    }
+
+    /// Performs a forward pass in training mode.
+    ///
+    /// # Parameters:
+    /// - `input`: 2D input tensor of shape (batch_size * seq_length, input_size).
+    /// - `train`: Whether to apply dropout.
+    ///
+    /// # Returns:
+    /// - Output tensor of shape (batch_size * seq_length, output_size).
+    #[requires(input.shape()[1] == self.input_size, "Input feature size must match layer's input size")]
+    #[requires(input.shape()[0] > 0, "Input tensor must not be empty")]
+    #[requires(input.shape()[1] == self.input_size, "Input tensor's second dimension must match input_size")]
     pub fn forward_t(&self, input: &Array2<f32>, train: bool) -> Array2<f32> {
         // First linear layer
         let first_dot = input.dot(&self.weights1);
@@ -57,56 +86,50 @@ impl FeedForwardLayer {
         // Second linear layer
         first_activation.dot(&self.weights2).add(&self.bias2)
     }
-    /// Forward pass through the feed-forward layer.
+
+    /// Performs a forward pass in evaluation mode.
     ///
     /// # Parameters:
-    /// - `x`: Input tensor of shape (batch_size, seq_length, d_model).
+    /// - `x`: Input tensor of shape (batch_size, seq_length, input_size).
     ///
     /// # Returns:
-    /// - Output tensor of shape (batch_size, seq_length, d_model).
+    /// - Output tensor of shape (batch_size, seq_length, output_size).
+    #[requires(x.shape()[2] == self.input_size, "Input feature size must match layer's input size")]
+    #[requires(x.shape()[0] > 0, "Input tensor must not be empty")]
+    #[requires(x.shape()[2] == self.input_size, "Input tensor's third dimension must match input_size")]
     pub fn forward(&self, x: Array3<f32>) -> Array3<f32> {
         let batch_size = x.shape()[0];
         let seq_length = x.shape()[1];
         let d_model = x.shape()[2];
 
-        // Flatten the input to 2D: (batch_size * seq_length, d_model)
         let reshaped_x = x.to_shape((batch_size * seq_length, d_model));
 
         match reshaped_x {
             Ok(valid_reshaped_x) => {
                 let dot = valid_reshaped_x.dot(&self.weights1);
-
                 let add = dot + &self.bias1;
 
-                // First linear layer + gelu
-
+                // First linear layer + GELU activation
                 let hidden = gelu(&add.to_owned());
-
                 let dot2 = hidden.dot(&self.weights2);
 
                 // Second linear layer
                 let output = dot2 + &self.bias2;
 
-                // Reshape back to 3D: (batch_size, seq_length, d_model)
+                // Reshape back to 3D
                 output
-                    .to_shape((batch_size, seq_length, d_model))
+                    .to_shape((batch_size, seq_length, self.output_size))
                     .unwrap()
                     .to_owned()
-                // Use the `hidden` result here for further processing.
             }
             Err(ref e) => {
                 eprintln!("Shape error: {}", e);
-                eprintln!(
-                    "Shape of input : {:?}   -=-   Shape of weights : {:?} ",
-                    reshaped_x.unwrap().shape(),
-                    seq_length
-                );
-                // Or return unchanged?
-                x
+                x // Fallback to the original input on failure
             }
         }
     }
 
+    /// Applies dropout to the input.
     fn apply_dropout(&self, input: &Array2<f32>) -> Array2<f32> {
         let mut rng = rand::rng();
         input.map(|&x| {
@@ -119,22 +142,22 @@ impl FeedForwardLayer {
     }
 }
 
+/// He initialization function.
 fn he_initialization(input_size: usize, output_size: usize) -> Array2<f32> {
     let mut rng = rand::rng();
-    // He initialization: scale by sqrt(2 / input_size)
     let scale = (2.0 / input_size as f32).sqrt();
     let values: Vec<f32> = (0..(input_size * output_size))
         .map(|_| rng.random_range(-scale..scale))
         .collect();
-
-    // Create an Array2 from the values vector
     Array2::from_shape_vec((input_size, output_size), values).unwrap()
 }
 
+/// Initializes bias vectors with zeros.
 fn bias_initialization(size: usize) -> Array1<f32> {
     Array1::zeros(size)
 }
 
+#[test]
 fn test_bias_initialization() {
     let size = 5;
 

diff --git a/src/layers/normalization.rs b/src/layers/normalization.rs
@@ -1,3 +1,5 @@
+#![allow(unused_imports)]
+use contracts::{ensures, requires};
 use ndarray::{Array2, Axis};
 
 /// Performs layer normalization on a 2D array (batch size x embedding size).
@@ -10,6 +12,12 @@ use ndarray::{Array2, Axis};
 ///
 /// # Returns:
 /// A 2D array of the same shape as `x` after applying Layer Normalization.
+#[requires(x.shape().len() == 2, "Input array must be 2-dimensional")]
+#[requires(gamma.shape().len() == 2 && gamma.shape()[0] == 1, "Gamma must be a 2-dimensional array with a single row")]
+#[requires(beta.shape().len() == 2 && beta.shape()[0] == 1, "Beta must be a 2-dimensional array with a single row")]
+#[requires(epsilon > 0.0, "Epsilon must be positive and non-zero")]
+#[ensures(ret.shape() == x.shape(), "The resulting array must have the same shape as the input array")]
+#[ensures(ret.iter().all(|&x| x.is_finite()), "All elements in the resulting array must be finite")]
 pub fn layer_norm(
     x: &Array2<f32>,
     gamma: &Array2<f32>,
@@ -19,18 +27,12 @@ pub fn layer_norm(
     // Calculate mean and variance across the features (axis=1)
     let mean = x.mean_axis(Axis(1)).unwrap();
     let variance = x.var_axis(Axis(1), 0.0);
-    //println!("Mean: {:?}", mean);
-    // println!("Variance: {:?}", variance);
 
     let expanded_mean = mean.insert_axis(Axis(1)); // Expands [6] to [6, 1]
     let expanded_variance = variance.insert_axis(Axis(1)); // Expands [6] to [6, 1]
-                                                           // println!("EXPMean: {:?}", expanded_mean);
-                                                           //println!("EXPVariance: {:?}", expanded_variance);
 
     // Add epsilon to expanded variance
     let normalized = (x - &expanded_mean) / (expanded_variance + epsilon).mapv(f32::sqrt);
 
-    // println!("Normalized {}", normalized);
-
     normalized * gamma + beta
 }
diff --git a/src/main.rs b/src/main.rs
@@ -8,7 +8,7 @@ use Transformer::model::decoder::decoding;
 use Transformer::model::embedding::Embedding;
 use Transformer::model::encoder::encoding;
 use Transformer::model::transformer_model::transformer_model;
-use Transformer::settings::{BATCH_SIZE, DROPOUT_RATE, INPUT_SIZE, OUTPUT_SIZE};
+use Transformer::settings::{BATCH_SIZE, DROPOUT_RATE, EMBEDDING_SIZE, INPUT_SIZE, OUTPUT_SIZE};
 
 fn main() {
     println!("runs successfully!");
@@ -40,9 +40,10 @@ fn main() {
     let embeddings = embedding.forward(tokens.clone());
 
     // Convert embeddings to Array3 (batch_size, seq_length, embed_size)
-    let input_tensor = Array3::from_shape_fn((1, tokens.len(), 12), |(batch, seq, _)| {
-        embeddings[[seq, batch]]
-    });
+    let input_tensor = Array3::from_shape_fn(
+        (BATCH_SIZE, tokens.len(), EMBEDDING_SIZE),
+        |(batch, seq, _)| embeddings[[seq, batch]],
+    );
 
     println!("INPUT : {}", input_tensor.clone());
     // Initialize gamma and beta for layer normalization