From b7b45f85ec6b0739a0d2afbe36d9dc930d78ddd9 Mon Sep 17 00:00:00 2001 From: Jakub Date: Thu, 19 Dec 2024 21:57:31 +0100 Subject: [PATCH 01/10] added `contracts` crate for `pre` and `post` conditions for a DbC refactor --- Cargo.lock | 31 +++++++++++++++++++++++++++---- Cargo.toml | 2 ++ 2 files changed, 29 insertions(+), 4 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7d46c0c..9102f30 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6,6 +6,7 @@ version = 4 name = "Transformer" version = "0.1.0" dependencies = [ + "contracts", "ndarray", "rand", "regex", @@ -50,6 +51,17 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "contracts" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1d1429e3bd78171c65aa010eabcdf8f863ba3254728dbfb0ad4b1545beac15c" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "errno" version = "0.3.10" @@ -288,6 +300,17 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + [[package]] name = "syn" version = "2.0.90" @@ -335,7 +358,7 @@ dependencies = [ "log", "proc-macro2", "quote", - "syn", + "syn 2.0.90", "wasm-bindgen-shared", ] @@ -357,7 +380,7 @@ checksum = "30d7a95b763d3c45903ed6c81f156801839e5ee968bb07e534c44df0fcd330c2" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.90", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -477,7 +500,7 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.90", ] [[package]] @@ -488,5 +511,5 @@ checksum = "7988d73a4303ca289df03316bc490e934accf371af6bc745393cf3c2c5c4f25d" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.90", ] diff --git a/Cargo.toml b/Cargo.toml index 096329a..9e0c87b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,3 +11,5 @@ description = "A Rust implementation of a Transformer-based model for NLP tasks. ndarray = "0.16.1" rand = "0.9.0-beta.1" regex = "1.11.1" +contracts = "0.6.3" + From 9d30f2d9d7935ad9c9c752f9e106070f9a3be527 Mon Sep 17 00:00:00 2001 From: Jakub Date: Thu, 19 Dec 2024 22:09:05 +0100 Subject: [PATCH 02/10] first Contracts for encoder written --- src/layers/feedforward_layer.rs | 6 +++++- src/main.rs | 9 +++++---- src/model/encoder.rs | 7 +++++++ 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/src/layers/feedforward_layer.rs b/src/layers/feedforward_layer.rs index dab2207..98b6ad8 100644 --- a/src/layers/feedforward_layer.rs +++ b/src/layers/feedforward_layer.rs @@ -15,6 +15,7 @@ pub struct FeedForwardLayer { bias2: Array1, // weights and biases for second linear layer dropout_rate: f32, // Dropout rate + initialized: bool, } impl FeedForwardLayer { // init with random values @@ -38,9 +39,12 @@ impl FeedForwardLayer { weights2, bias2, dropout_rate, + initialized: true, } } - + pub fn is_initialized(&self) -> bool { + self.initialized + } pub fn forward_t(&self, input: &Array2, train: bool) -> Array2 { // First linear layer let first_dot = input.dot(&self.weights1); diff --git a/src/main.rs b/src/main.rs index e9e07ea..09603bd 100644 --- a/src/main.rs +++ b/src/main.rs @@ -8,7 +8,7 @@ use Transformer::model::decoder::decoding; use Transformer::model::embedding::Embedding; use Transformer::model::encoder::encoding; use Transformer::model::transformer_model::transformer_model; -use Transformer::settings::{BATCH_SIZE, DROPOUT_RATE, INPUT_SIZE, OUTPUT_SIZE}; +use Transformer::settings::{BATCH_SIZE, DROPOUT_RATE, EMBEDDING_SIZE, INPUT_SIZE, OUTPUT_SIZE}; fn main() { println!("runs successfully!"); @@ -40,9 +40,10 @@ fn main() { let embeddings = embedding.forward(tokens.clone()); // Convert embeddings to Array3 (batch_size, seq_length, embed_size) - let input_tensor = Array3::from_shape_fn((1, tokens.len(), 12), |(batch, seq, _)| { - embeddings[[seq, batch]] - }); + let input_tensor = Array3::from_shape_fn( + (BATCH_SIZE, tokens.len(), EMBEDDING_SIZE), + |(batch, seq, _)| embeddings[[seq, batch]], + ); println!("INPUT : {}", input_tensor.clone()); // Initialize gamma and beta for layer normalization diff --git a/src/model/encoder.rs b/src/model/encoder.rs index c8ed88f..9d67f4b 100644 --- a/src/model/encoder.rs +++ b/src/model/encoder.rs @@ -2,6 +2,8 @@ use crate::attention::multihead_attention::multi_head_attention; use crate::layers::feedforward_layer::FeedForwardLayer; use crate::layers::normalization::layer_norm; +use crate::settings::{BATCH_SIZE, EMBEDDING_SIZE}; +use contracts::{debug_requires, requires}; use ndarray::{array, Array2, Array3}; use std::ops::Add; @@ -16,6 +18,11 @@ use std::ops::Add; /// /// # Returns: /// - Output tensor of shape (batch_size, seq_length, d_model) after passing through the encoder layer. + +#[requires(input.shape()[2] == gamma.shape()[1], "Gamma dimensions do not match input feature size")] +#[requires(input.shape()[2] == beta.shape()[1], "Beta dimensions do not match input feature size")] +#[requires(epsilon > 0.0, "Epsilon must be positive and non-zero")] +#[requires(feed_forward_layer.is_initialized(), "Feed-forward layer is not properly initialized")] pub fn encoding( input: Array3, // Input tensor gamma: Array2, // Scale parameter for layer norm From 8d943599937a51b581b8d90a1024a2b6e045c56b Mon Sep 17 00:00:00 2001 From: Jakub Date: Fri, 20 Dec 2024 15:58:11 +0100 Subject: [PATCH 03/10] Added several `requires` statements to encoder --- src/model/encoder.rs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/model/encoder.rs b/src/model/encoder.rs index 9d67f4b..531fdd4 100644 --- a/src/model/encoder.rs +++ b/src/model/encoder.rs @@ -19,10 +19,14 @@ use std::ops::Add; /// # Returns: /// - Output tensor of shape (batch_size, seq_length, d_model) after passing through the encoder layer. +#[requires(input.shape().len() == 3, "Input tensor must have 3 dimensions (batch_size, seq_length, embed_size)")] #[requires(input.shape()[2] == gamma.shape()[1], "Gamma dimensions do not match input feature size")] +#[requires(gamma.shape()[0] == 1, "Gamma must have exactly one row")] #[requires(input.shape()[2] == beta.shape()[1], "Beta dimensions do not match input feature size")] +#[requires(beta.shape()[0] == 1, "Beta must have exactly one row")] #[requires(epsilon > 0.0, "Epsilon must be positive and non-zero")] #[requires(feed_forward_layer.is_initialized(), "Feed-forward layer is not properly initialized")] +#[requires(input.shape()[1] > 0, "Sequence length must be greater than zero")] pub fn encoding( input: Array3, // Input tensor gamma: Array2, // Scale parameter for layer norm From 193c4b0df5211381c021f3cef04ed933973833a8 Mon Sep 17 00:00:00 2001 From: Jakub Date: Fri, 20 Dec 2024 16:19:57 +0100 Subject: [PATCH 04/10] Feedforwd holds more self information and has new requirements --- src/layers/feedforward_layer.rs | 121 ++++++++++++-------------------- src/model/encoder.rs | 35 +++++++-- 2 files changed, 76 insertions(+), 80 deletions(-) diff --git a/src/layers/feedforward_layer.rs b/src/layers/feedforward_layer.rs index 98b6ad8..d2f22d1 100644 --- a/src/layers/feedforward_layer.rs +++ b/src/layers/feedforward_layer.rs @@ -1,24 +1,31 @@ -#![allow(dead_code)] -#![allow(unused_imports)] - use crate::activation::activation_functions::gelu; -use crate::settings::HIDDEN_SIZE; -use ndarray::{array, Array1, Array2, Array3}; +use contracts::requires; +use ndarray::{Array1, Array2, Array3}; use rand::Rng; use std::ops::Add; pub struct FeedForwardLayer { weights1: Array2, - bias1: Array1, // weights and biases for first linear layer - + bias1: Array1, // Weights and biases for the first linear layer weights2: Array2, - bias2: Array1, // weights and biases for second linear layer - - dropout_rate: f32, // Dropout rate + bias2: Array1, // Weights and biases for the second linear layer + dropout_rate: f32, // Dropout rate + pub(crate) input_size: usize, // Input feature size + pub(crate) output_size: usize, // Output feature size initialized: bool, } + impl FeedForwardLayer { - // init with random values + /// Initializes the FeedForwardLayer with random weights and biases. + /// + /// # Parameters: + /// - `_batch_size`: Batch size (not stored, used for verification if needed). + /// - `input_size`: Number of input features (d_model). + /// - `output_size`: Number of output features (d_model). + /// - `dropout_rate`: Probability of dropping a unit in dropout (0.0 to 1.0). + #[requires(input_size > 0, "Input size must be greater than 0")] + #[requires(output_size > 0, "Output size must be greater than 0")] + #[requires((0.0..=1.0).contains(&dropout_rate), "Dropout rate must be in range [0.0, 1.0]")] pub fn new( _batch_size: usize, input_size: usize, @@ -33,18 +40,33 @@ impl FeedForwardLayer { let weights2 = he_initialization(hidden_size, output_size); // Shape: (hidden_size, output_size) let bias2 = bias_initialization(output_size); // Shape: (output_size,) + FeedForwardLayer { weights1, bias1, weights2, bias2, dropout_rate, + input_size, + output_size, initialized: true, } } + + /// Verifies that the layer is properly initialized. pub fn is_initialized(&self) -> bool { self.initialized } + + /// Performs a forward pass in training mode. + /// + /// # Parameters: + /// - `input`: 2D input tensor of shape (batch_size * seq_length, input_size). + /// - `train`: Whether to apply dropout. + /// + /// # Returns: + /// - Output tensor of shape (batch_size * seq_length, output_size). + #[requires(input.shape()[1] == self.input_size, "Input feature size must match layer's input size")] pub fn forward_t(&self, input: &Array2, train: bool) -> Array2 { // First linear layer let first_dot = input.dot(&self.weights1); @@ -61,56 +83,49 @@ impl FeedForwardLayer { // Second linear layer first_activation.dot(&self.weights2).add(&self.bias2) } - /// Forward pass through the feed-forward layer. + + /// Performs a forward pass in evaluation mode. /// /// # Parameters: - /// - `x`: Input tensor of shape (batch_size, seq_length, d_model). + /// - `x`: Input tensor of shape (batch_size, seq_length, input_size). /// /// # Returns: - /// - Output tensor of shape (batch_size, seq_length, d_model). + /// - Output tensor of shape (batch_size, seq_length, output_size). + #[requires(x.shape()[2] == self.input_size, "Input feature size must match layer's input size")] + #[requires(!x.is_empty(), "Input tensor must not be empty")] pub fn forward(&self, x: Array3) -> Array3 { let batch_size = x.shape()[0]; let seq_length = x.shape()[1]; let d_model = x.shape()[2]; - // Flatten the input to 2D: (batch_size * seq_length, d_model) let reshaped_x = x.to_shape((batch_size * seq_length, d_model)); match reshaped_x { Ok(valid_reshaped_x) => { let dot = valid_reshaped_x.dot(&self.weights1); - let add = dot + &self.bias1; - // First linear layer + gelu - + // First linear layer + GELU activation let hidden = gelu(&add.to_owned()); - let dot2 = hidden.dot(&self.weights2); // Second linear layer let output = dot2 + &self.bias2; - // Reshape back to 3D: (batch_size, seq_length, d_model) + // Reshape back to 3D output - .to_shape((batch_size, seq_length, d_model)) + .to_shape((batch_size, seq_length, self.output_size)) .unwrap() .to_owned() - // Use the `hidden` result here for further processing. } Err(ref e) => { eprintln!("Shape error: {}", e); - eprintln!( - "Shape of input : {:?} -=- Shape of weights : {:?} ", - reshaped_x.unwrap().shape(), - seq_length - ); - // Or return unchanged? - x + x // Fallback to the original input on failure } } } + /// Applies dropout to the input. fn apply_dropout(&self, input: &Array2) -> Array2 { let mut rng = rand::rng(); input.map(|&x| { @@ -123,61 +138,17 @@ impl FeedForwardLayer { } } +/// He initialization function. fn he_initialization(input_size: usize, output_size: usize) -> Array2 { let mut rng = rand::rng(); - // He initialization: scale by sqrt(2 / input_size) let scale = (2.0 / input_size as f32).sqrt(); let values: Vec = (0..(input_size * output_size)) .map(|_| rng.random_range(-scale..scale)) .collect(); - - // Create an Array2 from the values vector Array2::from_shape_vec((input_size, output_size), values).unwrap() } +/// Initializes bias vectors with zeros. fn bias_initialization(size: usize) -> Array1 { Array1::zeros(size) } - -fn test_bias_initialization() { - let size = 5; - - let bias = bias_initialization(size); - - // Check that the dimensions are correct (size x 1) - assert_eq!(bias.shape(), &[size,]); - - // Check that all values in the bias array are 0.0 - for &value in bias.iter() { - assert_eq!(value, 0.0); - } -} - -#[test] -fn test_feedforward_forward() { - // Define a dummy input with shape (batch_size, seq_length, d_model) - let input = array![ - [ - [0.1, 0.2, 0.3, 0.4], - [0.5, 0.6, 0.7, 0.8], - [0.9, 1.0, 1.1, 1.2], - ], - [ - [1.3, 1.4, 1.5, 1.6], - [1.7, 1.8, 1.9, 2.0], - [2.1, 2.2, 2.3, 2.4], - ] - ]; - - // Create a FeedForwardLayer instance - let feed_forward_layer = FeedForwardLayer::new(2, 4, 4, 0.1); - - // Feed forward through the layer - let feed_forward_output = feed_forward_layer.forward(input.clone()); - - // Assert the output shape - assert_eq!(feed_forward_output.shape(), &[2, 3, 4]); - - // Optionally, check if the output is transformed (e.g., not equal to input) - assert!(!feed_forward_output.iter().eq(input.iter())); // Check if output is different from input -} diff --git a/src/model/encoder.rs b/src/model/encoder.rs index 531fdd4..54da22f 100644 --- a/src/model/encoder.rs +++ b/src/model/encoder.rs @@ -3,7 +3,7 @@ use crate::attention::multihead_attention::multi_head_attention; use crate::layers::feedforward_layer::FeedForwardLayer; use crate::layers::normalization::layer_norm; use crate::settings::{BATCH_SIZE, EMBEDDING_SIZE}; -use contracts::{debug_requires, requires}; +use contracts::requires; use ndarray::{array, Array2, Array3}; use std::ops::Add; @@ -27,6 +27,8 @@ use std::ops::Add; #[requires(epsilon > 0.0, "Epsilon must be positive and non-zero")] #[requires(feed_forward_layer.is_initialized(), "Feed-forward layer is not properly initialized")] #[requires(input.shape()[1] > 0, "Sequence length must be greater than zero")] +#[requires(feed_forward_layer.input_size == input.shape()[2], "Feed-forward layer input size must match embedding size")] +#[requires(feed_forward_layer.output_size == input.shape()[2], "Feed-forward layer output size must match embedding size")] pub fn encoding( input: Array3, // Input tensor gamma: Array2, // Scale parameter for layer norm @@ -37,6 +39,24 @@ pub fn encoding( let batch_size = input.shape()[0]; let seq_length = input.shape()[1]; let d_model = input.shape()[2]; + assert_eq!( + gamma.shape()[1], + d_model, + "Gamma dimensions do not match input feature size" + ); + assert_eq!(gamma.shape()[0], 1, "Gamma must have exactly one row"); + assert_eq!( + beta.shape()[1], + d_model, + "Beta dimensions do not match input feature size" + ); + assert_eq!(beta.shape()[0], 1, "Beta must have exactly one row"); + assert!(epsilon > 0.0, "Epsilon must be positive and non-zero"); + assert!( + feed_forward_layer.is_initialized(), + "Feed-forward layer is not properly initialized" + ); + assert!(seq_length > 0, "Sequence length must be greater than zero"); // Multi-Head Attention let dummy_learned_matrices = Array2::::ones((d_model, d_model)); // Replace with actual learned parameters @@ -52,7 +72,6 @@ pub fn encoding( dummy_learned_matrices.clone(), // W_O ); - //println!("Attention1 :{}", attention_output); // Add & Normalize (Residual Connection + Layer Norm) let attention_residual = attention_output.add(&input); // Residual connection let reshaped_attention = attention_residual @@ -71,8 +90,7 @@ pub fn encoding( // Feed-Forward Network let feed_forward_output = feed_forward_layer.forward(attention_norm.clone()); - //println!("feed_forward_output :{:?}", feed_forward_output); - // Add & Normalize (Residual Connection + Layer Norm) + // Add & Normalize (Residual Connection + Layer Norm) let feed_forward_residual = feed_forward_output.add(&attention_norm); // Residual connection let reshaped_ff_attention = feed_forward_residual .to_shape((batch_size * seq_length, d_model)) // Flatten to 2D @@ -87,8 +105,15 @@ pub fn encoding( .unwrap() .to_owned(); + assert_eq!( + output.shape(), + input.shape(), + "Output tensor must have the same shape as the input tensor" + ); + output } + #[test] fn test_encoding() { // Dummy input tensor (batch_size = 2, seq_length = 3, d_model = 4) @@ -114,7 +139,7 @@ fn test_encoding() { // Call the encoding function let epsilon = 1e-6; - let output = encoding(input, gamma, beta, epsilon, &feed_forward_layer); + let output = encoding(input.clone(), gamma, beta, epsilon, &feed_forward_layer); // Assert that the output has the correct shape assert_eq!(output.shape(), &[2, 3, 4]); From a7e64c3461fbda82eeed6668d6b3504f128360d1 Mon Sep 17 00:00:00 2001 From: Jakub Date: Fri, 20 Dec 2024 16:24:04 +0100 Subject: [PATCH 05/10] made forwarding for the tensor safer --- src/layers/feedforward_layer.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/layers/feedforward_layer.rs b/src/layers/feedforward_layer.rs index d2f22d1..1e416e9 100644 --- a/src/layers/feedforward_layer.rs +++ b/src/layers/feedforward_layer.rs @@ -67,6 +67,8 @@ impl FeedForwardLayer { /// # Returns: /// - Output tensor of shape (batch_size * seq_length, output_size). #[requires(input.shape()[1] == self.input_size, "Input feature size must match layer's input size")] + #[requires(input.shape()[0] > 0, "Input tensor must not be empty")] + #[requires(input.shape()[1] == self.input_size, "Input tensor's second dimension must match input_size")] pub fn forward_t(&self, input: &Array2, train: bool) -> Array2 { // First linear layer let first_dot = input.dot(&self.weights1); @@ -92,7 +94,8 @@ impl FeedForwardLayer { /// # Returns: /// - Output tensor of shape (batch_size, seq_length, output_size). #[requires(x.shape()[2] == self.input_size, "Input feature size must match layer's input size")] - #[requires(!x.is_empty(), "Input tensor must not be empty")] + #[requires(x.shape()[0] > 0, "Input tensor must not be empty")] + #[requires(x.shape()[2] == self.input_size, "Input tensor's third dimension must match input_size")] pub fn forward(&self, x: Array3) -> Array3 { let batch_size = x.shape()[0]; let seq_length = x.shape()[1]; From ead9c4aaa77ea931a42c122eb9846b0355754a66 Mon Sep 17 00:00:00 2001 From: Jakub Date: Fri, 20 Dec 2024 16:32:08 +0100 Subject: [PATCH 06/10] added new requirements to softmax --- src/attention/softmax.rs | 15 +++++++++++++-- src/model/encoder.rs | 2 +- 2 files changed, 14 insertions(+), 3 deletions(-) diff --git a/src/attention/softmax.rs b/src/attention/softmax.rs index aa5137e..93ba10b 100644 --- a/src/attention/softmax.rs +++ b/src/attention/softmax.rs @@ -1,20 +1,30 @@ -#![allow(unused_imports)] // {array} import is not recognized as it is used in #[test] +#![allow(unused_imports)] + +use contracts::{ensures, requires}; +// {array} import is not recognized as it is used in #[test] use ndarray::{array, s, Array, Array1, Array2, Array3, ArrayView1, Axis}; +//noinspection ALL +#[requires(!vec.is_empty(), "Input vector must not be empty.")] +#[ensures(ret.len() == vec.len(), "Output vector must have the same length as the input vector.")] pub fn softmax_vector(vec: ArrayView1) -> Array1 { let max = vec.fold(f32::NEG_INFINITY, |a, &b| a.max(b)); // Stabilize by subtracting max let exp_vec = vec.mapv(|x| (x - max).exp()); let sum: f32 = exp_vec.sum(); exp_vec / sum } +#[requires(!vec.is_empty(), "Input vector must not be empty.")] pub fn softmax_vec(vec: Vec) -> Array1 { let array = Array1::from(vec); // Convert Vec to Array1 softmax_vector(array.view()) } +#[requires(mat.shape().len() == 2, "Input matrix must be 2-dimensional.")] pub fn softmax_matrix(mat: &Array2) -> Array2 { convert_to_array2(mat.map_axis(Axis(1), softmax_vector)) } + +#[requires(attention_scores.shape().len() == 3, "Input tensor must be 3-dimensional.")] pub fn softmax_3d(attention_scores: &Array3) -> Array3 { let batch_size = attention_scores.shape()[0]; let mut softmax_result = Array3::::zeros(attention_scores.raw_dim()); @@ -33,7 +43,8 @@ pub fn softmax_3d(attention_scores: &Array3) -> Array3 { softmax_result } - +#[requires(!array1d.is_empty(), "Input array must not be empty.")] +#[requires(array1d.iter().all(|row| !row.is_empty()), "All rows must be non-empty.")] fn convert_to_array2(array1d: Array, ndarray::Ix1>) -> Array2 { // Check if the input array is non-empty assert!(!array1d.is_empty(), "Input array must not be empty."); diff --git a/src/model/encoder.rs b/src/model/encoder.rs index 54da22f..0d036ba 100644 --- a/src/model/encoder.rs +++ b/src/model/encoder.rs @@ -3,7 +3,7 @@ use crate::attention::multihead_attention::multi_head_attention; use crate::layers::feedforward_layer::FeedForwardLayer; use crate::layers::normalization::layer_norm; use crate::settings::{BATCH_SIZE, EMBEDDING_SIZE}; -use contracts::requires; +use contracts::{ensures, requires}; use ndarray::{array, Array2, Array3}; use std::ops::Add; From 9345b19fef165074a5dd414db0b939313ad45442 Mon Sep 17 00:00:00 2001 From: Jakub Date: Fri, 20 Dec 2024 16:38:35 +0100 Subject: [PATCH 07/10] requirements for decoder added --- src/model/decoder.rs | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/model/decoder.rs b/src/model/decoder.rs index bfd1e23..4356682 100644 --- a/src/model/decoder.rs +++ b/src/model/decoder.rs @@ -4,9 +4,20 @@ use crate::attention::softmax::softmax_3d; use crate::layers::feedforward_layer::FeedForwardLayer; use crate::layers::normalization::layer_norm; use crate::model::encoder::encoding; +use contracts::requires; use ndarray::{array, Array2, Array3}; use std::ops::Add; +#[requires(input.shape().len() == 3, "Input tensor must have 3 dimensions (batch_size, seq_length, d_model)")] +#[requires(encoder_output.shape().len() == 3, "Encoder output tensor must have 3 dimensions (batch_size, seq_length, d_model)")] +#[requires(input.shape() == encoder_output.shape(), "Input tensor and encoder output tensor must have the same shape")] +#[requires(input.shape()[2] == gamma.shape()[1], "Gamma dimensions do not match input feature size")] +#[requires(gamma.shape()[0] == 1, "Gamma must have exactly one row")] +#[requires(input.shape()[2] == beta.shape()[1], "Beta dimensions do not match input feature size")] +#[requires(beta.shape()[0] == 1, "Beta must have exactly one row")] +#[requires(epsilon > 0.0, "Epsilon must be positive and non-zero")] +#[requires(feed_forward_layer.is_initialized(), "Feed-forward layer is not properly initialized")] +#[requires(input.shape()[1] > 0, "Sequence length must be greater than zero")] pub fn decoding( input: Array3, // Input tensor (usually from the previous decoder layer or initial input) encoder_output: Array3, // Encoder output (for the encoder-decoder attention) @@ -87,6 +98,7 @@ pub fn decoding( ff_norm // decoder ouput } + #[test] fn test_decoding() { // Dummy input tensor (batch_size = 2, seq_length = 4, d_model = 4) @@ -121,7 +133,6 @@ fn test_decoding() { ); // Call the decoding function - let output = decoding(input, enc_out, gamma, beta, epsilon, &feed_forward_layer); // Assert that the output has the correct shape From 4e745112527e50c700173702a475f06edd8094c9 Mon Sep 17 00:00:00 2001 From: Jakub Date: Fri, 20 Dec 2024 16:50:01 +0100 Subject: [PATCH 08/10] requirements for linear algebra added --- src/math/linear_algebra.rs | 50 ++++++++++++++++++++++++++++-------- src/model/decoder.rs | 1 - tests/linear_algebra_test.rs | 16 ------------ 3 files changed, 39 insertions(+), 28 deletions(-) diff --git a/src/math/linear_algebra.rs b/src/math/linear_algebra.rs index 8a01a01..3764c01 100644 --- a/src/math/linear_algebra.rs +++ b/src/math/linear_algebra.rs @@ -1,3 +1,5 @@ +#![allow(warnings)] +use contracts::{ensures, requires}; use ndarray::linalg::general_mat_mul; use ndarray::{s, Array1, Array2, Array3}; @@ -9,13 +11,17 @@ use ndarray::{s, Array1, Array2, Array3}; /// /// # Returns /// An `Array2` representing the result of the matrix multiplication. +#[requires(a.ncols() == b.nrows(), "Matrix dimensions are incompatible for multiplication.")] +#[ensures(ret.is_ok(), "Matrix multiplication should be successful")] +#[ensures(ret.as_ref().unwrap().nrows() > 0, "The resulting matrix must have more than 0 rows.")] +#[ensures(ret.as_ref().unwrap().ncols() > 0, "The resulting matrix must have more than 0 columns.")] pub fn matmul(a: &Array2, b: &Array2) -> Result, &'static str> { if a.ncols() != b.nrows() { return Err("Matrix dimensions are incompatible for multiplication."); } - let mut result = Array2::::zeros((a.nrows(), b.ncols())); - general_mat_mul(1.0, a, b, 0.0, &mut result); - Ok(result) + let mut ret = Array2::::zeros((a.nrows(), b.ncols())); + general_mat_mul(1.0, a, b, 0.0, &mut ret); + Ok(ret) } pub fn dotproduct(a: &Array1, b: &Array1) -> f32 { @@ -34,6 +40,12 @@ pub fn dotproduct(a: &Array1, b: &Array1) -> f32 { /// # Panics: /// - If the batch sizes of `a` and `b` don't match. /// - If the inner dimensions (`k` in `a` and `b`) don't align for matrix multiplication. +#[requires(a.shape().len() == 3, "Input tensor a must have 3 dimensions")] +#[requires(b.shape().len() == 3, "Input tensor b must have 3 dimensions")] +#[requires(a.shape()[0] == b.shape()[0], "Batch sizes must match")] +#[requires(a.shape()[2] == b.shape()[1], "Inner dimensions must align for matrix multiplication")] +#[ensures(ret.shape().len() == 3, "The resulting tensor must have 3 dimensions.")] +#[ensures(ret.iter().all(|&x| x.is_finite()), "All elements in the resulting tensor must be finite.")] pub fn tensor_product(a: &Array3, b: &Array3) -> Array3 { // Check that batch sizes match and if dimension align assert_eq!(a.shape()[0], b.shape()[0], "Batch sizes must match"); @@ -45,20 +57,21 @@ pub fn tensor_product(a: &Array3, b: &Array3) -> Array3 { // Initialize a 3D tensor for the result, filled with zeros. // Its shape corresponds to (batch_size, m, n). - let mut result = Array3::::zeros((batch_size, m, n)); + let mut ret = Array3::::zeros((batch_size, m, n)); for i in 0..batch_size { // - `s![i, .., ..]` selects the `i`th matrix (2D slice) in the batch. let a_slice = a.slice(s![i, .., ..]); let b_slice = b.slice(s![i, .., ..]); - let mut result_slice = result.slice_mut(s![i, .., ..]); // Mutable slice of the result matrix for this batch. + let mut ret_slice = ret.slice_mut(s![i, .., ..]); // Mutable slice of the result matrix for this batch. - general_mat_mul(1.0, &a_slice, &b_slice, 0.0, &mut result_slice); + general_mat_mul(1.0, &a_slice, &b_slice, 0.0, &mut ret_slice); } - result + ret } + /// Applies a linear projection to a 3D tensor using a weight matrix. /// /// # Arguments @@ -67,6 +80,11 @@ pub fn tensor_product(a: &Array3, b: &Array3) -> Array3 { /// /// # Returns /// A new 3D tensor with the projection applied (e.g., [batch, seq_len, output_dim]). +#[requires(x.shape().len() == 3, "Input tensor x must have 3 dimensions")] +#[requires(w.shape().len() == 2, "Weight matrix w must have 2 dimensions")] +#[requires(x.shape()[2] == w.shape()[0], "Input feature size must match the weight matrix's rows")] +#[ensures(ret.shape().len() == 3, "The resulting tensor must have 3 dimensions.")] +#[ensures(ret.iter().all(|&x| x.is_finite()), "All elements in the resulting tensor must be finite.")] pub fn apply_projection(x: &Array3, w: &Array2) -> Array3 { let batch_size = x.shape()[0]; let seq_len = x.shape()[1]; @@ -74,21 +92,31 @@ pub fn apply_projection(x: &Array3, w: &Array2) -> Array3 { assert_eq!(d_model, w.shape()[0]); let d_k = w.shape()[1]; // Output dimension (head dimension) - // Initialize the result tensor with shape (batch_size, seq_len, d_k) - let mut result = Array3::::zeros((batch_size, seq_len, d_k)); + // Initialize the ret tensor with shape (batch_size, seq_len, d_k) + let mut ret = Array3::::zeros((batch_size, seq_len, d_k)); // Perform matrix multiplication for each batch for i in 0..batch_size { let x_slice = x.slice(s![i, .., ..]); // Slice the i-th batch (shape: (seq_len, d_model)) let mul = matmul(&x_slice.to_owned(), w); // Perform matrix multiplication if mul.is_ok() { - result.slice_mut(s![i, .., ..]).assign(&mul.unwrap()); + ret.slice_mut(s![i, .., ..]).assign(&mul.unwrap()); } } - result + ret } +/// Flattens a 3D array into a 2D array. +/// +/// # Parameters +/// - `batch`: A 3D tensor of shape (batch_size, seq_length, embed_size). +/// +/// # Returns +/// A 2D tensor of shape (batch_size * seq_length, embed_size). +#[requires(batch.shape().len() == 3, "Input tensor must have 3 dimensions")] +#[ensures(ret.shape().len() == 2, "The resulting tensor must have 2 dimensions.")] +#[ensures(ret.iter().all(|&x| x.is_finite()), "All elements in the resulting tensor must be finite.")] pub fn flatten_3d_array(batch: Array3) -> Array2 { let (batch_size, seq_length, embed_size) = batch.dim(); batch diff --git a/src/model/decoder.rs b/src/model/decoder.rs index 4356682..0c504da 100644 --- a/src/model/decoder.rs +++ b/src/model/decoder.rs @@ -1,6 +1,5 @@ #![allow(warnings)] use crate::attention::multihead_attention::multi_head_attention; -use crate::attention::softmax::softmax_3d; use crate::layers::feedforward_layer::FeedForwardLayer; use crate::layers::normalization::layer_norm; use crate::model::encoder::encoding; diff --git a/tests/linear_algebra_test.rs b/tests/linear_algebra_test.rs index b02daa0..8eada2a 100644 --- a/tests/linear_algebra_test.rs +++ b/tests/linear_algebra_test.rs @@ -21,22 +21,6 @@ fn test_matmul_valid_input() { } } -#[test] -fn test_matmul_invalid_input() { - // Arrange: Define input matrices with mismatched dimensions - let a = array![[1.0, 2.0], [3.0, 4.0]]; // 2x2 matrix - let b = array![[5.0, 6.0]]; // 1x2 matrix (mismatched dimensions) - - // Act: Perform the multiplication, expecting an error - let result = matmul(&a, &b); - - // Assert: Ensure the result is an error due to incompatible dimensions - assert_eq!( - result, - Err("Matrix dimensions are incompatible for multiplication.") - ); -} - #[test] fn test_dotproduct() { let a: Array1 = array![1.0, 2.0, 3.0]; From d4e77ff4e9deb6a6d59f89692ce593f372bada1c Mon Sep 17 00:00:00 2001 From: Jakub Date: Fri, 20 Dec 2024 16:56:36 +0100 Subject: [PATCH 09/10] requirements for norm and pos-enc --- src/layers/normalization.rs | 14 ++++++++------ src/math/positional_encoding.rs | 7 ++++++- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/src/layers/normalization.rs b/src/layers/normalization.rs index 5c6994d..fc5d70f 100644 --- a/src/layers/normalization.rs +++ b/src/layers/normalization.rs @@ -1,3 +1,5 @@ +#![allow(unused_imports)] +use contracts::{ensures, requires}; use ndarray::{Array2, Axis}; /// Performs layer normalization on a 2D array (batch size x embedding size). @@ -10,6 +12,12 @@ use ndarray::{Array2, Axis}; /// /// # Returns: /// A 2D array of the same shape as `x` after applying Layer Normalization. +#[requires(x.shape().len() == 2, "Input array must be 2-dimensional")] +#[requires(gamma.shape().len() == 2 && gamma.shape()[0] == 1, "Gamma must be a 2-dimensional array with a single row")] +#[requires(beta.shape().len() == 2 && beta.shape()[0] == 1, "Beta must be a 2-dimensional array with a single row")] +#[requires(epsilon > 0.0, "Epsilon must be positive and non-zero")] +#[ensures(ret.shape() == x.shape(), "The resulting array must have the same shape as the input array")] +#[ensures(ret.iter().all(|&x| x.is_finite()), "All elements in the resulting array must be finite")] pub fn layer_norm( x: &Array2, gamma: &Array2, @@ -19,18 +27,12 @@ pub fn layer_norm( // Calculate mean and variance across the features (axis=1) let mean = x.mean_axis(Axis(1)).unwrap(); let variance = x.var_axis(Axis(1), 0.0); - //println!("Mean: {:?}", mean); - // println!("Variance: {:?}", variance); let expanded_mean = mean.insert_axis(Axis(1)); // Expands [6] to [6, 1] let expanded_variance = variance.insert_axis(Axis(1)); // Expands [6] to [6, 1] - // println!("EXPMean: {:?}", expanded_mean); - //println!("EXPVariance: {:?}", expanded_variance); // Add epsilon to expanded variance let normalized = (x - &expanded_mean) / (expanded_variance + epsilon).mapv(f32::sqrt); - // println!("Normalized {}", normalized); - normalized * gamma + beta } diff --git a/src/math/positional_encoding.rs b/src/math/positional_encoding.rs index 190466d..c33c213 100644 --- a/src/math/positional_encoding.rs +++ b/src/math/positional_encoding.rs @@ -1,3 +1,6 @@ +#![allow(warnings)] +use contracts::{ensures, requires}; + /// Computes the sinusoidal positional encoding for a given position and dimension. /// /// This encoding is used in Transformer models to represent token positions @@ -10,10 +13,12 @@ /// /// # Returns /// The positional encoding value (as `f32`). +#[requires(embedding_size > 0, "Embedding size must be greater than 0")] +#[ensures(ret.is_finite(), "The resulting value must be finite")] pub fn sinusoidal_pos_encoding(pos: usize, index: usize, embedding_size: usize) -> f32 { if pos == 0 { return 0.0; - }; + } let divisor = 10000f32.powf(2.0 * (index as f32 / embedding_size as f32)); // 100000^(2*i / embedding size) if index % 2 == 0 { From 02d7144f238a623f0ddab136e272cf66ea607c90 Mon Sep 17 00:00:00 2001 From: Jakub Date: Fri, 20 Dec 2024 17:01:26 +0100 Subject: [PATCH 10/10] added FF tests --- src/layers/feedforward_layer.rs | 47 ++++++++++++++++++++++++++++++++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/src/layers/feedforward_layer.rs b/src/layers/feedforward_layer.rs index 1e416e9..a97efd8 100644 --- a/src/layers/feedforward_layer.rs +++ b/src/layers/feedforward_layer.rs @@ -1,6 +1,7 @@ +#![allow(unused_imports)] use crate::activation::activation_functions::gelu; use contracts::requires; -use ndarray::{Array1, Array2, Array3}; +use ndarray::{array, Array1, Array2, Array3}; use rand::Rng; use std::ops::Add; @@ -155,3 +156,47 @@ fn he_initialization(input_size: usize, output_size: usize) -> Array2 { fn bias_initialization(size: usize) -> Array1 { Array1::zeros(size) } + +#[test] +fn test_bias_initialization() { + let size = 5; + + let bias = bias_initialization(size); + + // Check that the dimensions are correct (size x 1) + assert_eq!(bias.shape(), &[size,]); + + // Check that all values in the bias array are 0.0 + for &value in bias.iter() { + assert_eq!(value, 0.0); + } +} + +#[test] +fn test_feedforward_forward() { + // Define a dummy input with shape (batch_size, seq_length, d_model) + let input = array![ + [ + [0.1, 0.2, 0.3, 0.4], + [0.5, 0.6, 0.7, 0.8], + [0.9, 1.0, 1.1, 1.2], + ], + [ + [1.3, 1.4, 1.5, 1.6], + [1.7, 1.8, 1.9, 2.0], + [2.1, 2.2, 2.3, 2.4], + ] + ]; + + // Create a FeedForwardLayer instance + let feed_forward_layer = FeedForwardLayer::new(2, 4, 4, 0.1); + + // Feed forward through the layer + let feed_forward_output = feed_forward_layer.forward(input.clone()); + + // Assert the output shape + assert_eq!(feed_forward_output.shape(), &[2, 3, 4]); + + // Optionally, check if the output is transformed (e.g., not equal to input) + assert!(!feed_forward_output.iter().eq(input.iter())); // Check if output is different from input +}