diff --git a/Cargo.lock b/Cargo.lock index 7d46c0c..9102f30 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6,6 +6,7 @@ version = 4 name = "Transformer" version = "0.1.0" dependencies = [ + "contracts", "ndarray", "rand", "regex", @@ -50,6 +51,17 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "contracts" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1d1429e3bd78171c65aa010eabcdf8f863ba3254728dbfb0ad4b1545beac15c" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "errno" version = "0.3.10" @@ -288,6 +300,17 @@ dependencies = [ "windows-sys", ] +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + [[package]] name = "syn" version = "2.0.90" @@ -335,7 +358,7 @@ dependencies = [ "log", "proc-macro2", "quote", - "syn", + "syn 2.0.90", "wasm-bindgen-shared", ] @@ -357,7 +380,7 @@ checksum = "30d7a95b763d3c45903ed6c81f156801839e5ee968bb07e534c44df0fcd330c2" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.90", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -477,7 +500,7 @@ checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.90", ] [[package]] @@ -488,5 +511,5 @@ checksum = "7988d73a4303ca289df03316bc490e934accf371af6bc745393cf3c2c5c4f25d" dependencies = [ "proc-macro2", "quote", - "syn", + "syn 2.0.90", ] diff --git a/Cargo.toml b/Cargo.toml index 096329a..9e0c87b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,3 +11,5 @@ description = "A Rust implementation of a Transformer-based model for NLP tasks. ndarray = "0.16.1" rand = "0.9.0-beta.1" regex = "1.11.1" +contracts = "0.6.3" + diff --git a/src/attention/softmax.rs b/src/attention/softmax.rs index aa5137e..93ba10b 100644 --- a/src/attention/softmax.rs +++ b/src/attention/softmax.rs @@ -1,20 +1,30 @@ -#![allow(unused_imports)] // {array} import is not recognized as it is used in #[test] +#![allow(unused_imports)] + +use contracts::{ensures, requires}; +// {array} import is not recognized as it is used in #[test] use ndarray::{array, s, Array, Array1, Array2, Array3, ArrayView1, Axis}; +//noinspection ALL +#[requires(!vec.is_empty(), "Input vector must not be empty.")] +#[ensures(ret.len() == vec.len(), "Output vector must have the same length as the input vector.")] pub fn softmax_vector(vec: ArrayView1) -> Array1 { let max = vec.fold(f32::NEG_INFINITY, |a, &b| a.max(b)); // Stabilize by subtracting max let exp_vec = vec.mapv(|x| (x - max).exp()); let sum: f32 = exp_vec.sum(); exp_vec / sum } +#[requires(!vec.is_empty(), "Input vector must not be empty.")] pub fn softmax_vec(vec: Vec) -> Array1 { let array = Array1::from(vec); // Convert Vec to Array1 softmax_vector(array.view()) } +#[requires(mat.shape().len() == 2, "Input matrix must be 2-dimensional.")] pub fn softmax_matrix(mat: &Array2) -> Array2 { convert_to_array2(mat.map_axis(Axis(1), softmax_vector)) } + +#[requires(attention_scores.shape().len() == 3, "Input tensor must be 3-dimensional.")] pub fn softmax_3d(attention_scores: &Array3) -> Array3 { let batch_size = attention_scores.shape()[0]; let mut softmax_result = Array3::::zeros(attention_scores.raw_dim()); @@ -33,7 +43,8 @@ pub fn softmax_3d(attention_scores: &Array3) -> Array3 { softmax_result } - +#[requires(!array1d.is_empty(), "Input array must not be empty.")] +#[requires(array1d.iter().all(|row| !row.is_empty()), "All rows must be non-empty.")] fn convert_to_array2(array1d: Array, ndarray::Ix1>) -> Array2 { // Check if the input array is non-empty assert!(!array1d.is_empty(), "Input array must not be empty."); diff --git a/src/layers/feedforward_layer.rs b/src/layers/feedforward_layer.rs index dab2207..a97efd8 100644 --- a/src/layers/feedforward_layer.rs +++ b/src/layers/feedforward_layer.rs @@ -1,23 +1,32 @@ -#![allow(dead_code)] #![allow(unused_imports)] - use crate::activation::activation_functions::gelu; -use crate::settings::HIDDEN_SIZE; +use contracts::requires; use ndarray::{array, Array1, Array2, Array3}; use rand::Rng; use std::ops::Add; pub struct FeedForwardLayer { weights1: Array2, - bias1: Array1, // weights and biases for first linear layer - + bias1: Array1, // Weights and biases for the first linear layer weights2: Array2, - bias2: Array1, // weights and biases for second linear layer - - dropout_rate: f32, // Dropout rate + bias2: Array1, // Weights and biases for the second linear layer + dropout_rate: f32, // Dropout rate + pub(crate) input_size: usize, // Input feature size + pub(crate) output_size: usize, // Output feature size + initialized: bool, } + impl FeedForwardLayer { - // init with random values + /// Initializes the FeedForwardLayer with random weights and biases. + /// + /// # Parameters: + /// - `_batch_size`: Batch size (not stored, used for verification if needed). + /// - `input_size`: Number of input features (d_model). + /// - `output_size`: Number of output features (d_model). + /// - `dropout_rate`: Probability of dropping a unit in dropout (0.0 to 1.0). + #[requires(input_size > 0, "Input size must be greater than 0")] + #[requires(output_size > 0, "Output size must be greater than 0")] + #[requires((0.0..=1.0).contains(&dropout_rate), "Dropout rate must be in range [0.0, 1.0]")] pub fn new( _batch_size: usize, input_size: usize, @@ -32,15 +41,35 @@ impl FeedForwardLayer { let weights2 = he_initialization(hidden_size, output_size); // Shape: (hidden_size, output_size) let bias2 = bias_initialization(output_size); // Shape: (output_size,) + FeedForwardLayer { weights1, bias1, weights2, bias2, dropout_rate, + input_size, + output_size, + initialized: true, } } + /// Verifies that the layer is properly initialized. + pub fn is_initialized(&self) -> bool { + self.initialized + } + + /// Performs a forward pass in training mode. + /// + /// # Parameters: + /// - `input`: 2D input tensor of shape (batch_size * seq_length, input_size). + /// - `train`: Whether to apply dropout. + /// + /// # Returns: + /// - Output tensor of shape (batch_size * seq_length, output_size). + #[requires(input.shape()[1] == self.input_size, "Input feature size must match layer's input size")] + #[requires(input.shape()[0] > 0, "Input tensor must not be empty")] + #[requires(input.shape()[1] == self.input_size, "Input tensor's second dimension must match input_size")] pub fn forward_t(&self, input: &Array2, train: bool) -> Array2 { // First linear layer let first_dot = input.dot(&self.weights1); @@ -57,56 +86,50 @@ impl FeedForwardLayer { // Second linear layer first_activation.dot(&self.weights2).add(&self.bias2) } - /// Forward pass through the feed-forward layer. + + /// Performs a forward pass in evaluation mode. /// /// # Parameters: - /// - `x`: Input tensor of shape (batch_size, seq_length, d_model). + /// - `x`: Input tensor of shape (batch_size, seq_length, input_size). /// /// # Returns: - /// - Output tensor of shape (batch_size, seq_length, d_model). + /// - Output tensor of shape (batch_size, seq_length, output_size). + #[requires(x.shape()[2] == self.input_size, "Input feature size must match layer's input size")] + #[requires(x.shape()[0] > 0, "Input tensor must not be empty")] + #[requires(x.shape()[2] == self.input_size, "Input tensor's third dimension must match input_size")] pub fn forward(&self, x: Array3) -> Array3 { let batch_size = x.shape()[0]; let seq_length = x.shape()[1]; let d_model = x.shape()[2]; - // Flatten the input to 2D: (batch_size * seq_length, d_model) let reshaped_x = x.to_shape((batch_size * seq_length, d_model)); match reshaped_x { Ok(valid_reshaped_x) => { let dot = valid_reshaped_x.dot(&self.weights1); - let add = dot + &self.bias1; - // First linear layer + gelu - + // First linear layer + GELU activation let hidden = gelu(&add.to_owned()); - let dot2 = hidden.dot(&self.weights2); // Second linear layer let output = dot2 + &self.bias2; - // Reshape back to 3D: (batch_size, seq_length, d_model) + // Reshape back to 3D output - .to_shape((batch_size, seq_length, d_model)) + .to_shape((batch_size, seq_length, self.output_size)) .unwrap() .to_owned() - // Use the `hidden` result here for further processing. } Err(ref e) => { eprintln!("Shape error: {}", e); - eprintln!( - "Shape of input : {:?} -=- Shape of weights : {:?} ", - reshaped_x.unwrap().shape(), - seq_length - ); - // Or return unchanged? - x + x // Fallback to the original input on failure } } } + /// Applies dropout to the input. fn apply_dropout(&self, input: &Array2) -> Array2 { let mut rng = rand::rng(); input.map(|&x| { @@ -119,22 +142,22 @@ impl FeedForwardLayer { } } +/// He initialization function. fn he_initialization(input_size: usize, output_size: usize) -> Array2 { let mut rng = rand::rng(); - // He initialization: scale by sqrt(2 / input_size) let scale = (2.0 / input_size as f32).sqrt(); let values: Vec = (0..(input_size * output_size)) .map(|_| rng.random_range(-scale..scale)) .collect(); - - // Create an Array2 from the values vector Array2::from_shape_vec((input_size, output_size), values).unwrap() } +/// Initializes bias vectors with zeros. fn bias_initialization(size: usize) -> Array1 { Array1::zeros(size) } +#[test] fn test_bias_initialization() { let size = 5; diff --git a/src/layers/normalization.rs b/src/layers/normalization.rs index 5c6994d..fc5d70f 100644 --- a/src/layers/normalization.rs +++ b/src/layers/normalization.rs @@ -1,3 +1,5 @@ +#![allow(unused_imports)] +use contracts::{ensures, requires}; use ndarray::{Array2, Axis}; /// Performs layer normalization on a 2D array (batch size x embedding size). @@ -10,6 +12,12 @@ use ndarray::{Array2, Axis}; /// /// # Returns: /// A 2D array of the same shape as `x` after applying Layer Normalization. +#[requires(x.shape().len() == 2, "Input array must be 2-dimensional")] +#[requires(gamma.shape().len() == 2 && gamma.shape()[0] == 1, "Gamma must be a 2-dimensional array with a single row")] +#[requires(beta.shape().len() == 2 && beta.shape()[0] == 1, "Beta must be a 2-dimensional array with a single row")] +#[requires(epsilon > 0.0, "Epsilon must be positive and non-zero")] +#[ensures(ret.shape() == x.shape(), "The resulting array must have the same shape as the input array")] +#[ensures(ret.iter().all(|&x| x.is_finite()), "All elements in the resulting array must be finite")] pub fn layer_norm( x: &Array2, gamma: &Array2, @@ -19,18 +27,12 @@ pub fn layer_norm( // Calculate mean and variance across the features (axis=1) let mean = x.mean_axis(Axis(1)).unwrap(); let variance = x.var_axis(Axis(1), 0.0); - //println!("Mean: {:?}", mean); - // println!("Variance: {:?}", variance); let expanded_mean = mean.insert_axis(Axis(1)); // Expands [6] to [6, 1] let expanded_variance = variance.insert_axis(Axis(1)); // Expands [6] to [6, 1] - // println!("EXPMean: {:?}", expanded_mean); - //println!("EXPVariance: {:?}", expanded_variance); // Add epsilon to expanded variance let normalized = (x - &expanded_mean) / (expanded_variance + epsilon).mapv(f32::sqrt); - // println!("Normalized {}", normalized); - normalized * gamma + beta } diff --git a/src/main.rs b/src/main.rs index e9e07ea..09603bd 100644 --- a/src/main.rs +++ b/src/main.rs @@ -8,7 +8,7 @@ use Transformer::model::decoder::decoding; use Transformer::model::embedding::Embedding; use Transformer::model::encoder::encoding; use Transformer::model::transformer_model::transformer_model; -use Transformer::settings::{BATCH_SIZE, DROPOUT_RATE, INPUT_SIZE, OUTPUT_SIZE}; +use Transformer::settings::{BATCH_SIZE, DROPOUT_RATE, EMBEDDING_SIZE, INPUT_SIZE, OUTPUT_SIZE}; fn main() { println!("runs successfully!"); @@ -40,9 +40,10 @@ fn main() { let embeddings = embedding.forward(tokens.clone()); // Convert embeddings to Array3 (batch_size, seq_length, embed_size) - let input_tensor = Array3::from_shape_fn((1, tokens.len(), 12), |(batch, seq, _)| { - embeddings[[seq, batch]] - }); + let input_tensor = Array3::from_shape_fn( + (BATCH_SIZE, tokens.len(), EMBEDDING_SIZE), + |(batch, seq, _)| embeddings[[seq, batch]], + ); println!("INPUT : {}", input_tensor.clone()); // Initialize gamma and beta for layer normalization diff --git a/src/math/linear_algebra.rs b/src/math/linear_algebra.rs index 8a01a01..3764c01 100644 --- a/src/math/linear_algebra.rs +++ b/src/math/linear_algebra.rs @@ -1,3 +1,5 @@ +#![allow(warnings)] +use contracts::{ensures, requires}; use ndarray::linalg::general_mat_mul; use ndarray::{s, Array1, Array2, Array3}; @@ -9,13 +11,17 @@ use ndarray::{s, Array1, Array2, Array3}; /// /// # Returns /// An `Array2` representing the result of the matrix multiplication. +#[requires(a.ncols() == b.nrows(), "Matrix dimensions are incompatible for multiplication.")] +#[ensures(ret.is_ok(), "Matrix multiplication should be successful")] +#[ensures(ret.as_ref().unwrap().nrows() > 0, "The resulting matrix must have more than 0 rows.")] +#[ensures(ret.as_ref().unwrap().ncols() > 0, "The resulting matrix must have more than 0 columns.")] pub fn matmul(a: &Array2, b: &Array2) -> Result, &'static str> { if a.ncols() != b.nrows() { return Err("Matrix dimensions are incompatible for multiplication."); } - let mut result = Array2::::zeros((a.nrows(), b.ncols())); - general_mat_mul(1.0, a, b, 0.0, &mut result); - Ok(result) + let mut ret = Array2::::zeros((a.nrows(), b.ncols())); + general_mat_mul(1.0, a, b, 0.0, &mut ret); + Ok(ret) } pub fn dotproduct(a: &Array1, b: &Array1) -> f32 { @@ -34,6 +40,12 @@ pub fn dotproduct(a: &Array1, b: &Array1) -> f32 { /// # Panics: /// - If the batch sizes of `a` and `b` don't match. /// - If the inner dimensions (`k` in `a` and `b`) don't align for matrix multiplication. +#[requires(a.shape().len() == 3, "Input tensor a must have 3 dimensions")] +#[requires(b.shape().len() == 3, "Input tensor b must have 3 dimensions")] +#[requires(a.shape()[0] == b.shape()[0], "Batch sizes must match")] +#[requires(a.shape()[2] == b.shape()[1], "Inner dimensions must align for matrix multiplication")] +#[ensures(ret.shape().len() == 3, "The resulting tensor must have 3 dimensions.")] +#[ensures(ret.iter().all(|&x| x.is_finite()), "All elements in the resulting tensor must be finite.")] pub fn tensor_product(a: &Array3, b: &Array3) -> Array3 { // Check that batch sizes match and if dimension align assert_eq!(a.shape()[0], b.shape()[0], "Batch sizes must match"); @@ -45,20 +57,21 @@ pub fn tensor_product(a: &Array3, b: &Array3) -> Array3 { // Initialize a 3D tensor for the result, filled with zeros. // Its shape corresponds to (batch_size, m, n). - let mut result = Array3::::zeros((batch_size, m, n)); + let mut ret = Array3::::zeros((batch_size, m, n)); for i in 0..batch_size { // - `s![i, .., ..]` selects the `i`th matrix (2D slice) in the batch. let a_slice = a.slice(s![i, .., ..]); let b_slice = b.slice(s![i, .., ..]); - let mut result_slice = result.slice_mut(s![i, .., ..]); // Mutable slice of the result matrix for this batch. + let mut ret_slice = ret.slice_mut(s![i, .., ..]); // Mutable slice of the result matrix for this batch. - general_mat_mul(1.0, &a_slice, &b_slice, 0.0, &mut result_slice); + general_mat_mul(1.0, &a_slice, &b_slice, 0.0, &mut ret_slice); } - result + ret } + /// Applies a linear projection to a 3D tensor using a weight matrix. /// /// # Arguments @@ -67,6 +80,11 @@ pub fn tensor_product(a: &Array3, b: &Array3) -> Array3 { /// /// # Returns /// A new 3D tensor with the projection applied (e.g., [batch, seq_len, output_dim]). +#[requires(x.shape().len() == 3, "Input tensor x must have 3 dimensions")] +#[requires(w.shape().len() == 2, "Weight matrix w must have 2 dimensions")] +#[requires(x.shape()[2] == w.shape()[0], "Input feature size must match the weight matrix's rows")] +#[ensures(ret.shape().len() == 3, "The resulting tensor must have 3 dimensions.")] +#[ensures(ret.iter().all(|&x| x.is_finite()), "All elements in the resulting tensor must be finite.")] pub fn apply_projection(x: &Array3, w: &Array2) -> Array3 { let batch_size = x.shape()[0]; let seq_len = x.shape()[1]; @@ -74,21 +92,31 @@ pub fn apply_projection(x: &Array3, w: &Array2) -> Array3 { assert_eq!(d_model, w.shape()[0]); let d_k = w.shape()[1]; // Output dimension (head dimension) - // Initialize the result tensor with shape (batch_size, seq_len, d_k) - let mut result = Array3::::zeros((batch_size, seq_len, d_k)); + // Initialize the ret tensor with shape (batch_size, seq_len, d_k) + let mut ret = Array3::::zeros((batch_size, seq_len, d_k)); // Perform matrix multiplication for each batch for i in 0..batch_size { let x_slice = x.slice(s![i, .., ..]); // Slice the i-th batch (shape: (seq_len, d_model)) let mul = matmul(&x_slice.to_owned(), w); // Perform matrix multiplication if mul.is_ok() { - result.slice_mut(s![i, .., ..]).assign(&mul.unwrap()); + ret.slice_mut(s![i, .., ..]).assign(&mul.unwrap()); } } - result + ret } +/// Flattens a 3D array into a 2D array. +/// +/// # Parameters +/// - `batch`: A 3D tensor of shape (batch_size, seq_length, embed_size). +/// +/// # Returns +/// A 2D tensor of shape (batch_size * seq_length, embed_size). +#[requires(batch.shape().len() == 3, "Input tensor must have 3 dimensions")] +#[ensures(ret.shape().len() == 2, "The resulting tensor must have 2 dimensions.")] +#[ensures(ret.iter().all(|&x| x.is_finite()), "All elements in the resulting tensor must be finite.")] pub fn flatten_3d_array(batch: Array3) -> Array2 { let (batch_size, seq_length, embed_size) = batch.dim(); batch diff --git a/src/math/positional_encoding.rs b/src/math/positional_encoding.rs index 190466d..c33c213 100644 --- a/src/math/positional_encoding.rs +++ b/src/math/positional_encoding.rs @@ -1,3 +1,6 @@ +#![allow(warnings)] +use contracts::{ensures, requires}; + /// Computes the sinusoidal positional encoding for a given position and dimension. /// /// This encoding is used in Transformer models to represent token positions @@ -10,10 +13,12 @@ /// /// # Returns /// The positional encoding value (as `f32`). +#[requires(embedding_size > 0, "Embedding size must be greater than 0")] +#[ensures(ret.is_finite(), "The resulting value must be finite")] pub fn sinusoidal_pos_encoding(pos: usize, index: usize, embedding_size: usize) -> f32 { if pos == 0 { return 0.0; - }; + } let divisor = 10000f32.powf(2.0 * (index as f32 / embedding_size as f32)); // 100000^(2*i / embedding size) if index % 2 == 0 { diff --git a/src/model/decoder.rs b/src/model/decoder.rs index bfd1e23..0c504da 100644 --- a/src/model/decoder.rs +++ b/src/model/decoder.rs @@ -1,12 +1,22 @@ #![allow(warnings)] use crate::attention::multihead_attention::multi_head_attention; -use crate::attention::softmax::softmax_3d; use crate::layers::feedforward_layer::FeedForwardLayer; use crate::layers::normalization::layer_norm; use crate::model::encoder::encoding; +use contracts::requires; use ndarray::{array, Array2, Array3}; use std::ops::Add; +#[requires(input.shape().len() == 3, "Input tensor must have 3 dimensions (batch_size, seq_length, d_model)")] +#[requires(encoder_output.shape().len() == 3, "Encoder output tensor must have 3 dimensions (batch_size, seq_length, d_model)")] +#[requires(input.shape() == encoder_output.shape(), "Input tensor and encoder output tensor must have the same shape")] +#[requires(input.shape()[2] == gamma.shape()[1], "Gamma dimensions do not match input feature size")] +#[requires(gamma.shape()[0] == 1, "Gamma must have exactly one row")] +#[requires(input.shape()[2] == beta.shape()[1], "Beta dimensions do not match input feature size")] +#[requires(beta.shape()[0] == 1, "Beta must have exactly one row")] +#[requires(epsilon > 0.0, "Epsilon must be positive and non-zero")] +#[requires(feed_forward_layer.is_initialized(), "Feed-forward layer is not properly initialized")] +#[requires(input.shape()[1] > 0, "Sequence length must be greater than zero")] pub fn decoding( input: Array3, // Input tensor (usually from the previous decoder layer or initial input) encoder_output: Array3, // Encoder output (for the encoder-decoder attention) @@ -87,6 +97,7 @@ pub fn decoding( ff_norm // decoder ouput } + #[test] fn test_decoding() { // Dummy input tensor (batch_size = 2, seq_length = 4, d_model = 4) @@ -121,7 +132,6 @@ fn test_decoding() { ); // Call the decoding function - let output = decoding(input, enc_out, gamma, beta, epsilon, &feed_forward_layer); // Assert that the output has the correct shape diff --git a/src/model/encoder.rs b/src/model/encoder.rs index c8ed88f..0d036ba 100644 --- a/src/model/encoder.rs +++ b/src/model/encoder.rs @@ -2,6 +2,8 @@ use crate::attention::multihead_attention::multi_head_attention; use crate::layers::feedforward_layer::FeedForwardLayer; use crate::layers::normalization::layer_norm; +use crate::settings::{BATCH_SIZE, EMBEDDING_SIZE}; +use contracts::{ensures, requires}; use ndarray::{array, Array2, Array3}; use std::ops::Add; @@ -16,6 +18,17 @@ use std::ops::Add; /// /// # Returns: /// - Output tensor of shape (batch_size, seq_length, d_model) after passing through the encoder layer. + +#[requires(input.shape().len() == 3, "Input tensor must have 3 dimensions (batch_size, seq_length, embed_size)")] +#[requires(input.shape()[2] == gamma.shape()[1], "Gamma dimensions do not match input feature size")] +#[requires(gamma.shape()[0] == 1, "Gamma must have exactly one row")] +#[requires(input.shape()[2] == beta.shape()[1], "Beta dimensions do not match input feature size")] +#[requires(beta.shape()[0] == 1, "Beta must have exactly one row")] +#[requires(epsilon > 0.0, "Epsilon must be positive and non-zero")] +#[requires(feed_forward_layer.is_initialized(), "Feed-forward layer is not properly initialized")] +#[requires(input.shape()[1] > 0, "Sequence length must be greater than zero")] +#[requires(feed_forward_layer.input_size == input.shape()[2], "Feed-forward layer input size must match embedding size")] +#[requires(feed_forward_layer.output_size == input.shape()[2], "Feed-forward layer output size must match embedding size")] pub fn encoding( input: Array3, // Input tensor gamma: Array2, // Scale parameter for layer norm @@ -26,6 +39,24 @@ pub fn encoding( let batch_size = input.shape()[0]; let seq_length = input.shape()[1]; let d_model = input.shape()[2]; + assert_eq!( + gamma.shape()[1], + d_model, + "Gamma dimensions do not match input feature size" + ); + assert_eq!(gamma.shape()[0], 1, "Gamma must have exactly one row"); + assert_eq!( + beta.shape()[1], + d_model, + "Beta dimensions do not match input feature size" + ); + assert_eq!(beta.shape()[0], 1, "Beta must have exactly one row"); + assert!(epsilon > 0.0, "Epsilon must be positive and non-zero"); + assert!( + feed_forward_layer.is_initialized(), + "Feed-forward layer is not properly initialized" + ); + assert!(seq_length > 0, "Sequence length must be greater than zero"); // Multi-Head Attention let dummy_learned_matrices = Array2::::ones((d_model, d_model)); // Replace with actual learned parameters @@ -41,7 +72,6 @@ pub fn encoding( dummy_learned_matrices.clone(), // W_O ); - //println!("Attention1 :{}", attention_output); // Add & Normalize (Residual Connection + Layer Norm) let attention_residual = attention_output.add(&input); // Residual connection let reshaped_attention = attention_residual @@ -60,8 +90,7 @@ pub fn encoding( // Feed-Forward Network let feed_forward_output = feed_forward_layer.forward(attention_norm.clone()); - //println!("feed_forward_output :{:?}", feed_forward_output); - // Add & Normalize (Residual Connection + Layer Norm) + // Add & Normalize (Residual Connection + Layer Norm) let feed_forward_residual = feed_forward_output.add(&attention_norm); // Residual connection let reshaped_ff_attention = feed_forward_residual .to_shape((batch_size * seq_length, d_model)) // Flatten to 2D @@ -76,8 +105,15 @@ pub fn encoding( .unwrap() .to_owned(); + assert_eq!( + output.shape(), + input.shape(), + "Output tensor must have the same shape as the input tensor" + ); + output } + #[test] fn test_encoding() { // Dummy input tensor (batch_size = 2, seq_length = 3, d_model = 4) @@ -103,7 +139,7 @@ fn test_encoding() { // Call the encoding function let epsilon = 1e-6; - let output = encoding(input, gamma, beta, epsilon, &feed_forward_layer); + let output = encoding(input.clone(), gamma, beta, epsilon, &feed_forward_layer); // Assert that the output has the correct shape assert_eq!(output.shape(), &[2, 3, 4]); diff --git a/tests/linear_algebra_test.rs b/tests/linear_algebra_test.rs index b02daa0..8eada2a 100644 --- a/tests/linear_algebra_test.rs +++ b/tests/linear_algebra_test.rs @@ -21,22 +21,6 @@ fn test_matmul_valid_input() { } } -#[test] -fn test_matmul_invalid_input() { - // Arrange: Define input matrices with mismatched dimensions - let a = array![[1.0, 2.0], [3.0, 4.0]]; // 2x2 matrix - let b = array![[5.0, 6.0]]; // 1x2 matrix (mismatched dimensions) - - // Act: Perform the multiplication, expecting an error - let result = matmul(&a, &b); - - // Assert: Ensure the result is an error due to incompatible dimensions - assert_eq!( - result, - Err("Matrix dimensions are incompatible for multiplication.") - ); -} - #[test] fn test_dotproduct() { let a: Array1 = array![1.0, 2.0, 3.0];