Skip to content

Commit

Permalink
Merge pull request #11 from JakubSchwenkbeck/refactor/design-by-contract
Browse files Browse the repository at this point in the history
Implement Design by Contract
  • Loading branch information
JakubSchwenkbeck authored Dec 20, 2024
2 parents 23f9866 + 02d7144 commit 55fa876
Show file tree
Hide file tree
Showing 11 changed files with 205 additions and 80 deletions.
31 changes: 27 additions & 4 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,5 @@ description = "A Rust implementation of a Transformer-based model for NLP tasks.
ndarray = "0.16.1"
rand = "0.9.0-beta.1"
regex = "1.11.1"
contracts = "0.6.3"

15 changes: 13 additions & 2 deletions src/attention/softmax.rs
Original file line number Diff line number Diff line change
@@ -1,20 +1,30 @@
#![allow(unused_imports)] // {array} import is not recognized as it is used in #[test]
#![allow(unused_imports)]

use contracts::{ensures, requires};
// {array} import is not recognized as it is used in #[test]
use ndarray::{array, s, Array, Array1, Array2, Array3, ArrayView1, Axis};

//noinspection ALL
#[requires(!vec.is_empty(), "Input vector must not be empty.")]
#[ensures(ret.len() == vec.len(), "Output vector must have the same length as the input vector.")]
pub fn softmax_vector(vec: ArrayView1<f32>) -> Array1<f32> {
let max = vec.fold(f32::NEG_INFINITY, |a, &b| a.max(b)); // Stabilize by subtracting max
let exp_vec = vec.mapv(|x| (x - max).exp());
let sum: f32 = exp_vec.sum();
exp_vec / sum
}
#[requires(!vec.is_empty(), "Input vector must not be empty.")]
pub fn softmax_vec(vec: Vec<f32>) -> Array1<f32> {
let array = Array1::from(vec); // Convert Vec<f32> to Array1<f32>
softmax_vector(array.view())
}

#[requires(mat.shape().len() == 2, "Input matrix must be 2-dimensional.")]
pub fn softmax_matrix(mat: &Array2<f32>) -> Array2<f32> {
convert_to_array2(mat.map_axis(Axis(1), softmax_vector))
}

#[requires(attention_scores.shape().len() == 3, "Input tensor must be 3-dimensional.")]
pub fn softmax_3d(attention_scores: &Array3<f32>) -> Array3<f32> {
let batch_size = attention_scores.shape()[0];
let mut softmax_result = Array3::<f32>::zeros(attention_scores.raw_dim());
Expand All @@ -33,7 +43,8 @@ pub fn softmax_3d(attention_scores: &Array3<f32>) -> Array3<f32> {

softmax_result
}

#[requires(!array1d.is_empty(), "Input array must not be empty.")]
#[requires(array1d.iter().all(|row| !row.is_empty()), "All rows must be non-empty.")]
fn convert_to_array2(array1d: Array<Array1<f32>, ndarray::Ix1>) -> Array2<f32> {
// Check if the input array is non-empty
assert!(!array1d.is_empty(), "Input array must not be empty.");
Expand Down
83 changes: 53 additions & 30 deletions src/layers/feedforward_layer.rs
Original file line number Diff line number Diff line change
@@ -1,23 +1,32 @@
#![allow(dead_code)]
#![allow(unused_imports)]

use crate::activation::activation_functions::gelu;
use crate::settings::HIDDEN_SIZE;
use contracts::requires;
use ndarray::{array, Array1, Array2, Array3};
use rand::Rng;
use std::ops::Add;

pub struct FeedForwardLayer {
weights1: Array2<f32>,
bias1: Array1<f32>, // weights and biases for first linear layer

bias1: Array1<f32>, // Weights and biases for the first linear layer
weights2: Array2<f32>,
bias2: Array1<f32>, // weights and biases for second linear layer

dropout_rate: f32, // Dropout rate
bias2: Array1<f32>, // Weights and biases for the second linear layer
dropout_rate: f32, // Dropout rate
pub(crate) input_size: usize, // Input feature size
pub(crate) output_size: usize, // Output feature size
initialized: bool,
}

impl FeedForwardLayer {
// init with random values
/// Initializes the FeedForwardLayer with random weights and biases.
///
/// # Parameters:
/// - `_batch_size`: Batch size (not stored, used for verification if needed).
/// - `input_size`: Number of input features (d_model).
/// - `output_size`: Number of output features (d_model).
/// - `dropout_rate`: Probability of dropping a unit in dropout (0.0 to 1.0).
#[requires(input_size > 0, "Input size must be greater than 0")]
#[requires(output_size > 0, "Output size must be greater than 0")]
#[requires((0.0..=1.0).contains(&dropout_rate), "Dropout rate must be in range [0.0, 1.0]")]
pub fn new(
_batch_size: usize,
input_size: usize,
Expand All @@ -32,15 +41,35 @@ impl FeedForwardLayer {

let weights2 = he_initialization(hidden_size, output_size); // Shape: (hidden_size, output_size)
let bias2 = bias_initialization(output_size); // Shape: (output_size,)

FeedForwardLayer {
weights1,
bias1,
weights2,
bias2,
dropout_rate,
input_size,
output_size,
initialized: true,
}
}

/// Verifies that the layer is properly initialized.
pub fn is_initialized(&self) -> bool {
self.initialized
}

/// Performs a forward pass in training mode.
///
/// # Parameters:
/// - `input`: 2D input tensor of shape (batch_size * seq_length, input_size).
/// - `train`: Whether to apply dropout.
///
/// # Returns:
/// - Output tensor of shape (batch_size * seq_length, output_size).
#[requires(input.shape()[1] == self.input_size, "Input feature size must match layer's input size")]
#[requires(input.shape()[0] > 0, "Input tensor must not be empty")]
#[requires(input.shape()[1] == self.input_size, "Input tensor's second dimension must match input_size")]
pub fn forward_t(&self, input: &Array2<f32>, train: bool) -> Array2<f32> {
// First linear layer
let first_dot = input.dot(&self.weights1);
Expand All @@ -57,56 +86,50 @@ impl FeedForwardLayer {
// Second linear layer
first_activation.dot(&self.weights2).add(&self.bias2)
}
/// Forward pass through the feed-forward layer.

/// Performs a forward pass in evaluation mode.
///
/// # Parameters:
/// - `x`: Input tensor of shape (batch_size, seq_length, d_model).
/// - `x`: Input tensor of shape (batch_size, seq_length, input_size).
///
/// # Returns:
/// - Output tensor of shape (batch_size, seq_length, d_model).
/// - Output tensor of shape (batch_size, seq_length, output_size).
#[requires(x.shape()[2] == self.input_size, "Input feature size must match layer's input size")]
#[requires(x.shape()[0] > 0, "Input tensor must not be empty")]
#[requires(x.shape()[2] == self.input_size, "Input tensor's third dimension must match input_size")]
pub fn forward(&self, x: Array3<f32>) -> Array3<f32> {
let batch_size = x.shape()[0];
let seq_length = x.shape()[1];
let d_model = x.shape()[2];

// Flatten the input to 2D: (batch_size * seq_length, d_model)
let reshaped_x = x.to_shape((batch_size * seq_length, d_model));

match reshaped_x {
Ok(valid_reshaped_x) => {
let dot = valid_reshaped_x.dot(&self.weights1);

let add = dot + &self.bias1;

// First linear layer + gelu

// First linear layer + GELU activation
let hidden = gelu(&add.to_owned());

let dot2 = hidden.dot(&self.weights2);

// Second linear layer
let output = dot2 + &self.bias2;

// Reshape back to 3D: (batch_size, seq_length, d_model)
// Reshape back to 3D
output
.to_shape((batch_size, seq_length, d_model))
.to_shape((batch_size, seq_length, self.output_size))
.unwrap()
.to_owned()
// Use the `hidden` result here for further processing.
}
Err(ref e) => {
eprintln!("Shape error: {}", e);
eprintln!(
"Shape of input : {:?} -=- Shape of weights : {:?} ",
reshaped_x.unwrap().shape(),
seq_length
);
// Or return unchanged?
x
x // Fallback to the original input on failure
}
}
}

/// Applies dropout to the input.
fn apply_dropout(&self, input: &Array2<f32>) -> Array2<f32> {
let mut rng = rand::rng();
input.map(|&x| {
Expand All @@ -119,22 +142,22 @@ impl FeedForwardLayer {
}
}

/// He initialization function.
fn he_initialization(input_size: usize, output_size: usize) -> Array2<f32> {
let mut rng = rand::rng();
// He initialization: scale by sqrt(2 / input_size)
let scale = (2.0 / input_size as f32).sqrt();
let values: Vec<f32> = (0..(input_size * output_size))
.map(|_| rng.random_range(-scale..scale))
.collect();

// Create an Array2 from the values vector
Array2::from_shape_vec((input_size, output_size), values).unwrap()
}

/// Initializes bias vectors with zeros.
fn bias_initialization(size: usize) -> Array1<f32> {
Array1::zeros(size)
}

#[test]
fn test_bias_initialization() {
let size = 5;

Expand Down
14 changes: 8 additions & 6 deletions src/layers/normalization.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#![allow(unused_imports)]
use contracts::{ensures, requires};
use ndarray::{Array2, Axis};

/// Performs layer normalization on a 2D array (batch size x embedding size).
Expand All @@ -10,6 +12,12 @@ use ndarray::{Array2, Axis};
///
/// # Returns:
/// A 2D array of the same shape as `x` after applying Layer Normalization.
#[requires(x.shape().len() == 2, "Input array must be 2-dimensional")]
#[requires(gamma.shape().len() == 2 && gamma.shape()[0] == 1, "Gamma must be a 2-dimensional array with a single row")]
#[requires(beta.shape().len() == 2 && beta.shape()[0] == 1, "Beta must be a 2-dimensional array with a single row")]
#[requires(epsilon > 0.0, "Epsilon must be positive and non-zero")]
#[ensures(ret.shape() == x.shape(), "The resulting array must have the same shape as the input array")]
#[ensures(ret.iter().all(|&x| x.is_finite()), "All elements in the resulting array must be finite")]
pub fn layer_norm(
x: &Array2<f32>,
gamma: &Array2<f32>,
Expand All @@ -19,18 +27,12 @@ pub fn layer_norm(
// Calculate mean and variance across the features (axis=1)
let mean = x.mean_axis(Axis(1)).unwrap();
let variance = x.var_axis(Axis(1), 0.0);
//println!("Mean: {:?}", mean);
// println!("Variance: {:?}", variance);

let expanded_mean = mean.insert_axis(Axis(1)); // Expands [6] to [6, 1]
let expanded_variance = variance.insert_axis(Axis(1)); // Expands [6] to [6, 1]
// println!("EXPMean: {:?}", expanded_mean);
//println!("EXPVariance: {:?}", expanded_variance);

// Add epsilon to expanded variance
let normalized = (x - &expanded_mean) / (expanded_variance + epsilon).mapv(f32::sqrt);

// println!("Normalized {}", normalized);

normalized * gamma + beta
}
9 changes: 5 additions & 4 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ use Transformer::model::decoder::decoding;
use Transformer::model::embedding::Embedding;
use Transformer::model::encoder::encoding;
use Transformer::model::transformer_model::transformer_model;
use Transformer::settings::{BATCH_SIZE, DROPOUT_RATE, INPUT_SIZE, OUTPUT_SIZE};
use Transformer::settings::{BATCH_SIZE, DROPOUT_RATE, EMBEDDING_SIZE, INPUT_SIZE, OUTPUT_SIZE};

fn main() {
println!("runs successfully!");
Expand Down Expand Up @@ -40,9 +40,10 @@ fn main() {
let embeddings = embedding.forward(tokens.clone());

// Convert embeddings to Array3 (batch_size, seq_length, embed_size)
let input_tensor = Array3::from_shape_fn((1, tokens.len(), 12), |(batch, seq, _)| {
embeddings[[seq, batch]]
});
let input_tensor = Array3::from_shape_fn(
(BATCH_SIZE, tokens.len(), EMBEDDING_SIZE),
|(batch, seq, _)| embeddings[[seq, batch]],
);

println!("INPUT : {}", input_tensor.clone());
// Initialize gamma and beta for layer normalization
Expand Down
Loading

0 comments on commit 55fa876

Please sign in to comment.