-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Refactored and linted code, still having mismatches regarding the gra…
…dient computations
- Loading branch information
1 parent
a1ab397
commit 7127cd9
Showing
8 changed files
with
93 additions
and
87 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,39 +1,39 @@ | ||
#![allow(unused)] | ||
#![allow(unused)] | ||
|
||
// Numerical constants with downscaled real-application values | ||
pub const EPSILON: f32 = 0.0001; | ||
// Numerical constants with downscaled real-application values | ||
pub const EPSILON: f32 = 0.0001; | ||
|
||
// Embedding size | ||
pub const D_MODEL: usize = 32; // Reduced to 32 for a smaller model size | ||
// Embedding size | ||
pub const D_MODEL: usize = 32; // Reduced to 32 for a smaller model size | ||
|
||
// Attention mechanism dimensions | ||
pub const D_K: usize = 32; // Key/query dimension (same as D_V for simplicity) | ||
pub const D_V: usize = 32; // Value dimension (same as D_K) | ||
pub const NUM_HEADS: usize = 4; // Reduced the number of attention heads for smaller model | ||
// Attention mechanism dimensions | ||
pub const D_K: usize = 32; // Key/query dimension (same as D_V for simplicity) | ||
pub const D_V: usize = 32; // Value dimension (same as D_K) | ||
pub const NUM_HEADS: usize = 4; // Reduced the number of attention heads for smaller model | ||
|
||
// Sequence and batch size | ||
pub const SEQ_LENGTH: usize = 64; // Reduced sequence length | ||
pub const BATCH_SIZE: usize = 1; // Reduced batch size for smaller model training | ||
// Sequence and batch size | ||
pub const SEQ_LENGTH: usize = 64; // Reduced sequence length | ||
pub const BATCH_SIZE: usize = 1; // Reduced batch size for smaller model training | ||
|
||
// Embedding size and dimensions | ||
pub const EMBEDDING_SIZE: usize = D_MODEL; // Matches D_MODEL for consistency | ||
// Embedding size and dimensions | ||
pub const EMBEDDING_SIZE: usize = D_MODEL; // Matches D_MODEL for consistency | ||
|
||
// Input/Output sizes | ||
pub const INPUT_SIZE: usize = D_MODEL; // Typically equals D_MODEL for transformer inputs | ||
pub const OUTPUT_SIZE: usize = D_MODEL; // Consistent with D_MODEL for output | ||
// Input/Output sizes | ||
pub const INPUT_SIZE: usize = D_MODEL; // Typically equals D_MODEL for transformer inputs | ||
pub const OUTPUT_SIZE: usize = D_MODEL; // Consistent with D_MODEL for output | ||
|
||
// Number of layers | ||
pub const NUM_LAYERS: usize = 4; // Reduced to 4 layers for a smaller architecture | ||
// Number of layers | ||
pub const NUM_LAYERS: usize = 4; // Reduced to 4 layers for a smaller architecture | ||
|
||
// Feedforward network dimension (FFN_DIM) | ||
pub const FFN_DIM: usize = 128; // Smaller FFN dimension | ||
// Feedforward network dimension (FFN_DIM) | ||
pub const FFN_DIM: usize = 128; // Smaller FFN dimension | ||
|
||
// Hidden size (used for biases and other layer parameters) | ||
pub const HIDDEN_SIZE: usize = 128; // Adjusted for a smaller hidden layer size, consistent with FFN_DIM | ||
// Hidden size (used for biases and other layer parameters) | ||
pub const HIDDEN_SIZE: usize = 128; // Adjusted for a smaller hidden layer size, consistent with FFN_DIM | ||
|
||
// Dropout rate and learning rate | ||
pub const DROPOUT_RATE: f32 = 0.1; // Dropout rate for regularization | ||
pub const LEARNING_RATE: f32 = 1e-4; // Optimizer learning rate | ||
// Dropout rate and learning rate | ||
pub const DROPOUT_RATE: f32 = 0.1; // Dropout rate for regularization | ||
pub const LEARNING_RATE: f32 = 1e-4; // Optimizer learning rate | ||
|
||
// Positional encoding parameters | ||
pub const MAX_SEQ_LENGTH: usize = 128; // Maximum sequence length for positional encoding | ||
// Positional encoding parameters | ||
pub const MAX_SEQ_LENGTH: usize = 128; // Maximum sequence length for positional encoding |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters