diff --git a/String Algorithms/Needleman-Wunsch Algorithm/Program.c b/String Algorithms/Needleman-Wunsch Algorithm/Program.c new file mode 100644 index 00000000..0596d9ff --- /dev/null +++ b/String Algorithms/Needleman-Wunsch Algorithm/Program.c @@ -0,0 +1,107 @@ +#include +#include +#include + +#define MATCH 1 // Score for a match +#define MISMATCH -1 // Score for a mismatch +#define GAP -2 // Penalty for a gap + +// Function to calculate the maximum of three values +int max(int a, int b, int c) { + if (a >= b && a >= c) return a; + else if (b >= a && b >= c) return b; + else return c; +} + +// Function to implement the Needleman-Wunsch algorithm +void needlemanWunsch(char* seq1, char* seq2) { + int len1 = strlen(seq1); + int len2 = strlen(seq2); + + // Initialize score matrix + int scoreMatrix[len1 + 1][len2 + 1]; + + // Initialize first row and first column with gap penalties + for (int i = 0; i <= len1; i++) { + scoreMatrix[i][0] = i * GAP; + } + for (int j = 0; j <= len2; j++) { + scoreMatrix[0][j] = j * GAP; + } + + // Fill the score matrix + for (int i = 1; i <= len1; i++) { + for (int j = 1; j <= len2; j++) { + int matchMismatchScore = (seq1[i - 1] == seq2[j - 1]) ? MATCH : MISMATCH; + scoreMatrix[i][j] = max( + scoreMatrix[i - 1][j - 1] + matchMismatchScore, // Match/Mismatch + scoreMatrix[i - 1][j] + GAP, // Deletion (gap in seq2) + scoreMatrix[i][j - 1] + GAP // Insertion (gap in seq1) + ); + } + } + + // Print the final score matrix + printf("Score Matrix:\n"); + for (int i = 0; i <= len1; i++) { + for (int j = 0; j <= len2; j++) { + printf("%3d ", scoreMatrix[i][j]); + } + printf("\n"); + } + + // Traceback to get the optimal alignment + printf("\nOptimal Alignment:\n"); + int i = len1, j = len2; + char alignedSeq1[len1 + len2 + 1], alignedSeq2[len1 + len2 + 1]; + int idx = 0; + + while (i > 0 || j > 0) { + if (i > 0 && j > 0 && scoreMatrix[i][j] == scoreMatrix[i - 1][j - 1] + ((seq1[i - 1] == seq2[j - 1]) ? MATCH : MISMATCH)) { + alignedSeq1[idx] = seq1[i - 1]; + alignedSeq2[idx] = seq2[j - 1]; + i--; j--; + } + else if (i > 0 && scoreMatrix[i][j] == scoreMatrix[i - 1][j] + GAP) { + alignedSeq1[idx] = seq1[i - 1]; + alignedSeq2[idx] = '-'; + i--; + } + else { + alignedSeq1[idx] = '-'; + alignedSeq2[idx] = seq2[j - 1]; + j--; + } + idx++; + } + + // Print aligned sequences + alignedSeq1[idx] = '\0'; + alignedSeq2[idx] = '\0'; + + // Reverse the sequences + for (int k = 0; k < idx / 2; k++) { + char temp = alignedSeq1[k]; + alignedSeq1[k] = alignedSeq1[idx - k - 1]; + alignedSeq1[idx - k - 1] = temp; + + temp = alignedSeq2[k]; + alignedSeq2[k] = alignedSeq2[idx - k - 1]; + alignedSeq2[idx - k - 1] = temp; + } + + printf("Seq1: %s\n", alignedSeq1); + printf("Seq2: %s\n", alignedSeq2); +} + +int main() { + char seq1[] = "GATTACA"; + char seq2[] = "GCATGCU"; + + printf("Sequence 1: %s\n", seq1); + printf("Sequence 2: %s\n", seq2); + + needlemanWunsch(seq1, seq2); + + return 0; +} \ No newline at end of file diff --git a/String Algorithms/Needleman-Wunsch Algorithm/README.md b/String Algorithms/Needleman-Wunsch Algorithm/README.md new file mode 100644 index 00000000..4cf24a84 --- /dev/null +++ b/String Algorithms/Needleman-Wunsch Algorithm/README.md @@ -0,0 +1,64 @@ +# Needleman-Wunsch Algorithm + +## Description + +The Needleman-Wunsch algorithm is a dynamic programming algorithm used for sequence alignment. It is particularly useful for aligning nucleotide or protein sequences in bioinformatics but can also be applied to string matching in computer science. + +### Problem Definition + +Given: +- Two sequences (or strings) **S1** and **S2** + +Objective: +- Align the two sequences by introducing gaps (if necessary) to maximize the match score while minimizing the penalty for mismatches and gaps. + +### Algorithm Overview + +1. **Initialize Score Matrix**: + - Create a score matrix with dimensions `(len(S1) + 1)` x `(len(S2) + 1)` and initialize the first row and column with gap penalties. +2. **Matrix Filling**: + - Fill the matrix by choosing the optimal score for each cell based on the previous cells, using: + - Match/Mismatch score + - Insertion/Deletion (gap) score +3. **Traceback**: + - Trace back through the matrix to determine the optimal alignment of the two sequences. + +### Key Features + +- Global alignment algorithm +- Handles mismatches and gaps with penalties +- Constructs an optimal alignment based on a scoring system +- Suitable for aligning sequences of varying lengths + +### Time Complexity + +- **Time Complexity**: O(m * n), where `m` is the length of sequence S1, and `n` is the length of sequence S2. This is due to the need to fill in a score matrix of size `m x n`. +- **Space Complexity**: O(m * n), due to the storage required for the score matrix. + +### Scoring + +- **Match**: +1 (reward for matching characters) +- **Mismatch**: -1 (penalty for different characters) +- **Gap**: -2 (penalty for inserting gaps) + +## Implementation + +The implementation in C demonstrates the Needleman-Wunsch algorithm for aligning two sequences. It includes: + +1. **Score Matrix Calculation**: Initializes and fills the score matrix based on matches, mismatches, and gaps. +2. **Traceback**: Recovers the optimal alignment by tracing back through the score matrix. +3. **Alignment Output**: Prints the aligned sequences. + +## Usage + +1. **Compile**: Use a C compiler to compile the program. + ```bash + gcc needleman_wunsch.c -o needleman_wunsch + ``` + +2. **Run**: Execute the program with two predefined sequences. + ```bash + ./needleman_wunsch + ``` + +The example in the `main` function aligns the sequences "GATTACA" and "GCATGCU" and prints the optimal alignment and the corresponding score matrix. \ No newline at end of file