Skip to content

Commit

Permalink
Add more info to verbose and fix last column recovering
Browse files Browse the repository at this point in the history
  • Loading branch information
nicodr97 committed Dec 20, 2024
1 parent 73fd72c commit a5574d2
Showing 1 changed file with 114 additions and 65 deletions.
179 changes: 114 additions & 65 deletions source/Cleaner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,11 @@ Alignment *Cleaner::cleanByCutValueOverpass(
oldNumberKeptResidues++;
}

double percentageColumnsOriginal = ((double) oldNumberKeptResidues / alig->originalNumberOfResidues) * 100.0;
debug.report(InfoCode::ColumnsToKeep, new std::string[3]{"before gappyout method", std::to_string(oldNumberKeptResidues), std::to_string(percentageColumnsOriginal)});
percentageColumnsOriginal = ((double) newNumberKeptResidues / alig->originalNumberOfResidues) * 100.0;
debug.report(InfoCode::ColumnsToKeep, new std::string[3]{"after gappyout method clean based on gaps", std::to_string(newNumberKeptResidues), std::to_string(percentageColumnsOriginal)});

alig->numberOfResidues = oldNumberKeptResidues;
float minCoverageRatio;

Expand Down Expand Up @@ -195,6 +200,7 @@ Alignment *Cleaner::cleanByCutValueOverpass(
if (gapsInColumn[residueIndex] <= maxGaps) {
newAlig->saveResidues[residueIndex] = residueIndex;
numberColumnsToRecover--;
newNumberKeptResidues++;
} else {
break;
}
Expand All @@ -221,6 +227,7 @@ Alignment *Cleaner::cleanByCutValueOverpass(
if (gapsInColumn[residueIndex] <= maxGaps) {
newAlig->saveResidues[residueIndex] = residueIndex;
numberColumnsToRecover--;
newNumberKeptResidues++;
} else {
break;
}
Expand All @@ -232,6 +239,9 @@ Alignment *Cleaner::cleanByCutValueOverpass(
}
}

percentageColumnsOriginal = ((double) newNumberKeptResidues / alig->originalNumberOfResidues) * 100.0;
debug.report(InfoCode::ColumnsToKeep, new std::string[3]{"after gappyout method column recovering based on minimum coverage", std::to_string(newNumberKeptResidues), std::to_string(percentageColumnsOriginal)});

newAlig->Cleaning->removeSmallerBlocks(blockSize, *alig);

// Check for any additional column/sequence to be removed
Expand Down Expand Up @@ -581,68 +591,88 @@ Alignment *Cleaner::cleanStrict(int gapCut, const int *gInCol, float simCut, con

// Special case: Position 0 of newAlig
if (num > 2) {
if (rejectResiduesBuffer[0])
newAlig->saveResidues[positionResidueBuffer[0]] =
// Compare the sum of the booleans to 0. If any of the neighbours is rejected, we don't have 3 non-rejected residues.
(rejectResiduesBuffer[1] +
rejectResiduesBuffer[2]) > 0 ? -1 : positionResidueBuffer[0];
if (rejectResiduesBuffer[0]) {
// If any of the neighbours is rejected, we don't have 3 non-rejected residues.
int numNeighboursRejected = rejectResiduesBuffer[1] + rejectResiduesBuffer[2];
bool allNeighboursKept = numNeighboursRejected == 0;
if (allNeighboursKept) {
newAlig->saveResidues[positionResidueBuffer[0]] = positionResidueBuffer[0];
newNumberKeptResidues++;
}
}
}
// Special case: Position 1
{
// Special case: Position 1 of newAlig in case the alignment has more than 3 residues.
if (num > 3) {
if (rejectResiduesBuffer[1])
newAlig->saveResidues[positionResidueBuffer[1]] =
// Compare the sum of the booleans to 0. If any of the neighbours is rejected, we don't have 3 non-rejected residues.
(rejectResiduesBuffer[0] +
rejectResiduesBuffer[2] +
rejectResiduesBuffer[3]) > 0 ? -1 : positionResidueBuffer[1];
if (rejectResiduesBuffer[1]) {
int numNeighboursRejected = rejectResiduesBuffer[0] + rejectResiduesBuffer[2] + rejectResiduesBuffer[3];
bool allNeighboursKept = numNeighboursRejected == 0;
if (allNeighboursKept) {
newAlig->saveResidues[positionResidueBuffer[1]] = positionResidueBuffer[1];
newNumberKeptResidues++;
}
}
}
// Special case: Position 1 of newAlig in case the alignment has 3 residues.

// Special case: Position 1 of newAlig in case the alignment has 3 residues.
else if (num > 2) {
if (rejectResiduesBuffer[1])
newAlig->saveResidues[positionResidueBuffer[1]] =
// Compare the sum of the booleans to 0. If any of the neighbours is rejected, we don't have 3 non-rejected residues.
(rejectResiduesBuffer[0] +
rejectResiduesBuffer[2]) > 0 ? -1 : positionResidueBuffer[1];
if (rejectResiduesBuffer[1]) {
int numNeighboursRejected = rejectResiduesBuffer[0] + rejectResiduesBuffer[2];
bool allNeighboursKept = numNeighboursRejected == 0;
if (allNeighboursKept) {
newAlig->saveResidues[positionResidueBuffer[1]] = positionResidueBuffer[1];
newNumberKeptResidues++;
}
}
}
}

// Special case: Position 2
{
// Special case: Position 2 of newAlig in case the alignment has more than 4 residues.
if (num > 4) {
if (rejectResiduesBuffer[2])
newAlig->saveResidues[positionResidueBuffer[2]] =
// Compare the sum of the booleans to 0. As we have 4 neighbours to compare and only need 3 of them to be non-rejected,
// we can have 1 rejected residue on the neighbouring.
(rejectResiduesBuffer[0] +
rejectResiduesBuffer[1] +
rejectResiduesBuffer[3] +
rejectResiduesBuffer[4]) > 1 ? -1 : positionResidueBuffer[2];
if (rejectResiduesBuffer[2]) {
// As we have 4 neighbours to compare and only need 3 of them to be non-rejected,
// we can have 1 rejected residue on the neighbouring.
int numNeighboursRejected = rejectResiduesBuffer[0] + rejectResiduesBuffer[1] + rejectResiduesBuffer[3] + rejectResiduesBuffer[4];
bool threeOrMoreNeighboursKept = numNeighboursRejected <= 1;
if (threeOrMoreNeighboursKept) {
newAlig->saveResidues[positionResidueBuffer[2]] = positionResidueBuffer[2];
newNumberKeptResidues++;
}
}
}
// Special case: Position 2 of newAlig in case the alignment has 4 residues

// Special case: Position 2 of newAlig in case the alignment has 4 residues
else if (num > 3) {
if (rejectResiduesBuffer[2])
newAlig->saveResidues[positionResidueBuffer[2]] =
// Compare the sum of the booleans to 0. As we have 4 neighbours to compare and only need 3 of them to be non-rejected,
// we can have 1 rejected residue on the neighbouring.
(rejectResiduesBuffer[0] +
rejectResiduesBuffer[1] +
rejectResiduesBuffer[3]) > 0 ? -1 : positionResidueBuffer[2];
if (rejectResiduesBuffer[2]) {
// As we have 4 neighbours to compare and only need 3 of them to be non-rejected,
// we can have 1 rejected residue on the neighbouring.
int numNeighboursRejected = rejectResiduesBuffer[0] + rejectResiduesBuffer[1] + rejectResiduesBuffer[3];
bool allNeighboursKept = numNeighboursRejected == 0;
if (allNeighboursKept) {
newAlig->saveResidues[positionResidueBuffer[2]] = positionResidueBuffer[2];
newNumberKeptResidues++;
}
}
}
// Special case: Position 2 of newAlig in case the alignment has 3 residues

// Special case: Position 2 of newAlig in case the alignment has 3 residues
else if (num > 2) {
if (rejectResiduesBuffer[2])
newAlig->saveResidues[positionResidueBuffer[2]] =
// Compare the sum of the booleans to 0. As we have 4 neighbours to compare and only need 3 of them to be non-rejected,
// we can have 1 rejected residue on the neighbouring.
(rejectResiduesBuffer[0] +
rejectResiduesBuffer[1]) > 0 ? -1 : positionResidueBuffer[2];
if (rejectResiduesBuffer[2]) {
int numNeighboursRejected = rejectResiduesBuffer[0] + rejectResiduesBuffer[1];
bool allNeighboursKept = numNeighboursRejected == 0;
if (allNeighboursKept) {
newAlig->saveResidues[positionResidueBuffer[2]] = positionResidueBuffer[2];
newNumberKeptResidues++;
}
}
}
}

// Move the window until it arrives to the end of the alignment.
if (num == 5)
if (num == 5) {
for (; i < alig->originalNumberOfResidues; i++) {
if (alig->saveResidues[i] == -1) continue;
// If we find a new newAlig residue...
Expand All @@ -659,45 +689,64 @@ Alignment *Cleaner::cleanStrict(int gapCut, const int *gInCol, float simCut, con
// Take a look at the neighbours of the new middle-point, positionResidueBuffer[2].
// As we stored the rejectResiduesBuffer as booleans, we can add them and compare the result.
// If the result is bigger than 1, means that we have rejected at least 2 neighbouring residues, thus, we cannot rescue this residue.
newAlig->saveResidues[positionResidueBuffer[2]] =
(rejectResiduesBuffer[0] +
rejectResiduesBuffer[1] +
rejectResiduesBuffer[3] +
rejectResiduesBuffer[4]) > 1 ? -1 : positionResidueBuffer[2];
int numNeighboursRejected = rejectResiduesBuffer[0] + rejectResiduesBuffer[1] + rejectResiduesBuffer[3] + rejectResiduesBuffer[4];
bool threeOrMoreNeighboursKept = numNeighboursRejected <= 1;
if (threeOrMoreNeighboursKept) {
newAlig->saveResidues[positionResidueBuffer[2]] = positionResidueBuffer[2];
newNumberKeptResidues++;
}
}
}
}
}

// Special case: Position -1 of newAlig
{
if (num > 4) {
if (rejectResiduesBuffer[3])
newAlig->saveResidues[positionResidueBuffer[3]] =
(rejectResiduesBuffer[1] +
rejectResiduesBuffer[2] +
rejectResiduesBuffer[4]) > 0 ? -1 : positionResidueBuffer[3];
if (rejectResiduesBuffer[3]) {
int numNeighboursRejected = rejectResiduesBuffer[1] + rejectResiduesBuffer[2] + rejectResiduesBuffer[4];
bool allNeighboursKept = numNeighboursRejected == 0;
if (allNeighboursKept) {
newAlig->saveResidues[positionResidueBuffer[3]] = positionResidueBuffer[3];
newNumberKeptResidues++;
}
}
} else if (num > 3) {
if (rejectResiduesBuffer[2])
newAlig->saveResidues[positionResidueBuffer[2]] =
(rejectResiduesBuffer[0] +
rejectResiduesBuffer[1] +
rejectResiduesBuffer[3]) > 0 ? -1 : positionResidueBuffer[2];
if (rejectResiduesBuffer[2]) {
int numNeighboursRejected = rejectResiduesBuffer[0] + rejectResiduesBuffer[1] + rejectResiduesBuffer[3];
bool allNeighboursKept = numNeighboursRejected == 0;
if (allNeighboursKept) {
newAlig->saveResidues[positionResidueBuffer[2]] = positionResidueBuffer[2];
newNumberKeptResidues++;
}
}
}
}

// Special case: Position -2 of newAlig
if (num > 4) {
if (rejectResiduesBuffer[4])
newAlig->saveResidues[positionResidueBuffer[4]] =
(rejectResiduesBuffer[2] +
rejectResiduesBuffer[3]) > 0 ? -1 : positionResidueBuffer[4];
if (rejectResiduesBuffer[4]) {
int numNeighboursRejected = rejectResiduesBuffer[2] + rejectResiduesBuffer[3];
bool allNeighboursKept = numNeighboursRejected == 0;
if (allNeighboursKept) {
newAlig->saveResidues[positionResidueBuffer[4]] = positionResidueBuffer[4];
newNumberKeptResidues++;
}
}
} else if (num > 3) {
if (rejectResiduesBuffer[3])
newAlig->saveResidues[positionResidueBuffer[4]] =
(rejectResiduesBuffer[1] +
rejectResiduesBuffer[2]) > 0 ? -1 : positionResidueBuffer[3];
if (rejectResiduesBuffer[3]) {
int numNeighboursRejected = rejectResiduesBuffer[1] + rejectResiduesBuffer[2];
bool allNeighboursKept = numNeighboursRejected == 0;
if (allNeighboursKept) {
newAlig->saveResidues[positionResidueBuffer[3]] = positionResidueBuffer[3];
newNumberKeptResidues++;
}
}
}
}

percentageColumnsOriginal = ((double) newNumberKeptResidues / alig->originalNumberOfResidues) * 100.0;
debug.report(InfoCode::ColumnsToKeep, new std::string[3]{"after strict method recovered neighbours", std::to_string(newNumberKeptResidues), std::to_string(percentageColumnsOriginal)});

// Select blocks size based on user input. It can be set either to 5 or to a
// variable number between 3 and 12 depending on alignment's length (1% alig)
Expand Down Expand Up @@ -746,7 +795,7 @@ Alignment *Cleaner::cleanStrict(int gapCut, const int *gInCol, float simCut, con

percentageColumnsOriginal = ((double) newAlig->numberOfResidues / alig->originalNumberOfResidues) * 100.0;
debug.report(InfoCode::BlockSize, new std::string[1]{std::to_string(blockSize)});
debug.report(InfoCode::ColumnsToKeep, new std::string[3]{"after strict method recovered neighbours and filtered by block size", std::to_string(newAlig->numberOfResidues), std::to_string(percentageColumnsOriginal)});
debug.report(InfoCode::ColumnsToKeep, new std::string[3]{"after strict method filtered by block size", std::to_string(newAlig->numberOfResidues), std::to_string(percentageColumnsOriginal)});

return newAlig;
}
Expand Down

0 comments on commit a5574d2

Please sign in to comment.