Skip to content

Commit

Permalink
Add info of number of columns and block size
Browse files Browse the repository at this point in the history
  • Loading branch information
nicodr97 committed Nov 22, 2024
1 parent 9a598e6 commit 2600bdb
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 12 deletions.
4 changes: 4 additions & 0 deletions include/reportsystem.h
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,10 @@ enum InfoCode {

SimilarityThreshold = 8,

ColumnsToKeep = 9,

BlockSize = 10,


__MAXINFO
};
Expand Down
39 changes: 30 additions & 9 deletions source/Cleaner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,9 +108,9 @@ Alignment *Cleaner::cleanByCutValueOverpass(
int residueIndex, numberColumnsToRecover, *keptResiduesGaps;
int oldNumberKeptResidues = 0, newNumberKeptResidues = 0;
Alignment *newAlig = new Alignment(*alig);
double gapThreshold = maxGaps / alig->originalNumberOfSequences;

debug.report(InfoCode::GapThreshold, new std::string[2]{std::to_string((int) maxGaps), std::to_string(gapThreshold)});

double maxGapThreshold = ((double) maxGaps / alig->originalNumberOfSequences) * 100.0;
debug.report(InfoCode::GapThreshold, new std::string[2]{std::to_string((int) maxGaps), std::to_string(maxGapThreshold)});

// Select the columns with a gaps value
// less or equal than the cut point.
Expand Down Expand Up @@ -238,9 +238,10 @@ Alignment *Cleaner::cleanByCutValueOverpass(
// Compute new sequences and columns numbers
newAlig->Cleaning->removeAllGapsSeqsAndCols();



// Return the new alignment reference
return newAlig;

}

Alignment *Cleaner::cleanByCutValueFallBehind(
Expand Down Expand Up @@ -535,15 +536,31 @@ Alignment *Cleaner::cleanStrict(int gapCut, const int *gInCol, float simCut, con
int i, x, pos, counter, lenBlock;
Alignment *newAlig = new Alignment(*alig);

double gapThreshold = (double) gapCut / alig->originalNumberOfSequences;
debug.report(InfoCode::GapThreshold, new std::string[2]{std::to_string(gapCut), std::to_string(gapThreshold)});
double maxGapThreshold = ((double) gapCut / alig->originalNumberOfSequences) * 100.0;
debug.report(InfoCode::GapThreshold, new std::string[2]{std::to_string(gapCut), std::to_string(maxGapThreshold)});
debug.report(InfoCode::SimilarityThreshold, new std::string[1]{std::to_string(simCut)});

// Reject columns with gaps number greater than the gap threshold.
for (i = 0; i < alig->originalNumberOfResidues; i++) {
if (gInCol[i] > gapCut || MDK_W[i] < simCut)
newAlig->saveResidues[i] = -1;
int residueIndex = 0;
int oldNumberKeptResidues = 0;
int newNumberKeptResidues = 0;
for (residueIndex = 0; residueIndex < alig->originalNumberOfResidues; residueIndex++) {
if (alig->saveResidues[residueIndex] == -1) continue;

if (gInCol[residueIndex] > gapCut || MDK_W[residueIndex] < simCut) {
newAlig->saveResidues[residueIndex] = -1;
} else {
newNumberKeptResidues++;
}

oldNumberKeptResidues++;
}

double percentageColumnsOriginal = ((double) oldNumberKeptResidues / alig->originalNumberOfResidues) * 100.0;
debug.report(InfoCode::ColumnsToKeep, new std::string[3]{"before strict method", std::to_string(oldNumberKeptResidues), std::to_string(percentageColumnsOriginal)});
percentageColumnsOriginal = ((double) newNumberKeptResidues / alig->originalNumberOfResidues) * 100.0;
debug.report(InfoCode::ColumnsToKeep, new std::string[3]{"after strict method clean based on gaps and similarity", std::to_string(newNumberKeptResidues), std::to_string(percentageColumnsOriginal)});

// Rescue residues based on their neighbouring residues. We are going to rescue those residues that would be rejected but have at least, 3 non-rejected residues.
{
// We're going to store a 5-value window of values that we are goint to move residue by residue.
Expand Down Expand Up @@ -727,6 +744,10 @@ Alignment *Cleaner::cleanStrict(int gapCut, const int *gInCol, float simCut, con
// Compute new sequences and columns numbers
newAlig->Cleaning->removeAllGapsSeqsAndCols();

percentageColumnsOriginal = ((double) newAlig->numberOfResidues / alig->originalNumberOfResidues) * 100.0;
debug.report(InfoCode::BlockSize, new std::string[1]{std::to_string(blockSize)});
debug.report(InfoCode::ColumnsToKeep, new std::string[3]{"after strict method recovered neighbours and filtered by block size", std::to_string(newAlig->numberOfResidues), std::to_string(percentageColumnsOriginal)});

return newAlig;
}

Expand Down
12 changes: 9 additions & 3 deletions source/reportMessages/infoMessages.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,16 @@ const std::map<InfoCode, const char *> reporting::reportManager::InfoMessages =
"No filtering or calculation performed. Output same MSA as input."},

{InfoCode::GapThreshold,
"The maximum amount of gaps is \"[tag]\". The threshold value used is \"[tag]\". Column/sequence trimming may be affected by other parameters (e.g."
" conservation threshold)."},
"The maximum amount of gaps is \"[tag]\", representing a gap in the \"[tag]\" percent or more of the sequences. Column/sequence trimming may be affected by other parameters (e.g."
" conservation threshold, block size)."},

{InfoCode::SimilarityThreshold,
"The similarity threshold value used is \"[tag]\". Column/sequence trimming may be affected by other parameters (e.g."
" conservation threshold)."}
" conservation threshold, block size)."},

{InfoCode::ColumnsToKeep,
"The number of columns to keep [tag] is \"[tag]\", representing \"[tag]\" percent of the original alignment"},

{InfoCode::BlockSize,
"The block size used is \"[tag]\""},
};

0 comments on commit 2600bdb

Please sign in to comment.