Skip to content

Commit

Permalink
alt align
Browse files Browse the repository at this point in the history
  • Loading branch information
tobiasrausch committed Sep 24, 2024
1 parent 8a0d775 commit e0fe4f8
Show file tree
Hide file tree
Showing 3 changed files with 48 additions and 26 deletions.
17 changes: 15 additions & 2 deletions src/gaf.h
Original file line number Diff line number Diff line change
Expand Up @@ -92,9 +92,16 @@ namespace torali
}

inline bool
parseAlignRecord(std::istream& instream, Graph const& g, AlignRecord& ar, std::string& qname) {
parseAlignRecord(std::istream& instream, Graph const& g, AlignRecord& ar, std::string& qname, std::set<std::size_t>& validSR) {
std::string gline;
if(std::getline(instream, gline)) {
if(std::getline(instream, gline)) {
if (!validSR.empty()) {
std::size_t found = gline.find('\t');
if (found != std::string::npos) {
if (validSR.find(hash_lr(gline.substr(0, found))) == validSR.end()) return false;
}
}

typedef boost::tokenizer< boost::char_separator<char> > Tokenizer;
boost::char_separator<char> sep("\t");
Tokenizer tokens(gline, sep);
Expand Down Expand Up @@ -141,6 +148,12 @@ namespace torali
} else return false;
}

inline bool
parseAlignRecord(std::istream& instream, Graph const& g, AlignRecord& ar, std::string& qname) {
std::set<std::size_t> validSR;
return parseAlignRecord(instream, g, ar, qname, validSR);
}

inline bool
parseAlignRecord(std::istream& instream, Graph const& g, AlignRecord& ar) {
std::string qname;
Expand Down
39 changes: 25 additions & 14 deletions src/junction.h
Original file line number Diff line number Diff line change
Expand Up @@ -302,7 +302,7 @@ namespace torali

template<typename TConfig, typename TValidRegion, typename TReadBp>
inline void
findJunctions(TConfig const& c, TValidRegion const& validRegions, TReadBp& readBp) {
findJunctions(TConfig const& c, TValidRegion const& validRegions, TReadBp& readBp, std::set<size_t>& validSR) {
typedef typename TValidRegion::value_type TChrIntervals;

// Open file handles
Expand Down Expand Up @@ -338,6 +338,7 @@ namespace torali
if ((rec->core.qual < c.minMapQual) || (rec->core.tid<0)) continue;

std::size_t seed = hash_lr(rec);
if ((!validSR.empty()) && (validSR.find(seed) == validSR.end())) continue;
//std::cerr << bam_get_qname(rec) << '\t' << seed << std::endl;
uint32_t rp = rec->core.pos; // reference pointer
uint32_t sp = 0; // sequence pointer
Expand Down Expand Up @@ -459,15 +460,21 @@ namespace torali

template<typename TConfig, typename TValidRegions, typename TSvtSRBamRecord>
inline void
_findSRBreakpoints(TConfig const& c, TValidRegions const& validRegions, TSvtSRBamRecord& srBR) {
_findSRBreakpoints(TConfig const& c, TValidRegions const& validRegions, TSvtSRBamRecord& srBR, std::set<size_t>& validSR) {
// Breakpoints
typedef std::vector<Junction> TJunctionVector;
typedef std::map<std::size_t, TJunctionVector> TReadBp;
TReadBp readBp;
findJunctions(c, validRegions, readBp);
findJunctions(c, validRegions, readBp, validSR);
fetchSVs(c, readBp, srBR);
}

template<typename TConfig, typename TValidRegions, typename TSvtSRBamRecord>
inline void
_findSRBreakpoints(TConfig const& c, TValidRegions const& validRegions, TSvtSRBamRecord& srBR) {
std::set<size_t> validSR;
_findSRBreakpoints(c, validRegions, srBR, validSR);
}

template<typename TConfig, typename TValidRegions, typename TSVs, typename TSRStore>
inline void
Expand All @@ -480,7 +487,13 @@ namespace torali

// Alternate alignments
if (c.hasAltFile) {
std::set<std::size_t> validSR; // Set of split-reads in ALL alternate alignments

// Insert all split-reads
std::set<std::size_t> validSR;
for(uint32_t svt = 0; svt < srBR.size(); ++svt) {
for(uint32_t i = 0; i < srBR[svt].size(); ++i) validSR.insert(srBR[svt][i].id);
}

// Parse alternate alignments
std::vector<boost::filesystem::path> align;
std::vector<boost::filesystem::path> genome;
Expand All @@ -501,10 +514,10 @@ namespace torali
_parseExcludeIntervals(altConfig, hdr, vR);
bam_hdr_destroy(hdr);
sam_close(samfile);
_findSRBreakpoints(altConfig, vR, altSR);
_findSRBreakpoints(altConfig, vR, altSR, validSR);

// Debug
//std::cerr << "AltAlign: " << align[i] << std::endl;
//std::cerr << '[' << boost::posix_time::to_simple_string(boost::posix_time::second_clock::local_time()) << "] " << "AltLinearAlign: " << align[i] << std::endl;
//outputSRBamRecords(altConfig, altSR, true);
} else {
Graph g;
Expand All @@ -515,28 +528,26 @@ namespace torali
altConfig.maxReadSep = 2000;

// Alternate alignment in GAF format
_findGraphSRBreakpoints(altConfig, g, altSR);
_findGraphSRBreakpoints(altConfig, g, altSR, validSR);

// Debug
//std::cerr << "AltAlign: " << align[i] << std::endl;
//std::cerr << '[' << boost::posix_time::to_simple_string(boost::posix_time::second_clock::local_time()) << "] " << "AltGraphAlign: " << align[i] << std::endl;
//outputGraphSRBamRecords(altConfig, g, altSR);
}

// Update valid SR set
std::set<std::size_t> newValidSR;
for(uint32_t svt = 0; svt < altSR.size(); ++svt) {
//std::cerr << '[' << boost::posix_time::to_simple_string(boost::posix_time::second_clock::local_time()) << "] " << "SVT Size: " << svt << ',' << altSR[svt].size() << std::endl;
if (altSR[svt].empty()) continue;
for(uint32_t i = 0; i < altSR[svt].size(); ++i) {
if (validSR.empty()) newValidSR.insert(altSR[svt][i].id);
else {
if (validSR.find(altSR[svt][i].id) != validSR.end()) newValidSR.insert(altSR[svt][i].id);
}
if (validSR.find(altSR[svt][i].id) != validSR.end()) newValidSR.insert(altSR[svt][i].id);
}
}
validSR = newValidSR;

// Debug
//std::cerr << validSR.size() << std::endl;
//std::cerr << '[' << boost::posix_time::to_simple_string(boost::posix_time::second_clock::local_time()) << "] " << "SR size: " << validSR.size() << std::endl;
}

// Filter primary SR records
Expand All @@ -560,7 +571,7 @@ namespace torali


// Debug
//std::cerr << "MainAlign: " << std::endl;
//std::cerr << '[' << boost::posix_time::to_simple_string(boost::posix_time::second_clock::local_time()) << "] " << "MainAlign: " << std::endl;
//outputSRBamRecords(c, srBR, true);

// Cluster BAM records
Expand Down
18 changes: 8 additions & 10 deletions src/pangenome.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ namespace torali {

template<typename TConfig, typename TReadBp>
inline bool
findGraphJunctions(TConfig& c, Graph const& g, TReadBp& readBp) {
findGraphJunctions(TConfig& c, Graph const& g, TReadBp& readBp, std::set<std::size_t>& validSR) {
boost::posix_time::ptime now = boost::posix_time::second_clock::local_time();
std::cerr << '[' << boost::posix_time::to_simple_string(now) << "] " << "Split-read scanning" << std::endl;

Expand All @@ -67,11 +67,10 @@ namespace torali {

// Parse GAF
std::istream instream(&dataIn);
bool parseAR = true;
while (parseAR) {
while (!instream.eof()) {
AlignRecord ar;
std::string qname;
if (parseAlignRecord(instream, g, ar, qname)) {
if (parseAlignRecord(instream, g, ar, qname, validSR)) {
if (ar.mapq < c.minMapQual) continue;

// Iterate path alignments
Expand Down Expand Up @@ -205,7 +204,7 @@ namespace torali {
}

//std::cerr << ar.seed << ',' << ar.qlen << ',' << ar.qstart << ',' << ar.qend << ',' << ar.strand << ',' << ar.plen << ',' << ar.pstart << ',' << ar.pend << ',' << ar.matches << ',' << ar.alignlen << ',' << ar.mapq << std::endl;
} else parseAR = false;
}
}

// Sort junctions
Expand All @@ -222,12 +221,12 @@ namespace torali {

template<typename TConfig, typename TSvtSRBamRecord>
inline void
_findGraphSRBreakpoints(TConfig const& c, Graph const& g, TSvtSRBamRecord& srBR) {
_findGraphSRBreakpoints(TConfig const& c, Graph const& g, TSvtSRBamRecord& srBR, std::set<std::size_t>& validSR) {
// Breakpoints
typedef std::vector<Junction> TJunctionVector;
typedef std::map<std::size_t, TJunctionVector> TReadBp;
TReadBp readBp;
findGraphJunctions(c, g, readBp);
findGraphJunctions(c, g, readBp, validSR);
fetchSVs(c, readBp, srBR);
}

Expand Down Expand Up @@ -255,8 +254,7 @@ namespace torali {

// Parse GAF
std::istream instream(&dataIn);
bool parseAR = true;
while (parseAR) {
while (!instream.eof()) {
AlignRecord ar;
std::string qname;
if (parseAlignRecord(instream, g, ar, qname)) {
Expand All @@ -266,7 +264,7 @@ namespace torali {
std::cerr << "Warning: Hash collision! " << ar.seed << ',' << hm[ar.seed] << ',' << qname << std::endl;
}
}
} else parseAR = false;
}
}

// Close file
Expand Down

0 comments on commit e0fe4f8

Please sign in to comment.