-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathnucsequences.hpp
157 lines (124 loc) · 4.27 KB
/
nucsequences.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
#ifndef NUCSEQUENCE_HPP
#define NUCSEQUENCE_HPP
#include <vector>
#include <string>
#include <stdexcept>
#include <algorithm>
#include <divsufsort.h>
#include <map>
#include <list>
using namespace std;
#define MYBLOCKSIZE 18
namespace Nuc {
string complementary(const string & seq);
inline map<char,unsigned char> index()
{
map<char,unsigned char> res;
res[ 0 ] = 0; // Needed character (end of sequence)
res['A'] = 1;
res['C'] = 2;
res['G'] = 3;
res['N'] = 4;
res['T'] = 5;
return res;
}
}
// Class for queries
class NucQuery
{
protected:
string _name;
string _sequence;
bool _sense;
vector<saidx_t> _positions;
public:
// Only used when looking for submatches using a linked list
NucQuery * next;
public:
// Constructor
NucQuery() : next(0) {}
// Destructor
~NucQuery();
// Getters
inline const string & name() { return _name; }
inline const string & sequence() { return _sequence; }
inline const bool & sense() { return _sense; }
inline int count() { return _positions.size(); }
// Setters
inline void name ( const string& name ) { _name = name; }
inline void sequence( const string& sequence ) { _sequence = sequence; }
inline void sense ( const bool& sense ) { _sense = sense; }
// Positions handling
inline void addPosition(saidx_t pos) { _positions.push_back(pos); }
inline void removePosition(saidx_t pos) { _positions.erase(_positions.begin()+pos); }
inline saidx_t position(int k) { return _positions[k]; }
};
class NucSequence
{
protected:
string _name;
string _sequence;
protected:
map<char,unsigned char> _nuc;
short _nchar;
saidx_t _seqsize;
vector<saidx_t> _C;
vector<saidx_t> _SA;
vector<saidx_t> _occ; // Used as 2D vector, but cleaner with 1D-vector
short _blocksize;
saidx_t _pidx;
// No default constructor (no empty object)
private:
NucSequence();
// Constructors
public:
NucSequence(string & seqfilename);
NucSequence(string name, string sequence) :
_name(name), _sequence(sequence), _nuc(Nuc::index()), _nchar(_nuc.size()), _C(_nchar),
_blocksize(MYBLOCKSIZE)
{
lowercasename();
if(!check()) throw invalid_argument( "Invalid characters in the sequence." );
}
// Comparison operators (only names are taken into account)
friend bool operator== (const NucSequence &seq1, const NucSequence &seq2) { return seq1._name == seq2._name; }
friend bool operator!= (const NucSequence &seq1, const NucSequence &seq2) { return seq1._name != seq2._name; }
friend bool operator< (const NucSequence &seq1, const NucSequence &seq2) { return seq1._name < seq2._name; }
friend bool operator>= (const NucSequence &seq1, const NucSequence &seq2) { return seq1._name >= seq2._name; }
friend bool operator> (const NucSequence &seq1, const NucSequence &seq2) { return seq1._name > seq2._name; }
friend bool operator<= (const NucSequence &seq1, const NucSequence &seq2) { return seq1._name <= seq2._name; }
// Const Getters
const string & name() { return _name; }
const string & sequence() { return _sequence; }
// Setters
void name(const string & name) { _name = name; }
// BWT
void bwt();
void inverse_bwt();
template <bool SUBMATCHES,bool MISMATCHES, bool BWT>
void search(NucQuery & query, const int & mismatches, const int & submatches);
template <bool MISMATCHES, bool BWT>
void search(NucQuery & query, const int & mismatches);
protected:
// Puts the name in lower case
void lowercasename() { transform(_name.begin(), _name.end(), _name.begin(), (int (*)(int))tolower); }
// Checks the sequence
bool check();
};
class NucSequences : public vector<NucSequence>
{
// FASTA loading constructor
public:
NucSequences() {}
NucSequences(string & fastafilename);
};
struct Candidates
{
int count;
saidx_t low;
saidx_t high;
Candidates(int c, saidx_t l, saidx_t h, Candidates * n) : count(c), low(l), high(h), next(n) {}
Candidates * next;
};
#include "nucsequences.hxx"
#endif