@@ -45,7 +45,7 @@ namespace AlignFormat {
45
45
cur = cur->duplicate ();
46
46
other_segments.push_back (cur);
47
47
}
48
- cur->core . flag = (line[i] == ' >' ) ? 0 : 16 ;
48
+ cur->flag = (line[i] == ' >' ) ? 0 : 16 ;
49
49
++match_count;
50
50
break ;
51
51
case 1 :
@@ -62,16 +62,16 @@ namespace AlignFormat {
62
62
break ;
63
63
case 2 :
64
64
j = line.find (" -" , i);
65
- std::from_chars (line.data () + i, line.data () + j, cur->core . pos );
65
+ std::from_chars (line.data () + i, line.data () + j, cur->pos );
66
66
++match_count;
67
67
len = j - i;
68
68
i += len;
69
69
break ;
70
70
default :
71
71
j = line.find_first_of (" ><" , i);
72
- std::from_chars (line.data () + i, line.data () + j, cur->core . end );
72
+ std::from_chars (line.data () + i, line.data () + j, cur->end );
73
73
if (!have_cigar) {
74
- cur->blocks .emplace_back () = {(uint32_t )cur->core . pos , (uint32_t )cur->core . end };
74
+ cur->blocks .emplace_back () = {(uint32_t )cur->pos , (uint32_t )cur->end };
75
75
}
76
76
if (j == std::string::npos) {
77
77
i = pe;
@@ -89,15 +89,15 @@ namespace AlignFormat {
89
89
cur = g;
90
90
size_t next_segment = 0 ;
91
91
uint32_t num = 0 ;
92
- uint32_t current_pos = cur->core . pos ;
92
+ uint32_t current_pos = cur->pos ;
93
93
uint32_t block_start = current_pos;
94
94
bool in_match = false ;
95
95
96
96
auto flush_block = [&]() {
97
97
if (in_match && cur) {
98
98
// Ensure block doesn't extend beyond segment boundary and check for overflow
99
- uint32_t end_pos = (current_pos < (uint32_t )cur->core . end ) ?
100
- std::min (current_pos, (uint32_t )cur->core . end ) : (uint32_t )cur->core . end ;
99
+ uint32_t end_pos = (current_pos < (uint32_t )cur->end ) ?
100
+ std::min (current_pos, (uint32_t )cur->end ) : (uint32_t )cur->end ;
101
101
if (block_start < end_pos) {
102
102
cur->blocks .emplace_back () = {block_start, end_pos};
103
103
}
@@ -111,7 +111,7 @@ namespace AlignFormat {
111
111
if (next) {
112
112
cur = next;
113
113
next_segment++;
114
- current_pos = cur->core . pos ;
114
+ current_pos = cur->pos ;
115
115
block_start = current_pos;
116
116
in_match = false ;
117
117
return true ;
@@ -136,8 +136,8 @@ namespace AlignFormat {
136
136
uint32_t remaining = num;
137
137
while (remaining > 0 && cur != nullptr ) {
138
138
// Calculate how much of the operation fits in current segment
139
- uint32_t segment_remaining = (current_pos < (uint32_t )cur->core . end ) ?
140
- ((uint32_t )cur->core . end - current_pos) : 0 ;
139
+ uint32_t segment_remaining = (current_pos < (uint32_t )cur->end ) ?
140
+ ((uint32_t )cur->end - current_pos) : 0 ;
141
141
142
142
// Protect against overflow in advance calculation
143
143
uint32_t max_advance = UINT32_MAX - current_pos;
@@ -152,7 +152,7 @@ namespace AlignFormat {
152
152
remaining -= advance;
153
153
154
154
// If we've reached the end of the segment
155
- if (current_pos >= (uint32_t )cur->core . end ) {
155
+ if (current_pos >= (uint32_t )cur->end ) {
156
156
flush_block ();
157
157
if (!advance_to_next_segment ()) {
158
158
break ; // No more segments
@@ -167,16 +167,16 @@ namespace AlignFormat {
167
167
flush_block ();
168
168
uint32_t remaining = num;
169
169
while (remaining > 0 && cur != nullptr ) {
170
- uint32_t segment_remaining = (current_pos < (uint32_t )cur->core . end ) ?
171
- ((uint32_t )cur->core . end - current_pos) : 0 ;
170
+ uint32_t segment_remaining = (current_pos < (uint32_t )cur->end ) ?
171
+ ((uint32_t )cur->end - current_pos) : 0 ;
172
172
173
173
uint32_t max_advance = UINT32_MAX - current_pos;
174
174
uint32_t advance = std::min ({remaining, segment_remaining, max_advance});
175
175
176
176
current_pos += advance;
177
177
remaining -= advance;
178
178
179
- if (current_pos >= (uint32_t )cur->core . end ) {
179
+ if (current_pos >= (uint32_t )cur->end ) {
180
180
if (!advance_to_next_segment ()) {
181
181
break ; // No more segments
182
182
}
@@ -199,12 +199,12 @@ namespace AlignFormat {
199
199
flush_block ();
200
200
}
201
201
202
- // std::cout << "g is " << g->core. pos << " - " << g->core. end << std::endl;
202
+ // std::cout << "g is " << g->pos << " - " << g->end << std::endl;
203
203
// for (auto item : g->blocks) {
204
204
// std::cout << item.start << "-" << item.end << " ";
205
205
// } std::cout << std::endl;
206
206
// for (auto gg : other_segments) {
207
- // std::cout << "gg is " << gg->core. pos << " - " << gg->core. end << std::endl;
207
+ // std::cout << "gg is " << gg->pos << " - " << gg->end << std::endl;
208
208
// for (auto item : gg->blocks) {
209
209
// std::cout << item.start << "-" << item.end << " ";
210
210
// } std::cout << std::endl;
@@ -231,8 +231,7 @@ namespace AlignFormat {
231
231
// ds:Z::36*ct:129*tg*tc:2+[ag]gcgcag:1*ag:228+[ag]:19+ca:4*ta:11*ga-[caggcgcagaga]ggcgcgccgcgcc[ggcg]:21*cg:4*tc:7*ta:29*ag:3*ct:2*ct:1*gc:32+[g]:33*ac:14*gt:1*tg:15*ga:28+[g]:11*ag:3*tg+[g]:33*tc:20*ag:19*ac:12*ca:223*ta:17*ag:47*cg:50*ag:33*at:22*gt:206*ga:400-[g]:24*gc:184*ga:11+[g]:388*ga:211-[c]:120*tg:1*ag:183*ct:24*gc:329-ag:237*ca:61-[c]:21*tc:365-[c]:109+[aaga]g:217*gc:25*cg:42+[g]:165*ag:22*ag:187*ag:92*tg:62*at:542*gt:2*gt:85*ac+[g]:15*ag:33*ga:111*tc:34*tg:194*ct:79*tc:155*ct:36*ga:74*ct:160*gc:1*cg:46*ag:1307*tc:684*cg:322*ag:68*ct:534*ag:351*ga:92*gc:5*tc:796*ct:89-[c]:465*ct:577*ag:23*tc:586+[g]:302*tc:83*ga:18*gt:25*ac:102+[g]:56*cg:35*ag:312*at:1*gc:170*ca:93*ga:156*ct:136*ct:453*ag:108*ag:67*cg:11*ga:3*gc:17*gt:4*gc:1*ca:15*cg:347+[g]:8*ag:10*cg*cg:21*ac:93+[a]:292*ag:142*ct:107*tc:281*ct:444*ga:284*ag:57*ga:77*ga:309*ga:166+[t]:267*ta:24
232
232
233
233
void gafParser (std::string& line,
234
- ankerl::unordered_dense::map< std::string, SuperIntervals<int , GAF_t *>>& cached_alignments,
235
- ankerl::unordered_dense::map< std::string, uint32_t >& rids) {
234
+ ankerl::unordered_dense::map< std::string, SuperIntervals<int , GAF_t *>>& cached_alignments) {
236
235
GAF_t *g = new GAF_t ();
237
236
238
237
size_t pos = 0 ;
@@ -247,7 +246,8 @@ namespace AlignFormat {
247
246
};
248
247
249
248
next = line.find (' \t ' , pos);
250
- g->qname = line.substr (0 , next);
249
+ std::string qname = line.substr (0 , next);
250
+
251
251
pos = next + 1 ;
252
252
253
253
parse_int (g->qlen );
@@ -287,7 +287,7 @@ namespace AlignFormat {
287
287
pos = next + 1 ;
288
288
289
289
// 12 int Mapping quality (0-255; 255 for missing)
290
- parse_int (g->core . qual );
290
+ parse_int (g->qual );
291
291
292
292
size_t cigar_start = line.find (" cg:Z:" );
293
293
size_t cigar_end = std::string::npos;
@@ -299,13 +299,13 @@ namespace AlignFormat {
299
299
std::vector<GAF_t*> other_segments;
300
300
extractAlignmentPath (line, g, ps, pe, other_segments, cigar_start, cigar_end);
301
301
302
- cached_alignments[g->chrom ].add (g->core . pos , g->core . end , g);
302
+ cached_alignments[g->chrom ].add (g->pos , g->end , g);
303
303
304
- // std::cout << g->core. flag << " " << g->chrom << " " << g->core. pos << " " << g->core. end << std::endl;
305
- for (auto & v : other_segments) {
306
- cached_alignments[v->chrom ].add (v->core . pos , v->core . end , g);
307
- // std::cout << v->core. flag << " " << v->chrom << " " << v->core. pos << " " << v->core. end << std::endl;
308
- }
304
+ // std::cout << g->flag << " " << g->chrom << " " << g->pos << " " << g->end << std::endl;
305
+ // for (auto & v : other_segments) {
306
+ // cached_alignments[v->chrom].add(v->pos, v->end, g);
307
+ // std::cout << v->flag << " " << v->chrom << " " << v->pos << " " << v->end << std::endl;
308
+ // }
309
309
// std::exit(0);
310
310
311
311
}
@@ -314,26 +314,26 @@ namespace AlignFormat {
314
314
int start, end;
315
315
};
316
316
317
- void gafFindY (std::vector<GAF_t *>& gafAlignments) {
317
+ void gafFindY (std::vector<AlignFormat:: GAF_t *>& gafAlignments) {
318
318
std::vector<TrackRange> trackLevels;
319
319
for (const auto &b : gafAlignments) {
320
320
size_t memLen = trackLevels.size ();
321
321
size_t i = 0 ;
322
322
for (; i < memLen; ++i) {
323
- if (b->core . pos > trackLevels[i].end ) {
324
- trackLevels[i].end = b->core . end + 2 ;
323
+ if (b->pos > trackLevels[i].end ) {
324
+ trackLevels[i].end = b->end ;
325
325
b->y = (int )i;
326
326
break ;
327
327
}
328
328
}
329
329
if (i == memLen) {
330
- trackLevels.emplace_back () = {b->core . pos , b->core . end + 2 };
330
+ trackLevels.emplace_back () = {b->pos , b->end };
331
331
b->y = (int )memLen;
332
332
}
333
333
}
334
334
}
335
335
336
- void GwAlignment::open (const std::string& file_path, const std::string& reference, int threads, faidx_t * fai ) {
336
+ void AlignFormat:: GwAlignment::open (const std::string& file_path, const std::string& reference, int threads) {
337
337
path = file_path;
338
338
if (Utils::endsWith (path, " bam" ) || Utils::endsWith (path, " cram" )) {
339
339
type = AlignmentType::HTSLIB_t;
@@ -371,24 +371,16 @@ namespace AlignFormat {
371
371
throw std::exception ();
372
372
}
373
373
#endif
374
- // Reference id's
375
- ankerl::unordered_dense::map< std::string, uint32_t > rids;
376
- int num_sequences = faidx_nseq (fai);
377
- for (int i = 0 ; i < num_sequences; ++i) {
378
- const char * seq_name = faidx_iseq (fai, i);
379
- rids[seq_name] = i;
380
- }
381
374
382
375
std::string tp;
383
376
while (true ) {
384
- auto got_line = (bool )getline (*fpu, tp);
385
- if (!got_line) {
377
+ if (!(bool )getline (*fpu, tp)) {
386
378
break ;
387
379
}
388
380
if (tp[0 ] == ' #' ) {
389
381
continue ;
390
382
}
391
- gafParser (tp, this ->cached_alignments , rids );
383
+ gafParser (tp, this ->cached_alignments );
392
384
}
393
385
for (auto & item : cached_alignments) {
394
386
item.second .index ();
@@ -397,10 +389,48 @@ namespace AlignFormat {
397
389
}
398
390
}
399
391
400
- GwAlignment::~GwAlignment () {
392
+ AlignFormat:: GwAlignment::~GwAlignment () {
401
393
if (index ) hts_idx_destroy (index );
402
394
if (header) sam_hdr_destroy (header);
403
395
if (bam) sam_close (bam);
396
+ if (!cached_alignments.empty ()) {
397
+ for (auto & item : cached_alignments) {
398
+ for (auto &g : item.second .data ) {
399
+ delete g;
400
+ }
401
+ }
402
+ }
403
+ }
404
+
405
+ void gafToAlign (AlignFormat::GAF_t* gaf, AlignFormat::Align* align) {
406
+
407
+ align->cov_start = gaf->pos ;
408
+ align->cov_end = gaf->end ;
409
+ align->orient_pattern = AlignFormat::Pattern::NORMAL;
410
+ align->left_soft_clip = 0 ;
411
+ align->right_soft_clip = 0 ;
412
+ align->y = gaf->y ;
413
+ align->edge_type = 1 ;
414
+ align->pos = gaf->pos ;
415
+ align->reference_end = gaf->end ;
416
+ align->has_SA = false ;
417
+ align->blocks = gaf->blocks ;
418
+ std::cout << gaf->pos << " " << gaf->end << " " << gaf->y << std::endl;
419
+ align->delegate = bam_init1 ();
420
+ const char * qname = gaf->qname .c_str ();
421
+ size_t l_qname = gaf->qname .size ();
422
+ // int bam_set1(bam1_t *bam,
423
+ // size_t l_qname, const char *qname,
424
+ // uint16_t flag, int32_t tid, hts_pos_t pos, uint8_t mapq,
425
+ // size_t n_cigar, const uint32_t *cigar,
426
+ // int32_t mtid, hts_pos_t mpos, hts_pos_t isize,
427
+ // size_t l_seq, const char *seq, const char *qual,
428
+ // size_t l_aux);
429
+ int res = bam_set1 (align->delegate , l_qname, qname, (uint64_t )gaf->flag , 0 ,
430
+ (hts_pos_t )align->pos , (uint8_t )gaf->qual , 0 , nullptr , 0 , 0 , 0 , 0 , nullptr , nullptr , 0 );
431
+ if (res < 0 ) {
432
+ return ;
433
+ }
404
434
}
405
435
406
436
}
0 commit comments