@@ -5,7 +5,7 @@ use indicatif::ProgressBar;
5
5
6
6
use rayon:: prelude:: * ;
7
7
use std:: error:: Error ;
8
- use std:: fs:: File ;
8
+ use std:: fs:: { remove_file , File } ;
9
9
use std:: io:: { BufRead , BufReader , BufWriter , Write } ;
10
10
11
11
use crate :: uniwig:: counting:: {
@@ -191,8 +191,8 @@ pub fn run_uniwig(matches: &ArgMatches) {
191
191
}
192
192
193
193
/// Ensures that the start position is at a minimum equal to `1`
194
- fn clamped_start_position ( start : i32 , smoothsize : i32 ) -> i32 {
195
- std:: cmp:: max ( 1 , start - smoothsize)
194
+ fn clamped_start_position ( start : i32 , smoothsize : i32 , wig_shift : i32 ) -> i32 {
195
+ std:: cmp:: max ( 1 , start - smoothsize + wig_shift )
196
196
}
197
197
/// Ensure that the start position is at a minimum equal to `0`
198
198
fn clamped_start_position_zero_pos ( start : i32 , smoothsize : i32 ) -> i32 {
@@ -222,8 +222,6 @@ pub fn uniwig_main(
222
222
. build ( )
223
223
. unwrap ( ) ;
224
224
225
- let mut wig_shift: i32 = 0 ; // This will be set to 1 when writing to wiggle files, else always set to 0
226
-
227
225
// Determine Input File Type
228
226
let input_filetype = FileType :: from_str ( filetype. to_lowercase ( ) . as_str ( ) ) ;
229
227
// Set up output file names
@@ -238,6 +236,8 @@ pub fn uniwig_main(
238
236
meta_data_file_names[ 1 ] = format ! ( "{}{}.{}" , bwfileheader, "end" , "meta" ) ;
239
237
meta_data_file_names[ 2 ] = format ! ( "{}{}.{}" , bwfileheader, "core" , "meta" ) ;
240
238
239
+ let mut npy_meta_data_map: HashMap < String , HashMap < String , i32 > > = HashMap :: new ( ) ;
240
+
241
241
let chrom_sizes = match read_chromosome_sizes ( chromsizerefpath) {
242
242
// original program gets chromosome size from a .sizes file, e.g. chr1 248956422
243
243
// the original program simply pushes 0's until the end of the chromosome length and writes these to file.
@@ -252,19 +252,16 @@ pub fn uniwig_main(
252
252
match input_filetype {
253
253
//BED AND NARROWPEAK WORKFLOW
254
254
Ok ( FileType :: BED ) | Ok ( FileType :: NARROWPEAK ) => {
255
+ // Pare down chromosomes if necessary
256
+ let mut final_chromosomes =
257
+ get_final_chromosomes ( & input_filetype, filepath, & chrom_sizes, score) ;
258
+
255
259
// Some housekeeping depending on output type
256
260
let og_output_type = output_type; // need this later for conversion
257
261
let mut output_type = output_type;
258
262
if output_type == "bedgraph" || output_type == "bw" || output_type == "bigwig" {
259
263
output_type = "bedGraph" // we must create bedgraphs first before creating bigwig files
260
264
}
261
- if output_type == "wig" {
262
- wig_shift = 1 ;
263
- }
264
-
265
- // Pare down chromosomes if necessary
266
- let mut final_chromosomes =
267
- get_final_chromosomes ( & input_filetype, filepath, & chrom_sizes, score) ;
268
265
269
266
let bar = ProgressBar :: new ( final_chromosomes. len ( ) as u64 ) ;
270
267
@@ -299,7 +296,6 @@ pub fn uniwig_main(
299
296
current_chrom_size,
300
297
smoothsize,
301
298
stepsize,
302
- wig_shift,
303
299
) ;
304
300
305
301
match output_type {
@@ -322,6 +318,7 @@ pub fn uniwig_main(
322
318
clamped_start_position (
323
319
primary_start. 0 ,
324
320
smoothsize,
321
+ 1 , //must shift wiggle starts and core by 1 since it is 1 based
325
322
) ,
326
323
stepsize,
327
324
current_chrom_size,
@@ -390,7 +387,6 @@ pub fn uniwig_main(
390
387
current_chrom_size,
391
388
smoothsize,
392
389
stepsize,
393
- wig_shift,
394
390
) ;
395
391
match output_type {
396
392
"file" => {
@@ -411,6 +407,7 @@ pub fn uniwig_main(
411
407
clamped_start_position (
412
408
primary_end. 0 ,
413
409
smoothsize,
410
+ 0 ,
414
411
) ,
415
412
) ;
416
413
write_to_bed_graph_file (
@@ -432,6 +429,7 @@ pub fn uniwig_main(
432
429
clamped_start_position (
433
430
primary_end. 0 ,
434
431
smoothsize,
432
+ 0 , // ends already 1 based, do not shift further
435
433
) ,
436
434
stepsize,
437
435
current_chrom_size,
@@ -450,6 +448,7 @@ pub fn uniwig_main(
450
448
clamped_start_position (
451
449
primary_end. 0 ,
452
450
smoothsize,
451
+ 0 ,
453
452
) ,
454
453
stepsize,
455
454
meta_data_file_names[ 1 ] . clone ( ) ,
@@ -468,6 +467,7 @@ pub fn uniwig_main(
468
467
clamped_start_position (
469
468
primary_end. 0 ,
470
469
smoothsize,
470
+ 0 ,
471
471
) ,
472
472
stepsize,
473
473
meta_data_file_names[ 1 ] . clone ( ) ,
@@ -481,7 +481,6 @@ pub fn uniwig_main(
481
481
& chromosome. ends ,
482
482
current_chrom_size,
483
483
stepsize,
484
- wig_shift,
485
484
) ;
486
485
match output_type {
487
486
"file" => {
@@ -499,7 +498,10 @@ pub fn uniwig_main(
499
498
let count_info: ( Vec < u32 > , Vec < u32 > , Vec < u32 > ) =
500
499
compress_counts (
501
500
& mut core_results,
502
- primary_start. 0 ,
501
+ clamped_start_position_zero_pos (
502
+ primary_start. 0 ,
503
+ 0 ,
504
+ ) ,
503
505
) ;
504
506
write_to_bed_graph_file (
505
507
& count_info,
@@ -517,7 +519,7 @@ pub fn uniwig_main(
517
519
& core_results. 0 ,
518
520
file_name. clone ( ) ,
519
521
chrom_name. clone ( ) ,
520
- clamped_start_position ( primary_start. 0 , 0 ) ,
522
+ clamped_start_position ( primary_start. 0 , 0 , 1 ) , //starts are 1 based must be shifted by 1
521
523
stepsize,
522
524
current_chrom_size,
523
525
) ;
@@ -531,7 +533,10 @@ pub fn uniwig_main(
531
533
& core_results. 0 ,
532
534
file_name. clone ( ) ,
533
535
chrom_name. clone ( ) ,
534
- primary_start. 0 ,
536
+ clamped_start_position_zero_pos (
537
+ primary_start. 0 ,
538
+ 0 ,
539
+ ) ,
535
540
stepsize,
536
541
meta_data_file_names[ 2 ] . clone ( ) ,
537
542
) ;
@@ -546,7 +551,10 @@ pub fn uniwig_main(
546
551
& core_results. 0 ,
547
552
file_name. clone ( ) ,
548
553
chrom_name. clone ( ) ,
549
- primary_start. 0 ,
554
+ clamped_start_position_zero_pos (
555
+ primary_start. 0 ,
556
+ 0 ,
557
+ ) ,
550
558
stepsize,
551
559
meta_data_file_names[ 2 ] . clone ( ) ,
552
560
) ;
@@ -580,6 +588,63 @@ pub fn uniwig_main(
580
588
) ;
581
589
}
582
590
}
591
+ "npy" => {
592
+ // populate hashmap for the npy meta data
593
+ for chromosome in final_chromosomes. iter ( ) {
594
+ let chr_name = chromosome. chrom . clone ( ) ;
595
+ let current_chrom_size =
596
+ * chrom_sizes. get ( & chromosome. chrom ) . unwrap ( ) as i32 ;
597
+ npy_meta_data_map. insert (
598
+ chr_name,
599
+ HashMap :: from ( [
600
+ ( "stepsize" . to_string ( ) , stepsize) ,
601
+ ( "reported_chrom_size" . to_string ( ) , current_chrom_size) ,
602
+ ] ) ,
603
+ ) ;
604
+ }
605
+
606
+ for location in vec_count_type. iter ( ) {
607
+ let temp_meta_file_name =
608
+ format ! ( "{}{}.{}" , bwfileheader, * location, "meta" ) ;
609
+
610
+ if let Ok ( file) = File :: open ( & temp_meta_file_name) {
611
+ let reader = BufReader :: new ( file) ;
612
+
613
+ for line in reader. lines ( ) {
614
+ let line = line. unwrap ( ) ;
615
+ let parts: Vec < & str > = line. split_whitespace ( ) . collect ( ) ;
616
+ if parts. len ( ) >= 3 {
617
+ let chrom = parts[ 1 ] . split ( '=' ) . nth ( 1 ) . expect (
618
+ "Processing npy metadata file: Missing chromosome in line" ,
619
+ ) ;
620
+ let start_str = parts[ 2 ] . split ( '=' )
621
+ . nth ( 1 )
622
+ . expect ( "Processing npy metadata file: Missing start position in line" ) ;
623
+ let starting_position: i32 = start_str. parse ( ) . expect (
624
+ "Processing npy metadata file: Invalid start position" ,
625
+ ) ;
626
+
627
+ if let Some ( current_chr_data) = npy_meta_data_map. get_mut ( chrom)
628
+ {
629
+ current_chr_data. insert (
630
+ ( * location. to_string ( ) ) . parse ( ) . unwrap ( ) ,
631
+ starting_position,
632
+ ) ;
633
+ }
634
+ }
635
+ }
636
+ // Remove the file after it is used.
637
+ let path = std:: path:: Path :: new ( & temp_meta_file_name) ;
638
+ let _ = remove_file ( path) . unwrap ( ) ;
639
+ }
640
+ }
641
+ //write combined metadata as json
642
+ let json_string = serde_json:: to_string_pretty ( & npy_meta_data_map) . unwrap ( ) ;
643
+ let combined_npy_meta_file_path =
644
+ format ! ( "{}{}.{}" , bwfileheader, "npy_meta" , "json" ) ;
645
+ let mut file = File :: create ( combined_npy_meta_file_path) . unwrap ( ) ;
646
+ file. write_all ( json_string. as_bytes ( ) ) . unwrap ( ) ;
647
+ }
583
648
_ => { }
584
649
}
585
650
bar. finish ( ) ;
0 commit comments