@@ -545,7 +545,8 @@ seqVCF2GDS <- function(vcf.fn, out.fn, header=NULL,
545
545
storage.option = " LZMA_RA" , info.import = NULL , fmt.import = NULL ,
546
546
genotype.var.name = " GT" , ignore.chr.prefix = " chr" ,
547
547
scenario = c(" general" , " imputation" ), reference = NULL , start = 1L , count = - 1L ,
548
- optimize = TRUE , raise.error = TRUE , digest = TRUE , parallel = FALSE , verbose = TRUE )
548
+ variant_count = NA_integer_ , optimize = TRUE , raise.error = TRUE , digest = TRUE ,
549
+ parallel = FALSE , verbose = TRUE )
549
550
{
550
551
# check
551
552
if (! inherits(vcf.fn , " connection" ))
@@ -592,20 +593,15 @@ seqVCF2GDS <- function(vcf.fn, out.fn, header=NULL,
592
593
{
593
594
if (pnum > 1L )
594
595
stop(" No parallel support when the input is a connection object." )
595
- }
596
-
597
- if (is.character(vcf.fn ))
598
- variant_count <- attr(vcf.fn , " variant_count" )
599
- else
600
- variant_count <- NULL
601
- if (! is.null(variant_count ))
596
+ if (length(variant_count )!= 1L || ! is.na(variant_count ))
597
+ warning(" 'variant_count' is not used in seqVCF2GDS() when 'vcf.fn' is a connection object." )
598
+ } else if (! identical(variant_count , NA_integer_ ))
602
599
{
603
600
if (! is.numeric(variant_count ))
604
- stop(" the attribute 'variant_count' of 'vcf.fn ' should be a numeric vector." )
601
+ stop(" 'variant_count' should be a numeric vector." )
605
602
if (length(variant_count ) != length(vcf.fn ))
606
- stop(" the attribute 'variant_count' of 'vcf.fn' should be as the same length as 'vcf.fn' ." )
603
+ stop(" 'variant_count' and 'vcf.fn' should have the same length." )
607
604
}
608
-
609
605
if (verbose ) cat(date(), " \n " , sep = " " )
610
606
611
607
genotype.storage <- " bit2"
@@ -785,12 +781,18 @@ seqVCF2GDS <- function(vcf.fn, out.fn, header=NULL,
785
781
}
786
782
787
783
# get the number of variants in each VCF file
788
- num_array <- vapply (vcf.fn , function ( fn )
784
+ for ( i in seq_along (vcf.fn ) )
789
785
{
790
- v <- seqVCF_Header(fn , getnum = TRUE , parallel = parallel , verbose = FALSE )
791
- v $ num.variant
792
- }, 0L )
793
- num_var <- sum(num_array )
786
+ v <- variant_count [i ]
787
+ if (is.na(v ) || (v < 0L ))
788
+ {
789
+ fn <- vcf.fn [i ]
790
+ variant_count [i ] <- seqVCF_Header(fn , getnum = TRUE ,
791
+ parallel = parallel , verbose = FALSE )$ num.variant
792
+ }
793
+ }
794
+ num_var <- sum(variant_count )
795
+ if (anyNA(num_var )) stop(" Getting invalid # of variants." )
794
796
795
797
if (start < 1L )
796
798
stop(" 'start' should be a positive integer if conversion in parallel." )
@@ -822,31 +824,25 @@ seqVCF2GDS <- function(vcf.fn, out.fn, header=NULL,
822
824
seqParallel(parallel , NULL , FUN = function (
823
825
vcf.fn , header , storage.option , info.import , fmt.import ,
824
826
genotype.var.name , ignore.chr.prefix , scenario , optim ,
825
- raise.err , ptmpfn , psplit , num_array )
827
+ raise.err , ptmpfn , psplit , variant_count )
826
828
{
827
- # load package
828
- library(SeqArray , quietly = TRUE , verbose = FALSE )
829
-
830
- attr(vcf.fn , " variant_count" ) <- num_array
831
829
i <- process_index # the process id, starting from one
832
-
833
- seqVCF2GDS(vcf.fn , ptmpfn [i ], header = oldheader ,
830
+ SeqArray :: seqVCF2GDS(vcf.fn , ptmpfn [i ], header = oldheader ,
834
831
storage.option = storage.option , info.import = info.import ,
835
832
fmt.import = fmt.import , genotype.var.name = genotype.var.name ,
836
833
ignore.chr.prefix = ignore.chr.prefix ,
837
834
start = psplit [[1L ]][i ], count = psplit [[2L ]][i ],
835
+ variant_count = variant_count ,
838
836
optimize = optim , scenario = scenario , raise.error = raise.err ,
839
837
digest = FALSE , parallel = FALSE , verbose = FALSE )
840
-
841
- invisible ()
842
-
838
+ invisible () # return
843
839
}, split = " none" ,
844
840
vcf.fn = vcf.fn , header = header , storage.option = storage.option ,
845
841
info.import = info.import , fmt.import = fmt.import ,
846
842
genotype.var.name = genotype.var.name ,
847
843
ignore.chr.prefix = ignore.chr.prefix , scenario = scenario ,
848
844
optim = optimize , raise.err = raise.error ,
849
- ptmpfn = ptmpfn , psplit = psplit , num_array = num_array )
845
+ ptmpfn = ptmpfn , psplit = psplit , variant_count = variant_count )
850
846
851
847
if (verbose )
852
848
cat(" >>> Done (" , date(), " ) <<<\n " , sep = " " )
0 commit comments