diff --git a/scripts/vcf_parser b/scripts/vcf_parser index 5bb554a..52a5b11 100755 --- a/scripts/vcf_parser +++ b/scripts/vcf_parser @@ -59,7 +59,11 @@ def print_version(ctx, param, value): type=click.Path(exists=False), help='Path to a outfile.' ) -@click.option('-v', '--verbose', +@click.option("--allele_symbol", + default='0', + help="The symbol that should be used when representing "\ + "unobserved alleles. Default is '0'") +@click.option('-v', '--verbose', is_flag=True, help='Increase output verbosity.' ) @@ -88,7 +92,7 @@ def print_version(ctx, param, value): help="Set the level of log output." ) def cli(variant_file, vep, split, outfile, verbose, silent, skip_info_check, - logfile, loglevel): + allele_symbol, logfile, loglevel): """ Tool for parsing vcf files. @@ -107,11 +111,11 @@ def cli(variant_file, vep, split, outfile, verbose, silent, skip_info_check, if variant_file == '-': logger.info("Start parsing variants from stdin") my_parser = vcf_parser.VCFParser(fsock=sys.stdin, split_variants=split, - skip_info_check=skip_info_check) + skip_info_check=skip_info_check, allele_symbol=allele_symbol) else: logger.info("Start parsing variants from file {0}".format(variant_file)) my_parser = vcf_parser.VCFParser(infile = variant_file, - split_variants=split, skip_info_check=skip_info_check) + split_variants=split, skip_info_check=skip_info_check, allele_symbol=allele_symbol) start = datetime.now() nr_of_variants = 0 diff --git a/vcf_parser/parser.py b/vcf_parser/parser.py index 2227b0c..5755e26 100755 --- a/vcf_parser/parser.py +++ b/vcf_parser/parser.py @@ -86,7 +86,8 @@ class VCFParser(object): """docstring for VCFParser""" - def __init__(self, infile=None, fsock=None, split_variants=False, skip_info_check=False): + def __init__(self, infile=None, fsock=None, split_variants=False, + skip_info_check=False, allele_symbol='0'): super(VCFParser, self).__init__() self.logger = logging.getLogger(__name__) @@ -99,6 +100,9 @@ def __init__(self, infile=None, fsock=None, split_variants=False, skip_info_chec self.skip_info_check = skip_info_check self.logger.info("Skip info check = {0}".format(self.skip_info_check)) + + self.allele_symbol = allele_symbol + self.logger.info("Allele symbol = {0}".format(self.allele_symbol)) self.logger.info("Initializing HeaderParser") self.metadata = HeaderParser() @@ -166,7 +170,8 @@ def add_variant(self, chrom, pos, rs_id, ref, alt, qual, filt, info, form=None, # If multiple alternative and split_variants we must split the variant else: - for splitted_variant in split_variants(variant, self.metadata): + for splitted_variant in split_variants(variant_dict=variant, + header_parser=self.metadata, allele_symbol=self.allele_symbol): self.variants.append(splitted_variant) @@ -185,7 +190,8 @@ def __iter__(self): if not (self.split_variants and len(first_variant['ALT'].split(',')) > 1): variants.append(first_variant) else: - for splitted_variant in split_variants(first_variant, self.metadata): + for splitted_variant in split_variants(variant_dict=first_variant, + header_parser=self.metadata, allele_symbol=self.allele_symbol): variants.append(splitted_variant) for variant in variants: @@ -208,7 +214,10 @@ def __iter__(self): variants.append(variant) else: - for splitted_variant in split_variants(variant, self.metadata): + for splitted_variant in split_variants( + variant_dict=variant, + header_parser=self.metadata, + allele_symbol=self.allele_symbol): variants.append(splitted_variant) for variant in variants: diff --git a/vcf_parser/utils/split_genotype.py b/vcf_parser/utils/split_genotype.py index 3baffb8..b13c5dd 100644 --- a/vcf_parser/utils/split_genotype.py +++ b/vcf_parser/utils/split_genotype.py @@ -17,6 +17,8 @@ def split_genotype(genotype, gt_format, alternative_number, allele_symbol = '0') new_genotype (str): A string that represents the new genotype """ logger = getLogger(__name__) + logger.info("Allele symbol {0}".format(allele_symbol)) + splitted_genotype = genotype.split(':') logger.debug("Parsing genotype {0}".format(splitted_genotype)) splitted_gt_format = gt_format.split(':') diff --git a/vcf_parser/utils/split_variants.py b/vcf_parser/utils/split_variants.py index ac40a22..eaa231e 100644 --- a/vcf_parser/utils/split_variants.py +++ b/vcf_parser/utils/split_variants.py @@ -12,7 +12,7 @@ from vcf_parser import Genotype from vcf_parser.utils import (build_vep_string, split_genotype, build_info_string) -def split_variants(variant_dict, header_parser): +def split_variants(variant_dict, header_parser, allele_symbol='0'): """ Checks if there are multiple alternative alleles and splitts the variant. @@ -26,7 +26,8 @@ def split_variants(variant_dict, header_parser): variant: A variant dictionary with the splitted information for each alternative """ - + logger = getLogger(__name__) + logger.info("Allele symbol {0}".format(allele_symbol)) alternatives = variant_dict['ALT'].split(',') reference = variant_dict['REF'] number_of_values = 1 @@ -102,7 +103,8 @@ def split_variants(variant_dict, header_parser): new_genotype = split_genotype( variant_dict[individual], variant['FORMAT'], - alternative_number + alternative_number, + allele_symbol ) variant[individual] = new_genotype