diff --git a/gwaspy/phasing/phase.py b/gwaspy/phasing/phase.py index e386f35..72a01f8 100644 --- a/gwaspy/phasing/phase.py +++ b/gwaspy/phasing/phase.py @@ -12,6 +12,7 @@ def run_phase(backend: Union[hb.ServiceBackend, hb.LocalBackend] = None, fam_file: str = None, data_type: str = 'array', software: str = 'shapeit', + fill_tags: bool = False, output_filename: str = None, out_dir: str = None): @@ -44,6 +45,7 @@ def run_phase(backend: Union[hb.ServiceBackend, hb.LocalBackend] = None, reference_path=ref_path, fam_file=pedigree, data_type=data_type, + fill_tags=fill_tags, output_filename=output_filename, output_path=out_dir) # else: To add BEAGLE @@ -57,6 +59,7 @@ def main(): parser.add_argument('--local', action='store_true') parser.add_argument('--billing-project', required=True) parser.add_argument('--data-type', type=str, default='array', choices=['array', 'wgs']) + parser.add_argument('--fill-tags', action='store_true') parser.add_argument('--software', type=str, default='shapeit', choices=['beagle', 'shapeit']) parser.add_argument('--output-filename', type=str, required=True) parser.add_argument('--out-dir', type=str, required=True) @@ -75,5 +78,6 @@ def main(): fam_file=args.pedigree, data_type=args.data_type, software=args.software, + fill_tags=args.fill_tags, output_filename=args.output_filename, out_dir=args.out_dir) diff --git a/gwaspy/phasing/shapeit5_phase.py b/gwaspy/phasing/shapeit5_phase.py index 41f8966..9fbcae3 100644 --- a/gwaspy/phasing/shapeit5_phase.py +++ b/gwaspy/phasing/shapeit5_phase.py @@ -20,12 +20,45 @@ def size(file: str): return size_gigs +def annotate_vcf( + b: hb.batch.Batch = None, + vcf: hb.ResourceGroup = None, + region: str = None, + ncpu: int = 8, + memory: str = 'standard', + storage: int = None, + img: str = 'docker.io/lindonkambule/gwaspy_phase_impute:latest', +) -> Job: + j = b.new_job(name=f'Add AC, AN tags to input: {region}') + + j.image(img) + j.cpu(ncpu) + j.memory(memory) + j.regions(['us-central1']) + j.storage(f'{storage}Gi') + + j.declare_resource_group( + annotated_vcf={ + 'vcf': '{root}.bcf', + 'index': '{root}.bcf.csi' + } + ) + + j.command(f""" + bcftools +fill-tags {vcf['vcf']} -Ou -- -t AN,AC --output {j.annotated_vcf['vcf']} + bcftools index {j.annotated_vcf['vcf']} --output {j.annotated_vcf['index']} --threads {ncpu} + """) + + return j + + def shapeit_phasing( batch: hb.Batch = None, input_path: str = None, reference_path: Optional[str] = None, fam_file: Optional[hb.ResourceFile] = None, data_type: str = 'array', + fill_tags: bool = False, output_filename: str = None, output_path: str = None): @@ -244,6 +277,14 @@ def concatenate_rare_chunks( ref_vcf = None ref_size = 0 + if fill_tags: + chrom_vcf = annotate_vcf( + b=batch, + vcf=chrom_vcf, + region=f'chr{i}', + storage=round(vcf_size*1.5 + ref_size + 2) + ).annotated_vcf + if data_type == 'array': phase_common( b=batch,