diff --git a/modules/data_download/Snakefile b/modules/data_download/Snakefile index 7fe6869..fc20242 100644 --- a/modules/data_download/Snakefile +++ b/modules/data_download/Snakefile @@ -30,7 +30,7 @@ def get_gtf_file(wildcards): rule get_gtf_file: output: "data/{genus_species}.{build}.{ensembl_release}.chr.gtf" params: get_species - shell: "rsync -av rsync://ftp.ensembl.org/ensembl/pub/release-{config[ensembl_release]}/gtf/{params}/{wildcards.genus_species}.{wildcards.build}.{config[ensembl_release]}.chr.gtf.gz data/ && gunzip data/{wildcards.genus_species}.{wildcards.build}.{config[ensembl_release]}.chr.gtf.gz && sed 's/^chr//g' data/{wildcards.genus_species}.{wildcards.build}.{config[ensembl_release]}.chr.gtf > tmp && mv tmp data/{wildcards.genus_species}.{wildcards.build}.{config[ensembl_release]}.chr.gtf" + shell: "rsync -av rsync://ftp.ensembl.org/pub/release-{config[ensembl_release]}/gtf/{params}/{wildcards.genus_species}.{wildcards.build}.{config[ensembl_release]}.chr.gtf.gz data/ && gunzip data/{wildcards.genus_species}.{wildcards.build}.{config[ensembl_release]}.chr.gtf.gz && sed 's/^chr//g' data/{wildcards.genus_species}.{wildcards.build}.{config[ensembl_release]}.chr.gtf > tmp && mv tmp data/{wildcards.genus_species}.{wildcards.build}.{config[ensembl_release]}.chr.gtf" rule modify_gtf_file: # remove 'chr' prefix if it exists in the gtf file - remove contigs and mitochondrial records input: "data/{species_prefix}.{ensembl_release}.chr.gtf" @@ -48,18 +48,18 @@ rule get_transcript_ids: rule download_toplevel_dna: output: "data/{genus_species}.{build}.dna.toplevel.fa.gz" params: get_species - shell: "rsync -av rsync://ftp.ensembl.org/ensembl/pub/release-{config[ensembl_release]}/fasta/{params}/dna/{wildcards.genus_species}.{wildcards.build}.dna.toplevel.fa.gz data/" + shell: "rsync -av rsync://ftp.ensembl.org/pub/release-{config[ensembl_release]}/fasta/{params}/dna/{wildcards.genus_species}.{wildcards.build}.dna.toplevel.fa.gz data/" rule download_primary_assembly_dna: output: "data/{genus_species}.{build}.dna.primary_assembly.fa.gz" params: get_species - shell: "rsync -av rsync://ftp.ensembl.org/ensembl/pub/release-{config[ensembl_release]}/fasta/{params}/dna/{wildcards.genus_species}.{wildcards.build}.dna.primary_assembly.fa.gz data/" + shell: "rsync -av rsync://ftp.ensembl.org/pub/release-{config[ensembl_release]}/fasta/{params}/dna/{wildcards.genus_species}.{wildcards.build}.dna.primary_assembly.fa.gz data/" rule download_chromsomal_dna: input: output: "data/{genus_species}.{build}.dna.chromosome.{chrom}.fa.gz" params: get_species - shell: "rsync -av rsync://ftp.ensembl.org/ensembl/pub/release-{config[ensembl_release]}/fasta/{params}/dna/{wildcards.genus_species}.{wildcards.build}.dna.chromosome.{wildcards.chrom}.fa.gz data/" + shell: "rsync -av rsync://ftp.ensembl.org/pub/release-{config[ensembl_release]}/fasta/{params}/dna/{wildcards.genus_species}.{wildcards.build}.dna.chromosome.{wildcards.chrom}.fa.gz data/" rule decompress_dna_file: input: "data/{species_prefix}.dna.{assembly_level}.fa.gz" @@ -85,7 +85,7 @@ rule download_maf_files_mouse: rule download_cdna_file: output: "data/{genus_species}.{build}.cdna.all.fa.gz" params: get_species - shell: "rsync -av rsync://ftp.ensembl.org/ensembl/pub/release-{config[ensembl_release]}/fasta/{params}/cdna/{wildcards.genus_species}.{wildcards.build}.cdna.all.fa.gz data" + shell: "rsync -av rsync://ftp.ensembl.org/pub/release-{config[ensembl_release]}/fasta/{params}/cdna/{wildcards.genus_species}.{wildcards.build}.cdna.all.fa.gz data" rule decompress_cdna_file: input: "data/{species_prefix}.cdna.all.fa.gz"