From 84ff6ab670e7b1ba7cd152ed6254e6f6fdfdc3a8 Mon Sep 17 00:00:00 2001 From: TBradley27 Date: Wed, 3 Jul 2019 20:31:24 +0100 Subject: [PATCH] Set splice-unware genome building as the default for a new config variable [ci skip] --- .travis.yml | 3 +- config/basic.yaml | 2 + .../map_reads/hisat2/Snakefile | 61 +++++++++++-------- 3 files changed, 41 insertions(+), 25 deletions(-) diff --git a/.travis.yml b/.travis.yml index c4730e67..d21afca0 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,6 +4,7 @@ python: "3.6" branches: only: - master + - beta3 os: - linux @@ -13,7 +14,7 @@ before_install: - sudo ln -s /bin/tar /bin/gtar # this is because devtools requires gtar which I think is just an alias for 'tar' install: - - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh; + - wget https://repo.continuum.io/miniconda/Miniconda3-4.6.14-Linux-x86_64.sh -O miniconda.sh; - bash miniconda.sh -b - p $HOME/miniconda - export PATH="$HOME/miniconda/bin:$PATH" - hash -r diff --git a/config/basic.yaml b/config/basic.yaml index 57ac48fc..cfeed396 100644 --- a/config/basic.yaml +++ b/config/basic.yaml @@ -17,3 +17,5 @@ prediction_algorithm: TargetScan7 # select from 'TargetScan7' or 'miRanda reannotation: True # Whether to reannotate 3'UTRs or not conservation: False # Where possible, whether to determine conservation of target sites or not using MSAs sequence_data_source: ENA # select from 'ENA','SRA' or 'User' + +genome_index_type: standard # 'standard' option requires ~8GB of memory. Otherwise, the 'splice-aware' option reuires ~200GB of memory diff --git a/modules/with_reannotation/map_reads/hisat2/Snakefile b/modules/with_reannotation/map_reads/hisat2/Snakefile index c443cfd1..86be8277 100644 --- a/modules/with_reannotation/map_reads/hisat2/Snakefile +++ b/modules/with_reannotation/map_reads/hisat2/Snakefile @@ -64,30 +64,43 @@ def get_read_files (wildcards): raise Exception("\n\nPlease indicate whether the accession: '{}' corresponds to a single-end or a paired-end RNA-seq library\n".format(wildcards.accession)) return -rule get_splice_sites_for_hisat2: - input: - gtf=get_gtf_file, - py_script='scripts/hisat2_extract_splice_sites.py' - output: "results/{species}_hisat2_splice_sites.txt" - shell: "python {input.py_script} {input.gtf} > {output}" - -rule get_exons_for_hisat2: - input: - gtf=get_gtf_file, - py_script='scripts/hisat2_extract_exons.py' - output: "results/{species}_hisat2_exons.txt" - shell: "python {input.py_script} {input.gtf} > {output}" - -rule create_index: - input: - assembly=get_assembly, - splice_sites="results/{species}_hisat2_splice_sites.txt", - exons="results/{species}_hisat2_exons.txt" - conda: - "envs/hisat2.yaml" - output: "data/{species}.1.ht2" - threads: 16 - shell: "hisat2-build -f -p {threads} --ss {input.splice_sites} --exon {input.exons} {input.assembly} data/{wildcards.species}" +if config['genome_index_type'] == 'splice-aware': + rule get_splice_sites_for_hisat2: + input: + gtf=get_gtf_file, + py_script='scripts/hisat2_extract_splice_sites.py' + output: "results/{species}_hisat2_splice_sites.txt" + shell: "python {input.py_script} {input.gtf} > {output}" + + rule get_exons_for_hisat2: + input: + gtf=get_gtf_file, + py_script='scripts/hisat2_extract_exons.py' + output: "results/{species}_hisat2_exons.txt" + shell: "python {input.py_script} {input.gtf} > {output}" + + rule create_index: + input: + assembly=get_assembly, + splice_sites="results/{species}_hisat2_splice_sites.txt", + exons="results/{species}_hisat2_exons.txt" + conda: "envs/hisat2.yaml" + output: "data/{species}.1.ht2" + threads: 16 + shell: "hisat2-build -f -p {threads} --ss {input.splice_sites} --exon {input.exons} {input.assembly} data/{wildcards.species}" +elif config['genome_index_type'] == 'standard': + + rule create_index: + input: + assembly=get_assembly + conda: "envs/hisat2.yaml" + output: "data/{species}.1.ht2" + threads: 16 + shell: "hisat2-build -f -p {threads} {input.assembly} data/{wildcards.species}" + +else: + raise Exception("\nPlease use a valid value for the configuration option 'genome_index_type' - use either 'standard' or 'splice-aware'\n") + rule map_reads: input: