From 84ff6ab670e7b1ba7cd152ed6254e6f6fdfdc3a8 Mon Sep 17 00:00:00 2001
From: TBradley27 <thomas.bradley435@gmail.com>
Date: Wed, 3 Jul 2019 20:31:24 +0100
Subject: [PATCH] Set splice-unware genome building as the default for a new
 config variable [ci skip]

---
 .travis.yml                                   |  3 +-
 config/basic.yaml                             |  2 +
 .../map_reads/hisat2/Snakefile                | 61 +++++++++++--------
 3 files changed, 41 insertions(+), 25 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index c4730e67..d21afca0 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -4,6 +4,7 @@ python: "3.6"
 branches:
     only:
         - master
+        - beta3
 os:
  - linux
 
@@ -13,7 +14,7 @@ before_install:
    - sudo ln -s /bin/tar /bin/gtar # this is because devtools requires gtar which I think is just an alias for 'tar'
 
 install:
-   - wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh;
+   - wget https://repo.continuum.io/miniconda/Miniconda3-4.6.14-Linux-x86_64.sh -O miniconda.sh;
    - bash miniconda.sh -b - p $HOME/miniconda
    - export PATH="$HOME/miniconda/bin:$PATH"
    - hash -r
diff --git a/config/basic.yaml b/config/basic.yaml
index 57ac48fc..cfeed396 100644
--- a/config/basic.yaml
+++ b/config/basic.yaml
@@ -17,3 +17,5 @@ prediction_algorithm: TargetScan7        # select from 'TargetScan7' or 'miRanda
 reannotation: True                       # Whether to reannotate 3'UTRs or not
 conservation: False                      # Where possible, whether to determine conservation of target sites or not using MSAs
 sequence_data_source: ENA                # select from 'ENA','SRA' or 'User'
+
+genome_index_type: standard              # 'standard' option requires ~8GB of memory. Otherwise, the 'splice-aware' option reuires ~200GB of memory
diff --git a/modules/with_reannotation/map_reads/hisat2/Snakefile b/modules/with_reannotation/map_reads/hisat2/Snakefile
index c443cfd1..86be8277 100644
--- a/modules/with_reannotation/map_reads/hisat2/Snakefile
+++ b/modules/with_reannotation/map_reads/hisat2/Snakefile
@@ -64,30 +64,43 @@ def get_read_files (wildcards):
 		raise Exception("\n\nPlease indicate whether the accession: '{}' corresponds to a single-end or a paired-end RNA-seq library\n".format(wildcards.accession))
 		return
 
-rule get_splice_sites_for_hisat2:
-        input:
-          gtf=get_gtf_file,
-          py_script='scripts/hisat2_extract_splice_sites.py'
-        output: "results/{species}_hisat2_splice_sites.txt"
-        shell: "python {input.py_script} {input.gtf} > {output}"
-
-rule get_exons_for_hisat2:
-        input:
-          gtf=get_gtf_file,
-          py_script='scripts/hisat2_extract_exons.py'
-        output: "results/{species}_hisat2_exons.txt"
-        shell: "python {input.py_script} {input.gtf} > {output}"
-
-rule create_index:
-        input:
-           assembly=get_assembly,
-           splice_sites="results/{species}_hisat2_splice_sites.txt",
-           exons="results/{species}_hisat2_exons.txt"
-        conda:
-           "envs/hisat2.yaml"
-        output: "data/{species}.1.ht2"
-        threads: 16
-        shell: "hisat2-build -f -p {threads} --ss {input.splice_sites} --exon {input.exons} {input.assembly} data/{wildcards.species}"
+if config['genome_index_type'] == 'splice-aware':
+	rule get_splice_sites_for_hisat2:
+		input:
+			gtf=get_gtf_file,
+			py_script='scripts/hisat2_extract_splice_sites.py'
+		output: "results/{species}_hisat2_splice_sites.txt"
+		shell: "python {input.py_script} {input.gtf} > {output}"
+
+	rule get_exons_for_hisat2:
+		input:
+			gtf=get_gtf_file,
+			py_script='scripts/hisat2_extract_exons.py'
+		output: "results/{species}_hisat2_exons.txt"
+		shell: "python {input.py_script} {input.gtf} > {output}"
+
+	rule create_index:
+		input:
+			assembly=get_assembly,
+			splice_sites="results/{species}_hisat2_splice_sites.txt",
+			exons="results/{species}_hisat2_exons.txt"
+		conda: "envs/hisat2.yaml"
+		output: "data/{species}.1.ht2"
+		threads: 16
+		shell: "hisat2-build -f -p {threads} --ss {input.splice_sites} --exon {input.exons} {input.assembly} data/{wildcards.species}"
+elif config['genome_index_type'] == 'standard':
+
+	rule create_index:
+		input:
+			assembly=get_assembly
+		conda: "envs/hisat2.yaml"
+		output: "data/{species}.1.ht2"
+		threads: 16
+		shell: "hisat2-build -f -p {threads} {input.assembly} data/{wildcards.species}"
+
+else:
+	raise Exception("\nPlease use a valid value for the configuration option 'genome_index_type' - use either 'standard' or 'splice-aware'\n")
+
 
 rule map_reads:
 	input: