Merge pull request #1 from moka-guys/v1.0.0

first release (#1)
moka-guys · Sep 14, 2023 · 4d15a55 · 4d15a55
2 parents 3850c41 + 8e03805
commit 4d15a55
Show file tree

Hide file tree

Showing 5 changed files with 203 additions and 2 deletions.
diff --git a/README.md b/README.md
@@ -1,2 +1,28 @@
-# dnanexus_normals_readcount_analysis
-The app creates a panel of normals for Exomedepth
+dnanexus_normals_readcount_analysis_v1.0.0
+Exome depth is run in two stages. Firstly, read counts are calculated and the second step filers out the CNVs of interest. Read counts are calculated over the entire genome and the CNVs are filtered out using a subpanel BED file.
+
+CNV calling can be performed by providing a readcount file for a set of known normals. 
+
+dnanexus_normals_readcount_analysis_v1.0.0 calculates readcounts for a panel of normals samples intended to use as an input for https://github.com/moka-guys/dnanexus_ED_readcount_analysis
+
+What does the app do?
+This app runs the read count calculation stage for a set of known normals.
+
+Using the provided DNANexus project and the list of Pan numbers the app downloads BAMs and BAI.
+
+A Docker image containing Exome depth is downloaded from 001 - The Exomedepth image taken from https://github.com/moka-guys/seglh-cnv/releases/tag/v2.0.0
+
+The readCount.R script is then called, producing a readcount file (normals.RData) 
+
+Inputs
+DNAnexus project name where the BAMs and indexes are saved in a folder called '/output'
+NOTE: BAM/BAI files need to have a "NORMAL" prefix for the app to recognise it as an input.
+Reference_genome (*.fa.gz or *.fa) in build 37
+List of comma seperated pan numbers
+Bedfile covering the capture region
+Optional: panel of normals
+Output
+normal.RData - Read count data for panel of normals
+
+Created by
+This app was created within the Synnovis Genome Informatics section
diff --git a/dxapp.json b/dxapp.json
@@ -0,0 +1,72 @@
+{
+  "name": "ED_panel_of_normals_v1.0.0",
+  "title": "ED_panel_of_normals_v1.0.0",
+  "summary": "v1.0.0 - Create panel of normals for cnv calling",
+  "dxapi": "1.0.0",
+  "inputSpec": [
+    {
+      "name": "project_name",
+      "label": "project_name",
+      "help": "The project containing the bamfiles.",
+      "class": "string"
+    },
+    {
+      "name": "reference_genome",
+      "label": "fasta file",
+      "help": "reference_genome",
+      "class": "file",
+      "patterns": ["*"],
+      "optional": false
+    },
+    {
+      "name": "bamfile_pannumbers",
+      "label": "bamfile_pannumbers",
+      "help": "comma separated string on pan numbers found within the BAM file name",
+      "class": "string"
+    },
+    {
+      "name": "bedfile",
+      "label": "Read count BED",
+      "help": "BED file used to count reads",
+      "class": "file",
+      "patterns": ["*.bed"],
+      "optional": false
+    }
+  ],
+  "outputSpec": [
+    {
+      "name": "exomedepth_output",
+      "label": "exomedepth output",
+      "help": "readcount file for panel of normals.",
+      "class": "array:file"
+    }
+  ],
+  "runSpec": {
+    "interpreter": "bash",
+    "timeoutPolicy": {
+      "*": {
+        "hours": 48
+      }
+    },
+    "distribution": "Ubuntu",
+    "release": "20.04",
+    "version": "0",
+    "file": "src/code.sh"
+  },
+  "access": {
+    "network": [
+      "*"
+    ],
+    "allProjects": "VIEW"
+  },
+  "ignoreReuse": false,
+  "regionalOptions": {
+    "aws:us-east-1": {
+      "systemRequirements": {
+        "*": {
+          "instanceType": "mem1_ssd1_v2_x4"
+        }
+      }
+    }
+  }
+}
diff --git a/resources/usr/bin/mark-section b/resources/usr/bin/mark-section
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+echo '{"error": {"type": "AppError", "message": "Error while '"$@"'; please refer to the job log for more details."}}' > ~/job_error.json
diff --git a/resources/usr/bin/mark-success b/resources/usr/bin/mark-success
@@ -0,0 +1,3 @@
+#!/bin/bash
+
+rm -f ~/job_error.json
diff --git a/src/code.sh b/src/code.sh
@@ -0,0 +1,97 @@
+#!/bin/bash
+# ED_panel_of_normals_v1.0.0
+
+# The following line causes bash to exit at any point if there is any error
+# and to output each line as it is executed -- useful for debugging
+set -e -x -o pipefail
+
+### Set up parameters
+# split project name to get the NGS run number
+run=${project_name##*_}
+
+#read the DNA Nexus api key as a variable
+#API_KEY=$(dx cat project-FQqXfYQ0Z0gqx7XG9Z2b4K43:mokaguys_nexus_auth_key)
+API_KEY_wquotes=$(echo $DX_SECURITY_CONTEXT |  jq '.auth_token')
+API_KEY=$(echo "$API_KEY_wquotes" | sed 's/"//g')
+echo "$API_KEY"
+# make output dir and folder to hold downloaded files
+mkdir -p /home/dnanexus/out/exomedepth_output/exomedepth_output/$bedfile_prefix/ /home/dnanexus/to_test
+
+mark-section "Downloading inputs"
+# download all inputs
+dx-download-all-inputs --parallel
+
+mark-section "Determining reference genome"
+if  [[ $reference_genome_name == *.tar* ]]
+	then
+		echo "reference is tarball"
+		exit 1
+elif [[ $reference_genome_name == *.gz ]]
+	then 
+		gunzip $reference_genome_path
+		reference_fasta=$(echo $reference_genome_path | sed 's/\.gz//g')
+elif [[ $reference_genome_name == *.fa ]]
+	then
+		reference_fasta=$reference_genome_path
+fi 
+
+mark-section "determine run specific variables"
+echo "read_depth_bed="$bedfile
+echo "reference_genome="$reference_fasta
+echo "panel="$bamfile_pannumbers
+echo "bedfile_prefix="$bedfile_prefix
+output_RData_file="/home/dnanexus/out/exomedepth_output/exomedepth_output/$bedfile_prefix/normals.RData"
+
+mark-section "Download all relevant BAMs"
+# make and cd to test dir
+cd to_test
+# $bamfile_pannumbers is a comma seperated list of pannumbers that should be analysed together.
+# split this into an array and loop through to download BAM and BAI files
+IFS=',' read -ra pannum_array <<<  $bamfile_pannumbers
+for panel in ${pannum_array[@]}
+do
+	# check there is at least one file with that pan number to download otherwise the dx download command will fail
+	if (( $(dx ls $project_name:output/*001.ba* --auth $API_KEY | grep $panel -c) > 0 ));
+	then
+		#download all the BAM and BAI files for this project/pan number
+		dx download -f $project_name:output/*$panel*001.ba* --auth $API_KEY
+	fi
+done
+
+# Get list of all BAMs in to_test
+# NB (include full filepath to ensure the output are absolute paths (needed for docker run))
+bam_list=(/home/dnanexus/to_test/*bam)
+
+# count the BAM files. make sure there are at least 3 samples for this pan number, else stop
+filecount="${#bam_list[@]}"
+if (( $filecount < 3 )); then
+	echo "LESS THAN THREE BAM FILES FOUND FOR THIS ANALYSIS" 1>&2
+	exit 1
+fi
+
+# cd out of to_test
+cd /home/dnanexus
+
+mark-section "setting up Exomedepth docker image"
+# Location of the ExomeDepth docker file
+docker_file_id=project-ByfFPz00jy1fk6PjpZ95F27J:file-GYzKz400jy1yx101F34p8qj2
+# download the docker file from 001_Tools...
+dx download $docker_file_id --auth "${API_KEY}"
+docker_file=$(dx describe ${docker_file_id} --name)
+DOCKERIMAGENAME=`tar xfO ${docker_file} manifest.json | sed -E 's/.*"RepoTags":\["?([^"]*)"?.*/\1/'`
+docker load < /home/dnanexus/"${docker_file}"
+echo "Using image:"${DOCKERIMAGENAME}
+mark-section "Calculate read depths using docker image"
+# docker run - mount the home directory as a share
+# call the readCount.R script
+# supply following arguments
+#  	- output_RData_file path
+#  	- reference_fasta_path 
+#  	- bedfile_path 
+#	- bam_list 
+
+# Run ReadCount script in docker container
+docker run -v /home/dnanexus:/home/dnanexus ${DOCKERIMAGENAME} readCount.R $output_RData_file $reference_fasta $bedfile_path ${bam_list[@]}
+
+# Upload results
+dx-upload-all-outputs
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		#!/bin/bash

		echo '{"error": {"type": "AppError", "message": "Error while '"$@"'; please refer to the job log for more details."}}' > ~/job_error.json