-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1 from moka-guys/v1.0.0
first release (#1)
- Loading branch information
Showing
5 changed files
with
203 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,28 @@ | ||
# dnanexus_normals_readcount_analysis | ||
The app creates a panel of normals for Exomedepth | ||
dnanexus_normals_readcount_analysis_v1.0.0 | ||
Exome depth is run in two stages. Firstly, read counts are calculated and the second step filers out the CNVs of interest. Read counts are calculated over the entire genome and the CNVs are filtered out using a subpanel BED file. | ||
|
||
CNV calling can be performed by providing a readcount file for a set of known normals. | ||
|
||
dnanexus_normals_readcount_analysis_v1.0.0 calculates readcounts for a panel of normals samples intended to use as an input for https://github.com/moka-guys/dnanexus_ED_readcount_analysis | ||
|
||
What does the app do? | ||
This app runs the read count calculation stage for a set of known normals. | ||
|
||
Using the provided DNANexus project and the list of Pan numbers the app downloads BAMs and BAI. | ||
|
||
A Docker image containing Exome depth is downloaded from 001 - The Exomedepth image taken from https://github.com/moka-guys/seglh-cnv/releases/tag/v2.0.0 | ||
|
||
The readCount.R script is then called, producing a readcount file (normals.RData) | ||
|
||
Inputs | ||
DNAnexus project name where the BAMs and indexes are saved in a folder called '/output' | ||
NOTE: BAM/BAI files need to have a "NORMAL" prefix for the app to recognise it as an input. | ||
Reference_genome (*.fa.gz or *.fa) in build 37 | ||
List of comma seperated pan numbers | ||
Bedfile covering the capture region | ||
Optional: panel of normals | ||
Output | ||
normal.RData - Read count data for panel of normals | ||
|
||
Created by | ||
This app was created within the Synnovis Genome Informatics section |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
{ | ||
"name": "ED_panel_of_normals_v1.0.0", | ||
"title": "ED_panel_of_normals_v1.0.0", | ||
"summary": "v1.0.0 - Create panel of normals for cnv calling", | ||
"dxapi": "1.0.0", | ||
"inputSpec": [ | ||
{ | ||
"name": "project_name", | ||
"label": "project_name", | ||
"help": "The project containing the bamfiles.", | ||
"class": "string" | ||
}, | ||
{ | ||
"name": "reference_genome", | ||
"label": "fasta file", | ||
"help": "reference_genome", | ||
"class": "file", | ||
"patterns": ["*"], | ||
"optional": false | ||
}, | ||
{ | ||
"name": "bamfile_pannumbers", | ||
"label": "bamfile_pannumbers", | ||
"help": "comma separated string on pan numbers found within the BAM file name", | ||
"class": "string" | ||
}, | ||
{ | ||
"name": "bedfile", | ||
"label": "Read count BED", | ||
"help": "BED file used to count reads", | ||
"class": "file", | ||
"patterns": ["*.bed"], | ||
"optional": false | ||
} | ||
], | ||
"outputSpec": [ | ||
{ | ||
"name": "exomedepth_output", | ||
"label": "exomedepth output", | ||
"help": "readcount file for panel of normals.", | ||
"class": "array:file" | ||
} | ||
], | ||
"runSpec": { | ||
"interpreter": "bash", | ||
"timeoutPolicy": { | ||
"*": { | ||
"hours": 48 | ||
} | ||
}, | ||
"distribution": "Ubuntu", | ||
"release": "20.04", | ||
"version": "0", | ||
"file": "src/code.sh" | ||
}, | ||
"access": { | ||
"network": [ | ||
"*" | ||
], | ||
"allProjects": "VIEW" | ||
}, | ||
"ignoreReuse": false, | ||
"regionalOptions": { | ||
"aws:us-east-1": { | ||
"systemRequirements": { | ||
"*": { | ||
"instanceType": "mem1_ssd1_v2_x4" | ||
} | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
#!/bin/bash | ||
|
||
echo '{"error": {"type": "AppError", "message": "Error while '"$@"'; please refer to the job log for more details."}}' > ~/job_error.json |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
#!/bin/bash | ||
|
||
rm -f ~/job_error.json |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
#!/bin/bash | ||
# ED_panel_of_normals_v1.0.0 | ||
|
||
# The following line causes bash to exit at any point if there is any error | ||
# and to output each line as it is executed -- useful for debugging | ||
set -e -x -o pipefail | ||
|
||
### Set up parameters | ||
# split project name to get the NGS run number | ||
run=${project_name##*_} | ||
|
||
#read the DNA Nexus api key as a variable | ||
#API_KEY=$(dx cat project-FQqXfYQ0Z0gqx7XG9Z2b4K43:mokaguys_nexus_auth_key) | ||
API_KEY_wquotes=$(echo $DX_SECURITY_CONTEXT | jq '.auth_token') | ||
API_KEY=$(echo "$API_KEY_wquotes" | sed 's/"//g') | ||
echo "$API_KEY" | ||
# make output dir and folder to hold downloaded files | ||
mkdir -p /home/dnanexus/out/exomedepth_output/exomedepth_output/$bedfile_prefix/ /home/dnanexus/to_test | ||
|
||
mark-section "Downloading inputs" | ||
# download all inputs | ||
dx-download-all-inputs --parallel | ||
|
||
mark-section "Determining reference genome" | ||
if [[ $reference_genome_name == *.tar* ]] | ||
then | ||
echo "reference is tarball" | ||
exit 1 | ||
elif [[ $reference_genome_name == *.gz ]] | ||
then | ||
gunzip $reference_genome_path | ||
reference_fasta=$(echo $reference_genome_path | sed 's/\.gz//g') | ||
elif [[ $reference_genome_name == *.fa ]] | ||
then | ||
reference_fasta=$reference_genome_path | ||
fi | ||
|
||
mark-section "determine run specific variables" | ||
echo "read_depth_bed="$bedfile | ||
echo "reference_genome="$reference_fasta | ||
echo "panel="$bamfile_pannumbers | ||
echo "bedfile_prefix="$bedfile_prefix | ||
output_RData_file="/home/dnanexus/out/exomedepth_output/exomedepth_output/$bedfile_prefix/normals.RData" | ||
|
||
mark-section "Download all relevant BAMs" | ||
# make and cd to test dir | ||
cd to_test | ||
# $bamfile_pannumbers is a comma seperated list of pannumbers that should be analysed together. | ||
# split this into an array and loop through to download BAM and BAI files | ||
IFS=',' read -ra pannum_array <<< $bamfile_pannumbers | ||
for panel in ${pannum_array[@]} | ||
do | ||
# check there is at least one file with that pan number to download otherwise the dx download command will fail | ||
if (( $(dx ls $project_name:output/*001.ba* --auth $API_KEY | grep $panel -c) > 0 )); | ||
then | ||
#download all the BAM and BAI files for this project/pan number | ||
dx download -f $project_name:output/*$panel*001.ba* --auth $API_KEY | ||
fi | ||
done | ||
|
||
# Get list of all BAMs in to_test | ||
# NB (include full filepath to ensure the output are absolute paths (needed for docker run)) | ||
bam_list=(/home/dnanexus/to_test/*bam) | ||
|
||
# count the BAM files. make sure there are at least 3 samples for this pan number, else stop | ||
filecount="${#bam_list[@]}" | ||
if (( $filecount < 3 )); then | ||
echo "LESS THAN THREE BAM FILES FOUND FOR THIS ANALYSIS" 1>&2 | ||
exit 1 | ||
fi | ||
|
||
# cd out of to_test | ||
cd /home/dnanexus | ||
|
||
mark-section "setting up Exomedepth docker image" | ||
# Location of the ExomeDepth docker file | ||
docker_file_id=project-ByfFPz00jy1fk6PjpZ95F27J:file-GYzKz400jy1yx101F34p8qj2 | ||
# download the docker file from 001_Tools... | ||
dx download $docker_file_id --auth "${API_KEY}" | ||
docker_file=$(dx describe ${docker_file_id} --name) | ||
DOCKERIMAGENAME=`tar xfO ${docker_file} manifest.json | sed -E 's/.*"RepoTags":\["?([^"]*)"?.*/\1/'` | ||
docker load < /home/dnanexus/"${docker_file}" | ||
echo "Using image:"${DOCKERIMAGENAME} | ||
mark-section "Calculate read depths using docker image" | ||
# docker run - mount the home directory as a share | ||
# call the readCount.R script | ||
# supply following arguments | ||
# - output_RData_file path | ||
# - reference_fasta_path | ||
# - bedfile_path | ||
# - bam_list | ||
|
||
# Run ReadCount script in docker container | ||
docker run -v /home/dnanexus:/home/dnanexus ${DOCKERIMAGENAME} readCount.R $output_RData_file $reference_fasta $bedfile_path ${bam_list[@]} | ||
|
||
# Upload results | ||
dx-upload-all-outputs |