Skip to content

Commit

Permalink
Merge pull request #124 from ENCODE-DCC/dev
Browse files Browse the repository at this point in the history
v1.3.6
  • Loading branch information
leepc12 authored Jan 28, 2020
2 parents a7fd35c + 01eb970 commit 209a71d
Show file tree
Hide file tree
Showing 18 changed files with 393 additions and 62 deletions.
3 changes: 2 additions & 1 deletion .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ make_tag: &make_tag
name: make docker image tag
command: |
echo "export TAG=quay.io/encode-dcc/chip-seq-pipeline:${CIRCLE_BRANCH}_${CIRCLE_WORKFLOW_ID}" > ${BASH_ENV}
echo "export TAG_DOCKERHUB=encodedcc/chip-seq-pipeline:${CIRCLE_BRANCH}_${CIRCLE_WORKFLOW_ID}" >> ${BASH_ENV}
install_singularity: &install_singularity
name: install singularity
Expand Down Expand Up @@ -57,7 +58,7 @@ jobs:
docker login -u=${QUAY_ROBOT_USER} -p=${QUAY_ROBOT_USER_TOKEN} quay.io
docker build --cache-from quay.io/encode-dcc/chip-seq-pipeline:${DOCKER_CACHE_TAG} --build-arg GIT_COMMIT_HASH=${CIRCLE_SHA1} --build-arg BRANCH=${CIRCLE_BRANCH} --build-arg BUILD_TAG=${TAG} -t $TAG -f dev/docker_image/Dockerfile .
docker push ${TAG}
# docker push quay.io/encode-dcc/chip-seq-pipeline:template
#docker push ${TAG_DOCKERHUB}
docker logout
test_tasks:
<<: *machine_defaults
Expand Down
67 changes: 52 additions & 15 deletions chip.wdl
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
# ENCODE TF/Histone ChIP-Seq pipeline
# Author: Jin Lee (leepc12@gmail.com)
#CAPER docker quay.io/encode-dcc/chip-seq-pipeline:v1.3.5.1
#CAPER singularity docker://quay.io/encode-dcc/chip-seq-pipeline:v1.3.5.1
#CAPER docker quay.io/encode-dcc/chip-seq-pipeline:v1.3.6
#CAPER singularity docker://quay.io/encode-dcc/chip-seq-pipeline:v1.3.6
#CROO out_def https://storage.googleapis.com/encode-pipeline-output-definition/chip.croo.v3.json
workflow chip {
String pipeline_ver = 'v1.3.5.1'
String pipeline_ver = 'v1.3.6'
### sample name, description
String title = 'Untitled'
String description = 'No description'
Expand Down Expand Up @@ -63,7 +63,8 @@ workflow chip {
# parameters for aligner and filter
Boolean use_bwa_mem_for_pe = false # THIS IS EXPERIMENTAL and BWA ONLY (use bwa mem instead of bwa aln/sam)
# available only for PE dataset with READ_LEN>=70bp
Int xcor_pe_trim_bp = 50 # for cross-correlation analysis only (R1 of paired-end fastqs)
Int crop_length = 0 # crop reads in FASTQs with Trimmomatic (0 by default, i.e. disabled)
Int xcor_trim_bp = 50 # for cross-correlation analysis only (R1 of paired-end fastqs)
Boolean use_filt_pe_ta_for_xcor = false # PE only. use filtered PE BAM for cross-corr.
String dup_marker = 'picard' # picard, sambamba
Boolean no_dup_removal = false # keep all dups in final BAM
Expand All @@ -87,7 +88,8 @@ workflow chip {
Int? cap_num_peak
Int cap_num_peak_spp = 300000 # cap number of raw peaks called from SPP
Int cap_num_peak_macs2 = 500000 # cap number of raw peaks called from MACS2
Float pval_thresh = 0.01 # p.value threshold
Float pval_thresh = 0.01 # p.value threshold (for MACS2 peak caller only)
Float fdr_thresh = 0.01 # FDR threshold (for SPP peak caller only: Rscript run_spp.R -fdr)
Float idr_thresh = 0.05 # IDR threshold
### resources
Expand Down Expand Up @@ -127,6 +129,7 @@ workflow chip {
Int call_peak_time_hr = 72
String call_peak_disks = 'local-disk 200 HDD'

String? align_trimmomatic_java_heap
String? filter_picard_java_heap
String? gc_bias_picard_java_heap

Expand Down Expand Up @@ -375,7 +378,7 @@ workflow chip {
msg = 'No genome database found in your input JSON. Did you define "chip.genome_tsv" correctly?'
}
}
if ( peak_caller_ == 'spp' && num_ctl == 0 ) {
if ( !align_only && peak_caller_ == 'spp' && num_ctl == 0 ) {
call raise_exception as error_control_required { input:
msg = 'SPP requires control inputs. Define control input files ("chip.ctl_*") in an input JSON file.'
}
Expand All @@ -394,6 +397,7 @@ workflow chip {
call align { input :
fastqs_R1 = fastqs_R1[i],
fastqs_R2 = fastqs_R2[i],
crop_length = crop_length,
aligner = aligner_,
mito_chr_name = mito_chr_name_,
Expand All @@ -403,6 +407,8 @@ workflow chip {
else custom_aligner_idx_tar_,
paired_end = paired_end_,
use_bwa_mem_for_pe = use_bwa_mem_for_pe,
trimmomatic_java_heap = align_trimmomatic_java_heap,
cpu = align_cpu,
mem_mb = align_mem_mb,
time_hr = align_time_hr,
Expand Down Expand Up @@ -482,7 +488,8 @@ workflow chip {
call align as align_R1 { input :
fastqs_R1 = fastqs_R1[i],
fastqs_R2 = [],
trim_bp = xcor_pe_trim_bp,
trim_bp = xcor_trim_bp,
crop_length = 0,
aligner = aligner_,
mito_chr_name = mito_chr_name_,
Expand All @@ -492,6 +499,7 @@ workflow chip {
else custom_aligner_idx_tar_,
paired_end = false,
use_bwa_mem_for_pe = use_bwa_mem_for_pe,
cpu = align_cpu,
mem_mb = align_mem_mb,
time_hr = align_time_hr,
Expand Down Expand Up @@ -607,6 +615,7 @@ workflow chip {
call align as align_ctl { input :
fastqs_R1 = ctl_fastqs_R1[i],
fastqs_R2 = ctl_fastqs_R2[i],
crop_length = crop_length,
aligner = aligner_,
mito_chr_name = mito_chr_name_,
Expand All @@ -616,6 +625,8 @@ workflow chip {
else custom_aligner_idx_tar_,
paired_end = ctl_paired_end_,
use_bwa_mem_for_pe = use_bwa_mem_for_pe,
trimmomatic_java_heap = align_trimmomatic_java_heap,
cpu = align_cpu,
mem_mb = align_mem_mb,
time_hr = align_time_hr,
Expand Down Expand Up @@ -731,7 +742,7 @@ workflow chip {
Boolean has_all_input_of_choose_ctl = length(select_all(ta_))==num_rep
&& length(select_all(ctl_ta_))==num_ctl && num_ctl > 0
if ( has_all_input_of_choose_ctl ) {
if ( has_all_input_of_choose_ctl && !align_only ) {
# choose appropriate control for each exp IP replicate
# outputs:
# choose_ctl.idx : control replicate index for each exp replicate
Expand Down Expand Up @@ -768,6 +779,7 @@ workflow chip {
chrsz = chrsz_,
cap_num_peak = cap_num_peak_,
pval_thresh = pval_thresh,
fdr_thresh = fdr_thresh,
fraglen = fraglen_tmp[i],
blacklist = blacklist_,
regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name_,
Expand Down Expand Up @@ -809,6 +821,7 @@ workflow chip {
chrsz = chrsz_,
cap_num_peak = cap_num_peak_,
pval_thresh = pval_thresh,
fdr_thresh = fdr_thresh,
fraglen = fraglen_tmp[i],
blacklist = blacklist_,
regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name_,
Expand All @@ -835,6 +848,7 @@ workflow chip {
chrsz = chrsz_,
cap_num_peak = cap_num_peak_,
pval_thresh = pval_thresh,
fdr_thresh = fdr_thresh,
fraglen = fraglen_tmp[i],
blacklist = blacklist_,
regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name_,
Expand Down Expand Up @@ -877,6 +891,7 @@ workflow chip {
chrsz = chrsz_,
cap_num_peak = cap_num_peak_,
pval_thresh = pval_thresh,
fdr_thresh = fdr_thresh,
fraglen = fraglen_mean.rounded_mean,
blacklist = blacklist_,
regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name_,
Expand Down Expand Up @@ -918,6 +933,7 @@ workflow chip {
chrsz = chrsz_,
cap_num_peak = cap_num_peak_,
pval_thresh = pval_thresh,
fdr_thresh = fdr_thresh,
fraglen = fraglen_mean.rounded_mean,
blacklist = blacklist_,
regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name_,
Expand All @@ -944,6 +960,7 @@ workflow chip {
chrsz = chrsz_,
cap_num_peak = cap_num_peak_,
pval_thresh = pval_thresh,
fdr_thresh = fdr_thresh,
fraglen = fraglen_mean.rounded_mean,
blacklist = blacklist_,
regex_bfilt_peak_chr_name = regex_bfilt_peak_chr_name_,
Expand Down Expand Up @@ -1119,7 +1136,7 @@ workflow chip {
cap_num_peak = cap_num_peak_,
idr_thresh = idr_thresh,
pval_thresh = pval_thresh,
xcor_pe_trim_bp = xcor_pe_trim_bp,
xcor_trim_bp = xcor_trim_bp,
xcor_subsample_reads = xcor_subsample_reads,
samstat_qcs = align.samstat_qc,
Expand Down Expand Up @@ -1182,7 +1199,7 @@ task align {
Array[File] fastqs_R1 # [merge_id]
Array[File] fastqs_R2
Int? trim_bp # this is for R1 only
Int crop_length
String aligner
String mito_chr_name
Int? multimapping
Expand All @@ -1191,6 +1208,7 @@ task align {
Boolean paired_end
Boolean use_bwa_mem_for_pe

String? trimmomatic_java_heap
Int cpu
Int mem_mb
Int time_hr
Expand All @@ -1215,7 +1233,7 @@ task align {
python3 $(which encode_task_merge_fastq.py) \
${write_tsv(tmp_fastqs)} \
${if paired_end then '--paired-end' else ''} \
${'--nth ' + 1}
${'--nth ' + cpu}

if [ -z '${trim_bp}' ]; then
SUFFIX=
Expand All @@ -1232,6 +1250,21 @@ task align {
--out-dir R2$SUFFIX
fi
fi
if [ '${crop_length}' == '0' ]; then
SUFFIX=$SUFFIX
else
NEW_SUFFIX="$SUFFIX"_cropped
python3 $(which encode_task_trimmomatic.py) \
--fastq1 R1$SUFFIX/*.fastq.gz \
${if paired_end then '--fastq2 R2$SUFFIX/*.fastq.gz' else ''} \
${if paired_end then '--paired-end' else ''} \
--crop-length ${crop_length} \
--out-dir-R1 R1$NEW_SUFFIX \
${if paired_end then '--out-dir-R2 R2$NEW_SUFFIX' else ''} \
${'--trimmomatic-java-heap ' + if defined(trimmomatic_java_heap) then trimmomatic_java_heap else (mem_mb + 'M')} \
${'--nth ' + cpu}
SUFFIX=$NEW_SUFFIX
fi

if [ '${aligner}' == 'bwa' ]; then
python3 $(which encode_task_bwa.py) \
Expand Down Expand Up @@ -1535,7 +1568,9 @@ task call_peak {
# chr. sizes file, or hs for human, ms for mouse)
File chrsz # 2-col chromosome sizes file
Int cap_num_peak # cap number of raw peaks called from MACS2
Float pval_thresh # p.value threshold
Float pval_thresh # p.value threshold for MACS2
Float? fdr_thresh # FDR threshold for SPP
File? blacklist # blacklist BED to filter raw peaks
String? regex_bfilt_peak_chr_name

Expand All @@ -1561,6 +1596,7 @@ task call_peak {
${sep=' ' tas} \
${'--fraglen ' + fraglen} \
${'--cap-num-peak ' + cap_num_peak} \
${'--fdr-thresh '+ fdr_thresh} \
${'--nth ' + cpu}

else
Expand All @@ -1571,6 +1607,7 @@ task call_peak {
${'--fraglen ' + fraglen} \
${'--cap-num-peak ' + cap_num_peak} \
${'--pval-thresh '+ pval_thresh}
${'--fdr-thresh '+ fdr_thresh}
${'--nth ' + cpu}
fi

Expand Down Expand Up @@ -1792,7 +1829,7 @@ task gc_bias {
runtime {
cpu : 1
memory : '10000 MB'
time : 1
time : 6
disks : 'local-disk 100 HDD'
}
}
Expand All @@ -1816,7 +1853,7 @@ task qc_report {
Int cap_num_peak
Float idr_thresh
Float pval_thresh
Int xcor_pe_trim_bp
Int xcor_trim_bp
Int xcor_subsample_reads
# QCs
Array[File?] samstat_qcs
Expand Down Expand Up @@ -1880,7 +1917,7 @@ task qc_report {
${'--cap-num-peak ' + cap_num_peak} \
--idr-thresh ${idr_thresh} \
--pval-thresh ${pval_thresh} \
--xcor-pe-trim-bp ${xcor_pe_trim_bp} \
--xcor-trim-bp ${xcor_trim_bp} \
--xcor-subsample-reads ${xcor_subsample_reads} \
--samstat-qcs ${sep='_:_' samstat_qcs} \
--nodup-samstat-qcs ${sep='_:_' nodup_samstat_qcs} \
Expand Down
58 changes: 58 additions & 0 deletions dev/build_on_dx_dockerhub.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
#!/bin/bash
set -e

VER=$(cat chip.wdl | grep "#CAPER docker" | awk 'BEGIN{FS=":"} {print $2}')
DOCKER=encodedcc/chip-seq-pipeline:$VER

# general
java -jar ~/dxWDL-0.79.1.jar compile chip.wdl -project "ENCODE Uniform Processing Pipelines" -extras <(echo "{\"default_runtime_attributes\":{\"docker\":\"${DOCKER}\"}}") -f -folder /ChIP-seq2/workflows/$VER-dockerhub/general -defaults example_input_json/dx/template_general.json

# hg38
java -jar ~/dxWDL-0.79.1.jar compile chip.wdl -project "ENCODE Uniform Processing Pipelines" -extras <(echo "{\"default_runtime_attributes\":{\"docker\":\"${DOCKER}\"}}") -f -folder /ChIP-seq2/workflows/$VER-dockerhub/hg38 -defaults example_input_json/dx/template_hg38.json

# hg19
java -jar ~/dxWDL-0.79.1.jar compile chip.wdl -project "ENCODE Uniform Processing Pipelines" -extras <(echo "{\"default_runtime_attributes\":{\"docker\":\"${DOCKER}\"}}") -f -folder /ChIP-seq2/workflows/$VER-dockerhub/hg19 -defaults example_input_json/dx/template_hg19.json

# mm10
java -jar ~/dxWDL-0.79.1.jar compile chip.wdl -project "ENCODE Uniform Processing Pipelines" -extras <(echo "{\"default_runtime_attributes\":{\"docker\":\"${DOCKER}\"}}") -f -folder /ChIP-seq2/workflows/$VER-dockerhub/mm10 -defaults example_input_json/dx/template_mm10.json

# mm9
java -jar ~/dxWDL-0.79.1.jar compile chip.wdl -project "ENCODE Uniform Processing Pipelines" -extras <(echo "{\"default_runtime_attributes\":{\"docker\":\"${DOCKER}\"}}") -f -folder /ChIP-seq2/workflows/$VER-dockerhub/mm9 -defaults example_input_json/dx/template_mm9.json

# test sample PE ENCSR936XTK (full)
java -jar ~/dxWDL-0.79.1.jar compile chip.wdl -project "ENCODE Uniform Processing Pipelines" -extras <(echo "{\"default_runtime_attributes\":{\"docker\":\"${DOCKER}\"}}") -f -folder /ChIP-seq2/workflows/$VER-dockerhub/test_ENCSR936XTK -defaults example_input_json/dx/ENCSR936XTK_dx.json

# test sample SE ENCSR000DYI (full)
java -jar ~/dxWDL-0.79.1.jar compile chip.wdl -project "ENCODE Uniform Processing Pipelines" -extras <(echo "{\"default_runtime_attributes\":{\"docker\":\"${DOCKER}\"}}") -f -folder /ChIP-seq2/workflows/$VER-dockerhub/test_ENCSR000DYI -defaults example_input_json/dx/ENCSR000DYI_dx.json

# test sample SE ENCSR000DYI (subsampled, chr19/chrM only)
java -jar ~/dxWDL-0.79.1.jar compile chip.wdl -project "ENCODE Uniform Processing Pipelines" -extras <(echo "{\"default_runtime_attributes\":{\"docker\":\"${DOCKER}\"}}") -f -folder /ChIP-seq2/workflows/$VER-dockerhub/test_ENCSR000DYI_subsampled_chr19_only -defaults example_input_json/dx/ENCSR000DYI_subsampled_chr19_only_dx.json

# test sample SE ENCSR000DYI (subsampled, chr19/chrM only, rep1)
java -jar ~/dxWDL-0.79.1.jar compile chip.wdl -project "ENCODE Uniform Processing Pipelines" -extras <(echo "{\"default_runtime_attributes\":{\"docker\":\"${DOCKER}\"}}") -f -folder /ChIP-seq2/workflows/$VER-dockerhub/test_ENCSR000DYI_subsampled_chr19_only_rep1 -defaults example_input_json/dx/ENCSR000DYI_subsampled_chr19_only_rep1_dx.json

## DX Azure

# general
java -jar ~/dxWDL-0.79.1.jar compile chip.wdl -project "ENCODE Uniform Processing Pipelines Azure" -extras <(echo "{\"default_runtime_attributes\":{\"docker\":\"${DOCKER}\"}}") -f -folder /ChIP-seq2/workflows/$VER-dockerhub/general -defaults example_input_json/dx_azure/template_general.json

# hg38
java -jar ~/dxWDL-0.79.1.jar compile chip.wdl -project "ENCODE Uniform Processing Pipelines Azure" -extras <(echo "{\"default_runtime_attributes\":{\"docker\":\"${DOCKER}\"}}") -f -folder /ChIP-seq2/workflows/$VER-dockerhub/hg38 -defaults example_input_json/dx_azure/template_hg38.json

# hg19
java -jar ~/dxWDL-0.79.1.jar compile chip.wdl -project "ENCODE Uniform Processing Pipelines Azure" -extras <(echo "{\"default_runtime_attributes\":{\"docker\":\"${DOCKER}\"}}") -f -folder /ChIP-seq2/workflows/$VER-dockerhub/hg19 -defaults example_input_json/dx_azure/template_hg19.json

# mm10
java -jar ~/dxWDL-0.79.1.jar compile chip.wdl -project "ENCODE Uniform Processing Pipelines Azure" -extras <(echo "{\"default_runtime_attributes\":{\"docker\":\"${DOCKER}\"}}") -f -folder /ChIP-seq2/workflows/$VER-dockerhub/mm10 -defaults example_input_json/dx_azure/template_mm10.json

# mm9
java -jar ~/dxWDL-0.79.1.jar compile chip.wdl -project "ENCODE Uniform Processing Pipelines Azure" -extras <(echo "{\"default_runtime_attributes\":{\"docker\":\"${DOCKER}\"}}") -f -folder /ChIP-seq2/workflows/$VER-dockerhub/mm9 -defaults example_input_json/dx_azure/template_mm9.json

# test sample PE ENCSR936XTK (full)
java -jar ~/dxWDL-0.79.1.jar compile chip.wdl -project "ENCODE Uniform Processing Pipelines Azure" -extras <(echo "{\"default_runtime_attributes\":{\"docker\":\"${DOCKER}\"}}") -f -folder /ChIP-seq2/workflows/$VER-dockerhub/test_ENCSR936XTK -defaults example_input_json/dx_azure/ENCSR936XTK_dx_azure.json

# test sample SE ENCSR000DYI (full)
java -jar ~/dxWDL-0.79.1.jar compile chip.wdl -project "ENCODE Uniform Processing Pipelines Azure" -extras <(echo "{\"default_runtime_attributes\":{\"docker\":\"${DOCKER}\"}}") -f -folder /ChIP-seq2/workflows/$VER-dockerhub/test_ENCSR000DYI -defaults example_input_json/dx_azure/ENCSR000DYI_dx_azure.json

# test sample SE ENCSR000DYI (subsampled, chr19/chrM only)
java -jar ~/dxWDL-0.79.1.jar compile chip.wdl -project "ENCODE Uniform Processing Pipelines Azure" -extras <(echo "{\"default_runtime_attributes\":{\"docker\":\"${DOCKER}\"}}") -f -folder /ChIP-seq2/workflows/$VER-dockerhub/test_ENCSR000DYI_subsampled_chr19_only -defaults example_input_json/dx_azure/ENCSR000DYI_subsampled_chr19_only_dx_azure.json
3 changes: 3 additions & 0 deletions dev/docker_image/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,9 @@ RUN git clone https://github.com/ENCODE-DCC/kentUtils_bin_v377
ENV PATH=${PATH}:/software/kentUtils_bin_v377/bin
ENV LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:/software/kentUtils_bin_v377/lib

# Instal Trimmomatic JAR
RUN wget http://www.usadellab.org/cms/uploads/supplementary/Trimmomatic/Trimmomatic-0.39.zip && unzip Trimmomatic-0.39.zip && mv Trimmomatic-0.39/trimmomatic-0.39.jar trimmomatic.jar && chmod +rx trimmomatic.jar && rm -rf Trimmomatic-0.39.zip Trimmomatic-0.39/

# Prevent conflict with locally installed python outside of singularity container
ENV PYTHONNOUSERSITE=True

Expand Down
6 changes: 5 additions & 1 deletion dev/test/test_task/test_bowtie2.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,11 @@
"test_bowtie2.se_fastqs" : [
"chip-seq-pipeline-test-data/input/se/fastqs/rep1/rep1.subsampled.25.fastq.gz"
],
"test_bowtie2.pe_crop_length" : 50,
"test_bowtie2.se_crop_length" : 30,

"test_bowtie2.ref_pe_flagstat" : "chip-seq-pipeline-test-data/ref_output/test_bowtie2/pe/rep1-R1.subsampled.67.samstats.qc",
"test_bowtie2.ref_se_flagstat" : "chip-seq-pipeline-test-data/ref_output/test_bowtie2/se/rep1.subsampled.25.samstats.qc"
"test_bowtie2.ref_se_flagstat" : "chip-seq-pipeline-test-data/ref_output/test_bowtie2/se/rep1.subsampled.25.samstats.qc",
"test_bowtie2.ref_pe_cropped_flagstat" : "chip-seq-pipeline-test-data/ref_output/test_bowtie2/pe/rep1-R1.subsampled.67.merged.crop_50bp.samstats.qc",
"test_bowtie2.ref_se_cropped_flagstat" : "chip-seq-pipeline-test-data/ref_output/test_bowtie2/se/rep1.subsampled.25.merged.crop_30bp.samstats.qc"
}
Loading

0 comments on commit 209a71d

Please sign in to comment.