From 884fe089bdd8fe1e06cbd10339a7b2320ddeaee3 Mon Sep 17 00:00:00 2001 From: wbaopaul Date: Mon, 2 Aug 2021 13:10:55 -0400 Subject: [PATCH 1/7] v1.4.1 planned --- scripts/bam2qc.sh | 4 ++-- scripts/mapping.sh | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/scripts/bam2qc.sh b/scripts/bam2qc.sh index b2b10b9..29c998d 100755 --- a/scripts/bam2qc.sh +++ b/scripts/bam2qc.sh @@ -24,14 +24,14 @@ if [[ "$isort" != "SO:coordinate" ]]; then echo "Sorting bam file" mkdir -p ${mapRes_dir}/tmp - ${SAMTOOLS_PATH}/samtools sort -T ${mapRes_dir}/tmp/ -@ $ncore -n -o ${mapRes_dir}/${OUTPUT_PREFIX}.sorted.bam $bam_file + ${SAMTOOLS_PATH}/samtools sort -m 2G -T ${mapRes_dir}/tmp/ -@ $ncore -n -o ${mapRes_dir}/${OUTPUT_PREFIX}.sorted.bam $bam_file ## to mark duplicates ${SAMTOOLS_PATH}/samtools fixmate -@ $ncore -m ${mapRes_dir}/${OUTPUT_PREFIX}.sorted.bam ${mapRes_dir}/${OUTPUT_PREFIX}.fixmate.bam rm ${mapRes_dir}/${OUTPUT_PREFIX}.sorted.bam # Markdup needs position order - ${SAMTOOLS_PATH}/samtools sort -@ $ncore -T ${mapRes_dir}/tmp/ -o ${mapRes_dir}/${OUTPUT_PREFIX}.positionsort0.bam ${mapRes_dir}/${OUTPUT_PREFIX}.fixmate.bam + ${SAMTOOLS_PATH}/samtools sort -m 2G -@ $ncore -T ${mapRes_dir}/tmp/ -o ${mapRes_dir}/${OUTPUT_PREFIX}.positionsort0.bam ${mapRes_dir}/${OUTPUT_PREFIX}.fixmate.bam rm ${mapRes_dir}/${OUTPUT_PREFIX}.fixmate.bam ## mark duplicates diff --git a/scripts/mapping.sh b/scripts/mapping.sh index b481e9a..1cb80ea 100755 --- a/scripts/mapping.sh +++ b/scripts/mapping.sh @@ -28,7 +28,7 @@ echo "Sorting bam file" ncore=$(nproc --all) ncore=$(($ncore - 1)) mkdir -p ${mapRes_dir}/tmp -${SAMTOOLS_PATH}/samtools sort -T ${mapRes_dir}/tmp/ -@ $ncore -n -o ${mapRes_dir}/${OUTPUT_PREFIX}.sorted.bam ${mapRes_dir}/${OUTPUT_PREFIX}.bam +${SAMTOOLS_PATH}/samtools sort -m 2G -T ${mapRes_dir}/tmp/ -@ $ncore -n -o ${mapRes_dir}/${OUTPUT_PREFIX}.sorted.bam ${mapRes_dir}/${OUTPUT_PREFIX}.bam rm ${mapRes_dir}/${OUTPUT_PREFIX}.bam ## to mark duplicates @@ -36,7 +36,7 @@ ${SAMTOOLS_PATH}/samtools fixmate -@ $ncore -m ${mapRes_dir}/${OUTPUT_PREFIX}.so rm ${mapRes_dir}/${OUTPUT_PREFIX}.sorted.bam # Markdup needs position order -${SAMTOOLS_PATH}/samtools sort -@ $ncore -T ${mapRes_dir}/tmp/ -o ${mapRes_dir}/${OUTPUT_PREFIX}.positionsort0.bam ${mapRes_dir}/${OUTPUT_PREFIX}.fixmate.bam +${SAMTOOLS_PATH}/samtools sort -m 2G -@ $ncore -T ${mapRes_dir}/tmp/ -o ${mapRes_dir}/${OUTPUT_PREFIX}.positionsort0.bam ${mapRes_dir}/${OUTPUT_PREFIX}.fixmate.bam rm ${mapRes_dir}/${OUTPUT_PREFIX}.fixmate.bam ## mark duplicates From 6a1959a276a3abac9880119e8189c04bfa5c12f7 Mon Sep 17 00:00:00 2001 From: wbaopaul Date: Fri, 5 Nov 2021 12:13:00 -0400 Subject: [PATCH 2/7] update tutorial --- README.md | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index fabc077..1b58303 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,7 @@ Installation ------------ - Note: It is not necessary to install scATAC-pro from scratch. You can use the docker or singularity version if you prefer (see [Run scATAC-pro through docker or singularity](#run-scATAC-pro-through-docker-or-singularity) ) -- Run the following command in your terminal, scATAC-pro will be installed in YOUR\_INSTALL\_PATH/scATAC-pro\_1.4.0 +- Run the following command in your terminal, scATAC-pro will be installed in YOUR\_INSTALL\_PATH/scATAC-pro\_1.4.1 @@ -49,7 +49,7 @@ Installation Updates ------------ - Now provide [scATAC-pro tutorial in R](https://scatacpro-in-r.netlify.app/index.html) for access QC metrics and perform downstream analysis -- Current version: 1.4.0 +- Current version: 1.4.1 - Recent updates * *labelTransfer*: new module added, to do label trasfer (for cell annotation) from cell annotation of scRNA-seq data. First construct a gene by cell activity matrix, then use *FindTransferAnchors* and *TransferData* function from Seurat R package to predicted cell type annotation from the cell annotaiton in scRNA-seq data. * *rmDoublets*: new module added, to remove potential doublets using [DoubletFinder](https://github.com/chris-mcginnis-ucsf/DoubletFinder) algorithm. @@ -297,11 +297,13 @@ cello('output/downstream_analysis/PEAK_CALLER/CELL_CALLER/VisCello_obj') ## laun Detailed Usage -------------- +See [here](https://scatacpro-in-r.netlify.app/note_module) or in your terminal: + $ scATAC-pro --help usage : scATAC-pro -s STEP -i INPUT -c CONFIG [-o] [-h] [-v] Use option -h|--help for more information - scATAC-pro 1.4.0 + scATAC-pro 1.4.1 --------------- OPTIONS @@ -445,7 +447,7 @@ Detailed Usage output: a updated seurat object for atac with the Predicted_Cell_Type as a metadata variable and an umap plot colored by Predicted_Cell_Type, saved in the same directory as the input atac seurat object. - *Note*: the cell annotation should be given as a metadata of the seurat object of + NOTE: the cell annotation should be given as a metadata of the seurat object of scRNA-seq. Both seurat objects should have pca and umap dimemsion reduction done. From c0f50bd75b2ad71c9bf22ade03ef701aa6e79914 Mon Sep 17 00:00:00 2001 From: wbaopaul Date: Mon, 8 Nov 2021 09:51:05 -0500 Subject: [PATCH 3/7] tf-idf renormization using all features --- scripts/src/dsAnalysis_utilities.R | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/scripts/src/dsAnalysis_utilities.R b/scripts/src/dsAnalysis_utilities.R index 8e0af88..cf40479 100644 --- a/scripts/src/dsAnalysis_utilities.R +++ b/scripts/src/dsAnalysis_utilities.R @@ -175,6 +175,11 @@ runSeurat_Atac <- function(mtx, npc = 50, top_variable_features = 0.2, verbose = FALSE, seed.use = 10, npc = npc) if(length(reg.var) > 0 ) seurat.obj = regress_on_pca(seurat.obj, reg.var) + if(norm_by == 'tf-idf'){ + ## redo normalization using all features + mtx.norm = TF_IDF(mtx) + seurat.obj[[assay]]@data = mtx.norm + } return(seurat.obj) } From 7cf6aa3a62475c73bb97129588c771997b759be5 Mon Sep 17 00:00:00 2001 From: wbaopaul Date: Wed, 10 Nov 2021 16:37:28 -0500 Subject: [PATCH 4/7] correct a bug in report module --- configure_system.txt | 6 +++--- scATAC-pro | 2 +- scripts/report.sh | 9 +++++++-- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/configure_system.txt b/configure_system.txt index 673119c..1531194 100644 --- a/configure_system.txt +++ b/configure_system.txt @@ -7,9 +7,9 @@ ## using the 'which' command ####################################################################### INSTALL_PREFIX = /mnt/isilon/tan_lab/yuw1/local_tools/bin -PYTHON_PATH = /cm/shared/easybuild/software/Python/3.8.2-GCCcore-9.3.0/bin +PYTHON_PATH = /cm/local/apps/python37/bin R_PATH = /cm/shared/apps_chop/R/4.0.5/lib64/R/bin -SAMTOOLS_PATH = /mnt/isilon/tan_lab/yuw1/local_tools/bin/samtools-1.9 +SAMTOOLS_PATH = /mnt/isilon/tan_lab/yuw1/local_tools/bin DEEPTOOLS_PATH = /home/yuw1/.local/bin BEDTOOLS_PATH = /mnt/isilon/tan_lab/yuw1/local_tools/bin/bedtools2/bin TABIX_PATH = /mnt/isilon/tan_lab/yuw1/local_tools/bin/tabix @@ -17,7 +17,7 @@ BWA_PATH = /mnt/isilon/tan_lab/yuw1/local_tools/bin/bwa-0.7.17 MACS2_PATH = /home/yuw1/.local/bin PERL_PATH = /usr/bin CUTADAPT_PATH = /home/yuw1/.local/bin -TRIM_GALORE_PATH = /mnt/isilon/tan_lab/yuw1/local_tools/bin/TrimGalore-0.6.3 +TRIM_GALORE_PATH = /mnt/isilon/tan_lab/yuw1/local_tools/bin HINT_PATH = /home/yuw1/.local/bin TRIMMOMATIC_PATH = /mnt/isilon/tan_lab/yuw1/local_tools/bin/Trimmomatic-0.39 GEM_PATH = /mnt/isilon/tan_lab/yuw1/local_tools/bin/gem diff --git a/scATAC-pro b/scATAC-pro index 0175a62..4c1546c 100755 --- a/scATAC-pro +++ b/scATAC-pro @@ -9,7 +9,7 @@ ######################### SOFT="scATAC-pro" -VERSION="1.4.0" +VERSION="1.4.1" function usage { echo -e "usage : $SOFT -s STEP -i INPUT -c CONFIG [-o] [-h] [-v] [-b]" diff --git a/scripts/report.sh b/scripts/report.sh index 35db874..c9e7b04 100755 --- a/scripts/report.sh +++ b/scripts/report.sh @@ -18,9 +18,14 @@ abs_out_dir=`cd ${OUTPUT_DIR}; pwd` curr_dir=`dirname $0` work_dir=`pwd` +configure_dir=`dirname ${2}` +configure_file=`basename ${2}` +abs_configure_dir=`cd ${configure_dir}; pwd` +abs_configure_file=${abs_configure_dir}/${configure_file} - +#${R_PATH}/Rscript --vanilla ${curr_dir}/src/render2report.R \ +# ${abs_report_dir}/scATAC-pro_report_${OUTPUT_PREFIX}.html $abs_out_dir ${work_dir}/${2} ${R_PATH}/Rscript --vanilla ${curr_dir}/src/render2report.R \ - ${abs_report_dir}/scATAC-pro_report_${OUTPUT_PREFIX}.html $abs_out_dir ${work_dir}/${2} + ${abs_report_dir}/scATAC-pro_report_${OUTPUT_PREFIX}.html $abs_out_dir $abs_configure_file echo "Report generation Done!" From ed1ee2b3204f23fb272b4a4c032f8b17387e4878 Mon Sep 17 00:00:00 2001 From: wbaopaul Date: Tue, 23 Nov 2021 16:17:47 -0500 Subject: [PATCH 5/7] choose between bam and bampe in macs2 --- complete_update_history.md | 4 ++++ configure_user.txt | 2 +- scripts/call_peak.sh | 8 ++++++-- scripts/iter_peak.sh | 7 ++++++- scripts/src/integrate_mtx.R | 5 +++++ scripts/src/scATAC-pro_utils.R | 5 +++++ 6 files changed, 27 insertions(+), 4 deletions(-) diff --git a/complete_update_history.md b/complete_update_history.md index 2119089..977833d 100644 --- a/complete_update_history.md +++ b/complete_update_history.md @@ -1,4 +1,8 @@ ## Complete Update History +- Version 1.4.1 released + * update tutorial + * correst a bug relate to input filepath for report module + * record input file paths for the *integration* module - Version 1.4.0 released * new module added: labelTransfer (for cell annotation) from scRNA-seq - Version 1.3.1 released diff --git a/configure_user.txt b/configure_user.txt index a5cbe84..94a89f0 100644 --- a/configure_user.txt +++ b/configure_user.txt @@ -64,7 +64,7 @@ PEAK_CALLER = MACS2 ## one of MACS2, BIN, and COMBINED ## provided extra options for macs2 (NO NEED TO SPECIFY -t, -n, -f here), like -#MACS2_OPTS = -q 0.05 -g hs +#MACS2_OPTS = -q 0.05 -g hs ## (change to -g mm if you are using mouse data) ## or something like this to ignore building shifting model diff --git a/scripts/call_peak.sh b/scripts/call_peak.sh index dfa6134..2f7c5e1 100755 --- a/scripts/call_peak.sh +++ b/scripts/call_peak.sh @@ -25,8 +25,12 @@ if [ "${PEAK_CALLER}" = 'MACS2' ];then unset PYTHONPATH work_dir=${peaks_dir}/MACS2 mkdir -p $work_dir - ${MACS2_PATH}/macs2 callpeak -t $input_bam --outdir $work_dir -n $out_prefix -f BAM $MACS2_OPTS - #${MACS2_PATH}/macs2 callpeak -t $input_bam --outdir $peaks_dir -f BAM $MACS2_OPTS --nomodel --extsize 147 + + bamPE="BAM" + if [[ "$isSingleEnd"="FALSE" ]]; then + bamPE="BAMPE" + fi + ${MACS2_PATH}/macs2 callpeak -t $input_bam --outdir $work_dir -n $out_prefix -f $bamPE $MACS2_OPTS ## remove peaks whose chromosome is not list in the chrom_size file ${R_PATH}/Rscript --vanilla ${curr_dir}/src/rmRedundantPeaks.R ${work_dir}/${out_prefix}_peaks.narrowPeak \ diff --git a/scripts/iter_peak.sh b/scripts/iter_peak.sh index 3846398..a7a5e50 100755 --- a/scripts/iter_peak.sh +++ b/scripts/iter_peak.sh @@ -40,10 +40,15 @@ ${PERL_PATH}/perl ${curr_dir}/src/split_bam2clusters0.pl --cluster_file ${output ## call peaks per cluster unset PYTHONHOME unset PYTHONPATH +samPE="SAM" +if [[ "$isSingleEnd"="FALSE" ]]; then + samPE="SAMPE" +fi + for input_sam0 in $(find $output_dir -name *.sam); do pre=$(basename $input_sam0) pre=${pre/.sam/} - ${MACS2_PATH}/macs2 callpeak -t $input_sam0 --outdir $output_dir -n $pre -f SAM $MACS2_OPTS & + ${MACS2_PATH}/macs2 callpeak -t $input_sam0 --outdir $output_dir -n $pre -f $SAMPE $MACS2_OPTS & done wait diff --git a/scripts/src/integrate_mtx.R b/scripts/src/integrate_mtx.R index cda6b8c..738dbbd 100644 --- a/scripts/src/integrate_mtx.R +++ b/scripts/src/integrate_mtx.R @@ -53,6 +53,11 @@ seurat.obj <- run_integration(mtx_list, integrate_by = integrate_by, max_depth = 50000, reg.var = 'nCount_ATAC', resolution = 0.6) +# record input sample path as metadata to the seurat object +dpath = data.table('sample' = paste0('sample', 1:len), + 'sample_path' = mtx_files) +setkey(dpath, sample) +seurat.obj$sample_path = dpath[J(seurat.obj$sample)]$sample_path saveRDS(seurat.obj, file = paste0(output_dir, '/seurat_obj_', integrate_by, '.rds')) diff --git a/scripts/src/scATAC-pro_utils.R b/scripts/src/scATAC-pro_utils.R index 8e0af88..cf40479 100644 --- a/scripts/src/scATAC-pro_utils.R +++ b/scripts/src/scATAC-pro_utils.R @@ -175,6 +175,11 @@ runSeurat_Atac <- function(mtx, npc = 50, top_variable_features = 0.2, verbose = FALSE, seed.use = 10, npc = npc) if(length(reg.var) > 0 ) seurat.obj = regress_on_pca(seurat.obj, reg.var) + if(norm_by == 'tf-idf'){ + ## redo normalization using all features + mtx.norm = TF_IDF(mtx) + seurat.obj[[assay]]@data = mtx.norm + } return(seurat.obj) } From 60f648f03e91767fed568a2daac07ceb76d0a97b Mon Sep 17 00:00:00 2001 From: wbaopaul Date: Tue, 23 Nov 2021 16:33:08 -0500 Subject: [PATCH 6/7] typo --- scripts/iter_peak.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/iter_peak.sh b/scripts/iter_peak.sh index a7a5e50..67094c3 100755 --- a/scripts/iter_peak.sh +++ b/scripts/iter_peak.sh @@ -48,7 +48,7 @@ fi for input_sam0 in $(find $output_dir -name *.sam); do pre=$(basename $input_sam0) pre=${pre/.sam/} - ${MACS2_PATH}/macs2 callpeak -t $input_sam0 --outdir $output_dir -n $pre -f $SAMPE $MACS2_OPTS & + ${MACS2_PATH}/macs2 callpeak -t $input_sam0 --outdir $output_dir -n $pre -f $samPE $MACS2_OPTS & done wait From 49a46a3a508944daaec364c94bb61b87bdbe3429 Mon Sep 17 00:00:00 2001 From: wbaopaul Date: Tue, 30 Nov 2021 12:39:54 -0500 Subject: [PATCH 7/7] update readme --- complete_update_history.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/complete_update_history.md b/complete_update_history.md index 977833d..5f38742 100644 --- a/complete_update_history.md +++ b/complete_update_history.md @@ -1,8 +1,9 @@ ## Complete Update History - Version 1.4.1 released * update tutorial - * correst a bug relate to input filepath for report module + * correct a bug relate to input filepath for report module * record input file paths for the *integration* module + * using BAMPE for macs2 for paired-end sequencing - Version 1.4.0 released * new module added: labelTransfer (for cell annotation) from scRNA-seq - Version 1.3.1 released