diff --git a/README.md b/README.md index fabc077..1b58303 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,7 @@ Installation ------------ - Note: It is not necessary to install scATAC-pro from scratch. You can use the docker or singularity version if you prefer (see [Run scATAC-pro through docker or singularity](#run-scATAC-pro-through-docker-or-singularity) ) -- Run the following command in your terminal, scATAC-pro will be installed in YOUR\_INSTALL\_PATH/scATAC-pro\_1.4.0 +- Run the following command in your terminal, scATAC-pro will be installed in YOUR\_INSTALL\_PATH/scATAC-pro\_1.4.1 @@ -49,7 +49,7 @@ Installation Updates ------------ - Now provide [scATAC-pro tutorial in R](https://scatacpro-in-r.netlify.app/index.html) for access QC metrics and perform downstream analysis -- Current version: 1.4.0 +- Current version: 1.4.1 - Recent updates * *labelTransfer*: new module added, to do label trasfer (for cell annotation) from cell annotation of scRNA-seq data. First construct a gene by cell activity matrix, then use *FindTransferAnchors* and *TransferData* function from Seurat R package to predicted cell type annotation from the cell annotaiton in scRNA-seq data. * *rmDoublets*: new module added, to remove potential doublets using [DoubletFinder](https://github.com/chris-mcginnis-ucsf/DoubletFinder) algorithm. @@ -297,11 +297,13 @@ cello('output/downstream_analysis/PEAK_CALLER/CELL_CALLER/VisCello_obj') ## laun Detailed Usage -------------- +See [here](https://scatacpro-in-r.netlify.app/note_module) or in your terminal: + $ scATAC-pro --help usage : scATAC-pro -s STEP -i INPUT -c CONFIG [-o] [-h] [-v] Use option -h|--help for more information - scATAC-pro 1.4.0 + scATAC-pro 1.4.1 --------------- OPTIONS @@ -445,7 +447,7 @@ Detailed Usage output: a updated seurat object for atac with the Predicted_Cell_Type as a metadata variable and an umap plot colored by Predicted_Cell_Type, saved in the same directory as the input atac seurat object. - *Note*: the cell annotation should be given as a metadata of the seurat object of + NOTE: the cell annotation should be given as a metadata of the seurat object of scRNA-seq. Both seurat objects should have pca and umap dimemsion reduction done. diff --git a/complete_update_history.md b/complete_update_history.md index 2119089..5f38742 100644 --- a/complete_update_history.md +++ b/complete_update_history.md @@ -1,4 +1,9 @@ ## Complete Update History +- Version 1.4.1 released + * update tutorial + * correct a bug relate to input filepath for report module + * record input file paths for the *integration* module + * using BAMPE for macs2 for paired-end sequencing - Version 1.4.0 released * new module added: labelTransfer (for cell annotation) from scRNA-seq - Version 1.3.1 released diff --git a/configure_user.txt b/configure_user.txt index a5cbe84..94a89f0 100644 --- a/configure_user.txt +++ b/configure_user.txt @@ -64,7 +64,7 @@ PEAK_CALLER = MACS2 ## one of MACS2, BIN, and COMBINED ## provided extra options for macs2 (NO NEED TO SPECIFY -t, -n, -f here), like -#MACS2_OPTS = -q 0.05 -g hs +#MACS2_OPTS = -q 0.05 -g hs ## (change to -g mm if you are using mouse data) ## or something like this to ignore building shifting model diff --git a/scATAC-pro b/scATAC-pro index 0175a62..4c1546c 100755 --- a/scATAC-pro +++ b/scATAC-pro @@ -9,7 +9,7 @@ ######################### SOFT="scATAC-pro" -VERSION="1.4.0" +VERSION="1.4.1" function usage { echo -e "usage : $SOFT -s STEP -i INPUT -c CONFIG [-o] [-h] [-v] [-b]" diff --git a/scripts/bam2qc.sh b/scripts/bam2qc.sh index b2b10b9..29c998d 100755 --- a/scripts/bam2qc.sh +++ b/scripts/bam2qc.sh @@ -24,14 +24,14 @@ if [[ "$isort" != "SO:coordinate" ]]; then echo "Sorting bam file" mkdir -p ${mapRes_dir}/tmp - ${SAMTOOLS_PATH}/samtools sort -T ${mapRes_dir}/tmp/ -@ $ncore -n -o ${mapRes_dir}/${OUTPUT_PREFIX}.sorted.bam $bam_file + ${SAMTOOLS_PATH}/samtools sort -m 2G -T ${mapRes_dir}/tmp/ -@ $ncore -n -o ${mapRes_dir}/${OUTPUT_PREFIX}.sorted.bam $bam_file ## to mark duplicates ${SAMTOOLS_PATH}/samtools fixmate -@ $ncore -m ${mapRes_dir}/${OUTPUT_PREFIX}.sorted.bam ${mapRes_dir}/${OUTPUT_PREFIX}.fixmate.bam rm ${mapRes_dir}/${OUTPUT_PREFIX}.sorted.bam # Markdup needs position order - ${SAMTOOLS_PATH}/samtools sort -@ $ncore -T ${mapRes_dir}/tmp/ -o ${mapRes_dir}/${OUTPUT_PREFIX}.positionsort0.bam ${mapRes_dir}/${OUTPUT_PREFIX}.fixmate.bam + ${SAMTOOLS_PATH}/samtools sort -m 2G -@ $ncore -T ${mapRes_dir}/tmp/ -o ${mapRes_dir}/${OUTPUT_PREFIX}.positionsort0.bam ${mapRes_dir}/${OUTPUT_PREFIX}.fixmate.bam rm ${mapRes_dir}/${OUTPUT_PREFIX}.fixmate.bam ## mark duplicates diff --git a/scripts/call_peak.sh b/scripts/call_peak.sh index 800e923..4e9dde0 100755 --- a/scripts/call_peak.sh +++ b/scripts/call_peak.sh @@ -33,8 +33,6 @@ if [ "${PEAK_CALLER}" = 'MACS2' ];then ${MACS2_PATH}/macs2 callpeak -t $input_bam --outdir $work_dir -n $out_prefix -f BAMPE $MACS2_OPTS fi - #${MACS2_PATH}/macs2 callpeak -t $input_bam --outdir $peaks_dir -f BAM $MACS2_OPTS --nomodel --extsize 147 - ## remove peaks whose chromosome is not list in the chrom_size file ${R_PATH}/Rscript --vanilla ${curr_dir}/src/rmRedundantPeaks.R ${work_dir}/${out_prefix}_peaks.narrowPeak \ $CHROM_SIZE_FILE diff --git a/scripts/iter_peak.sh b/scripts/iter_peak.sh index 3846398..67094c3 100755 --- a/scripts/iter_peak.sh +++ b/scripts/iter_peak.sh @@ -40,10 +40,15 @@ ${PERL_PATH}/perl ${curr_dir}/src/split_bam2clusters0.pl --cluster_file ${output ## call peaks per cluster unset PYTHONHOME unset PYTHONPATH +samPE="SAM" +if [[ "$isSingleEnd"="FALSE" ]]; then + samPE="SAMPE" +fi + for input_sam0 in $(find $output_dir -name *.sam); do pre=$(basename $input_sam0) pre=${pre/.sam/} - ${MACS2_PATH}/macs2 callpeak -t $input_sam0 --outdir $output_dir -n $pre -f SAM $MACS2_OPTS & + ${MACS2_PATH}/macs2 callpeak -t $input_sam0 --outdir $output_dir -n $pre -f $samPE $MACS2_OPTS & done wait diff --git a/scripts/mapping.sh b/scripts/mapping.sh index b481e9a..1cb80ea 100755 --- a/scripts/mapping.sh +++ b/scripts/mapping.sh @@ -28,7 +28,7 @@ echo "Sorting bam file" ncore=$(nproc --all) ncore=$(($ncore - 1)) mkdir -p ${mapRes_dir}/tmp -${SAMTOOLS_PATH}/samtools sort -T ${mapRes_dir}/tmp/ -@ $ncore -n -o ${mapRes_dir}/${OUTPUT_PREFIX}.sorted.bam ${mapRes_dir}/${OUTPUT_PREFIX}.bam +${SAMTOOLS_PATH}/samtools sort -m 2G -T ${mapRes_dir}/tmp/ -@ $ncore -n -o ${mapRes_dir}/${OUTPUT_PREFIX}.sorted.bam ${mapRes_dir}/${OUTPUT_PREFIX}.bam rm ${mapRes_dir}/${OUTPUT_PREFIX}.bam ## to mark duplicates @@ -36,7 +36,7 @@ ${SAMTOOLS_PATH}/samtools fixmate -@ $ncore -m ${mapRes_dir}/${OUTPUT_PREFIX}.so rm ${mapRes_dir}/${OUTPUT_PREFIX}.sorted.bam # Markdup needs position order -${SAMTOOLS_PATH}/samtools sort -@ $ncore -T ${mapRes_dir}/tmp/ -o ${mapRes_dir}/${OUTPUT_PREFIX}.positionsort0.bam ${mapRes_dir}/${OUTPUT_PREFIX}.fixmate.bam +${SAMTOOLS_PATH}/samtools sort -m 2G -@ $ncore -T ${mapRes_dir}/tmp/ -o ${mapRes_dir}/${OUTPUT_PREFIX}.positionsort0.bam ${mapRes_dir}/${OUTPUT_PREFIX}.fixmate.bam rm ${mapRes_dir}/${OUTPUT_PREFIX}.fixmate.bam ## mark duplicates diff --git a/scripts/report.sh b/scripts/report.sh index 35db874..c9e7b04 100755 --- a/scripts/report.sh +++ b/scripts/report.sh @@ -18,9 +18,14 @@ abs_out_dir=`cd ${OUTPUT_DIR}; pwd` curr_dir=`dirname $0` work_dir=`pwd` +configure_dir=`dirname ${2}` +configure_file=`basename ${2}` +abs_configure_dir=`cd ${configure_dir}; pwd` +abs_configure_file=${abs_configure_dir}/${configure_file} - +#${R_PATH}/Rscript --vanilla ${curr_dir}/src/render2report.R \ +# ${abs_report_dir}/scATAC-pro_report_${OUTPUT_PREFIX}.html $abs_out_dir ${work_dir}/${2} ${R_PATH}/Rscript --vanilla ${curr_dir}/src/render2report.R \ - ${abs_report_dir}/scATAC-pro_report_${OUTPUT_PREFIX}.html $abs_out_dir ${work_dir}/${2} + ${abs_report_dir}/scATAC-pro_report_${OUTPUT_PREFIX}.html $abs_out_dir $abs_configure_file echo "Report generation Done!" diff --git a/scripts/src/dsAnalysis_utilities.R b/scripts/src/dsAnalysis_utilities.R index 8e0af88..cf40479 100644 --- a/scripts/src/dsAnalysis_utilities.R +++ b/scripts/src/dsAnalysis_utilities.R @@ -175,6 +175,11 @@ runSeurat_Atac <- function(mtx, npc = 50, top_variable_features = 0.2, verbose = FALSE, seed.use = 10, npc = npc) if(length(reg.var) > 0 ) seurat.obj = regress_on_pca(seurat.obj, reg.var) + if(norm_by == 'tf-idf'){ + ## redo normalization using all features + mtx.norm = TF_IDF(mtx) + seurat.obj[[assay]]@data = mtx.norm + } return(seurat.obj) } diff --git a/scripts/src/integrate_mtx.R b/scripts/src/integrate_mtx.R index cda6b8c..738dbbd 100644 --- a/scripts/src/integrate_mtx.R +++ b/scripts/src/integrate_mtx.R @@ -53,6 +53,11 @@ seurat.obj <- run_integration(mtx_list, integrate_by = integrate_by, max_depth = 50000, reg.var = 'nCount_ATAC', resolution = 0.6) +# record input sample path as metadata to the seurat object +dpath = data.table('sample' = paste0('sample', 1:len), + 'sample_path' = mtx_files) +setkey(dpath, sample) +seurat.obj$sample_path = dpath[J(seurat.obj$sample)]$sample_path saveRDS(seurat.obj, file = paste0(output_dir, '/seurat_obj_', integrate_by, '.rds')) diff --git a/scripts/src/scATAC-pro_utils.R b/scripts/src/scATAC-pro_utils.R index 8e0af88..cf40479 100644 --- a/scripts/src/scATAC-pro_utils.R +++ b/scripts/src/scATAC-pro_utils.R @@ -175,6 +175,11 @@ runSeurat_Atac <- function(mtx, npc = 50, top_variable_features = 0.2, verbose = FALSE, seed.use = 10, npc = npc) if(length(reg.var) > 0 ) seurat.obj = regress_on_pca(seurat.obj, reg.var) + if(norm_by == 'tf-idf'){ + ## redo normalization using all features + mtx.norm = TF_IDF(mtx) + seurat.obj[[assay]]@data = mtx.norm + } return(seurat.obj) }