Skip to content

Commit

Permalink
merge v1.4.1
Browse files Browse the repository at this point in the history
  • Loading branch information
wbaopaul committed Dec 1, 2021
2 parents d265693 + 49a46a3 commit 227fd7a
Show file tree
Hide file tree
Showing 12 changed files with 45 additions and 15 deletions.
10 changes: 6 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ Installation
------------

- Note: It is not necessary to install scATAC-pro from scratch. You can use the docker or singularity version if you prefer (see [Run scATAC-pro through docker or singularity](#run-scATAC-pro-through-docker-or-singularity) )
- Run the following command in your terminal, scATAC-pro will be installed in YOUR\_INSTALL\_PATH/scATAC-pro\_1.4.0
- Run the following command in your terminal, scATAC-pro will be installed in YOUR\_INSTALL\_PATH/scATAC-pro\_1.4.1

<!-- -->

Expand All @@ -49,7 +49,7 @@ Installation
Updates
------------
- Now provide [scATAC-pro tutorial in R](https://scatacpro-in-r.netlify.app/index.html) for access QC metrics and perform downstream analysis
- Current version: 1.4.0
- Current version: 1.4.1
- Recent updates
* *labelTransfer*: new module added, to do label trasfer (for cell annotation) from cell annotation of scRNA-seq data. First construct a gene by cell activity matrix, then use *FindTransferAnchors* and *TransferData* function from Seurat R package to predicted cell type annotation from the cell annotaiton in scRNA-seq data.
* *rmDoublets*: new module added, to remove potential doublets using [DoubletFinder](https://github.com/chris-mcginnis-ucsf/DoubletFinder) algorithm.
Expand Down Expand Up @@ -297,11 +297,13 @@ cello('output/downstream_analysis/PEAK_CALLER/CELL_CALLER/VisCello_obj') ## laun
Detailed Usage
--------------

See [here](https://scatacpro-in-r.netlify.app/note_module) or in your terminal:

$ scATAC-pro --help
usage : scATAC-pro -s STEP -i INPUT -c CONFIG [-o] [-h] [-v]
Use option -h|--help for more information

scATAC-pro 1.4.0
scATAC-pro 1.4.1
---------------
OPTIONS

Expand Down Expand Up @@ -445,7 +447,7 @@ Detailed Usage
output: a updated seurat object for atac with the Predicted_Cell_Type as a metadata variable and
an umap plot colored by Predicted_Cell_Type, saved in the same directory as the input atac
seurat object.
*Note*: the cell annotation should be given as a metadata of the seurat object of
NOTE: the cell annotation should be given as a metadata of the seurat object of
scRNA-seq. Both seurat objects should have pca and umap dimemsion reduction
done.
Expand Down
5 changes: 5 additions & 0 deletions complete_update_history.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,9 @@
## Complete Update History
- Version 1.4.1 released
* update tutorial
* correct a bug relate to input filepath for report module
* record input file paths for the *integration* module
* using BAMPE for macs2 for paired-end sequencing
- Version 1.4.0 released
* new module added: labelTransfer (for cell annotation) from scRNA-seq
- Version 1.3.1 released
Expand Down
2 changes: 1 addition & 1 deletion configure_user.txt
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ PEAK_CALLER = MACS2 ## one of MACS2, BIN, and COMBINED

## provided extra options for macs2 (NO NEED TO SPECIFY -t, -n, -f here), like

#MACS2_OPTS = -q 0.05 -g hs
#MACS2_OPTS = -q 0.05 -g hs
## (change to -g mm if you are using mouse data)

## or something like this to ignore building shifting model
Expand Down
2 changes: 1 addition & 1 deletion scATAC-pro
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
#########################

SOFT="scATAC-pro"
VERSION="1.4.0"
VERSION="1.4.1"

function usage {
echo -e "usage : $SOFT -s STEP -i INPUT -c CONFIG [-o] [-h] [-v] [-b]"
Expand Down
4 changes: 2 additions & 2 deletions scripts/bam2qc.sh
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,14 @@ if [[ "$isort" != "SO:coordinate" ]]; then
echo "Sorting bam file"

mkdir -p ${mapRes_dir}/tmp
${SAMTOOLS_PATH}/samtools sort -T ${mapRes_dir}/tmp/ -@ $ncore -n -o ${mapRes_dir}/${OUTPUT_PREFIX}.sorted.bam $bam_file
${SAMTOOLS_PATH}/samtools sort -m 2G -T ${mapRes_dir}/tmp/ -@ $ncore -n -o ${mapRes_dir}/${OUTPUT_PREFIX}.sorted.bam $bam_file

## to mark duplicates
${SAMTOOLS_PATH}/samtools fixmate -@ $ncore -m ${mapRes_dir}/${OUTPUT_PREFIX}.sorted.bam ${mapRes_dir}/${OUTPUT_PREFIX}.fixmate.bam
rm ${mapRes_dir}/${OUTPUT_PREFIX}.sorted.bam

# Markdup needs position order
${SAMTOOLS_PATH}/samtools sort -@ $ncore -T ${mapRes_dir}/tmp/ -o ${mapRes_dir}/${OUTPUT_PREFIX}.positionsort0.bam ${mapRes_dir}/${OUTPUT_PREFIX}.fixmate.bam
${SAMTOOLS_PATH}/samtools sort -m 2G -@ $ncore -T ${mapRes_dir}/tmp/ -o ${mapRes_dir}/${OUTPUT_PREFIX}.positionsort0.bam ${mapRes_dir}/${OUTPUT_PREFIX}.fixmate.bam
rm ${mapRes_dir}/${OUTPUT_PREFIX}.fixmate.bam

## mark duplicates
Expand Down
2 changes: 0 additions & 2 deletions scripts/call_peak.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,6 @@ if [ "${PEAK_CALLER}" = 'MACS2' ];then
${MACS2_PATH}/macs2 callpeak -t $input_bam --outdir $work_dir -n $out_prefix -f BAMPE $MACS2_OPTS
fi

#${MACS2_PATH}/macs2 callpeak -t $input_bam --outdir $peaks_dir -f BAM $MACS2_OPTS --nomodel --extsize 147

## remove peaks whose chromosome is not list in the chrom_size file
${R_PATH}/Rscript --vanilla ${curr_dir}/src/rmRedundantPeaks.R ${work_dir}/${out_prefix}_peaks.narrowPeak \
$CHROM_SIZE_FILE
Expand Down
7 changes: 6 additions & 1 deletion scripts/iter_peak.sh
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,15 @@ ${PERL_PATH}/perl ${curr_dir}/src/split_bam2clusters0.pl --cluster_file ${output
## call peaks per cluster
unset PYTHONHOME
unset PYTHONPATH
samPE="SAM"
if [[ "$isSingleEnd"="FALSE" ]]; then
samPE="SAMPE"
fi

for input_sam0 in $(find $output_dir -name *.sam); do
pre=$(basename $input_sam0)
pre=${pre/.sam/}
${MACS2_PATH}/macs2 callpeak -t $input_sam0 --outdir $output_dir -n $pre -f SAM $MACS2_OPTS &
${MACS2_PATH}/macs2 callpeak -t $input_sam0 --outdir $output_dir -n $pre -f $samPE $MACS2_OPTS &
done
wait

Expand Down
4 changes: 2 additions & 2 deletions scripts/mapping.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,15 @@ echo "Sorting bam file"
ncore=$(nproc --all)
ncore=$(($ncore - 1))
mkdir -p ${mapRes_dir}/tmp
${SAMTOOLS_PATH}/samtools sort -T ${mapRes_dir}/tmp/ -@ $ncore -n -o ${mapRes_dir}/${OUTPUT_PREFIX}.sorted.bam ${mapRes_dir}/${OUTPUT_PREFIX}.bam
${SAMTOOLS_PATH}/samtools sort -m 2G -T ${mapRes_dir}/tmp/ -@ $ncore -n -o ${mapRes_dir}/${OUTPUT_PREFIX}.sorted.bam ${mapRes_dir}/${OUTPUT_PREFIX}.bam
rm ${mapRes_dir}/${OUTPUT_PREFIX}.bam

## to mark duplicates
${SAMTOOLS_PATH}/samtools fixmate -@ $ncore -m ${mapRes_dir}/${OUTPUT_PREFIX}.sorted.bam ${mapRes_dir}/${OUTPUT_PREFIX}.fixmate.bam
rm ${mapRes_dir}/${OUTPUT_PREFIX}.sorted.bam

# Markdup needs position order
${SAMTOOLS_PATH}/samtools sort -@ $ncore -T ${mapRes_dir}/tmp/ -o ${mapRes_dir}/${OUTPUT_PREFIX}.positionsort0.bam ${mapRes_dir}/${OUTPUT_PREFIX}.fixmate.bam
${SAMTOOLS_PATH}/samtools sort -m 2G -@ $ncore -T ${mapRes_dir}/tmp/ -o ${mapRes_dir}/${OUTPUT_PREFIX}.positionsort0.bam ${mapRes_dir}/${OUTPUT_PREFIX}.fixmate.bam
rm ${mapRes_dir}/${OUTPUT_PREFIX}.fixmate.bam

## mark duplicates
Expand Down
9 changes: 7 additions & 2 deletions scripts/report.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,14 @@ abs_out_dir=`cd ${OUTPUT_DIR}; pwd`

curr_dir=`dirname $0`
work_dir=`pwd`
configure_dir=`dirname ${2}`
configure_file=`basename ${2}`
abs_configure_dir=`cd ${configure_dir}; pwd`
abs_configure_file=${abs_configure_dir}/${configure_file}


#${R_PATH}/Rscript --vanilla ${curr_dir}/src/render2report.R \
# ${abs_report_dir}/scATAC-pro_report_${OUTPUT_PREFIX}.html $abs_out_dir ${work_dir}/${2}

${R_PATH}/Rscript --vanilla ${curr_dir}/src/render2report.R \
${abs_report_dir}/scATAC-pro_report_${OUTPUT_PREFIX}.html $abs_out_dir ${work_dir}/${2}
${abs_report_dir}/scATAC-pro_report_${OUTPUT_PREFIX}.html $abs_out_dir $abs_configure_file
echo "Report generation Done!"
5 changes: 5 additions & 0 deletions scripts/src/dsAnalysis_utilities.R
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,11 @@ runSeurat_Atac <- function(mtx, npc = 50, top_variable_features = 0.2,
verbose = FALSE, seed.use = 10, npc = npc)
if(length(reg.var) > 0 ) seurat.obj = regress_on_pca(seurat.obj, reg.var)

if(norm_by == 'tf-idf'){
## redo normalization using all features
mtx.norm = TF_IDF(mtx)
seurat.obj[[assay]]@data = mtx.norm
}

return(seurat.obj)
}
Expand Down
5 changes: 5 additions & 0 deletions scripts/src/integrate_mtx.R
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,11 @@ seurat.obj <- run_integration(mtx_list, integrate_by = integrate_by,
max_depth = 50000, reg.var = 'nCount_ATAC',
resolution = 0.6)

# record input sample path as metadata to the seurat object
dpath = data.table('sample' = paste0('sample', 1:len),
'sample_path' = mtx_files)
setkey(dpath, sample)
seurat.obj$sample_path = dpath[J(seurat.obj$sample)]$sample_path

saveRDS(seurat.obj, file = paste0(output_dir, '/seurat_obj_', integrate_by, '.rds'))

Expand Down
5 changes: 5 additions & 0 deletions scripts/src/scATAC-pro_utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,11 @@ runSeurat_Atac <- function(mtx, npc = 50, top_variable_features = 0.2,
verbose = FALSE, seed.use = 10, npc = npc)
if(length(reg.var) > 0 ) seurat.obj = regress_on_pca(seurat.obj, reg.var)

if(norm_by == 'tf-idf'){
## redo normalization using all features
mtx.norm = TF_IDF(mtx)
seurat.obj[[assay]]@data = mtx.norm
}

return(seurat.obj)
}
Expand Down

0 comments on commit 227fd7a

Please sign in to comment.