Skip to content

Commit

Permalink
update pipeline scripts
Browse files Browse the repository at this point in the history
  • Loading branch information
hiruna72 committed Mar 5, 2024
1 parent b05d5be commit d2f07fd
Show file tree
Hide file tree
Showing 4 changed files with 78 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -32,13 +32,14 @@ SAMTOOLS=samtools
SLOW5TOOLS=slow5tools
SAMTOOLS=samtools
MINIMAP2=minimap2
SIGFISH=sigfish
F5C=f5c
BGZIP=bgzip
TABIX=tabix

GUPPY="none"
BUTTERY_EEL_ENV_PATH="none"
REFERENCE=""
REFERENCE="/genome/hg38noAlt.fa"

MODEL_TO_USE=${R10_MODEL_SUP}
CHUNK_SIZE="--chunk_size 500"
Expand All @@ -54,6 +55,7 @@ SIGNAL_FILE="reads.blow5"
READ_ID="35142bde-548d-4f55-bf50-21c4cdd254da"
READ_REGION=${READ_ID}:1-500
REF_REGION="chr1:92,783,745-92,783,946"
# REF_REGION="chr1:92,783,725-92,783,840"
SIM_REGION="sim_ref:1-190"
SIG_SCALE="--sig_scale znorm"

Expand Down Expand Up @@ -221,6 +223,33 @@ simulate_ref_signal() {

}

simulate_ref_signal_given_seq() {
info "simulating using ${SIMULATING_PROFILE}"

test -d "$SIMULATE_REF_DIR" && rm -r "$SIMULATE_REF_DIR"
mkdir "$SIMULATE_REF_DIR" || die "Failed creating $SIMULATE_REF_DIR"

# echo ">sim_ref" > ${SIMULATE_REF_DIR}/ref.fasta
# echo "TTTTTCTTTTTCTTTTCTTTTCTTTTTTTTTTTTTTTTTTGACAGGGAGTCTCGCTCTGTCGCCAGACTGGAGTGCAGTGGCGACGTTGGTTCACTGCAACCTC" >> ${SIMULATE_REF_DIR}/ref.fasta

# ${SQUIGULATOR} --seed 1 --full-contigs --ideal-time --amp-noise 0 -x ${SIMULATING_PROFILE} ${SIMULATE_REF_DIR}/ref.fasta -o ${SIMULATE_REF_DIR}/sim.slow5 -c ${SIMULATE_REF_DIR}/sim.paf -a ${SIMULATE_REF_DIR}/sim.sam -q ${SIMULATE_REF_DIR}/sim.fasta || die "squigulator failed"
# ${SAMTOOLS} sort ${SIMULATE_REF_DIR}/sim.sam -o ${SIMULATE_REF_DIR}/sim.bam || die "samtools sort failed"
# ${SAMTOOLS} index ${SIMULATE_REF_DIR}/sim.bam || die "samtools index failed"

REGION_FILE="${SIMULATE_REF_DIR}/region.file"
echo ${REF_REGION} > ${REGION_FILE}
cat ${REGION_FILE}
${SAMTOOLS} faidx ${REFERENCE} --region-file ${REGION_FILE} -o ${SIMULATE_REF_DIR}/ref.fasta || die "samtools faidx failed"
sed -i "1s/.*/>sim_ref/" ${SIMULATE_REF_DIR}/ref.fasta

${SQUIGULATOR} --seed 10 -n 2 --ideal-time --amp-noise 0 -x ${SIMULATING_PROFILE} ${SIMULATE_REF_DIR}/ref.fasta -o ${SIMULATE_REF_DIR}/sim.slow5 -c ${SIMULATE_REF_DIR}/sim.paf -a ${SIMULATE_REF_DIR}/sim.sam -q ${SIMULATE_REF_DIR}/sim.fasta || die "squigulator failed"
${SAMTOOLS} sort ${SIMULATE_REF_DIR}/sim.sam -o ${SIMULATE_REF_DIR}/sim.bam || die "samtools sort failed"
${SAMTOOLS} index ${SIMULATE_REF_DIR}/sim.bam || die "samtools index failed"


}


plot_realign_and_sim() {
mkdir -p "${SQUIG_PLOT_DIR}" || die "Failed creating ${SQUIG_PLOT_DIR}"
info "plotting realigned and simulated signals"
Expand Down Expand Up @@ -331,6 +360,32 @@ plot_nanopolish_projected_signal(){

}

plot_sigfish() {
# set -x
# REGION_FILE="${SIMULATE_REF_DIR}/region.file"
# echo "chr1:92,767,585-92,786,990" > ${REGION_FILE}
# cat ${REGION_FILE}
# ${SAMTOOLS} faidx ${REFERENCE} --region-file ${REGION_FILE} -o ${OUTPUT_DIR}/ref.fasta || die "samtools faidx failed"
# sed -i "1s/.*/>sim_ref/" ${OUTPUT_DIR}/ref.fasta

# ${SIGFISH} dtw -t 8 ${OUTPUT_DIR}/ref.fasta ${SIGNAL_FILE} -a -q 100000| samtools sort - > ${OUTPUT_DIR}/sorted_sigfish.bam || die "sigfish failed"
# samtools index ${OUTPUT_DIR}/sorted_sigfish.bam || die "samtools index failed"

TRACK_COMMAND_FILE=${SQUIG_PLOT_DIR}/track_commands_${FUNCNAME[0]}.txt
rm -f ${TRACK_COMMAND_FILE}
echo "num_commands=2" > ${TRACK_COMMAND_FILE}
echo "plot_heights=*" >> ${TRACK_COMMAND_FILE}
echo "squigualiser plot_pileup --read_list readlist -f ${OUTPUT_DIR}/ref.fasta -s ${SIGNAL_FILE} -a ${OUTPUT_DIR}/sorted_sigfish.bam --region sim_ref:16161-16260 --tag_name Method:sigfish_dtw --plot_limit 6 ${SIG_SCALE} --profile ${PROFILE_TO_DETERMINE_BASE_SHIFT} " >> ${TRACK_COMMAND_FILE}
echo "squigualiser plot_pileup -f ${SIMULATE_REF_DIR}/ref.fasta -s ${SIMULATE_REF_DIR}/sim.slow5 -a ${SIMULATE_REF_DIR}/sim.bam --region ${SIM_REGION} --tag_name Squigualator_simulated_read --no_overlap --profile ${PROFILE_TO_DETERMINE_BASE_SHIFT}" >> ${TRACK_COMMAND_FILE}

cat ${TRACK_COMMAND_FILE}

TESTCASE="${MODEL_TO_USE}_sigfish_dtw_vs_sim"
OUTPUT="${OUTPUT_DIR}/testcase_${TESTCASE}"
${PLOT_TRACK_TOOL} --shared_x -f ${TRACK_COMMAND_FILE} -o ${SQUIG_PLOT_DIR}/${TESTCASE} --tag_name ${TESTCASE} || die "testcase:$TESTCASE failed"

}

## stage 1

# create_output_dir
Expand All @@ -352,11 +407,14 @@ plot_nanopolish_projected_signal(){
# minimap2_align
# realign
# simulate_ref_signal
# simulate_ref_signal_given_seq
# plot_realign_and_sim
# f5c_eventalign
# plot_eventalign_and_sim
# plot_eventalign_forward_reverse
# plot_realign_forward_reverse
# plot_nanopolish_projected_signal
# set -x
# plot_sigfish

info "success"
9 changes: 5 additions & 4 deletions test/data/raw/pipelines/pipeline_1/real_variant/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,9 @@ F5C=f5c
BGZIP=bgzip
TABIX=tabix

GUPPY=
BUTTERY_EEL_ENV_PATH=
REFERENCE=
GUPPY="none"
BUTTERY_EEL_ENV_PATH="none"
REFERENCE="/genome/hg38noAlt.fa"

MODEL_TO_USE=${R10_MODEL_SUP}
CHUNK_SIZE="--chunk_size 500"
Expand Down Expand Up @@ -342,7 +342,8 @@ plot_realign_reverse() {
# simulate_ref_signal
# plot_realign_and_sim
# f5c_eventalign
# plot_eventalign_and_sim
set -x
plot_eventalign_and_sim
#plot_eventalign_reverse
#plot_realign_reverse

Expand Down
24 changes: 13 additions & 11 deletions test/data/raw/pipelines/pipeline_2/dna_r10.4.1_e8.2_400bps/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ TABIX=tabix

GUPPY="none"
BUTTERY_EEL_ENV_PATH="none"
REFERENCE=""
REFERENCE="/genome/hg38noAlt.fa"

MODEL_TO_USE=${R10_MODEL_SUP}
CHUNK_SIZE="--chunk_size 500"
Expand All @@ -53,7 +53,9 @@ SIGNAL_FILE="reads.blow5"

READ_ID="251256d6-1c5d-4756-91bf-8fa5dc6fd1c8"
READ_REGION=${READ_ID}:1-500
REF_REGION="chr1:92778190-92778210"
# REF_REGION="chr1:92778190-92778210"
# REF_REGION="chr1:92790650-92790700"
REF_REGION="chr1:92790650-92790820"
SIM_REGION="sim_ref:1-20"
SIG_SCALE="--sig_scale znorm"

Expand Down Expand Up @@ -254,10 +256,10 @@ f5c_eventalign() {
info "f5c eventalign..."

f5c index ${SEQUENCE_FILE} --slow5 ${SIGNAL_FILE} || die "f5c index failed"
# f5c eventalign -b ${MAPPED_BAM} -r ${SEQUENCE_FILE} -g ${REFERENCE} --slow5 ${SIGNAL_FILE} --sam | head -n -1 | ${SAMTOOLS} sort -o ${EVENTALIGN_BAM}
# ${SAMTOOLS} index ${EVENTALIGN_BAM} || die "samtools index failed"
f5c eventalign -b ${MAPPED_BAM} -r ${SEQUENCE_FILE} -g ${REFERENCE} --slow5 ${SIGNAL_FILE} --sam | ${SAMTOOLS} sort -o ${EVENTALIGN_BAM}
${SAMTOOLS} index ${EVENTALIGN_BAM} || die "samtools index failed"

f5c eventalign -b ${MAPPED_BAM} -r ${SEQUENCE_FILE} -g ${REFERENCE} --slow5 ${SIGNAL_FILE} -c -o ${EVENTALIGN_PAF}
# f5c eventalign -b ${MAPPED_BAM} -r ${SEQUENCE_FILE} -g ${REFERENCE} --slow5 ${SIGNAL_FILE} -c -o ${EVENTALIGN_PAF}

}

Expand All @@ -276,17 +278,17 @@ plot_eventalign_and_sim() {
mkdir -p "${SQUIG_PLOT_DIR}" || die "Failed creating ${SQUIG_PLOT_DIR}"
info "plotting eventalign and simulated signals"

sort -k6,6 -k8,8n ${EVENTALIGN_PAF} -o ${SORTED_EVENTALIGN_PAF}
cat ${SORTED_EVENTALIGN_PAF} | grep -f ${READ_ID_LIST} > ${EVENTALIGN_PAF_FILTERED}
bgzip -f ${EVENTALIGN_PAF_FILTERED}
tabix -0 -b 8 -e 9 -s 6 ${EVENTALIGN_PAF_FILTERED_GZ}
# sort -k6,6 -k8,8n ${EVENTALIGN_PAF} -o ${SORTED_EVENTALIGN_PAF}
# cat ${SORTED_EVENTALIGN_PAF} | grep -f ${READ_ID_LIST} > ${EVENTALIGN_PAF_FILTERED}
# bgzip -f ${EVENTALIGN_PAF_FILTERED}
# tabix -0 -b 8 -e 9 -s 6 ${EVENTALIGN_PAF_FILTERED_GZ}

TRACK_COMMAND_FILE="${SQUIG_PLOT_DIR}/track_commands_${FUNCNAME[0]}.txt"
rm -f ${TRACK_COMMAND_FILE}
echo "num_commands=2" > ${TRACK_COMMAND_FILE}
echo "plot_heights=*" >> ${TRACK_COMMAND_FILE}
# echo "squigualiser plot_pileup --bed ${METH_BED} -f ${REFERENCE} -s ${SIGNAL_FILE} -a ${EVENTALIGN_PAF_FILTERED_GZ} --region ${REF_REGION} --tag_name ${MODEL_TO_USE}_eventalign --plot_limit 20 --base_shift -5 ${SIG_SCALE}" >> ${TRACK_COMMAND_FILE}
echo "squigualiser plot_pileup --bed ${METH_BED} -f ${REFERENCE} -s ${SIGNAL_FILE} -a ${EVENTALIGN_PAF_FILTERED_GZ} --region ${REF_REGION} --tag_name ${MODEL_TO_USE}_eventalign --plot_limit 20 --profile ${PROFILE_TO_DETERMINE_BASE_SHIFT} ${SIG_SCALE}" >> ${TRACK_COMMAND_FILE}
echo "squigualiser plot_pileup -l readlist --bed ${METH_BED} -f ${REFERENCE} -s ${SIGNAL_FILE} -a ${EVENTALIGN_BAM} --region ${REF_REGION} --tag_name supplementary_file --plot_limit 20 --profile ${PROFILE_TO_DETERMINE_BASE_SHIFT} ${SIG_SCALE}" >> ${TRACK_COMMAND_FILE}
echo "squigualiser plot_pileup -f ${SIMULATE_REF_DIR}/ref.fasta -s ${SIMULATE_REF_DIR}/sim.slow5 -a ${SIMULATE_REF_DIR}/sim.bam --region ${SIM_REGION} --tag_name ${MODEL_TO_USE}_sim_read --no_overlap --profile ${PROFILE_TO_DETERMINE_BASE_SHIFT}" >> ${TRACK_COMMAND_FILE}

cat ${TRACK_COMMAND_FILE}
Expand Down Expand Up @@ -321,6 +323,6 @@ plot_eventalign_and_sim() {
# plot_realign_and_sim
# f5c_eventalign
# f5c_methylation
#plot_eventalign_and_sim
plot_eventalign_and_sim

info "success"
2 changes: 1 addition & 1 deletion test/test_metric.sh
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ RAW_DIR="${REL_PATH}/data/raw/pipelines/pipeline_0/dna_r10.4.1_e8.2_400bps"
[ "${HUMAN_GENOME}" ] || die "Please set the env variable to the human genome path. export HUMAN_GENOME=path/to/file"
GENOME="${HUMAN_GENOME}"
REGION="--region chr1:92,783,745-92,783,946"
PROFILE="--profile guppy_dna_r10.4.1_e8.2_400bps_sup"
PROFILE="--profile guppy_dna_r10.4.1_e8.2_400bps_sup" #should have used different profiles for different alignments
TESTCASE=50.1
info "testcase:$TESTCASE - help"
ex && die "testcase:$TESTCASE failed"
Expand Down

0 comments on commit d2f07fd

Please sign in to comment.