Skip to content

Commit

Permalink
tests
Browse files Browse the repository at this point in the history
  • Loading branch information
aerijman committed Dec 20, 2024
1 parent 326b571 commit d13f6f0
Showing 1 changed file with 70 additions and 21 deletions.
91 changes: 70 additions & 21 deletions run_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,44 +2,93 @@ pwd=$(pwd)
tmp="${pwd}/test_data/tmp"
mkdir ${tmp}

# make 151 nt-long bam and fastq files #
# -------------
# make 151 nt-long bam and fastq files WITH simulated conversions #
# --------------------------------------------------------------- #

ln -s ${pwd}/test_data/emseq-test_R*.fastq ${tmp}/

cat <(echo -e "@HD\tVN:1.6\tSO:unsorted\n@RG\tID:test1\tSM:testsample1") <(samtools sort ${tmp}/emseq-test_R1.fastq | samtools view | awk 'BEGIN{OFS="\t"}{$2=77; print $0"\tBC:Z:CGTCAAGA-GGGTTGTT\tRG:Z:NS500.4"}') <(samtools sort ${tmp}/emseq-test_R1.fastq | samtools view | awk 'BEGIN{OFS="\t"}{$2=141; print $0"\tBC:Z:CGTCAAGA-GGGTTGTT\tRG:Z:NS500.4"}') | samtools view -ho ${tmp}/emseq-test.u.bam
mkfifo tmp_fq
paste -d "\n" <(samtools sort ${tmp}/emseq-test_R1.fastq | samtools view | awk 'BEGIN{OFS="\t"}{$2=77; print $0"\tBC:Z:CGTCAAGA-GGGTTGTT\tRG:Z:NS500.4"}') <(samtools sort ${tmp}/emseq-test_R2.fastq | samtools view | awk 'BEGIN{OFS="\t"}{$2=141; print $0"\tBC:Z:CGTCAAGA-GGGTTGTT\tRG:Z:NS500.4"}') > tmp_fq &
tmp_fq_pid=$!

cat <(echo -e "@HD\tVN:1.6\tSO:unsorted\n@RG\tID:test1\tSM:testsample1") <(cat tmp_fq | \
awk '
function modify_base(base, next_base) {
if (base == "C") {
if (next_base == "G") {
return (rand() < 0.2) ? "T" : "C";
} else {
return "T";
}
} return base;}
BEGIN { srand() } {
modified_seq =""
for (i = 1; i <= length($10); i++) {
base = substr($10, i, 1);
next_base = (i < length($10)) ? substr($10, i + 1, 1) : "";
modified_seq = modified_seq modify_base(base, next_base);
}
$10 = modified_seq
print $0; }' | tr " " "\t") | \
samtools view -h -o ${tmp}/emseq-test.u.bam

wait $tmp_fq_pid
rm tmp_fq


# make genome and index #
# ---------------------

echo ">chr_Human_autosome_chr1" > ${tmp}/reference.fasta
samtools view ${tmp}/emseq-test.u.bam | shuf | awk 'BEGIN{ srand() } {
if ($10 ~ /GC/) {
while ($10 ~ /GC/) {
if (rand() > 0.5) {
gsub(/GC/, "GT")
}
}
}
seq = seq""$10
}END{ print seq }' | fold -w60 >> ${tmp}/reference.fasta
gen_rand_seq() {
length=$1
tr -dc 'ACGT' < /dev/urandom | head -c ${length}
}
export -f gen_rand_seq
revcomp() {
echo $1 | rev | tr "[ATCGNatcgn]" "[TAGCNtagcn]"
}
export -f revcomp


######################## bwa
echo ">chr_Human_autosome_chr1" > ${tmp}/reference.fa

samtools view ${tmp}/emseq-test.u.bam | \
cut -f10 | paste - - | \
awk 'BEGIN{srand()}{
min=20;
max=700;
random_number = int(min + rand() * (max - min + 1));
r=system("gen_rand_seq " random_number);
read2=system("revcomp " $2);
# read2=$2
print $1""r""read2
}' | tr -d "\n" | fold -w60 >> ${tmp}/reference.fa
#| shuf | tr -d "\n" | fold -w60 >> ${tmp}/reference.fa

# make 76 nt-long fastq and bam files #
# -------------
for rn in 1 2; do cat ${tmp}/emseq-test_R${rn}.fastq | awk '{if (NR%4==2 || NR%4==0) {print substr($0,1,76)} else {print $0} }' > ${tmp}/emseq-test_76.R${rn}.fastq; done

cat <(echo -e "@HD\tVN:1.6\tSO:unsorted\n@RG\tID:test1\tSM:testsample1") <(samtools sort ${tmp}/emseq-test_76.R1.fastq | samtools view | awk 'BEGIN{OFS="\t"}{$2=77; print $0"\tBC:Z:CGTCAAGA-GGGTTGTT\tRG:Z:NS500.4"}') <(samtools sort ${tmp}/emseq-test_76.R1.fastq | samtools view | awk 'BEGIN{OFS="\t"}{$2=141; print $0"\tBC:Z:CGTCAAGA-GGGTTGTT\tRG:Z:NS500.4"}') | samtools view -ho ${tmp}/emseq-test.76.u.bam

# make 76 nt-long fastq and bam files INCLUDING simulated conversions #
# ------------------------------------------------------------------- #

cat <(samtools view -H ${tmp}/emseq-test.u.bam) <(samtools view ${tmp}/emseq-test.u.bam | awk 'BEGIN{OFS="\t"}{$10=substr($10,1,76); $11=substr($11,1,76); print $0}') > ${tmp}/emseq-test_76.u.bam

BARCODE=$(samtools view -f 77 ${tmp}/emseq-test.u.bam | grep -o "BC:Z:[A-Z,+,-]*" | sort | uniq -c | head | awk '{gsub("BC:Z","N:0",$2); print $2}' )
samtools view -f 77 ${tmp}/emseq-test_76.u.bam | samtools fastq | sed "s/\/1$/ 1:$BARCODE/" > ${tmp}/emseq-test_76.R1.fastq
samtools view -f 141 ${tmp}/emseq-test_76.u.bam | samtools fastq | sed "s/\/2$/ 2:$BARCODE/" > ${tmp}/emseq-test_76.R2.fastq


# make gzip fastq files. #
# -------------
# ---------------------- #

gzip -c ${tmp}/emseq-test_76.R1.fastq > ${tmp}/emseq-test_76.R1.fastq.gz
gzip -c ${tmp}/emseq-test_76.R2.fastq > ${tmp}/emseq-test_76.R2.fastq.gz



genome_path=${tmp}/reference.fasta
# run tests #
# --------- #

genome_path=${tmp}/reference.fa

#. /mnt/home/aerijman/miniconda3/bin/activate
. ~/miniconda3/bin/activate
Expand Down

0 comments on commit d13f6f0

Please sign in to comment.