-
Notifications
You must be signed in to change notification settings - Fork 0
/
genome_assembly_size_cluster.sh
72 lines (65 loc) · 2.51 KB
/
genome_assembly_size_cluster.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# a genomic class for the genome size estimation from the multi sequencing reads
def __init__(self, name, queue, threads, core, memory, user, change, mail,filename):
self.name = name
self.queue = queue
self.threads = threads
self.core = core
self.memory = memory
self.dir = os.getcwd()
self.change = os.chroot(change)
self.mail = mail
self.user = user
self.filename = filename
def read_file(self):
self.filecontents = open(self.filename, 'rb')
for i in self.filecontents.readlines():
if i.startswith('#SBATCH'):
raise FileExistsError('Already a configuration file')
else:
print(f'the_name_of_the_filename')
def getCluster(self):
return print(f'#SBATCH -J self.name \
\n#SBATCH -p constraint="snb|hsw \
\n#SBATCH -p self.queue \
\n#SBATCH -n self.threads \
\n#SBATCH -c self.core \
\n#SBATCH --mem=self.memory \
\n#SBATCH --workdir = self.change \
\n#SBATCH --mail = self.mail \
\n#SBATCH --mail-type=END')
def writeCluster(self):
self.filecontent = open(self.filename, 'rb')
self.filecontents.write(self.getCluster())
self.filecontents.write(self.getcwd())
self.filecontents.close()
print(f'the_cluster_file_for_the_configuration_run:{self.filecontents}')
fastq_files1 = file_1.txt
fastq_files2 = file_2.txt
threads = NUM_THREADS
overlap = SET_THE_OVERLAP
memory = SET_MEMORY
mkdir counts
mv counts
pip install biopython=version
export PATH=/PATH/TO/JELLYFISH
paste tags.txt file1.txt file2.txt \
| while read col1 col2 col3; \
do echo jellyfish count -m $overlap \
-s $memory -t $threads -o ${col1}.jf
-C ${col2} ${col3}; done
echo "jellyfish_count_finished"
for f in $(pwd)/*.jf; do echo $f; done >> count_file.txt
paste count_file.txt tags.txt \
| while read col1 col2; \
do echo jellyfish histo -o ${col2}.histo \
-t $threads -l 1 \
-h 100000000 -i 1 \
${col1}; done
echo "histogram_files_prepared"
paste tags.txt file1.txt file2.txt \
| while read col1 col2 col3; \
do echo draw_kmer_distribution_from_fastq.py \
-m 23 -t $threads -b -s $memory -e png \
-o ${col1} -i ${col2},${col3} \
-w 10 -g 100 -d \
echo "cluster_genome_finished"