16
16
import argparse
17
17
import time
18
18
from Bio import SeqIO
19
- import HCGB
20
- from HCGB .functions .aesthetics_functions import debug_message
21
- import HCGB .functions .time_functions as time_functions
22
19
from termcolor import colored
23
20
import pandas as pd
24
21
22
+ ## import HCGB
23
+ from HCGB .functions .aesthetics_functions import debug_message
24
+ import HCGB .functions .time_functions as HCGB_time
25
+ import HCGB .functions .files_functions as HCGB_files
26
+ import HCGB .functions .aesthetics_functions as HCGB_aes
27
+ import HCGB .functions .main_functions as HCGB_main
28
+
25
29
## my modules
26
30
import BacDup
27
31
import BacDup .scripts .gbf_parser as gbf_parser
@@ -47,9 +51,9 @@ def run_input(arg_dict):
47
51
exit ()
48
52
49
53
BacDup_functions .pipeline_header ('BacDup' )
50
- HCGB . functions . aesthetics_functions .boxymcboxface ("Preparing input files" )
54
+ HCGB_aes .boxymcboxface ("Preparing input files" )
51
55
print ("--------- Starting Process ---------" )
52
- time_functions .print_time ()
56
+ HCGB_time .print_time ()
53
57
54
58
## init time
55
59
start_time_total = time .time ()
@@ -60,7 +64,7 @@ def run_input(arg_dict):
60
64
61
65
## output folder
62
66
print ("\n + Create output folder(s):" )
63
- HCGB . functions . files_functions .create_folder (outdir )
67
+ HCGB_files .create_folder (outdir )
64
68
65
69
## set defaults
66
70
if not (arg_dict .assembly_level ):
@@ -76,7 +80,7 @@ def run_input(arg_dict):
76
80
else :
77
81
arg_dict .project = True
78
82
print ("+ Generate a directory containing information within the project folder provided" )
79
- final_dir = HCGB . functions . files_functions .create_subfolder ("info" , outdir )
83
+ final_dir = HCGB_files .create_subfolder ("info" , outdir )
80
84
81
85
## debug messages
82
86
if (arg_dict .debug ):
@@ -90,7 +94,7 @@ def run_input(arg_dict):
90
94
91
95
## get files
92
96
print ()
93
- HCGB . functions . aesthetics_functions .print_sepLine ("-" ,50 , False )
97
+ HCGB_aes .print_sepLine ("-" ,50 , False )
94
98
print ('+ Getting input information provided... ' )
95
99
print ('+ Several options available:' )
96
100
print ('\t * Single/Multiple Annotation file:' )
@@ -104,7 +108,7 @@ def run_input(arg_dict):
104
108
time .sleep (1 )
105
109
106
110
## time stamp
107
- start_time_partial = time_functions .timestamp (start_time_total )
111
+ start_time_partial = HCGB_time .timestamp (start_time_total )
108
112
109
113
#################################################
110
114
## Parse and obtain the type of input information provided
@@ -116,24 +120,24 @@ def run_input(arg_dict):
116
120
## 'plasmids_number','plasmids_ID'))
117
121
118
122
## time stamp
119
- start_time_partial = time_functions .timestamp (start_time_partial )
123
+ start_time_partial = HCGB_time .timestamp (start_time_partial )
120
124
121
125
## parse information accordingly
122
126
parse_information (arg_dict , df_accID , outdir )
123
127
124
128
### report generation
125
- HCGB . functions . aesthetics_functions .boxymcboxface ("Summarizing input files" )
126
- outdir_report = HCGB . functions . files_functions .create_subfolder ("report" , outdir )
129
+ HCGB_aes .boxymcboxface ("Summarizing input files" )
130
+ outdir_report = HCGB_files .create_subfolder ("report" , outdir )
127
131
128
- input_report = HCGB . functions . files_functions .create_subfolder ("input" , outdir_report )
132
+ input_report = HCGB_files .create_subfolder ("input" , outdir_report )
129
133
130
134
## add df_accID.loc[sample,] information as csv into input folder
131
135
df_accID .to_csv (os .path .join (input_report , 'info.csv' ), index = True , header = True )
132
136
133
137
## maybe add a summary of the files?
134
138
135
139
print ("\n *************** Finish *******************" )
136
- start_time_partial = time_functions .timestamp (start_time_total )
140
+ start_time_partial = HCGB_time .timestamp (start_time_total )
137
141
138
142
print ("+ Exiting Input module." )
139
143
return ()
@@ -142,8 +146,8 @@ def run_input(arg_dict):
142
146
def parse_information (arg_dict , df_accID , outdir ):
143
147
144
148
### Parse df_accID
145
- dict_input_folders = HCGB . functions . files_functions .outdir_project (outdir , arg_dict .project , df_accID , "input" , arg_dict .debug )
146
- dict_parse_folders = HCGB . functions . files_functions .outdir_project (outdir , arg_dict .project , df_accID , "parse" , arg_dict .debug )
149
+ dict_input_folders = HCGB_files .outdir_project (outdir , arg_dict .project , df_accID , "input" , arg_dict .debug )
150
+ dict_parse_folders = HCGB_files .outdir_project (outdir , arg_dict .project , df_accID , "parse" , arg_dict .debug )
147
151
148
152
## debug messages
149
153
if (arg_dict .debug ):
@@ -170,34 +174,34 @@ def parse_information(arg_dict, df_accID, outdir):
170
174
print ()
171
175
print ("\t + Parsing sample: " + sample )
172
176
173
- if (not HCGB . functions . files_functions . is_non_zero_file (parse_timestamp ) and not HCGB . functions . files_functions .is_non_zero_file (input_timestamp )):
177
+ if (not HCGB_files . is_non_zero_file (parse_timestamp ) and not HCGB_files .is_non_zero_file (input_timestamp )):
174
178
175
179
## TODO: Set threads to use in parallel
176
180
process_OK = parse_annot_file (sample , folder_input , df_accID .loc [sample , 'annot_file' ], dict_parse_folders [sample ], arg_dict .debug , df_accID .loc [sample , 'genome' ])
177
181
178
182
if (process_OK ):
179
183
180
184
## link or copy annotation file into folder_input
181
- HCGB . functions . files_functions .get_symbolic_link_file (df_accID .loc [sample , 'annot_file' ], folder_input )
185
+ HCGB_files .get_symbolic_link_file (df_accID .loc [sample , 'annot_file' ], folder_input )
182
186
183
187
## add df_accID.loc[sample,] information as csv into input folder
184
188
df_accID .loc [sample ,].to_csv (os .path .join (folder_input , 'info.csv' ), index = True , header = True )
185
189
186
190
## print time stamp
187
- time_functions .print_time_stamp (input_timestamp )
191
+ HCGB_time .print_time_stamp (input_timestamp )
188
192
189
193
## print time stamp
190
- time_functions .print_time_stamp (parse_timestamp )
194
+ HCGB_time .print_time_stamp (parse_timestamp )
191
195
else :
192
196
print (colored ("\t + Some error occurred for sample %s while parsing input options" % sample , 'red' ))
193
197
194
198
## print time stamp
195
- time_functions .print_time_stamp (os .path .join (folder_input , '.fail' ))
199
+ HCGB_time .print_time_stamp (os .path .join (folder_input , '.fail' ))
196
200
197
201
## print time stamp
198
- time_functions .print_time_stamp (os .path .join (dict_parse_folders [sample ], '.fail' ))
202
+ HCGB_time .print_time_stamp (os .path .join (dict_parse_folders [sample ], '.fail' ))
199
203
else :
200
- read_time = time_functions .read_time_stamp (parse_timestamp )
204
+ read_time = HCGB_time .read_time_stamp (parse_timestamp )
201
205
print (colored ("\t + Input parsing already available for sample %s [%s]" % (sample , read_time ), 'green' ))
202
206
print ()
203
207
@@ -250,7 +254,7 @@ def parse_annot_file(name, folder_out_input, annot_file, output_path, Debug, ref
250
254
251
255
elif (format == 'gff' ):
252
256
print (colored ('\t * GFF format file:.......[OK]' , 'green' ))
253
- if (HCGB . functions . files_functions .is_non_zero_file (ref_file )):
257
+ if (HCGB_files .is_non_zero_file (ref_file )):
254
258
return (gff_parser .gff_parser_caller (annot_file , ref_file , output_path , Debug ))
255
259
else :
256
260
print (colored ("ERROR: No genome reference file provided for this GFF annotation. Check input options provided." ,"red" ))
@@ -293,7 +297,7 @@ def parse_options(arg_dict):
293
297
BacDup_functions .file_readable_check (arg_dict .annot_file )
294
298
295
299
print (colored ('\t * Multiple annotation files provided .......[OK]' , 'green' ))
296
- dict_entries = HCGB . functions . main_functions .file2dictionary (arg_dict .annot_file , ',' )
300
+ dict_entries = HCGB_main .file2dictionary (arg_dict .annot_file , ',' )
297
301
298
302
## debug messages
299
303
if (arg_dict .debug ):
@@ -361,7 +365,7 @@ def parse_options(arg_dict):
361
365
BacDup_functions .file_readable_check (arg_dict .ref_file )
362
366
363
367
if (arg_dict .batch ):
364
- ref_entries = HCGB . functions . main_functions .file2dictionary (arg_dict .ref_file , ',' )
368
+ ref_entries = HCGB_main .file2dictionary (arg_dict .ref_file , ',' )
365
369
genome = ref_entries [name ]
366
370
else :
367
371
genome = arg_dict .ref_file
@@ -382,9 +386,9 @@ def parse_options(arg_dict):
382
386
elif (arg_dict .GenBank_id ):
383
387
## get database path
384
388
if (arg_dict .db_folder ):
385
- db_folder = HCGB . functions . files_functions .create_folder (os .path .abspath (arg_dict .db_folder ))
389
+ db_folder = HCGB_files .create_folder (os .path .abspath (arg_dict .db_folder ))
386
390
else :
387
- db_folder = HCGB . functions . files_functions .create_subfolder ("db" , os .path .abspath (arg_dict .output_folder ))
391
+ db_folder = HCGB_files .create_subfolder ("db" , os .path .abspath (arg_dict .output_folder ))
388
392
389
393
## debug messages
390
394
if (arg_dict .debug ):
@@ -410,7 +414,7 @@ def parse_options(arg_dict):
410
414
print ()
411
415
412
416
## call IDs into a list and create tmp folder
413
- strains2get = HCGB . functions . main_functions .readList_fromFile (arg_dict .GenBank_id )
417
+ strains2get = HCGB_main .readList_fromFile (arg_dict .GenBank_id )
414
418
strains2get = list (filter (None , strains2get ))
415
419
416
420
## debug messages
@@ -435,7 +439,7 @@ def parse_options(arg_dict):
435
439
## download
436
440
print (colored ('\t * A NCBI GenBank ID:.......[OK]' , 'green' ))
437
441
print ()
438
- HCGB . functions . aesthetics_functions .print_sepLine ("+" , 75 , False )
442
+ HCGB_aes .print_sepLine ("+" , 75 , False )
439
443
df_accID = BacDup .scripts .NCBI_downloader .NCBIdownload (arg_dict .GenBank_id , db_folder , arg_dict .debug )
440
444
441
445
## --------------------------------------- ##
@@ -457,7 +461,7 @@ def parse_options(arg_dict):
457
461
BacDup_functions .file_readable_check (arg_dict .tax_id )
458
462
459
463
## get IDs into a list
460
- taxIDs2get = HCGB . functions . main_functions .readList_fromFile (arg_dict .tax_id )
464
+ taxIDs2get = HCGB_main .readList_fromFile (arg_dict .tax_id )
461
465
462
466
else :
463
467
print (colored ('\t * A NCBI Taxonomy ID:.......[OK]' , 'green' ))
@@ -496,9 +500,9 @@ def parse_options(arg_dict):
496
500
## get database path
497
501
#################
498
502
if (arg_dict .db_folder ):
499
- db_folder = HCGB . functions . files_functions .create_folder (os .path .abspath (arg_dict .db_folder ))
503
+ db_folder = HCGB_files .create_folder (os .path .abspath (arg_dict .db_folder ))
500
504
else :
501
- db_folder = HCGB . functions . files_functions .create_subfolder ("db" , outdir )
505
+ db_folder = HCGB_files .create_subfolder ("db" , outdir )
502
506
503
507
## debug messages
504
508
if arg_dict .debug :
@@ -518,20 +522,25 @@ def parse_options(arg_dict):
518
522
519
523
## print list and dictionary of possible and selected taxIDs
520
524
outdir = os .path .abspath (arg_dict .output_folder )
521
- final_dir = HCGB . functions . files_functions .create_subfolder ("info" , outdir )
522
- input_info_dir = HCGB . functions . files_functions . create_subfolder ("input" , outdir )
523
- HCGB . functions . main_functions .printList2file (os .path .join (input_info_dir , 'Downloaded.txt' ), strains2get )
524
- HCGB . functions . main_functions .printList2file (os .path .join (input_info_dir , 'all_entries.txt' ), allstrains_available )
525
+ info_dir = HCGB_files .create_subfolder ("info" , outdir )
526
+ input_info_dir = HCGB_files . create_subfolder ("input" , info_dir )
527
+ HCGB_main .printList2file (os .path .join (input_info_dir , 'Downloaded.txt' ), strains2get )
528
+ HCGB_main .printList2file (os .path .join (input_info_dir , 'all_entries.txt' ), allstrains_available )
525
529
526
530
## save into file
527
531
file_info = os .path .join (input_info_dir , 'info.txt' )
528
532
529
533
## stop here if dry_run
530
534
if arg_dict .dry_run :
535
+ print ()
536
+ HCGB_aes .print_sepLine ("*" , 75 , False )
531
537
print ("ATTENTION: Dry run mode selected. Stopping the process here." )
532
- print ("All available entries listed and printed in file: " + os .path .join (input_info_dir , 'all_entries.txt' ))
533
- print ("Subset of entries generated and printed in file: " + os .path .join (input_info_dir , 'Downloaded.txt' ))
534
- print ("\n \n If random numbers selected, take into account re-running this process might produce different results." )
538
+ HCGB_aes .print_sepLine ("*" , 75 , False )
539
+ print ("+ All available entries listed and printed in file:\n \t " + os .path .join (input_info_dir , 'all_entries.txt' ))
540
+ print ("+ Subset of entries generated and printed in file:\n \t " + os .path .join (input_info_dir , 'Downloaded.txt' ))
541
+ print ("\n \n If random numbers selected, take into account re-running this process might produce different results.\n " )
542
+ HCGB_aes .print_sepLine ("*" , 75 , False )
543
+ print ()
535
544
exit ()
536
545
537
546
#################
0 commit comments