-
Notifications
You must be signed in to change notification settings - Fork 5
/
config.py
72 lines (66 loc) · 2.63 KB
/
config.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# Paths
scripts_dir = ""
R_script = ""
input_target_dir = "" # Should be given by the user
input_ref_dir = "" # Should be given by the user
output_dir = "Syntracker_output/" # Default output dir
main_output_path = ""
conf_file = "config.txt"
old_conf_file = "config_old.txt"
conf_file_path = ""
old_conf_file_path = ""
logfile = "SynTracker_log.txt"
logfile_path = ""
central_regions_dir = "central_regions/"
combined_output_dir = "combined_targets/"
summary_output_dir = "summary_output/"
summary_output_path = ""
output_summary_file = "synteny_scores_per_region.csv"
output_summary_file_path = ""
final_output_dir = "final_output/"
r_temp_dir = "R_temp/"
r_intermediate_objects_dir = "R_intermediate_objects/"
combined_output_path = ""
combined_renamed_genomes = "combined_renamed_genomes.fasta"
combined_renamed_genomes_file_path = ""
dictionary_table_full = "names_dictionary_full.tab"
dictionary_table_full_path = ""
sample_dictionary_table = "sample_names_dictionary.tab"
sample_dictionary_table_path = ""
blast_db_dir = "blastDB/"
blast_db_path = ""
blast_db_file = "GroupsDB"
blast_db_file_path = ""
blast_out_dir = "blast_output/"
blastdbcmd_out_dir = "blastdbcmd_output/"
# Central regions related parameters
region_length = 1000
flanking_length = 2000
minimal_flanking_length = flanking_length * 0.9
full_length = region_length + flanking_length * 2
minimal_full_length = full_length * 0.9
# BLAST related parameters
minimal_coverage = 70
minimal_identity = 97
blast_num_threads = 2
minimal_hits_num = 2
# Job-related parameters
cpu_num = 8
# R - related parameters
is_set_seed = True # Whether to set a seed for the subsampling process (to have same results between different runs)
seed_num = 1
subsampling_lengths = [40, 60, 80, 100, 200]
subsampled_regions_file_names = []
for i in range(len(subsampling_lengths)):
subsampled_regions_file_names.append("avg_synteny_scores_" + str(subsampling_lengths[i]) + "_regions.csv")
avg_all = False # Whether to add non-subsampled output (average all the regions per pair of samples)
avg_all_file_name = "avg_synteny_scores_all_regions.csv"
# Run related parameters
running_mode = "new" # Mode can be 'new' or 'continue'
complete_target_merge = False
# A dictionary containing all the ref-genomes names, input files paths and an indication whether they were finished
# being processed or not (0 - not processed yet, 1 - finished being processed)
# For example: genomes_dict['E_coli_K-12_MG1655']['input_file'] = 'full_path/E_coli_K-12_MG1655.fasta'
# genomes_dict['E_coli_K-12_MG1655']['processed'] = 1
genomes_dict = {}
run_genomes_list = [] # a list of the genomes that should be processed in the current run