-
Notifications
You must be signed in to change notification settings - Fork 0
/
pdists.go
149 lines (129 loc) · 6.08 KB
/
pdists.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
// Parallel Distances
//
// A utility for basic calculation of distances on allele profiles (in parallel), fast-matching, clustering
// and dendrogram generation
//
// This utility is still in development however.
package main
import (
"fmt"
"log"
"os"
"github.com/integrii/flaggy"
)
var BUCKET_SCALE int = 3
var FM_THREAD_LIMIT int64 = 100
var COLUMN_DELIMITER = "\t"
var NEWLINE_CHARACTER = "\n"
var MISSING_ALLELE_STRING = "0"
var INPUT_PROFILE string = ""
var OUTPUT_FILE string = ""
var REFERENCE_PROFILES string = ""
var MATCH_THRESHOLD float64 = 10
var BUFFER_SIZE int = 16384 // 3 times bigger then 4096
var LINKAGE_METHOD int = 0
var distance_matrix *flaggy.Subcommand
var convert_matrix *flaggy.Subcommand
var fast_match *flaggy.Subcommand
var tree *flaggy.Subcommand
const version string = "0.0.2"
const integer_required_distance_functions_threshold = 2
func cli() {
flaggy.SetName("Parallel Distances")
flaggy.SetDescription("A program for getting distances between allelic profiles and creating distance matrices.")
flaggy.SetVersion(version)
flaggy.DefaultParser.ShowHelpOnUnexpected = true
distance_matrix = flaggy.NewSubcommand("distances")
distance_matrix.Description = "Compute all pairwise distances between the specified input profile."
distance_func_help := fmt.Sprintf(`Enter an integer denoting the distance function you would like to use:
%s: %d
%s: %d
%s: %d
%s: %d`,
ham.help, ham.assignment,
ham_missing.help, ham_missing.assignment,
scaled.help, scaled.assignment,
scaled_missing.help, scaled_missing.assignment)
buffer_help := fmt.Sprintf("The default buffer size is: %d. Larger buffers may increase performance.", BUFFER_SIZE)
load_factor_help := fmt.Sprintf("This value is used to compute how many profile calculations are assigned to thread, a larger value will result in fewer threads being used. Default: %d", BUCKET_SCALE)
distance_matrix.String(&INPUT_PROFILE, "i", "input", "File path to your alleles profiles.")
distance_matrix.Int(&BUCKET_SCALE, "l", "load-factor", load_factor_help)
distance_matrix.Int(&DIST_FUNC, "d", "distance", distance_func_help)
distance_matrix.String(&OUTPUT_FILE, "o", "output", "Name of output file. If nothing is specified results will be sent to stdout.")
distance_matrix.Int(&BUFFER_SIZE, "b", "buffer-size", buffer_help)
distance_matrix.String(&COLUMN_DELIMITER, "c", "column-delimiter", "Column delimiter, default value is a tab character")
distance_matrix.String(&MISSING_ALLELE_STRING, "m", "missing-allele-character", "String denoting missing alleles.")
convert_matrix = flaggy.NewSubcommand("convert")
convert_matrix.Description = "Convert the pairwise distance generated by the program into a distance matrix."
convert_matrix.String(&INPUT_PROFILE, "i", "input", "File path to a previously generated output for conversion into a distance matrix.")
convert_matrix.String(&OUTPUT_FILE, "o", "output", "Name of output file. If nothing is specified results will be sent to stdout.")
fast_match = flaggy.NewSubcommand("fast-match")
fast_match.Description = "Tabulate distances between a query profile and reference profiles. Only distances exceeding a threshold will be kept."
thread_limit_help := fmt.Sprintf("Limit the number of goroutines run at one time. Default: %d", FM_THREAD_LIMIT)
fast_match.String(&INPUT_PROFILE, "i", "input", "File path to profiles for querying.")
fast_match.String(&REFERENCE_PROFILES, "r", "reference", "File path to reference profiles to query against.")
fast_match.String(&COLUMN_DELIMITER, "c", "column-delimiter", "Column delimiter, default value is a tab character")
fast_match.String(&MISSING_ALLELE_STRING, "m", "missing-allele-character", "String denoting missing alleles.")
fast_match.Int(&DIST_FUNC, "d", "distance", distance_func_help)
fast_match.Float64(&MATCH_THRESHOLD, "t", "threshold", "Threshold for matching alleles.")
fast_match.String(&OUTPUT_FILE, "o", "output", "Name of output file. If nothing is specified results will be sent to stdout.")
fast_match.Int64(&FM_THREAD_LIMIT, "l", "goroutine-limit", thread_limit_help)
tree = flaggy.NewSubcommand("tree")
tree.Description = "Create a dendrogram from a supplied distance matrix."
tree.String(&INPUT_PROFILE, "i", "input", "File path to previously generate distance matrix.")
tree.String(&OUTPUT_FILE, "o", "output", "Name of output file.")
tree.Int(&LINKAGE_METHOD, "l", "linkage-method", linkage_methods_help)
flaggy.AttachSubcommand(distance_matrix, 1)
flaggy.AttachSubcommand(convert_matrix, 1)
flaggy.AttachSubcommand(tree, 1)
flaggy.AttachSubcommand(fast_match, 1)
flaggy.Parse()
}
func main() {
cli()
// Quit if not enough args passed
if len(os.Args) <= 1 {
flaggy.ShowHelpAndExit("No inputs passed")
}
output_buffer, file_out := CreateOutputBuffer(OUTPUT_FILE)
defer file_out.Close()
if distance_matrix.Used {
if len(os.Args) <= 2 {
flaggy.ShowHelpAndExit("No commands selected.")
}
data_ := LoadProfile(INPUT_PROFILE)
data := *data_
RunData(&data, output_buffer)
log.Println("All threads depleted.")
output_buffer.Flush()
} else if convert_matrix.Used {
if len(os.Args) <= 2 {
flaggy.ShowHelpAndExit("No commands selected.")
}
PairwiseToMatrix(INPUT_PROFILE, OUTPUT_FILE)
} else if fast_match.Used {
if len(os.Args) <= 2 {
flaggy.ShowHelpAndExit("No commands selected.")
}
if distance_functions[DIST_FUNC].assignment < integer_required_distance_functions_threshold && MATCH_THRESHOLD < 1 {
flaggy.ShowHelpAndExit("Distance function selected requires a value >1 for selection.")
}
IdentifyMatches(REFERENCE_PROFILES, INPUT_PROFILE, MATCH_THRESHOLD, output_buffer)
output_buffer.Flush()
} else if tree.Used {
if len(os.Args) <= 2 {
flaggy.ShowHelpAndExit("No commands selected.")
}
if INPUT_PROFILE == "" {
flaggy.ShowHelpAndExit("No input file selected")
}
if LINKAGE_METHOD > LINKAGE_METHODS[len(LINKAGE_METHODS)-1].match_value || LINKAGE_METHOD < 0 {
flaggy.ShowHelpAndExit("Invalid linkage method selected.")
}
Cluster(INPUT_PROFILE, LINKAGE_METHOD, output_buffer)
output_buffer.Flush()
} else {
flaggy.ShowHelpAndExit("Could not identify command used.")
}
log.Println("Done")
}