-
Notifications
You must be signed in to change notification settings - Fork 1
/
CandidateSearchGPU.cs
135 lines (116 loc) · 5.99 KB
/
CandidateSearchGPU.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
using CandidateSearch.util;
using System.Diagnostics;
namespace CandidateSearch
{
/// <summary>
/// Searching for candidates on the GPU.
/// </summary>
public static class CandidateSearchGPU
{
/// <summary>
/// Searches the given spectra for peptide candidates of the given database using the given settings on the GPU.
/// </summary>
/// <param name="spectraFile">Filename of the mgf file containing the MS2 spectra.</param>
/// <param name="databaseFile">Filename of the fasta file containing the proteins that should be considered for search.</param>
/// <param name="settings">Settings containing parameters for digestion, ion calculation and search.</param>
public static void Search(string spectraFile, string databaseFile, Settings settings)
{
var spectra = MGFReader.readMGF(spectraFile);
Console.WriteLine($"Read {spectra.Count} spectra.");
var peptides = DatabaseReader.readFASTA(databaseFile, settings, generateDecoys: settings.DECOY_SEARCH);
Console.WriteLine($"Generated {peptides.Count} peptides/peptidoforms from fasta file.");
if (peptides.Count > 12500000)
{
Console.WriteLine("Database size exceeds 12 500 000, might not be able to allocate a matrix of that size!");
Console.WriteLine("Please see 'Limitations' for more info and possible work arounds!");
}
// sorting database
Console.WriteLine("Sorting peptides/peptidoforms...");
var sortTime = new Stopwatch(); sortTime.Start();
peptides.Sort((x, y) => x.ToString().CompareTo(y.ToString()));
sortTime.Stop();
Console.WriteLine($"Sorted peptides/peptidoforms for search in {sortTime.Elapsed.TotalSeconds} seconds.");
// generating the csrColIdx and csrRowoffsets arrays
int csrColIdxLength = 0;
foreach (var peptide in peptides)
{
var encoding = peptide.getEnconding();
csrColIdxLength += encoding.Length;
}
var csrColIdx = new int[csrColIdxLength];
var csrRowoffsets = new int[peptides.Count + 1];
int currentIdxCsrColIdx = 0;
int currentIdxCsrRowoffsets = 0;
foreach (var peptide in peptides)
{
csrRowoffsets[currentIdxCsrRowoffsets] = currentIdxCsrColIdx;
currentIdxCsrRowoffsets++;
var encoding = peptide.getEnconding();
foreach (var value in encoding)
{
csrColIdx[currentIdxCsrColIdx] = value;
currentIdxCsrColIdx++;
}
}
csrRowoffsets[peptides.Count] = csrColIdxLength;
// generating the spectraValues and spectraIdx arrays
int spectraValuesLength = 0;
foreach (var spectrum in spectra)
{
var encoding = spectrum.getEncoding();
spectraValuesLength += encoding.Length;
}
var spectraValues = new int[spectraValuesLength];
var spectraIdx = new int[spectra.Count];
int currentIdxSV = 0;
int currentIdxSI = 0;
foreach (var spectrum in spectra)
{
spectraIdx[currentIdxSI] = currentIdxSV;
currentIdxSI++;
var encoding = spectrum.getEncoding();
foreach (var value in encoding)
{
spectraValues[currentIdxSV] = value;
currentIdxSV++;
}
}
VectorSearchInterface.VectorSearchAPI.GPU_METHODS METHOD;
switch (settings.MODE)
{
case "GPU_DVf32":
METHOD = VectorSearchInterface.VectorSearchAPI.GPU_METHODS.f32GPU_DV;
break;
case "GPU_DMf32":
METHOD = VectorSearchInterface.VectorSearchAPI.GPU_METHODS.f32GPU_DM;
break;
case "GPU_SMf32":
METHOD = VectorSearchInterface.VectorSearchAPI.GPU_METHODS.f32GPU_SM;
break;
default:
METHOD = VectorSearchInterface.VectorSearchAPI.GPU_METHODS.f32GPU_DV;
break;
}
var sw = new Stopwatch();
sw.Start();
var result = VectorSearchInterface.VectorSearchAPI.searchGPU(ref csrRowoffsets,
ref csrColIdx,
ref spectraValues,
ref spectraIdx,
topN: settings.TOP_N,
tolerance: settings.TOLERANCE,
normalize: settings.NORMALIZE,
useGaussianTol: settings.USE_GAUSSIAN,
batchSize: 100,
method: METHOD,
verbose: 1000,
memStat: out int memStat);
sw.Stop();
Console.WriteLine($"GPU search finished with code {memStat}. Search took {sw.Elapsed.TotalSeconds} seconds.");
var processedResult = new Result(ref result, ref peptides, ref spectra, TopN: settings.TOP_N);
var csvStat = processedResult.export(spectraFile + "_results.csv");
Console.WriteLine($"Result file written to disk with code {csvStat}. Search finished!");
return;
}
}
}