Skip to content

Commit

Permalink
restore peptide index support (#71)
Browse files Browse the repository at this point in the history
* add CometPeptideIndex files

* use proper mutex to protect g_pvDBIndex and fix capturing proper protein file position for peptide index search

* update VS project with CometPeptideIndex.cpp and .h files

* Add CreatePeptideIndex and CreateFragmentIndex to CometWrapper.

* Get peptide index working for DoSingleSpectrumSearchMultiResults call thru CometWrapper.

* address memory leak in GetPrevNextAA that showed up with peptide index RTS

* Skip some peptide index parsing on subsequent calls to DoSingleSpectrumSearchMultiResults().
  • Loading branch information
jke000 authored Jan 2, 2025
1 parent c884d4e commit 292818c
Show file tree
Hide file tree
Showing 30 changed files with 1,739 additions and 366 deletions.
13 changes: 11 additions & 2 deletions Comet.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,8 @@ void Usage(char *pszCmd)
logout(" -F<num> to specify the first/start scan to search, overriding entry in parameters file\n");
logout(" -L<num> to specify the last/end scan to search, overriding entry in parameters file\n");
logout(" (-L option is required if -F option is used)\n");
logout(" -i create peptide index file only (specify .idx file as database for index search)\n");
logout(" -i create .idx file for fragment ion indexing\n");
logout(" -j create .idx file for peptide indexing\n");
logout("\n");
sprintf(szTmp, " example: %s file1.mzXML file2.mzXML\n", pszCmd);
logout(szTmp);
Expand Down Expand Up @@ -197,7 +198,15 @@ void SetOptions(char *arg,
break;
case 'i':
sprintf(szParamStringVal, "1");
pSearchMgr->SetParam("create_index", szParamStringVal, 1);
pSearchMgr->SetParam("create_fragment_index", szParamStringVal, 1);
sprintf(szParamStringVal, "0");
pSearchMgr->SetParam("create_peptide_index", szParamStringVal, 0);
break;
case 'j':
sprintf(szParamStringVal, "0");
pSearchMgr->SetParam("create_fragment_index", szParamStringVal, 0);
sprintf(szParamStringVal, "1");
pSearchMgr->SetParam("create_peptide_index", szParamStringVal, 1);
break;
default:
break;
Expand Down
2 changes: 1 addition & 1 deletion CometSearch/CometCheckForUpdates.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
#define _COMETCHECKFORUPDATES_H_

#include "Common.h"
#include "CometDataInternal.h"
//#include "CometDataInternal.h"

#include <errno.h>
#include <string.h>
Expand Down
18 changes: 12 additions & 6 deletions CometSearch/CometDataInternal.h
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,8 @@ struct Options
int bSkipAlreadyDone; // 0=search everything; 1=don't re-search if .out exists
int bMango; // 0=normal; 1=Mango x-link ms2 input
int bScaleFragmentNL; // 0=no; 1=scale fragment NL for each modified residue contained in fragment
int bCreateIndex; // 0=normal search; 1=create peptide index file
int bCreateFragmentIndex; // 0=normal search; 1=create fragment ion index file
int bCreatePeptideIndex; // 0=normal search; 1=create peptide index file; only one of bCreateFragmentIndex and bCreatePeptideIndex can be 1
int bVerboseOutput;
int bShowFragmentIons;
int bExplicitDeltaCn; // if set to 1, do not use sequence similarity logic
Expand Down Expand Up @@ -194,7 +195,8 @@ struct Options
bSkipAlreadyDone = a.bSkipAlreadyDone;
bMango = a.bMango;
bScaleFragmentNL = a.bScaleFragmentNL;
bCreateIndex = a.bCreateIndex;
bCreatePeptideIndex = a.bCreatePeptideIndex;
bCreateFragmentIndex = a.bCreateFragmentIndex;
bVerboseOutput = a.bVerboseOutput;
bShowFragmentIons = a.bShowFragmentIons;
bExplicitDeltaCn = a.bExplicitDeltaCn;
Expand Down Expand Up @@ -712,12 +714,13 @@ struct StaticParams
double dOneMinusBinOffset; // this is used in BIN() many times so calculate once
IonInfo ionInformation;
int iXcorrProcessingOffset;
int bIndexDb; // 0 = normal fasta; 1 = indexed database
int iIndexDb; // 0 = normal fasta; 1 = fragment ion indexed; 2 = peptide index
vector<double> vectorMassOffsets;
vector<double> precursorNLIons;
int iPrecursorNLSize;
int iOldModsEncoding;
bool bSkipToStartScan;
std::chrono::high_resolution_clock::time_point tRealTimeStart; // track run time of real-time index search

StaticParams()
{
Expand Down Expand Up @@ -767,7 +770,7 @@ struct StaticParams
szMod[0] = '\0';

iXcorrProcessingOffset = 75;
bIndexDb = 0;
iIndexDb = 0;

databaseInfo.szDatabase[0] = '\0';

Expand Down Expand Up @@ -882,7 +885,8 @@ struct StaticParams
options.bSkipAlreadyDone = 1;
options.bMango = 0;
options.bScaleFragmentNL = 0;
options.bCreateIndex = 0;
options.bCreatePeptideIndex = 0;
options.bCreateFragmentIndex = 0;
options.bVerboseOutput = 0;
options.iDecoySearch = 0;
options.iNumThreads = 4;
Expand Down Expand Up @@ -949,7 +953,7 @@ extern StaticParams g_staticParams;

extern string g_psGITHUB_SHA; // grab the GITHUB_SHA environment variable and trim to 7 chars; null if environment variable not present

extern vector<DBIndex> g_pvDBIndex;
extern vector<DBIndex> g_pvDBIndex; // used in both peptide index and fragment ion index; latter to store plain peptides

extern vector<vector<comet_fileoffset_t>> g_pvProteinsList;

Expand All @@ -972,6 +976,8 @@ extern int* PEPTIDE_MOD_SEQ_IDXS;

extern int MOD_NUM;
extern bool g_bPlainPeptideIndexRead; // set to true if plain peptide index file is read (and fragment index generated)
// poor choice of name for the fragment index .idx given peptide index is back
extern bool g_bPeptideIndexRead; // set to true if peptide index file is read

// Query stores information for peptide scoring and results
// This struct is allocated for each spectrum/charge combination
Expand Down
19 changes: 9 additions & 10 deletions CometSearch/CometFragmentIndex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,10 @@
// limitations under the License.


#include "Common.h"
#include "CometFragmentIndex.h"
#include "CometSearch.h"
#include "ThreadPool.h"
#include "CometStatus.h"
//#include "CometPostAnalysis.h"
#include "CometMassSpecUtils.h"
#include "ModificationsPermuter.h"

Expand All @@ -37,7 +35,7 @@ int MOD_NUM = 0;

Mutex CometFragmentIndex::_vFragmentPeptidesMutex;

//comet_fileoffset_t clSizeCometFileOffset;

#ifdef _WIN32
#ifdef _WIN64
comet_fileoffset_t clSizeCometFileOffset = sizeof(comet_fileoffset_t); //win64
Expand All @@ -48,6 +46,7 @@ comet_fileoffset_t clSizeCometFileOffset = (long long)sizeof(comet_fileoffset_t)
comet_fileoffset_t clSizeCometFileOffset = sizeof(comet_fileoffset_t); //linux
#endif


CometFragmentIndex::CometFragmentIndex()
{
}
Expand Down Expand Up @@ -652,7 +651,7 @@ bool CometFragmentIndex::WritePlainPeptideIndex(ThreadPool *tp)
exit(1);
}

strOut = " Creating plain peptide/protein index file:\n";
strOut = " Creating plain peptide/protein index file for fragment ion indexing:\n";
logout(strOut.c_str());
fflush(stdout);
strOut = " - parse peptides from database ... ";
Expand All @@ -671,15 +670,15 @@ bool CometFragmentIndex::WritePlainPeptideIndex(ThreadPool *tp)

if (bSucceeded)
{
g_staticParams.options.bCreateIndex = true;
g_staticParams.bIndexDb = false;
g_staticParams.options.bCreateFragmentIndex = true;
g_staticParams.iIndexDb = 0;

// this step calls RunSearch just to pull out all peptides
// to write into the .idx pepties/proteins file
bSucceeded = CometSearch::RunSearch(0, 0, tp);

g_staticParams.options.bCreateIndex = false;
g_staticParams.bIndexDb = true;
g_staticParams.options.bCreateFragmentIndex = false;
g_staticParams.iIndexDb = 1;
}

if (bSwapIdxExtension)
Expand Down Expand Up @@ -767,7 +766,7 @@ bool CometFragmentIndex::WritePlainPeptideIndex(ThreadPool *tp)
cout << " - write peptides/proteins to file" << endl;

// write out index header
fprintf(fp, "Comet peptide index. Comet version %s\n", g_sCometVersion.c_str());
fprintf(fp, "Comet fragment ion index plain peptides. Comet version %s\n", g_sCometVersion.c_str());
fprintf(fp, "InputDB: %s\n", g_staticParams.databaseInfo.szDatabase);
fprintf(fp, "MassRange: %lf %lf\n", g_staticParams.options.dPeptideMassLow, g_staticParams.options.dPeptideMassHigh);
fprintf(fp, "LengthRange: %d %d\n", g_staticParams.options.peptideLengthRange.iStart, g_staticParams.options.peptideLengthRange.iEnd);
Expand Down Expand Up @@ -893,7 +892,7 @@ bool CometFragmentIndex::ReadPlainPeptideIndex(void)
if (g_bPlainPeptideIndexRead)
return 1;

if (g_staticParams.options.bCreateIndex && !strstr(g_staticParams.databaseInfo.szDatabase + strlen(g_staticParams.databaseInfo.szDatabase) - 4, ".idx"))
if (g_staticParams.options.bCreateFragmentIndex && !strstr(g_staticParams.databaseInfo.szDatabase + strlen(g_staticParams.databaseInfo.szDatabase) - 4, ".idx"))
strIndexFile = g_staticParams.databaseInfo.szDatabase + string(".idx");
else // database already is .idx
strIndexFile = g_staticParams.databaseInfo.szDatabase;
Expand Down
13 changes: 6 additions & 7 deletions CometSearch/CometFragmentIndex.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
#define _COMETFRAGMENTINDEX_H_

#include "Common.h"
#include "CometDataInternal.h"
#include "CometSearch.h"
#include <functional>

Expand All @@ -32,6 +31,10 @@ class CometFragmentIndex
static bool CreateFragmentIndex(ThreadPool *tp);
static string ElapsedTime(std::chrono::time_point<std::chrono::steady_clock> tStartTime);
static int WhichPrecursorBin(double dMass);
static bool CompareByPeptide(const DBIndex &lhs,
const DBIndex &rhs);
static bool CompareByMass(const DBIndex &lhs,
const DBIndex &rhs);

private:

Expand All @@ -52,16 +55,12 @@ class CometFragmentIndex
unsigned int y);
static void SortFragmentThreadProc(int iWhichThread,
ThreadPool* tp);
static bool CompareByPeptide(const DBIndex &lhs,
const DBIndex &rhs);
static bool CompareByMass(const DBIndex &lhs,
const DBIndex &rhs);

/*
unsigned int _uiBinnedIonMasses[MAX_FRAGMENT_CHARGE + 1][NUM_ION_SERIES][MAX_PEPTIDE_LEN][VMODS + 1];
unsigned int _uiBinnedIonMassesDecoy[MAX_FRAGMENT_CHARGE + 1][NUM_ION_SERIES][MAX_PEPTIDE_LEN][VMODS + 1];
unsigned int _uiBinnedPrecursorNL[MAX_PRECURSOR_NL_SIZE][MAX_PRECURSOR_CHARGE];
unsigned int _uiBinnedPrecursorNLDecoy[MAX_PRECURSOR_NL_SIZE][MAX_PRECURSOR_CHARGE];

*/
static bool *_pbSearchMemoryPool; // Pool of memory to be shared by search threads
static bool **_ppbDuplFragmentArr; // Number of arrays equals number of threads

Expand Down
21 changes: 11 additions & 10 deletions CometSearch/CometInterfaces.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,8 @@ namespace CometInterfaces
{
public:
virtual ~ICometSearchManager() {}
virtual bool CreateIndex() = 0;
virtual bool CreateFragmentIndex() = 0;
virtual bool CreatePeptideIndex() = 0;
virtual bool DoSearch() = 0;
virtual bool InitializeSingleSpectrumSearch() = 0;
virtual void FinalizeSingleSpectrumSearch() = 0;
Expand All @@ -43,15 +44,15 @@ namespace CometInterfaces
vector<Fragment> & matchedFragments,
Scores & scores) = 0;
virtual bool DoSingleSpectrumSearchMultiResults(const int topN,
const int iPrecursorCharge,
const double dMZ,
double* dMass,
double* dInten,
const int iNumPeaks,
vector<string>& strReturnPeptide,
vector<string>& strReturnProtein,
vector<vector<Fragment>>& matchedFragments,
vector<Scores>& scores) = 0;
const int iPrecursorCharge,
const double dMZ,
double* dMass,
double* dInten,
const int iNumPeaks,
vector<string>& strReturnPeptide,
vector<string>& strReturnProtein,
vector<vector<Fragment>>& matchedFragments,
vector<Scores>& scores) = 0;
virtual void AddInputFiles(vector<InputFileInfo*> &pvInputFiles) = 0;
virtual void SetOutputFileBaseName(const char *pszBaseName) = 0;
virtual void SetParam(const string &name, const string &strValue, const string &value) = 0;
Expand Down
Loading

0 comments on commit 292818c

Please sign in to comment.