From 292818c7ec5303203b23ed35def13367b2e3495d Mon Sep 17 00:00:00 2001 From: Jimmy Eng Date: Thu, 2 Jan 2025 12:13:47 -0800 Subject: [PATCH] restore peptide index support (#71) * add CometPeptideIndex files * use proper mutex to protect g_pvDBIndex and fix capturing proper protein file position for peptide index search * update VS project with CometPeptideIndex.cpp and .h files * Add CreatePeptideIndex and CreateFragmentIndex to CometWrapper. * Get peptide index working for DoSingleSpectrumSearchMultiResults call thru CometWrapper. * address memory leak in GetPrevNextAA that showed up with peptide index RTS * Skip some peptide index parsing on subsequent calls to DoSingleSpectrumSearchMultiResults(). --- Comet.cpp | 13 +- CometSearch/CometCheckForUpdates.h | 2 +- CometSearch/CometDataInternal.h | 18 +- CometSearch/CometFragmentIndex.cpp | 19 +- CometSearch/CometFragmentIndex.h | 13 +- CometSearch/CometInterfaces.h | 21 +- CometSearch/CometMassSpecUtils.cpp | 95 ++- CometSearch/CometPeptideIndex.cpp | 325 ++++++++ CometSearch/CometPeptideIndex.h | 53 ++ CometSearch/CometPostAnalysis.cpp | 23 +- CometSearch/CometPostAnalysis.h | 1 + CometSearch/CometPreprocess.cpp | 15 +- CometSearch/CometPreprocess.h | 1 - CometSearch/CometSearch.cpp | 958 ++++++++++++++++++++++-- CometSearch/CometSearch.h | 19 +- CometSearch/CometSearch.vcxproj | 2 + CometSearch/CometSearch.vcxproj.filters | 6 + CometSearch/CometSearchManager.cpp | 423 ++++++----- CometSearch/CometSearchManager.h | 3 +- CometSearch/CometWriteMzIdentML.cpp | 2 +- CometSearch/CometWriteOut.cpp | 2 +- CometSearch/Common.h | 3 +- CometSearch/Makefile | 10 +- CometSearch/ModificationsPermuter.cpp | 5 +- CometSearch/ModificationsPermuter.h | 3 - CometSearch/ThreadPool.h | 1 - CometWrapper/CometWrapper.cpp | 13 +- CometWrapper/CometWrapper.h | 3 +- Makefile | 3 +- RealtimeSearch/Search.cs | 50 +- 30 files changed, 1739 insertions(+), 366 deletions(-) create mode 100644 CometSearch/CometPeptideIndex.cpp create mode 100644 CometSearch/CometPeptideIndex.h diff --git a/Comet.cpp b/Comet.cpp index 1b769b9d..acb66055 100644 --- a/Comet.cpp +++ b/Comet.cpp @@ -100,7 +100,8 @@ void Usage(char *pszCmd) logout(" -F to specify the first/start scan to search, overriding entry in parameters file\n"); logout(" -L to specify the last/end scan to search, overriding entry in parameters file\n"); logout(" (-L option is required if -F option is used)\n"); - logout(" -i create peptide index file only (specify .idx file as database for index search)\n"); + logout(" -i create .idx file for fragment ion indexing\n"); + logout(" -j create .idx file for peptide indexing\n"); logout("\n"); sprintf(szTmp, " example: %s file1.mzXML file2.mzXML\n", pszCmd); logout(szTmp); @@ -197,7 +198,15 @@ void SetOptions(char *arg, break; case 'i': sprintf(szParamStringVal, "1"); - pSearchMgr->SetParam("create_index", szParamStringVal, 1); + pSearchMgr->SetParam("create_fragment_index", szParamStringVal, 1); + sprintf(szParamStringVal, "0"); + pSearchMgr->SetParam("create_peptide_index", szParamStringVal, 0); + break; + case 'j': + sprintf(szParamStringVal, "0"); + pSearchMgr->SetParam("create_fragment_index", szParamStringVal, 0); + sprintf(szParamStringVal, "1"); + pSearchMgr->SetParam("create_peptide_index", szParamStringVal, 1); break; default: break; diff --git a/CometSearch/CometCheckForUpdates.h b/CometSearch/CometCheckForUpdates.h index 5fa2941c..349e6a2f 100644 --- a/CometSearch/CometCheckForUpdates.h +++ b/CometSearch/CometCheckForUpdates.h @@ -17,7 +17,7 @@ #define _COMETCHECKFORUPDATES_H_ #include "Common.h" -#include "CometDataInternal.h" +//#include "CometDataInternal.h" #include #include diff --git a/CometSearch/CometDataInternal.h b/CometSearch/CometDataInternal.h index d6212171..f1d9d325 100644 --- a/CometSearch/CometDataInternal.h +++ b/CometSearch/CometDataInternal.h @@ -134,7 +134,8 @@ struct Options int bSkipAlreadyDone; // 0=search everything; 1=don't re-search if .out exists int bMango; // 0=normal; 1=Mango x-link ms2 input int bScaleFragmentNL; // 0=no; 1=scale fragment NL for each modified residue contained in fragment - int bCreateIndex; // 0=normal search; 1=create peptide index file + int bCreateFragmentIndex; // 0=normal search; 1=create fragment ion index file + int bCreatePeptideIndex; // 0=normal search; 1=create peptide index file; only one of bCreateFragmentIndex and bCreatePeptideIndex can be 1 int bVerboseOutput; int bShowFragmentIons; int bExplicitDeltaCn; // if set to 1, do not use sequence similarity logic @@ -194,7 +195,8 @@ struct Options bSkipAlreadyDone = a.bSkipAlreadyDone; bMango = a.bMango; bScaleFragmentNL = a.bScaleFragmentNL; - bCreateIndex = a.bCreateIndex; + bCreatePeptideIndex = a.bCreatePeptideIndex; + bCreateFragmentIndex = a.bCreateFragmentIndex; bVerboseOutput = a.bVerboseOutput; bShowFragmentIons = a.bShowFragmentIons; bExplicitDeltaCn = a.bExplicitDeltaCn; @@ -712,12 +714,13 @@ struct StaticParams double dOneMinusBinOffset; // this is used in BIN() many times so calculate once IonInfo ionInformation; int iXcorrProcessingOffset; - int bIndexDb; // 0 = normal fasta; 1 = indexed database + int iIndexDb; // 0 = normal fasta; 1 = fragment ion indexed; 2 = peptide index vector vectorMassOffsets; vector precursorNLIons; int iPrecursorNLSize; int iOldModsEncoding; bool bSkipToStartScan; + std::chrono::high_resolution_clock::time_point tRealTimeStart; // track run time of real-time index search StaticParams() { @@ -767,7 +770,7 @@ struct StaticParams szMod[0] = '\0'; iXcorrProcessingOffset = 75; - bIndexDb = 0; + iIndexDb = 0; databaseInfo.szDatabase[0] = '\0'; @@ -882,7 +885,8 @@ struct StaticParams options.bSkipAlreadyDone = 1; options.bMango = 0; options.bScaleFragmentNL = 0; - options.bCreateIndex = 0; + options.bCreatePeptideIndex = 0; + options.bCreateFragmentIndex = 0; options.bVerboseOutput = 0; options.iDecoySearch = 0; options.iNumThreads = 4; @@ -949,7 +953,7 @@ extern StaticParams g_staticParams; extern string g_psGITHUB_SHA; // grab the GITHUB_SHA environment variable and trim to 7 chars; null if environment variable not present -extern vector g_pvDBIndex; +extern vector g_pvDBIndex; // used in both peptide index and fragment ion index; latter to store plain peptides extern vector> g_pvProteinsList; @@ -972,6 +976,8 @@ extern int* PEPTIDE_MOD_SEQ_IDXS; extern int MOD_NUM; extern bool g_bPlainPeptideIndexRead; // set to true if plain peptide index file is read (and fragment index generated) + // poor choice of name for the fragment index .idx given peptide index is back +extern bool g_bPeptideIndexRead; // set to true if peptide index file is read // Query stores information for peptide scoring and results // This struct is allocated for each spectrum/charge combination diff --git a/CometSearch/CometFragmentIndex.cpp b/CometSearch/CometFragmentIndex.cpp index 9a87d4a5..6809f364 100644 --- a/CometSearch/CometFragmentIndex.cpp +++ b/CometSearch/CometFragmentIndex.cpp @@ -13,12 +13,10 @@ // limitations under the License. -#include "Common.h" #include "CometFragmentIndex.h" #include "CometSearch.h" #include "ThreadPool.h" #include "CometStatus.h" -//#include "CometPostAnalysis.h" #include "CometMassSpecUtils.h" #include "ModificationsPermuter.h" @@ -37,7 +35,7 @@ int MOD_NUM = 0; Mutex CometFragmentIndex::_vFragmentPeptidesMutex; -//comet_fileoffset_t clSizeCometFileOffset; + #ifdef _WIN32 #ifdef _WIN64 comet_fileoffset_t clSizeCometFileOffset = sizeof(comet_fileoffset_t); //win64 @@ -48,6 +46,7 @@ comet_fileoffset_t clSizeCometFileOffset = (long long)sizeof(comet_fileoffset_t) comet_fileoffset_t clSizeCometFileOffset = sizeof(comet_fileoffset_t); //linux #endif + CometFragmentIndex::CometFragmentIndex() { } @@ -652,7 +651,7 @@ bool CometFragmentIndex::WritePlainPeptideIndex(ThreadPool *tp) exit(1); } - strOut = " Creating plain peptide/protein index file:\n"; + strOut = " Creating plain peptide/protein index file for fragment ion indexing:\n"; logout(strOut.c_str()); fflush(stdout); strOut = " - parse peptides from database ... "; @@ -671,15 +670,15 @@ bool CometFragmentIndex::WritePlainPeptideIndex(ThreadPool *tp) if (bSucceeded) { - g_staticParams.options.bCreateIndex = true; - g_staticParams.bIndexDb = false; + g_staticParams.options.bCreateFragmentIndex = true; + g_staticParams.iIndexDb = 0; // this step calls RunSearch just to pull out all peptides // to write into the .idx pepties/proteins file bSucceeded = CometSearch::RunSearch(0, 0, tp); - g_staticParams.options.bCreateIndex = false; - g_staticParams.bIndexDb = true; + g_staticParams.options.bCreateFragmentIndex = false; + g_staticParams.iIndexDb = 1; } if (bSwapIdxExtension) @@ -767,7 +766,7 @@ bool CometFragmentIndex::WritePlainPeptideIndex(ThreadPool *tp) cout << " - write peptides/proteins to file" << endl; // write out index header - fprintf(fp, "Comet peptide index. Comet version %s\n", g_sCometVersion.c_str()); + fprintf(fp, "Comet fragment ion index plain peptides. Comet version %s\n", g_sCometVersion.c_str()); fprintf(fp, "InputDB: %s\n", g_staticParams.databaseInfo.szDatabase); fprintf(fp, "MassRange: %lf %lf\n", g_staticParams.options.dPeptideMassLow, g_staticParams.options.dPeptideMassHigh); fprintf(fp, "LengthRange: %d %d\n", g_staticParams.options.peptideLengthRange.iStart, g_staticParams.options.peptideLengthRange.iEnd); @@ -893,7 +892,7 @@ bool CometFragmentIndex::ReadPlainPeptideIndex(void) if (g_bPlainPeptideIndexRead) return 1; - if (g_staticParams.options.bCreateIndex && !strstr(g_staticParams.databaseInfo.szDatabase + strlen(g_staticParams.databaseInfo.szDatabase) - 4, ".idx")) + if (g_staticParams.options.bCreateFragmentIndex && !strstr(g_staticParams.databaseInfo.szDatabase + strlen(g_staticParams.databaseInfo.szDatabase) - 4, ".idx")) strIndexFile = g_staticParams.databaseInfo.szDatabase + string(".idx"); else // database already is .idx strIndexFile = g_staticParams.databaseInfo.szDatabase; diff --git a/CometSearch/CometFragmentIndex.h b/CometSearch/CometFragmentIndex.h index eb94de7f..810047c1 100644 --- a/CometSearch/CometFragmentIndex.h +++ b/CometSearch/CometFragmentIndex.h @@ -17,7 +17,6 @@ #define _COMETFRAGMENTINDEX_H_ #include "Common.h" -#include "CometDataInternal.h" #include "CometSearch.h" #include @@ -32,6 +31,10 @@ class CometFragmentIndex static bool CreateFragmentIndex(ThreadPool *tp); static string ElapsedTime(std::chrono::time_point tStartTime); static int WhichPrecursorBin(double dMass); + static bool CompareByPeptide(const DBIndex &lhs, + const DBIndex &rhs); + static bool CompareByMass(const DBIndex &lhs, + const DBIndex &rhs); private: @@ -52,16 +55,12 @@ class CometFragmentIndex unsigned int y); static void SortFragmentThreadProc(int iWhichThread, ThreadPool* tp); - static bool CompareByPeptide(const DBIndex &lhs, - const DBIndex &rhs); - static bool CompareByMass(const DBIndex &lhs, - const DBIndex &rhs); - +/* unsigned int _uiBinnedIonMasses[MAX_FRAGMENT_CHARGE + 1][NUM_ION_SERIES][MAX_PEPTIDE_LEN][VMODS + 1]; unsigned int _uiBinnedIonMassesDecoy[MAX_FRAGMENT_CHARGE + 1][NUM_ION_SERIES][MAX_PEPTIDE_LEN][VMODS + 1]; unsigned int _uiBinnedPrecursorNL[MAX_PRECURSOR_NL_SIZE][MAX_PRECURSOR_CHARGE]; unsigned int _uiBinnedPrecursorNLDecoy[MAX_PRECURSOR_NL_SIZE][MAX_PRECURSOR_CHARGE]; - +*/ static bool *_pbSearchMemoryPool; // Pool of memory to be shared by search threads static bool **_ppbDuplFragmentArr; // Number of arrays equals number of threads diff --git a/CometSearch/CometInterfaces.h b/CometSearch/CometInterfaces.h index c827c06d..473d63af 100644 --- a/CometSearch/CometInterfaces.h +++ b/CometSearch/CometInterfaces.h @@ -29,7 +29,8 @@ namespace CometInterfaces { public: virtual ~ICometSearchManager() {} - virtual bool CreateIndex() = 0; + virtual bool CreateFragmentIndex() = 0; + virtual bool CreatePeptideIndex() = 0; virtual bool DoSearch() = 0; virtual bool InitializeSingleSpectrumSearch() = 0; virtual void FinalizeSingleSpectrumSearch() = 0; @@ -43,15 +44,15 @@ namespace CometInterfaces vector & matchedFragments, Scores & scores) = 0; virtual bool DoSingleSpectrumSearchMultiResults(const int topN, - const int iPrecursorCharge, - const double dMZ, - double* dMass, - double* dInten, - const int iNumPeaks, - vector& strReturnPeptide, - vector& strReturnProtein, - vector>& matchedFragments, - vector& scores) = 0; + const int iPrecursorCharge, + const double dMZ, + double* dMass, + double* dInten, + const int iNumPeaks, + vector& strReturnPeptide, + vector& strReturnProtein, + vector>& matchedFragments, + vector& scores) = 0; virtual void AddInputFiles(vector &pvInputFiles) = 0; virtual void SetOutputFileBaseName(const char *pszBaseName) = 0; virtual void SetParam(const string &name, const string &strValue, const string &value) = 0; diff --git a/CometSearch/CometMassSpecUtils.cpp b/CometSearch/CometMassSpecUtils.cpp index f9ce6f27..952be7cb 100644 --- a/CometSearch/CometMassSpecUtils.cpp +++ b/CometSearch/CometMassSpecUtils.cpp @@ -129,61 +129,61 @@ void CometMassSpecUtils::AssignMass(double *pdAAMass, // return a single protein name as a C char string -void CometMassSpecUtils::GetProteinName(FILE *fpdb, +void CometMassSpecUtils::GetProteinName(FILE *fpfasta, comet_fileoffset_t lFilePosition, char *szProteinName) { size_t tTmp; - comet_fseek(fpdb, lFilePosition, SEEK_SET); + comet_fseek(fpfasta, lFilePosition, SEEK_SET); - if (g_staticParams.bIndexDb) //index database + if (g_staticParams.iIndexDb) //fragment ion or peptide index { long lSize; - tTmp = fread(&lSize, sizeof(long), 1, fpdb); + tTmp = fread(&lSize, sizeof(long), 1, fpfasta); vector vOffsets; for (long x = 0; x < lSize; ++x) // read file offsets { comet_fileoffset_t tmpoffset; - tTmp = fread(&tmpoffset, sizeof(comet_fileoffset_t), 1, fpdb); + tTmp = fread(&tmpoffset, sizeof(comet_fileoffset_t), 1, fpfasta); vOffsets.push_back(tmpoffset); } for (long x = 0; x < lSize; ++x) // read name from fasta { char szTmp[WIDTH_REFERENCE]; - comet_fseek(fpdb, vOffsets.at(x), SEEK_SET); - tTmp = fread(szTmp, sizeof(char)*WIDTH_REFERENCE, 1, fpdb); + comet_fseek(fpfasta, vOffsets.at(x), SEEK_SET); + tTmp = fread(szTmp, sizeof(char)*WIDTH_REFERENCE, 1, fpfasta); sscanf(szTmp, "%511s", szProteinName); // WIDTH_REFERENCE-1 break; //break here to only get first protein reference (out of lSize) } } else //regular fasta database { - fscanf(fpdb, "%511s", szProteinName); // WIDTH_REFERENCE-1 + fscanf(fpfasta, "%511s", szProteinName); // WIDTH_REFERENCE-1 szProteinName[511] = '\0'; } } // return a single protein sequence as C++ string -void CometMassSpecUtils::GetProteinSequence(FILE *fpdb, +void CometMassSpecUtils::GetProteinSequence(FILE *fpfasta, comet_fileoffset_t lFilePosition, string &strSeq) { strSeq.clear(); - if (!g_staticParams.bIndexDb) // works only for regular FASTA + if (!g_staticParams.iIndexDb) // works only for regular FASTA { int iTmpCh; - comet_fseek(fpdb, lFilePosition, SEEK_SET); + comet_fseek(fpfasta, lFilePosition, SEEK_SET); // skip to end of description line - while (((iTmpCh = getc(fpdb)) != '\n') && (iTmpCh != '\r') && (iTmpCh != EOF)); + while (((iTmpCh = getc(fpfasta)) != '\n') && (iTmpCh != '\r') && (iTmpCh != EOF)); // load sequence - while (((iTmpCh=getc(fpdb)) != '>') && (iTmpCh != EOF)) + while (((iTmpCh=getc(fpfasta)) != '>') && (iTmpCh != EOF)) { if ('a'<=iTmpCh && iTmpCh<='z') { @@ -205,7 +205,7 @@ void CometMassSpecUtils::GetProteinSequence(FILE *fpdb, // return all matched protein names in a vector of strings -void CometMassSpecUtils::GetProteinNameString(FILE *fpdb, +void CometMassSpecUtils::GetProteinNameString(FILE *fpfasta, int iWhichQuery, // which search int iWhichResult, // which peptide within the search int iPrintTargetDecoy, // 0 = target+decoys, 1=target only, 2=decoy only @@ -220,7 +220,7 @@ void CometMassSpecUtils::GetProteinNameString(FILE *fpdb, int iLenDecoyPrefix = (int)strlen(g_staticParams.szDecoyPrefix); - if (g_staticParams.bIndexDb) //index database + if (g_staticParams.iIndexDb) //fragment ion or peptide index { Results *pOutput; @@ -241,8 +241,8 @@ void CometMassSpecUtils::GetProteinNameString(FILE *fpdb, comet_fileoffset_t lEntry = pOutput[iWhichResult].lProteinFilePosition; for (auto it = g_pvProteinsList.at(lEntry).begin(); it != g_pvProteinsList.at(lEntry).end(); ++it) { - comet_fseek(fpdb, *it, SEEK_SET); - fscanf(fpdb, "%511s", szProteinName); // WIDTH_REFERENCE-1 + comet_fseek(fpfasta, *it, SEEK_SET); + fscanf(fpfasta, "%511s", szProteinName); // WIDTH_REFERENCE-1 szProteinName[511] = '\0'; if (!strncmp(szProteinName, g_staticParams.szDecoyPrefix, iLenDecoyPrefix)) @@ -286,8 +286,8 @@ void CometMassSpecUtils::GetProteinNameString(FILE *fpdb, { for (it=pOutput[iWhichResult].pWhichProtein.begin(); it!=pOutput[iWhichResult].pWhichProtein.end(); ++it) { - comet_fseek(fpdb, (*it).lWhichProtein, SEEK_SET); - fscanf(fpdb, "%511s", szProteinName); // WIDTH_REFERENCE-1 + comet_fseek(fpfasta, (*it).lWhichProtein, SEEK_SET); + fscanf(fpfasta, "%511s", szProteinName); // WIDTH_REFERENCE-1 szProteinName[511] = '\0'; vProteinTargets.push_back(szProteinName); @@ -309,8 +309,8 @@ void CometMassSpecUtils::GetProteinNameString(FILE *fpdb, if (iPrintDuplicateProteinCt >= g_staticParams.options.iMaxDuplicateProteins) break; - comet_fseek(fpdb, (*it).lWhichProtein, SEEK_SET); - fscanf(fpdb, "%511s", szProteinName); // WIDTH_REFERENCE-1 + comet_fseek(fpfasta, (*it).lWhichProtein, SEEK_SET); + fscanf(fpfasta, "%511s", szProteinName); // WIDTH_REFERENCE-1 szProteinName[511] = '\0'; if (strlen(szProteinName) + iLenDecoyPrefix >= WIDTH_REFERENCE) @@ -328,13 +328,13 @@ void CometMassSpecUtils::GetProteinNameString(FILE *fpdb, // find prev, next AA from first matched protein // this is only valid if searching indexed db with peptide/protein .idx file -void CometMassSpecUtils::GetPrevNextAA(FILE *fpdb, +void CometMassSpecUtils::GetPrevNextAA(FILE *fpfasta, int iWhichQuery, // which search int iWhichResult, // which peptide within the search int iPrintTargetDecoy, // 0 = target+decoys, 1=target only, 2=decoy only int iWhichTerm) // 0=no term constraint, 1=protein N-term, 2=protein C-term { - if (g_staticParams.bIndexDb) + if (g_staticParams.iIndexDb) // fragment ion or peptide index { Results *pOutput; int iTmpCh = 0; @@ -356,13 +356,13 @@ void CometMassSpecUtils::GetPrevNextAA(FILE *fpdb, { string strSeq; - comet_fseek(fpdb, *it, SEEK_SET); + comet_fseek(fpfasta, *it, SEEK_SET); // skip through protein name string to first carriage return - while (((iTmpCh = getc(fpdb)) != '\n') && (iTmpCh != '\r') && (iTmpCh != EOF)); + while (((iTmpCh = getc(fpfasta)) != '\n') && (iTmpCh != '\r') && (iTmpCh != EOF)); // Load sequence - while (((iTmpCh=getc(fpdb)) != '>') && (iTmpCh != EOF)) + while (((iTmpCh=getc(fpfasta)) != '>') && (iTmpCh != EOF)) { if ('a' <= iTmpCh && iTmpCh <= 'z') { @@ -385,11 +385,18 @@ void CometMassSpecUtils::GetPrevNextAA(FILE *fpdb, } } - CometSearch cs; + if (strSeq.size() < 1) + { + printf("Error - parsed sequence in GetPrevNextAA() is empty. File pointer %ld, query %d, result %d.\n", *it, iWhichQuery, iWhichResult); + pOutput[iWhichResult].cPrevAA = pOutput[iWhichResult].cNextAA = '-'; + return; + } char* szSequence = (char*)malloc(strSeq.size() + 1); strcpy(szSequence, strSeq.c_str()); int iLenSequence = (int)strlen(szSequence); + + CometSearch cs; cs._proteinInfo.iTmpProteinSeqLength = iLenSequence; // used in CheckEnzymeTermini if (iWhichTerm == 0) @@ -414,8 +421,9 @@ void CometMassSpecUtils::GetPrevNextAA(FILE *fpdb, else pOutput[iWhichResult].cNextAA = szSequence[iEndPos + 1]; - bFound = true; - break; + free(szSequence); + strSeq.clear(); + return; } else if (g_staticParams.options.bClipNtermMet && iStartPos == 1 && szSequence[0] == 'M' && cs.CheckEnzymeEndTermini(szSequence, iEndPos)) { @@ -426,15 +434,13 @@ void CometMassSpecUtils::GetPrevNextAA(FILE *fpdb, else pOutput[iWhichResult].cNextAA = szSequence[iEndPos + 1]; - bFound = true; - break; + free(szSequence); + strSeq.clear(); + return; } ++iStartPos; } - - if (bFound) - break; } else if (iWhichTerm == 1) { @@ -450,8 +456,9 @@ void CometMassSpecUtils::GetPrevNextAA(FILE *fpdb, else pOutput[iWhichResult].cNextAA = '-'; - bFound = true; - break; + free(szSequence); + strSeq.clear(); + return; } else if (g_staticParams.options.bClipNtermMet && szSequence[0] == 'M' @@ -461,7 +468,10 @@ void CometMassSpecUtils::GetPrevNextAA(FILE *fpdb, pOutput[iWhichResult].cPrevAA = 'M'; pOutput[iWhichResult].cNextAA = szSequence[iEndPos + 1]; bFound = true; - break; + + free(szSequence); + strSeq.clear(); + return; } } else if (iWhichTerm == 2) @@ -479,20 +489,25 @@ void CometMassSpecUtils::GetPrevNextAA(FILE *fpdb, pOutput[iWhichResult].cNextAA = '-'; - bFound = true; - break; + free(szSequence); + strSeq.clear(); + return; } else if (g_staticParams.options.bClipNtermMet && iStartPos == 1 && szSequence[0] == 'M') { pOutput[iWhichResult].cPrevAA = 'M'; pOutput[iWhichResult].cNextAA = '-'; bFound = true; - break; + + free(szSequence); + strSeq.clear(); + return; } } } free(szSequence); + strSeq.clear(); } if (!bFound) diff --git a/CometSearch/CometPeptideIndex.cpp b/CometSearch/CometPeptideIndex.cpp new file mode 100644 index 00000000..0057416d --- /dev/null +++ b/CometSearch/CometPeptideIndex.cpp @@ -0,0 +1,325 @@ +// Copyright 2023 Jimmy Eng +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#include "CometPeptideIndex.h" + +extern comet_fileoffset_t clSizeCometFileOffset; + + +CometPeptideIndex::CometPeptideIndex() +{ +} + +CometPeptideIndex::~CometPeptideIndex() +{ +} + + +bool CometPeptideIndex::WritePeptideIndex(ThreadPool *tp) +{ + bool bSucceeded; + char szOut[256]; + FILE *fptr; + + const int iIndex_SIZE_FILE=SIZE_FILE+4; + char szIndexFile[iIndex_SIZE_FILE]; + sprintf(szIndexFile, "%s.idx", g_staticParams.databaseInfo.szDatabase); + + if ((fptr = fopen(szIndexFile, "wb")) == NULL) + { + printf(" Error - cannot open index file %s to write\n", szIndexFile); + exit(1); + } + + sprintf(szOut, " Creating peptide index file: "); + logout(szOut); + fflush(stdout); + + bSucceeded = CometSearch::AllocateMemory(g_staticParams.options.iNumThreads); + + g_massRange.dMinMass = g_staticParams.options.dPeptideMassLow; + g_massRange.dMaxMass = g_staticParams.options.dPeptideMassHigh; + + tp->fillPool( g_staticParams.options.iNumThreads < 0 ? 0 : g_staticParams.options.iNumThreads-1); + if (g_massRange.dMaxMass - g_massRange.dMinMass > g_massRange.dMinMass) + g_massRange.bNarrowMassRange = true; + else + g_massRange.bNarrowMassRange = false; + + if (bSucceeded) + { + bSucceeded = CometSearch::RunSearch(0, 0, tp); + } + + if (!bSucceeded) + { + char szErrorMsg[SIZE_ERROR]; + sprintf(szErrorMsg, " Error in RunSearch() for peptide index creation.\n"); + logerr(szErrorMsg); + CometSearch::DeallocateMemory(g_staticParams.options.iNumThreads); + return false; + } + + // sanity check + if (g_pvDBIndex.size() == 0) + { + char szErrorMsg[SIZE_ERROR]; + sprintf(szErrorMsg, " Error: no peptides in index; check the input database file or search parameters.\n"); + logerr(szErrorMsg); + CometSearch::DeallocateMemory(g_staticParams.options.iNumThreads); + return false; + } + + // remove duplicates + sprintf(szOut, " - removing duplicates\n"); + logout(szOut); + fflush(stdout); + + // keep unique entries only; sort by peptide/modification state and protein + // first sort by peptide, then mod state, then protein file position + sort(g_pvDBIndex.begin(), g_pvDBIndex.end(), CometFragmentIndex::CompareByPeptide); + + // At this point, need to create g_pvProteinsList protein file position vector of vectors to map each peptide + // to every protein. g_pvDBIndex.at().lProteinFilePosition is now reference to protein vector entry + vector> g_pvProteinsList; + vector temp; // stores list of duplicate proteins which gets pushed to g_pvProteinsList + + // Create g_pvProteinsList. This is a vector of vectors. Each element is vector list + // of duplicate proteins (generated as "temp") ... these are generated by looping + // through g_pvDBIndex and looking for consecutive, same peptides. Once the "temp" + // vector is assigned the lIndexProteinFilePosition offset, the g_pvDBIndex entry is + // is assigned lProtCount to lIndexProteinFilePosition. This is used later to look up + // the right vector element of duplicate proteins later. + long lProtCount = 0; + for (size_t i = 0; i < g_pvDBIndex.size(); i++) + { + if (i == 0) + { + temp.push_back(g_pvDBIndex.at(i).lIndexProteinFilePosition); + g_pvDBIndex.at(i).lIndexProteinFilePosition = lProtCount; + } + else + { + // each unique peptide, irregardless of mod state, will have the same list + // of matched proteins + if (!strcmp(g_pvDBIndex.at(i).szPeptide, g_pvDBIndex.at(i-1).szPeptide)) + { + temp.push_back(g_pvDBIndex.at(i).lIndexProteinFilePosition); + g_pvDBIndex.at(i).lIndexProteinFilePosition = lProtCount; + } + else + { + // different peptide + mod state so go ahead and push temp onto g_pvProteinsList + // and store current protein reference into new temp + // temp can have duplicates due to mod forms of peptide so make unique here + sort(temp.begin(), temp.end()); + temp.erase(unique(temp.begin(), temp.end()), temp.end() ); + g_pvProteinsList.push_back(temp); + + lProtCount++; // start new row in g_pvProteinsList + temp.clear(); + temp.push_back(g_pvDBIndex.at(i).lIndexProteinFilePosition); + g_pvDBIndex.at(i).lIndexProteinFilePosition = lProtCount; + } + } + } + // now at end of loop, push last temp onto g_pvProteinsList + sort(temp.begin(), temp.end()); + temp.erase(unique(temp.begin(), temp.end()), temp.end() ); + g_pvProteinsList.push_back(temp); + + g_pvDBIndex.erase(unique(g_pvDBIndex.begin(), g_pvDBIndex.end()), g_pvDBIndex.end()); + + // sort by mass; + sort(g_pvDBIndex.begin(), g_pvDBIndex.end(), CometFragmentIndex::CompareByMass); + +/* + for (std::vector::iterator it = g_pvDBIndex.begin(); it != g_pvDBIndex.end(); ++it) + { + printf("OK after unique "); + if ((*it).pcVarModSites[strlen((*it).szPeptide)] != 0) + printf("n*"); + for (unsigned int x = 0; x < strlen((*it).szPeptide); x++) + { + printf("%c", (*it).szPeptide[x]); + if ((*it).pcVarModSites[x] != 0) + printf("*"); + } + if ((*it).pcVarModSites[strlen((*it).szPeptide) + 1] != 0) + printf("c*"); + printf(" %f %lld\n", (*it).dPepMass, (*it).lIndexProteinFilePosition); + } + printf("\n"); +*/ + + sprintf(szOut, " - writing file\n"); + logout(szOut); + fflush(stdout); + + // write out index header + fprintf(fptr, "Comet peptide index database. Comet version %s\n", g_sCometVersion.c_str()); + fprintf(fptr, "InputDB: %s\n", g_staticParams.databaseInfo.szDatabase); + fprintf(fptr, "MassRange: %lf %lf\n", g_staticParams.options.dPeptideMassLow, g_staticParams.options.dPeptideMassHigh); + fprintf(fptr, "LengthRange: %d %d\n", g_staticParams.options.peptideLengthRange.iStart, g_staticParams.options.peptideLengthRange.iEnd); + fprintf(fptr, "MassType: %d %d\n", g_staticParams.massUtility.bMonoMassesParent, g_staticParams.massUtility.bMonoMassesFragment); + fprintf(fptr, "Enzyme: %s [%d %s %s]\n", g_staticParams.enzymeInformation.szSearchEnzymeName, + g_staticParams.enzymeInformation.iSearchEnzymeOffSet, + g_staticParams.enzymeInformation.szSearchEnzymeBreakAA, + g_staticParams.enzymeInformation.szSearchEnzymeNoBreakAA); + fprintf(fptr, "Enzyme2: %s [%d %s %s]\n", g_staticParams.enzymeInformation.szSearchEnzyme2Name, + g_staticParams.enzymeInformation.iSearchEnzyme2OffSet, + g_staticParams.enzymeInformation.szSearchEnzyme2BreakAA, + g_staticParams.enzymeInformation.szSearchEnzyme2NoBreakAA); + fprintf(fptr, "NumPeptides: %ld\n", (long)g_pvDBIndex.size()); + + // write out static mod params A to Z is ascii 65 to 90 then terminal mods + fprintf(fptr, "StaticMod:"); + for (int x = 65; x <= 90; x++) + fprintf(fptr, " %lf", g_staticParams.staticModifications.pdStaticMods[x]); + fprintf(fptr, " %lf", g_staticParams.staticModifications.dAddNterminusPeptide); + fprintf(fptr, " %lf", g_staticParams.staticModifications.dAddCterminusPeptide); + fprintf(fptr, " %lf", g_staticParams.staticModifications.dAddNterminusProtein); + fprintf(fptr, " %lf\n", g_staticParams.staticModifications.dAddCterminusProtein); + + // write out variable mod params + fprintf(fptr, "VariableMod:"); + for (int x = 0; x < VMODS; x++) + { + fprintf(fptr, " %s %lf:%lf", g_staticParams.variableModParameters.varModList[x].szVarModChar, + g_staticParams.variableModParameters.varModList[x].dVarModMass, + g_staticParams.variableModParameters.varModList[x].dNeutralLoss); + } + fprintf(fptr, "\n\n"); + + // Now write out: vector> g_pvProteinsList + comet_fileoffset_t clProteinsFilePos = comet_ftell(fptr); + size_t tTmp = g_pvProteinsList.size(); + fwrite(&tTmp, clSizeCometFileOffset, 1, fptr); + for (auto it = g_pvProteinsList.begin(); it != g_pvProteinsList.end(); ++it) + { + tTmp = (*it).size(); + fwrite(&tTmp, sizeof(size_t), 1, fptr); + for (size_t it2 = 0; it2 < tTmp; ++it2) + { + fwrite(&((*it).at(it2)), clSizeCometFileOffset, 1, fptr); + } + } + + // next write out the peptides and track peptide mass index + int iMaxPeptideMass = (int)(g_staticParams.options.dPeptideMassHigh); + int iMaxPeptideMass10 = iMaxPeptideMass * 10; // make mass index at resolution of 0.1 Da + comet_fileoffset_t *lIndex = new comet_fileoffset_t[iMaxPeptideMass10 + 1]; + for (int x = 0; x <= iMaxPeptideMass10; x++) + lIndex[x] = -1; + + // write out peptide entry here + int iPrevMass10 = 0; + for (std::vector::iterator it = g_pvDBIndex.begin(); it != g_pvDBIndex.end(); ++it) + { + if ((int)((*it).dPepMass * 10.0) > iPrevMass10) + { + iPrevMass10 = (int)((*it).dPepMass * 10.0); + if (iPrevMass10 < iMaxPeptideMass10) + lIndex[iPrevMass10] = comet_ftell(fptr); + } + + int iLen = (int)strlen((*it).szPeptide); + fwrite(&iLen, sizeof(int), 1, fptr); + fwrite((*it).szPeptide, sizeof(char), iLen, fptr); +// fwrite((*it).szPrevNextAA, sizeof(char), 2, fptr); + + // write out for char 0=no mod, N=mod. If N, write out var mods as N pairs (pos,whichmod) + int iLen2 = iLen + 2; + unsigned char cNumMods = 0; + for (unsigned char x=0; x 0) + { + for (unsigned char x=0; xszPeptide, sizeof(char), iLen, fp); + sDBI->szPeptide[iLen] = '\0'; + + unsigned char cNumMods; // number of var mods encoded as position:residue pairs + tTmp = fread(&cNumMods, sizeof(unsigned char), 1, fp); // read how many var mods are stored + + memset(sDBI->pcVarModSites, 0, sizeof(unsigned char)*iLen+2); + if (cNumMods > 0) + { + for (unsigned char x=0; xpcVarModSites[(int)cPosition] = cResidue; + } + } + // done reading mod sites + + tTmp = fread(&(sDBI->dPepMass), sizeof(double), 1, fp); + tTmp = fread(&(sDBI->lIndexProteinFilePosition), sizeof(comet_fileoffset_t), 1, fp); +} diff --git a/CometSearch/CometPeptideIndex.h b/CometSearch/CometPeptideIndex.h new file mode 100644 index 00000000..a9630b9e --- /dev/null +++ b/CometSearch/CometPeptideIndex.h @@ -0,0 +1,53 @@ +// Copyright 2023 Jimmy Eng +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#ifndef _COMETPEPTIDEINDEX_H_ +#define _COMETPEPTIDEINDEX_H_ + +#include "Common.h" +//#include "CometDataInternal.h" +#include "CometPostAnalysis.h" +#include "CometSearch.h" +#include "CometStatus.h" +#include "CometMassSpecUtils.h" +#include "CometFragmentIndex.h" +#include "ThreadPool.h" + + + +class CometPeptideIndex +{ +public: + CometPeptideIndex(); + ~CometPeptideIndex(); + + static bool WritePeptideIndex(ThreadPool *tp); + static void ReadPeptideIndexEntry(struct DBIndex *sDBI, FILE *fp); + +private: + +/* + unsigned int _uiBinnedIonMasses[MAX_FRAGMENT_CHARGE + 1][NUM_ION_SERIES][MAX_PEPTIDE_LEN][VMODS + 1]; + unsigned int _uiBinnedIonMassesDecoy[MAX_FRAGMENT_CHARGE + 1][NUM_ION_SERIES][MAX_PEPTIDE_LEN][VMODS + 1]; + unsigned int _uiBinnedPrecursorNL[MAX_PRECURSOR_NL_SIZE][MAX_PRECURSOR_CHARGE]; + unsigned int _uiBinnedPrecursorNLDecoy[MAX_PRECURSOR_NL_SIZE][MAX_PRECURSOR_CHARGE]; +*/ + static bool *_pbSearchMemoryPool; // Pool of memory to be shared by search threads + static bool **_ppbDuplFragmentArr; // Number of arrays equals number of threads + + static Mutex _vFragmentPeptidesMutex; +}; + +#endif // _COMETPEPTIDEINDEX_H_ diff --git a/CometSearch/CometPostAnalysis.cpp b/CometSearch/CometPostAnalysis.cpp index e9bd5839..b05f79a9 100644 --- a/CometSearch/CometPostAnalysis.cpp +++ b/CometSearch/CometPostAnalysis.cpp @@ -14,13 +14,11 @@ #include "Common.h" -#include "CometDataInternal.h" #include "ThreadPool.h" #include "CometPostAnalysis.h" #include "CometMassSpecUtils.h" #include "CometStatus.h" - #include "CometDecoys.h" // this is where decoyIons[EXPECT_DECOY_SIZE] is initialized @@ -43,15 +41,18 @@ bool CometPostAnalysis::PostAnalysis(ThreadPool* tp) for (int i=0; i<(int)g_pvQuery.size(); ++i) { - PostAnalysisThreadData *pThreadData = new PostAnalysisThreadData(i); + if (g_pvQuery.at(i)->iMatchPeptideCount > 0 || g_pvQuery.at(i)->iDecoyMatchPeptideCount > 0) + { + PostAnalysisThreadData* pThreadData = new PostAnalysisThreadData(i); - pPostAnalysisThreadPool->doJob(std::bind(PostAnalysisThreadProc, pThreadData, pPostAnalysisThreadPool)); + pPostAnalysisThreadPool->doJob(std::bind(PostAnalysisThreadProc, pThreadData, pPostAnalysisThreadPool)); - pThreadData = NULL; - bSucceeded = !g_cometStatus.IsError() && !g_cometStatus.IsCancel(); - if (!bSucceeded) - { - break; + pThreadData = NULL; + bSucceeded = !g_cometStatus.IsError() && !g_cometStatus.IsCancel(); + if (!bSucceeded) + { + break; + } } } @@ -330,7 +331,7 @@ void CometPostAnalysis::CalculateSP(Results *pOutput, for (i = 0; i < iSize; ++i) { - if (!g_staticParams.bIndexDb) + if (!g_staticParams.iIndexDb) { // hijack here to make protein vector unique if (pOutput[i].pWhichProtein.size() > 1) @@ -369,7 +370,7 @@ void CometPostAnalysis::CalculateSP(Results *pOutput, } } - if (pOutput[i].iLenPeptide > 0 && pOutput[i].fXcorr > XCORR_CUTOFF) // take care of possible edge case + if (pOutput[i].iLenPeptide > 0 && pOutput[i].fXcorr > g_staticParams.options.dMinimumXcorr) // take care of possible edge case { int ii; int ctCharge; diff --git a/CometSearch/CometPostAnalysis.h b/CometSearch/CometPostAnalysis.h index a54b934f..f2126b94 100644 --- a/CometSearch/CometPostAnalysis.h +++ b/CometSearch/CometPostAnalysis.h @@ -16,6 +16,7 @@ #ifndef _COMETPOSTANALYSIS_H_ #define _COMETPOSTANALYSIS_H_ +#include "CometDataInternal.h" struct PostAnalysisThreadData { diff --git a/CometSearch/CometPreprocess.cpp b/CometSearch/CometPreprocess.cpp index a0919a6b..79ab7011 100644 --- a/CometSearch/CometPreprocess.cpp +++ b/CometSearch/CometPreprocess.cpp @@ -1611,7 +1611,7 @@ bool CometPreprocess::LoadIons(struct Query *pScoring, int iNumFragmentPeaks = 0; - if (g_staticParams.bIndexDb && mstSpectrum.size() > FRAGINDEX_MAX_NUMPEAKS) + if (g_staticParams.iIndexDb && mstSpectrum.size() > FRAGINDEX_MAX_NUMPEAKS) { // sorts spectrum in ascending order by intensity mstSpectrum.sortIntensity(); @@ -1628,7 +1628,7 @@ bool CometPreprocess::LoadIons(struct Query *pScoring, if (dIntensity >= dIntensityCutoff && dIntensity > 0.0) { - if (g_staticParams.bIndexDb && iNumFragmentPeaks < FRAGINDEX_MAX_NUMPEAKS) + if (g_staticParams.iIndexDb && iNumFragmentPeaks < FRAGINDEX_MAX_NUMPEAKS) { // Store list of fragment masses for fragment index search // Intensities don't matter here @@ -1960,10 +1960,6 @@ bool CometPreprocess::PreprocessSingleSpectrum(int iPrecursorCharge, pScoring->_spectrumInfoInternal.iChargeState = iPrecursorCharge; - g_massRange.dMinMass = pScoring->_pepMassInfo.dExpPepMass; - g_massRange.dMaxMass = pScoring->_pepMassInfo.dExpPepMass; - g_massRange.iMaxFragmentCharge = pScoring->_spectrumInfoInternal.iMaxFragCharge; - if (iPrecursorCharge == 1) pScoring->_spectrumInfoInternal.iMaxFragCharge = 1; else @@ -1974,6 +1970,10 @@ bool CometPreprocess::PreprocessSingleSpectrum(int iPrecursorCharge, pScoring->_spectrumInfoInternal.iMaxFragCharge = g_staticParams.options.iMaxFragmentCharge; } + g_massRange.dMinMass = pScoring->_pepMassInfo.dExpPepMass; + g_massRange.dMaxMass = pScoring->_pepMassInfo.dExpPepMass; + g_massRange.iMaxFragmentCharge = pScoring->_spectrumInfoInternal.iMaxFragCharge; + //preprocess here int i; int x; @@ -2012,7 +2012,6 @@ bool CometPreprocess::PreprocessSingleSpectrum(int iPrecursorCharge, pScoring->_spectrumInfoInternal.iArraySize = (int)((pScoring->_pepMassInfo.dExpPepMass + dCushion + 2.0) * g_staticParams.dInverseBinWidth); - // initialize these temporary arrays before re-using size_t iTmp= (size_t)(pScoring->_spectrumInfoInternal.iArraySize)*sizeof(double); @@ -2060,7 +2059,7 @@ bool CometPreprocess::PreprocessSingleSpectrum(int iPrecursorCharge, if (bPass) { - if (g_staticParams.bIndexDb) + if (g_staticParams.iIndexDb) pScoring->vdRawFragmentPeakMass.push_back(dIon); if (dIon < (pScoring->_pepMassInfo.dExpPepMass + 50.0)) diff --git a/CometSearch/CometPreprocess.h b/CometSearch/CometPreprocess.h index aeef823f..2f30afca 100644 --- a/CometSearch/CometPreprocess.h +++ b/CometSearch/CometPreprocess.h @@ -16,7 +16,6 @@ #ifndef _COMETPREPROCESS_H_ #define _COMETPREPROCESS_H_ -#include "Common.h" #include "ThreadPool.h" struct PreprocessThreadData diff --git a/CometSearch/CometSearch.cpp b/CometSearch/CometSearch.cpp index ce27e179..035a72f2 100644 --- a/CometSearch/CometSearch.cpp +++ b/CometSearch/CometSearch.cpp @@ -12,24 +12,30 @@ // See the License for the specific language governing permissions and // limitations under the License. - #include "Common.h" #include "CometSearch.h" +#include "CometDataInternal.h" #include "ThreadPool.h" #include "CometStatus.h" #include "CometPostAnalysis.h" #include "CometMassSpecUtils.h" #include "CometFragmentIndex.h" +#include "CometPeptideIndex.h" #include "ModificationsPermuter.h" #include #include #include #include +#include + bool *CometSearch::_pbSearchMemoryPool; bool **CometSearch::_ppbDuplFragmentArr; +extern comet_fileoffset_t clSizeCometFileOffset; + + CometSearch::CometSearch() { // Initialize the header modification string - won't change. @@ -98,22 +104,36 @@ bool CometSearch::DeallocateMemory(int maxNumThreads) } - // called by DoSingleSpectrumSearch bool CometSearch::RunSearch(ThreadPool *tp) { CometFragmentIndex sqFI; CometSearch sqSearch; + size_t iWhichQuery = 0; - if (!g_bPlainPeptideIndexRead) + if (g_staticParams.iIndexDb == 1) // fragment index { - sqFI.ReadPlainPeptideIndex(); - sqFI.CreateFragmentIndex(tp); - } - - size_t iWhichQuery = 0; + if (!g_bPlainPeptideIndexRead) + { + sqFI.ReadPlainPeptideIndex(); + sqFI.CreateFragmentIndex(tp); + } - sqSearch.SearchFragmentIndex(iWhichQuery, tp); + sqSearch.SearchFragmentIndex(iWhichQuery, tp); + } + else if (g_staticParams.iIndexDb == 2) // peptide index + { + sqSearch.SearchPeptideIndex(); + } + else + { + char szErrorMsg[SIZE_ERROR]; + sprintf(szErrorMsg, " Error - index search but iIndexDb=%d\n", g_staticParams.iIndexDb); + string strErrorMsg(szErrorMsg); + g_cometStatus.SetStatus(CometResult_Failed, strErrorMsg); + logerr(szErrorMsg); + return false; + } return true; } @@ -125,7 +145,7 @@ bool CometSearch::RunSearch(int iPercentStart, { bool bSucceeded = true; - if (g_staticParams.bIndexDb) + if (g_staticParams.iIndexDb == 1) { CometFragmentIndex sqFI; CometSearch sqSearch; @@ -161,6 +181,11 @@ bool CometSearch::RunSearch(int iPercentStart, return bSucceeded; } + else if (g_staticParams.iIndexDb == 2) + { + CometSearch sqSearch; + sqSearch.SearchPeptideIndex(); + } else { sDBEntry dbe; @@ -241,7 +266,7 @@ bool CometSearch::RunSearch(int iPercentStart, } } - if (!g_staticParams.options.bOutputSqtStream && !g_staticParams.options.bCreateIndex) + if (!g_staticParams.options.bOutputSqtStream && !g_staticParams.options.bCreatePeptideIndex && !g_staticParams.options.bCreateFragmentIndex) { logout(" - Search progress: "); fflush(stdout); @@ -332,7 +357,7 @@ bool CometSearch::RunSearch(int iPercentStart, if (iNumBadChars > 20) { logerr(" Too many non-printing characters in database header lines; wrong file type/format?\n"); - fclose(fp); + std::fclose(fp); return false; } } @@ -360,7 +385,7 @@ bool CometSearch::RunSearch(int iPercentStart, string strErrorMsg = " Error realloc(szPeffLine[" + to_string(iLenSzLine) + "])\n"; g_cometStatus.SetStatus(CometResult_Failed, strErrorMsg); logerr(strErrorMsg.c_str()); - fclose(fp); + std::fclose(fp); return false; } szPeffLine = pTmp; @@ -403,7 +428,7 @@ bool CometSearch::RunSearch(int iPercentStart, string strErrorMsg = " Error realloc(szMods[" + to_string(iLenAllocMods) + "])\n"; g_cometStatus.SetStatus(CometResult_Failed, strErrorMsg); logerr(strErrorMsg.c_str()); - fclose(fp); + std::fclose(fp); return false; } szMods = pTmp; @@ -422,7 +447,7 @@ bool CometSearch::RunSearch(int iPercentStart, string strErrorMsg = " Error: PEFF entry '" + dbe.strName + "' missing mod closing parenthesis\n"; g_cometStatus.SetStatus(CometResult_Failed, strErrorMsg); logerr(strErrorMsg.c_str()); - fclose(fp); + std::fclose(fp); return false; } @@ -541,7 +566,7 @@ bool CometSearch::RunSearch(int iPercentStart, string strErrorMsg = " Error realloc(szMods[" + to_string(iLenAllocMods) + "])\n"; g_cometStatus.SetStatus(CometResult_Failed, strErrorMsg); logerr(strErrorMsg.c_str()); - fclose(fp); + std::fclose(fp); return false; } szMods = pTmp; @@ -560,7 +585,7 @@ bool CometSearch::RunSearch(int iPercentStart, string strErrorMsg = " Error: PEFF entry '" + dbe.strName + "' missing variant closing parenthesis\n"; g_cometStatus.SetStatus(CometResult_Failed, strErrorMsg); logerr(strErrorMsg.c_str()); - fclose(fp); + std::fclose(fp); return false; } @@ -654,7 +679,7 @@ bool CometSearch::RunSearch(int iPercentStart, string strErrorMsg = " Error realloc(szMods[" + to_string(iLenAllocMods) + "])\n"; g_cometStatus.SetStatus(CometResult_Failed, strErrorMsg); logerr(strErrorMsg.c_str()); - fclose(fp); + std::fclose(fp); return false; } szMods = pTmp; @@ -673,7 +698,7 @@ bool CometSearch::RunSearch(int iPercentStart, string strErrorMsg = " Error: PEFF entry '" + dbe.strName + "' missing variant closing parenthesis\n"; g_cometStatus.SetStatus(CometResult_Failed, strErrorMsg); logerr(strErrorMsg.c_str()); - fclose(fp); + std::fclose(fp); return false; } @@ -800,7 +825,7 @@ bool CometSearch::RunSearch(int iPercentStart, { char szTmp[128]; lCurrPos = ftell(fp); - if (g_staticParams.options.bCreateIndex) + if (g_staticParams.options.bCreatePeptideIndex || g_staticParams.options.bCreateFragmentIndex) sprintf(szTmp, "%3d%%", (int)(100.0*(0.005 + (double)lCurrPos/(double)lEndPos))); else // go from iPercentStart to iPercentEnd, scaled by lCurrPos/iEndPos sprintf(szTmp, "%3d%%", (int)(((iPercentStart + ((double)iPercentEnd-iPercentStart)*(double)lCurrPos/(double)lEndPos) ))); @@ -831,12 +856,12 @@ bool CometSearch::RunSearch(int iPercentStart, bSucceeded = !g_cometStatus.IsError() && !g_cometStatus.IsCancel(); } - fclose(fp); + std::fclose(fp); if (!g_staticParams.options.bOutputSqtStream) { char szTmp[128]; - if (g_staticParams.options.bCreateIndex) + if (g_staticParams.options.bCreatePeptideIndex || g_staticParams.options.bCreateFragmentIndex) sprintf(szTmp, "100%%\n"); else sprintf(szTmp, "%3d%%\n", iPercentEnd); @@ -921,7 +946,7 @@ void CometSearch::ReadOBO(char *szOBO, } } - fclose(fp); + std::fclose(fp); } else { @@ -941,7 +966,7 @@ bool CometSearch::MapOBO(string strMod, vector *vectorPeffOBO, struct // find match of strMod in vectorPeffOBO and store diff masses in pData - iPos=BinarySearchPeffStrMod(0, (int)(*vectorPeffOBO).size(), strMod, *vectorPeffOBO); + iPos = BinarySearchPeffStrMod(0, (int)(*vectorPeffOBO).size(), strMod, *vectorPeffOBO); if (iPos != -1 && iPos< (int)(*vectorPeffOBO).size() ) { @@ -1482,7 +1507,7 @@ void CometSearch::SearchFragmentIndex(size_t iWhichQuery, // iLenPeptide-1 to complete set of internal fragment ions. for (ctLen = 0; ctLen < iLenMinus1; ++ctLen) { - double dFragMass = GetFragmentIonMass(iWhichIonSeries, ctLen, ctCharge, pdAAforward, pdAAreverse); + double dFragMass = CometMassSpecUtils::GetFragmentIonMass(iWhichIonSeries, ctLen, ctCharge, pdAAforward, pdAAreverse); int iVal = BIN(dFragMass); if (pbDuplFragment[iVal] == false) @@ -1544,6 +1569,802 @@ void CometSearch::SearchFragmentIndex(size_t iWhichQuery, } +bool CometSearch::SearchPeptideIndex(void) +{ + comet_fileoffset_t lEndOfStruct; + char szBuf[SIZE_BUF]; + FILE *fp; + size_t tTmp; + + CometPostAnalysis cpa; + + if ((fp = fopen(g_staticParams.databaseInfo.szDatabase, "rb")) == NULL) + { + char szErrorMsg[SIZE_ERROR]; + sprintf(szErrorMsg, " Error - cannot read indexed database file \"%s\" %s.\n", g_staticParams.databaseInfo.szDatabase, strerror(errno)); + string strErrorMsg(szErrorMsg); + g_cometStatus.SetStatus(CometResult_Failed, strErrorMsg); + logerr(szErrorMsg); + return false; + } + + if (!g_bPeptideIndexRead) // save some repeated parsing when being called from DLL + { + // ignore any static masses in params file; only valid ones + // are those in database index + memset(g_staticParams.staticModifications.pdStaticMods, 0, sizeof(g_staticParams.staticModifications.pdStaticMods)); + + bool bFoundStatic = false; + bool bFoundVariable = false; + + // read in static and variable mods + while (fgets(szBuf, sizeof(szBuf), fp)) + { + if (!strncmp(szBuf, "MassType:", 9)) + { + sscanf(szBuf, "%d %d", &g_staticParams.massUtility.bMonoMassesParent, &g_staticParams.massUtility.bMonoMassesFragment); + } + else if (!strncmp(szBuf, "StaticMod:", 10)) + { + char* tok; + char delims[] = " "; + int x = 65; + + // FIX: hack here for setting static mods; need to reset masses ... fix later + CometMassSpecUtils::AssignMass(g_staticParams.massUtility.pdAAMassFragment, + g_staticParams.massUtility.bMonoMassesFragment, + &g_staticParams.massUtility.dOH2fragment); + + bFoundStatic = true; + tok = strtok(szBuf + 11, delims); + while (tok != NULL) + { + sscanf(tok, "%lf", &(g_staticParams.staticModifications.pdStaticMods[x])); + g_staticParams.massUtility.pdAAMassFragment[x] += g_staticParams.staticModifications.pdStaticMods[x]; + tok = strtok(NULL, delims); + x++; + if (x == 95) // 65-90 stores A-Z then next 4 (ascii 91-94) are n/c term peptide, n/c term protein + break; + } + + g_staticParams.staticModifications.dAddNterminusPeptide = g_staticParams.staticModifications.pdStaticMods[91]; + g_staticParams.staticModifications.dAddCterminusPeptide = g_staticParams.staticModifications.pdStaticMods[92]; + g_staticParams.staticModifications.dAddNterminusProtein = g_staticParams.staticModifications.pdStaticMods[93]; + g_staticParams.staticModifications.dAddCterminusProtein = g_staticParams.staticModifications.pdStaticMods[94]; + + // have to set these here again once static mods are read + g_staticParams.precalcMasses.dNtermProton = g_staticParams.staticModifications.dAddNterminusPeptide + + PROTON_MASS; + + g_staticParams.precalcMasses.dCtermOH2Proton = g_staticParams.staticModifications.dAddCterminusPeptide + + g_staticParams.massUtility.dOH2fragment + + PROTON_MASS; + + g_staticParams.precalcMasses.dOH2ProtonCtermNterm = g_staticParams.massUtility.dOH2parent + + PROTON_MASS + + g_staticParams.staticModifications.dAddCterminusPeptide + + g_staticParams.staticModifications.dAddNterminusPeptide; + } + else if (!strncmp(szBuf, "Enzyme:", 7)) + { + sscanf(szBuf, "Enzyme: %s [%d %s %s]", g_staticParams.enzymeInformation.szSearchEnzymeName, + &(g_staticParams.enzymeInformation.iSearchEnzymeOffSet), + g_staticParams.enzymeInformation.szSearchEnzymeBreakAA, + g_staticParams.enzymeInformation.szSearchEnzymeNoBreakAA); + } + else if (!strncmp(szBuf, "Enzyme2:", 8)) + { + sscanf(szBuf, "Enzyme2: %s [%d %s %s]", g_staticParams.enzymeInformation.szSearchEnzyme2Name, + &(g_staticParams.enzymeInformation.iSearchEnzyme2OffSet), + g_staticParams.enzymeInformation.szSearchEnzyme2BreakAA, + g_staticParams.enzymeInformation.szSearchEnzyme2NoBreakAA); + } + else if (!strncmp(szBuf, "VariableMod:", 12)) + { + char* tok; + char delims[] = " "; + int x = 0; + + bFoundVariable = true; + + tok = strtok(szBuf + 13, delims); + while (tok != NULL) + { + tok = strtok(NULL, delims); // skip list of var mod residues + + // for index search, storing variable mods 0-9 in pdStaticMods array 0-9 + sscanf(tok, "%lf:%lf", &(g_staticParams.variableModParameters.varModList[x].dVarModMass), + &(g_staticParams.variableModParameters.varModList[x].dNeutralLoss)); + + if (g_staticParams.variableModParameters.varModList[x].dNeutralLoss != 0.0) + g_staticParams.variableModParameters.bUseFragmentNeutralLoss = true; + + tok = strtok(NULL, delims); + + x++; + if (x == VMODS) + break; + } + break; + } + } + + if (!(bFoundStatic && bFoundVariable)) + { + char szErr[256]; + sprintf(szErr, " Error with index database format. Mods not parsed (%d %d).", bFoundStatic, bFoundVariable); + logerr(szErr); + std::fclose(fp); + return false; + } + + // indexed searches will always set this to true + g_staticParams.variableModParameters.bVarModSearch = true; + } + + // read fp of index + comet_fileoffset_t clTmp; + comet_fileoffset_t clProteinsFilePos; + + comet_fseek(fp, -clSizeCometFileOffset * 2, SEEK_END); + tTmp = fread(&lEndOfStruct, clSizeCometFileOffset, 1, fp); + tTmp = fread(&clProteinsFilePos, clSizeCometFileOffset, 1, fp); + + if (!g_bPeptideIndexRead) + { + // now read in: vector> g_pvProteinsList + comet_fseek(fp, clProteinsFilePos, SEEK_SET); + size_t tSize; + tTmp = fread(&tSize, clSizeCometFileOffset, 1, fp); + vector vTmp; + + g_pvProteinsList.clear(); + g_pvProteinsList.reserve(tSize); + for (size_t it = 0; it < tSize; ++it) + { + size_t tNumProteinOffsets; + tTmp = fread(&tNumProteinOffsets, clSizeCometFileOffset, 1, fp); + + vTmp.clear(); + for (size_t it2 = 0; it2 < tNumProteinOffsets; ++it2) + { + tTmp = fread(&clTmp, clSizeCometFileOffset, 1, fp); + vTmp.push_back(clTmp); + } + g_pvProteinsList.push_back(vTmp); + } + g_bPeptideIndexRead = true; + } + + // read index + int iMinMass = 0; + int iMaxMass = 0; + uint64_t tNumPeptides = 0; + + // seek to index + comet_fseek(fp, lEndOfStruct, SEEK_SET); + + tTmp = fread(&iMinMass, sizeof(int), 1, fp); + tTmp = fread(&iMaxMass, sizeof(int), 1, fp); + tTmp = fread(&tNumPeptides, sizeof(uint64_t), 1, fp); + + // sanity checks + if (iMinMass < 0 || iMinMass > 20000 || iMaxMass < 0 || iMaxMass > 20000) + { + char szErr[256]; + sprintf(szErr, " Error reading .idx database: min mass %d, max mass %d, num peptides %zu\n", iMinMass, iMaxMass, tNumPeptides); + logerr(szErr); + std::fclose(fp); + return false; + } + + int iMaxPeptideMass10 = iMaxMass * 10; + comet_fileoffset_t* lReadIndex = new comet_fileoffset_t[iMaxPeptideMass10]; + for (int i = 0; i < iMaxPeptideMass10; ++i) + lReadIndex[i] = -1; + + tTmp = fread(lReadIndex, sizeof(comet_fileoffset_t), iMaxPeptideMass10, fp); + + int iStart = (int)(g_massRange.dMinMass - 0.5); // smallest mass/index start + int iEnd = (int)(g_massRange.dMaxMass + 0.5); // largest mass/index end + + if (iStart > iMaxMass) // smallest input mass is greater than what's stored in index + { + delete[] lReadIndex; + std::fclose(fp); + return true; + } + + if (iStart < iMinMass) + iStart = iMinMass; + if (iEnd > iMaxMass) + iEnd = iMaxMass; + + int iStart10 = (int)(g_massRange.dMinMass*10.0 - 0.5); // lReadIndex is at 0.1 resolution for index value so scale iStart/iEnd to be same + int iEnd10 = (int)(g_massRange.dMaxMass*10.0 + 0.5); + + if (iStart10 < iMinMass*10) + iStart10 = iMinMass*10; + if (iEnd10 > iMaxMass*10) + iEnd10 = iMaxMass*10; + + struct DBIndex sDBI; + sDBEntry dbe; + + while (lReadIndex[iStart10] == -1 && iStart10 < iEnd10) + iStart10++; + + if (lReadIndex[iStart10] == -1) // no match found within tolerance + { + delete[] lReadIndex; + std::fclose(fp); + return true; + } + + comet_fseek(fp, lReadIndex[iStart10], SEEK_SET); + CometPeptideIndex::ReadPeptideIndexEntry(&sDBI, fp); + + // only use of dbe here is to store the protein position; used for backwards + // compatibility with standard search in StorePeptide + dbe.lProteinFilePosition = sDBI.lIndexProteinFilePosition; + + while ((int)(sDBI.dPepMass * 10) <= iEnd10) + { +/* + printf("OK index pep "); + for (unsigned int x=0; x g_massRange.dMaxMass) + break; + + int iWhichQuery = BinarySearchMass(0, (int)g_pvQuery.size(), sDBI.dPepMass); + + while (iWhichQuery > 0 && g_pvQuery.at(iWhichQuery)->_pepMassInfo.dPeptideMassTolerancePlus >= sDBI.dPepMass) + iWhichQuery--; + + // Do the search + if (iWhichQuery != -1) + AnalyzePeptideIndex(iWhichQuery, sDBI, _ppbDuplFragmentArr[0], &dbe); + + if (comet_ftell(fp) >= lEndOfStruct || sDBI.dPepMass > g_massRange.dMaxMass) + break; + + CometPeptideIndex::ReadPeptideIndexEntry(&sDBI, fp); + dbe.lProteinFilePosition = sDBI.lIndexProteinFilePosition; + + // read past last entry in indexed db, need to break out of loop + if (feof(fp)) + break; + + if (g_staticParams.options.iMaxIndexRunTime > 0) + { + // now check search run time + std::chrono::high_resolution_clock::time_point tNow = std::chrono::high_resolution_clock::now(); + auto tElapsedTime = std::chrono::duration_cast(tNow - g_staticParams.tRealTimeStart).count(); + if (tElapsedTime >= g_staticParams.options.iMaxIndexRunTime) + break; + } + } + + for (vector::iterator it = g_pvQuery.begin(); it != g_pvQuery.end(); ++it) + { + int iNumMatchedPeptides; + + iNumMatchedPeptides = (*it)->iMatchPeptideCount; + if (iNumMatchedPeptides > g_staticParams.options.iNumStored) + iNumMatchedPeptides = g_staticParams.options.iNumStored; + + if (iNumMatchedPeptides > 0) // will retrieve protein names here if there is one or more matched peptides + { + // sort and report results by xcorr + std::sort((*it)->_pResults, (*it)->_pResults + iNumMatchedPeptides, cpa.SortFnXcorr); + + for (int ii = 0; ii < iNumMatchedPeptides; ii++) // loop through all hits to this one spectrum query + { + if (ii > 0 && (*it)->_pResults[ii].fXcorr < (*it)->_pResults[0].fXcorr) // do this only for peptides that have same top xcorr, could be more than 1 + break; + + std::vector::iterator itProt; + bool bPrintDecoyPrefix = false; + + // Note peptides can be from target or internal decoy. If peptide is from a target protein, + // Comet will only report target protein matches and not internal decoy protein matches. + // Decoy proteins only reported for peptides that are exclusively decoy matches. + if ((*it)->_pResults[ii].pWhichProtein.size() > 0) + itProt = (*it)->_pResults[ii].pWhichProtein.begin(); // list of target proteins + else + { + itProt = (*it)->_pResults[ii].pWhichDecoyProtein.begin(); // list of decoy proteins + bPrintDecoyPrefix = true; + } + } +/* + for (int x = 0; x < iNumMatchedPeptides; x++) + { + printf("OK %d scan %d, pep %s, xcorr %f, mass %f, matchcount %d, prot %s\n", x, + (*it)->_spectrumInfoInternal.iScanNumber, + (*it)->_pResults[x].szPeptide, + (*it)->_pResults[x].fXcorr, + (*it)->_pResults[x].dPepMass, + (*it)->iMatchPeptideCount, + (*it)->_pResults[x].strSingleSearchProtein.c_str()); fflush(stdout); + } +*/ + } + } + + delete [] lReadIndex; + std::fclose(fp); + return true; +} + + +void CometSearch::AnalyzePeptideIndex(int iWhichQuery, + DBIndex sDBI, + bool *pbDuplFragment, + struct sDBEntry *dbe) +{ + int iWhichIonSeries; + int ctIonSeries; + int ctLen; + int ctCharge; + int iLenPeptide = (int)strlen(sDBI.szPeptide); + int iStartPos = 0; + int iEndPos = iLenPeptide - 1; + int iUnused = 0; + bool bFirstTimeThroughLoopForPeptide = true; + + int iFoundVariableMod = 0; // 1 = variable mod, 2 = with fragment NL + int iFoundVariableModDecoy = 0; + + int iPositionNLB[VMODS]; + int iPositionNLY[VMODS]; + + double _pdAAforward[MAX_PEPTIDE_LEN]; // Stores fragment ion fragment ladder calc.; sum AA masses including mods + double _pdAAreverse[MAX_PEPTIDE_LEN]; // Stores n-term fragment ion fragment ladder calc.; sum AA masses including mods + double _pdAAforwardDecoy[MAX_PEPTIDE_LEN]; // Stores fragment ion fragment ladder calc.; sum AA masses including mods + double _pdAAreverseDecoy[MAX_PEPTIDE_LEN]; // Stores n-term fragment ion fragment ladder calc.; sum AA masses including mods + + if (g_staticParams.variableModParameters.bUseFragmentNeutralLoss) + { + int i; + + for (i=0; i 0) + { + if (g_staticParams.variableModParameters.varModList[sDBI.pcVarModSites[i]-1].dNeutralLoss != 0.0) + { + iPositionNLB[sDBI.pcVarModSites[i] -1] = i; + break; + } + } + } + + for (i=iEndPos; i>=0; i--) + { + if (sDBI.pcVarModSites[i] > 0) + { + if (g_staticParams.variableModParameters.varModList[sDBI.pcVarModSites[i]-1].dNeutralLoss != 0.0) + { + iPositionNLY[sDBI.pcVarModSites[i] -1] = i; + break; + } + } + } + } + + // Compare calculated fragment ions against all matching query spectra. + while (iWhichQuery < (int)g_pvQuery.size()) + { + if (sDBI.dPepMass < g_pvQuery.at(iWhichQuery)->_pepMassInfo.dPeptideMassToleranceMinus) + { + // If calculated mass is smaller than low mass range. + break; + } + + // Mass tolerance check for particular query against this candidate peptide mass. + if (CheckMassMatch(iWhichQuery, sDBI.dPepMass)) + { + char szDecoyPeptide[MAX_PEPTIDE_LEN]; + int piVarModSites[MAX_PEPTIDE_LEN_P2]; // forward mods, generated from sDBI.sVarModSites + int piVarModSitesDecoy[MAX_PEPTIDE_LEN_P2]; + + int iLen2 = iLenPeptide + 2; + for (int x = 0; x < iLen2; x++) + piVarModSites[x] = sDBI.pcVarModSites[x]; + + // Calculate ion series just once to compare against all relevant query spectra. + if (bFirstTimeThroughLoopForPeptide) + { + int iLenMinus1 = iEndPos - iStartPos; // Equals iLenPeptide minus 1. + int i; + + bFirstTimeThroughLoopForPeptide = false; + + double dBion = g_staticParams.precalcMasses.dNtermProton; + double dYion = g_staticParams.precalcMasses.dCtermOH2Proton; + +/* n/c-term protein mods not supported + if (iStartPos == 0) + dBion += g_staticParams.staticModifications.dAddNterminusProtein; + if (iEndPos == iLenProteinMinus1) + dYion += g_staticParams.staticModifications.dAddCterminusProtein; +*/ + + // variable N-term peptide mod + if (piVarModSites[iLenPeptide] > 0) + { + dBion += g_staticParams.variableModParameters.varModList[piVarModSites[iLenPeptide] - 1].dVarModMass; + iFoundVariableMod = 1; + } + + // variable C-term peptide mod + if (piVarModSites[iLenPeptide + 1] > 0) + { + dYion += g_staticParams.variableModParameters.varModList[piVarModSites[iLenPeptide + 1] - 1].dVarModMass; + iFoundVariableMod = 1; + } + + // Generate pdAAforward for sDBI.szPeptide + for (int i=iStartPos; i 0) + { + dBion += g_staticParams.variableModParameters.varModList[piVarModSites[iPos]-1].dVarModMass; + iFoundVariableMod = 1; + } + + dYion += g_staticParams.massUtility.pdAAMassFragment[(int)sDBI.szPeptide[iPos2]]; + if (piVarModSites[iPos2] > 0) + { + dYion += g_staticParams.variableModParameters.varModList[piVarModSites[iPos2]-1].dVarModMass; + iFoundVariableMod = 1; + } + + _pdAAforward[iPos] = dBion; + _pdAAreverse[iPos] = dYion; + } + + // Now get the set of binned fragment ions once to compare this peptide against all matching spectra. + // First initialize pbDuplFragment and _uiBinnedIonMasses + for (ctCharge=1; ctCharge<=g_massRange.iMaxFragmentCharge; ctCharge++) + { + for (ctIonSeries=0; ctIonSeries= iPositionNLB[x]) // 0/1/2 is a/b/c ions + || (iWhichIonSeries >= 3 && iWhichIonSeries <= 5 && iLenMinus1-ctLen <= iPositionNLY[x])) // 3/4/5 is x/y/z ions + { + double dNewMass = dFragMass - g_staticParams.variableModParameters.varModList[x].dNeutralLoss/ctCharge; + + if (dNewMass >= 0.0) + { + pbDuplFragment[BIN(dNewMass)] = false; + iFoundVariableMod = 2; + } + + _uiBinnedIonMasses[ctCharge][ctIonSeries][ctLen][x+1] = 0; + } + } + } + } + } + } + + for (int ctNL=0; ctNL_spectrumInfoInternal.iChargeState; ctCharge>=1; ctCharge--) + { + double dNLMass = (sDBI.dPepMass - PROTON_MASS - g_staticParams.precursorNLIons[ctNL] + ctCharge*PROTON_MASS)/ctCharge; + int iVal = BIN(dNLMass); + + if (iVal > 0) + { + pbDuplFragment[iVal] = false; + _uiBinnedPrecursorNL[ctNL][ctCharge] = 0; + } + } + } + + for (ctCharge=1; ctCharge<=g_massRange.iMaxFragmentCharge; ctCharge++) + { + for (ctIonSeries=0; ctIonSeries= iPositionNLB[x]) // 0/1/2 is a/b/c ions + || (iWhichIonSeries >= 3 && iWhichIonSeries <= 5 && iLenMinus1-ctLen <= iPositionNLY[x])) // 3/4/5 is x/y/z ions + { + double dNewMass = dFragMass - g_staticParams.variableModParameters.varModList[x].dNeutralLoss/ctCharge; + + iVal = BIN(dNewMass); + + if (iVal > 0 && pbDuplFragment[iVal] == false) + { + _uiBinnedIonMasses[ctCharge][ctIonSeries][ctLen][x+1] = iVal; + pbDuplFragment[iVal] = true; + } + } + } + } + } + } + } + + // Precursor NL peaks added here + for (int ctNL=0; ctNL_spectrumInfoInternal.iChargeState; ctCharge>=1; ctCharge--) + { + double dNLMass = (sDBI.dPepMass - PROTON_MASS - g_staticParams.precursorNLIons[ctNL] + ctCharge*PROTON_MASS)/ctCharge; + int iVal = BIN(dNLMass); + + if (iVal > 0 && pbDuplFragment[iVal] == false) + { + _uiBinnedPrecursorNL[ctNL][ctCharge] = iVal; + pbDuplFragment[iVal] = true; + } + } + } + + if (g_staticParams.options.iDecoySearch) + { + if (g_staticParams.enzymeInformation.iSearchEnzymeOffSet == 1) + { + // last residue stays the same: change ABCDEK to EDCBAK + + for (i = iEndPos - 1; i >= iStartPos; i--) + { + szDecoyPeptide[iEndPos - i - 1] = sDBI.szPeptide[i - iStartPos]; + piVarModSitesDecoy[iEndPos - i - 1] = piVarModSites[i - iStartPos]; + } + + szDecoyPeptide[iEndPos] = sDBI.szPeptide[iEndPos]; // last residue stays same + piVarModSitesDecoy[iLenPeptide - 1] = piVarModSites[iLenPeptide - 1]; + } + else + { + // first residue stays the same: change ABCDEK to AKEDCB + + for (i = iEndPos; i > iStartPos; i--) + { + szDecoyPeptide[iEndPos - i + 1] = sDBI.szPeptide[i - iStartPos]; + piVarModSitesDecoy[iEndPos - i + 1] = piVarModSites[i - iStartPos]; + } + + szDecoyPeptide[iStartPos] = sDBI.szPeptide[iStartPos]; // first residue stays same + piVarModSitesDecoy[iStartPos] = piVarModSites[iStartPos]; + } + + piVarModSitesDecoy[iLenPeptide] = piVarModSites[iLenPeptide]; // N-term + piVarModSitesDecoy[iLenPeptide + 1] = piVarModSites[iLenPeptide + 1]; // C-term + + // Now need to recalculate _pdAAforward and _pdAAreverse for decoy entry + dBion = g_staticParams.precalcMasses.dNtermProton; + dYion = g_staticParams.precalcMasses.dCtermOH2Proton; + +/* n/c-term protein mods not supported + // use same protein terminal static mods as target peptide + if (_varModInfo.iStartPos == 0) + dBion += g_staticParams.staticModifications.dAddNterminusProtein; + if (_varModInfo.iEndPos == iLenProteinMinus1) + dYion += g_staticParams.staticModifications.dAddCterminusProtein; +*/ + + // variable N-term + if (piVarModSitesDecoy[iLenPeptide] > 0) + dBion += g_staticParams.variableModParameters.varModList[piVarModSitesDecoy[iLenPeptide] - 1].dVarModMass; + + // variable C-term + if (piVarModSitesDecoy[iLenPeptide + 1] > 0) + dYion += g_staticParams.variableModParameters.varModList[piVarModSitesDecoy[iLenPeptide + 1] - 1].dVarModMass; + + int iDecoyStartPos = iStartPos; + int iDecoyEndPos = iEndPos; + + // Generate pdAAforward for szDecoyPeptide + for (i = iDecoyStartPos; i < iDecoyEndPos; i++) + { + int iPos = i - iDecoyStartPos; + int iPos2 = iDecoyEndPos - i + iDecoyStartPos; + + dBion += g_staticParams.massUtility.pdAAMassFragment[(int)szDecoyPeptide[i]]; + if (piVarModSitesDecoy[iPos] > 0) + { + dBion += g_staticParams.variableModParameters.varModList[piVarModSitesDecoy[iPos] - 1].dVarModMass; + iFoundVariableModDecoy = 1; + } + + dYion += g_staticParams.massUtility.pdAAMassFragment[(int)szDecoyPeptide[iPos2]]; + if (piVarModSitesDecoy[iPos2] > 0) + { + dYion += g_staticParams.variableModParameters.varModList[piVarModSitesDecoy[iPos2] - 1].dVarModMass; + iFoundVariableModDecoy = 1; + } + + _pdAAforwardDecoy[iPos] = dBion; + _pdAAreverseDecoy[iPos] = dYion; + } + + // Now get the set of binned fragment ions once to compare this peptide against all matching spectra. + // First initialize pbDuplFragment and _uiBinnedIonMassesDecoy + for (ctCharge = 1; ctCharge <= g_massRange.iMaxFragmentCharge; ctCharge++) + { + for (ctIonSeries = 0; ctIonSeries < g_staticParams.ionInformation.iNumIonSeriesUsed; ctIonSeries++) + { + iWhichIonSeries = g_staticParams.ionInformation.piSelectedIonSeries[ctIonSeries]; + + for (ctLen = 0; ctLen < iLenMinus1; ctLen++) + { + double dFragMass = CometMassSpecUtils::GetFragmentIonMass(iWhichIonSeries, ctLen, ctCharge, _pdAAforwardDecoy, _pdAAreverseDecoy); + + pbDuplFragment[BIN(dFragMass)] = false; + _uiBinnedIonMassesDecoy[ctCharge][ctIonSeries][ctLen][0] = 0; + + // initialize fragmentNL + if (g_staticParams.variableModParameters.bUseFragmentNeutralLoss) + { + for (int x = 0; x < VMODS; x++) // should be within this if() because only looking for NL masses from each mod + { + if ((iWhichIonSeries <= 2 && ctLen >= iPositionNLB[x]) // 0/1/2 is a/b/c ions + || (iWhichIonSeries >= 3 && iWhichIonSeries <= 5 && iLenMinus1 - ctLen <= iPositionNLY[x])) // 3/4/5 is x/y/z ions + { + double dNewMass = dFragMass - g_staticParams.variableModParameters.varModList[x].dNeutralLoss / ctCharge; + + if (dNewMass >= 0.0) + { + pbDuplFragment[BIN(dNewMass)] = false; + iFoundVariableModDecoy = 2; + } + + _uiBinnedIonMassesDecoy[ctCharge][ctIonSeries][ctLen][x + 1] = 0; + } + } + } + } + } + } + + for (int ctNL = 0; ctNL < g_staticParams.iPrecursorNLSize; ctNL++) + { + for (ctCharge = g_pvQuery.at(iWhichQuery)->_spectrumInfoInternal.iChargeState; ctCharge >= 1; ctCharge--) + { + double dNLMass = (sDBI.dPepMass - PROTON_MASS - g_staticParams.precursorNLIons[ctNL] + ctCharge * PROTON_MASS) / ctCharge; + int iVal = BIN(dNLMass); + + if (iVal > 0) + { + pbDuplFragment[iVal] = false; + _uiBinnedPrecursorNLDecoy[ctNL][ctCharge] = 0; + } + } + } + + for (ctCharge = 1; ctCharge <= g_massRange.iMaxFragmentCharge; ctCharge++) + { + for (ctIonSeries = 0; ctIonSeries < g_staticParams.ionInformation.iNumIonSeriesUsed; ctIonSeries++) + { + iWhichIonSeries = g_staticParams.ionInformation.piSelectedIonSeries[ctIonSeries]; + + // As both _pdAAforward and _pdAAreverse are increasing, loop through + // iLenPeptide-1 to complete set of internal fragment ions. + for (ctLen = 0; ctLen < iLenMinus1; ctLen++) + { + double dFragMass = CometMassSpecUtils::GetFragmentIonMass(iWhichIonSeries, ctLen, ctCharge, _pdAAforwardDecoy, _pdAAreverseDecoy); + int iVal = BIN(dFragMass); + + if (pbDuplFragment[iVal] == false) + { + _uiBinnedIonMassesDecoy[ctCharge][ctIonSeries][ctLen][0] = iVal; + pbDuplFragment[iVal] = true; + } + + if (g_staticParams.variableModParameters.bUseFragmentNeutralLoss) + { + for (int x = 0; x < VMODS; x++) + { + if ((iWhichIonSeries <= 2 && ctLen >= iPositionNLB[x]) // 0/1/2 is a/b/c ions + || (iWhichIonSeries >= 3 && iWhichIonSeries <= 5 && iLenMinus1 - ctLen <= iPositionNLY[x])) // 3/4/5 is x/y/z ions + { + double dNewMass = dFragMass - g_staticParams.variableModParameters.varModList[x].dNeutralLoss / ctCharge; + + iVal = BIN(dNewMass); + + if (iVal > 0 && pbDuplFragment[iVal] == false) + { + _uiBinnedIonMassesDecoy[ctCharge][ctIonSeries][ctLen][x + 1] = iVal; + pbDuplFragment[iVal] = true; + } + } + } + } + } + } + } + + // Precursor NL peaks added here + for (int ctNL = 0; ctNL < g_staticParams.iPrecursorNLSize; ctNL++) + { + for (ctCharge = g_pvQuery.at(iWhichQuery)->_spectrumInfoInternal.iChargeState; ctCharge >= 1; ctCharge--) + { + double dNLMass = (sDBI.dPepMass - PROTON_MASS - g_staticParams.precursorNLIons[ctNL] + ctCharge * PROTON_MASS) / ctCharge; + int iVal = BIN(dNLMass); + + if (iVal > 0 && pbDuplFragment[iVal] == false) + { + _uiBinnedPrecursorNLDecoy[ctNL][ctCharge] = iVal; + pbDuplFragment[iVal] = true; + } + } + } + } + } + + XcorrScore(sDBI.szPeptide, iUnused, iUnused, iStartPos, iEndPos, iFoundVariableMod, + sDBI.dPepMass, false, iWhichQuery, iLenPeptide, piVarModSites, dbe); + + if (g_staticParams.options.iDecoySearch) + { + XcorrScore(szDecoyPeptide, iUnused, iUnused, iStartPos, iEndPos, iFoundVariableModDecoy, + sDBI.dPepMass, true, iWhichQuery, iLenPeptide, piVarModSitesDecoy, dbe); + } + } + iWhichQuery++; + } +} + + // Compare MSMS data to peptide with szProteinSeq from the input database. // iNtermPeptideOnly==0 specifies normal sequence // iNtermPeptideOnly==1 specifies clipped methionine sequence @@ -1729,7 +2550,8 @@ bool CometSearch::SearchForPeptides(struct sDBEntry dbe, if (iLenPeptide <= g_staticParams.options.peptideLengthRange.iEnd) { - if (g_staticParams.options.bCreateIndex) // && !g_staticParams.variableModParameters.bRequireVarMod) + if ((g_staticParams.options.bCreatePeptideIndex && !g_staticParams.variableModParameters.iRequireVarMod) + || g_staticParams.options.bCreateFragmentIndex) { int iPepLen = iEndPos - iStartPos + 1; @@ -1916,7 +2738,7 @@ bool CometSearch::SearchForPeptides(struct sDBEntry dbe, char szDecoyPeptide[MAX_PEPTIDE_LEN_P2]; // Allow for prev/next AA in string. // Calculate ion series just once to compare against all relevant query spectra. - if (bFirstTimeThroughLoopForPeptide && !g_staticParams.options.bCreateIndex) + if (bFirstTimeThroughLoopForPeptide && !(g_staticParams.options.bCreatePeptideIndex || g_staticParams.options.bCreateFragmentIndex)) { int iLenMinus1 = iEndPos - iStartPos; // Equals iLenPeptide minus 1. double dBion = g_staticParams.precalcMasses.dNtermProton; @@ -1956,7 +2778,7 @@ bool CometSearch::SearchForPeptides(struct sDBEntry dbe, for (ctLen = 0; ctLen < iLenMinus1; ++ctLen) { - pbDuplFragment[BIN(GetFragmentIonMass(iWhichIonSeries, ctLen, ctCharge, _pdAAforward, _pdAAreverse))] = false; + pbDuplFragment[BIN(CometMassSpecUtils::GetFragmentIonMass(iWhichIonSeries, ctLen, ctCharge, _pdAAforward, _pdAAreverse))] = false; _uiBinnedIonMasses[ctCharge][ctIonSeries][ctLen][0] = 0; // note no need to initialize fragment NL positions as no mods here } @@ -1990,7 +2812,7 @@ bool CometSearch::SearchForPeptides(struct sDBEntry dbe, // iLenPeptide-1 to complete set of internal fragment ions. for (ctLen = 0; ctLen < iLenMinus1; ++ctLen) { - int iVal = BIN(GetFragmentIonMass(iWhichIonSeries, ctLen, ctCharge, _pdAAforward, _pdAAreverse)); + int iVal = BIN(CometMassSpecUtils::GetFragmentIonMass(iWhichIonSeries, ctLen, ctCharge, _pdAAforward, _pdAAreverse)); if (pbDuplFragment[iVal] == false) { @@ -2105,7 +2927,7 @@ bool CometSearch::SearchForPeptides(struct sDBEntry dbe, for (ctLen = 0; ctLen < iLenMinus1; ++ctLen) { - pbDuplFragment[BIN(GetFragmentIonMass(iWhichIonSeries, ctLen, ctCharge, _pdAAforwardDecoy, _pdAAreverseDecoy))] = false; + pbDuplFragment[BIN(CometMassSpecUtils::GetFragmentIonMass(iWhichIonSeries, ctLen, ctCharge, _pdAAforwardDecoy, _pdAAreverseDecoy))] = false; _uiBinnedIonMassesDecoy[ctCharge][ctIonSeries][ctLen][0] = 0; } } @@ -2137,7 +2959,7 @@ bool CometSearch::SearchForPeptides(struct sDBEntry dbe, // iLenPeptide-1 to complete set of internal fragment ions. for (ctLen = 0; ctLen < iLenMinus1; ++ctLen) { - double dFragMass = GetFragmentIonMass(iWhichIonSeries, ctLen, ctCharge, _pdAAforwardDecoy, _pdAAreverseDecoy); + double dFragMass = CometMassSpecUtils::GetFragmentIonMass(iWhichIonSeries, ctLen, ctCharge, _pdAAforwardDecoy, _pdAAreverseDecoy); int iVal = BIN(dFragMass); if (pbDuplFragment[iVal] == false) @@ -2186,7 +3008,7 @@ bool CometSearch::SearchForPeptides(struct sDBEntry dbe, { dCalcPepMass += (double)g_staticParams.massUtility.pdAAMassParent[(int)szProteinSeq[iEndPos]]; - if (g_staticParams.variableModParameters.bVarModSearch && !g_staticParams.options.bCreateIndex) + if (g_staticParams.variableModParameters.bVarModSearch && !g_staticParams.options.bCreateFragmentIndex) CountVarMods(piVarModCounts, szProteinSeq[iEndPos], iEndPos); if (iEndPos == iProteinSeqLengthMinus1) @@ -2197,13 +3019,13 @@ bool CometSearch::SearchForPeptides(struct sDBEntry dbe, else if (dCalcPepMass > g_massRange.dMaxMass || iEndPos==iProteinSeqLengthMinus1 || iLenPeptide == g_staticParams.options.peptideLengthRange.iEnd) { // Run variable mod search before incrementing iStartPos. - if (g_staticParams.variableModParameters.bVarModSearch && !g_staticParams.options.bCreateIndex) + if (g_staticParams.variableModParameters.bVarModSearch && !g_staticParams.options.bCreateFragmentIndex) { // If any variable mod mass is negative, consider adding to iEndPos as long // as peptide minus all possible negative mods is less than the dMaxMass???? // // Otherwise, at this point, peptide mass is too big which means should be ok for varmod search. - if (!g_staticParams.options.bCreateIndex && HasVariableMod(piVarModCounts, iStartPos, iEndPos, &dbe)) + if (HasVariableMod(piVarModCounts, iStartPos, iEndPos, &dbe)) { // if variable mod protein filter applied, set residue mod count to 0 for the // particular variable mod if current protein not on the protein filter list @@ -2223,7 +3045,7 @@ bool CometSearch::SearchForPeptides(struct sDBEntry dbe, VariableModSearch(szProteinSeq, piVarModCounts, iStartPos, iEndPos, iClipNtermMetOffset, pbDuplFragment, &dbe); } - if (!g_staticParams.options.bCreateIndex && g_massRange.bNarrowMassRange) + if (g_massRange.bNarrowMassRange) SubtractVarMods(piVarModCounts, szProteinSeq[iStartPos], iStartPos); } @@ -2249,7 +3071,7 @@ bool CometSearch::SearchForPeptides(struct sDBEntry dbe, { dCalcPepMass -= (double)g_staticParams.massUtility.pdAAMassParent[(int)szProteinSeq[iEndPos]]; - if (g_staticParams.variableModParameters.bVarModSearch && !g_staticParams.options.bCreateIndex) + if (g_staticParams.variableModParameters.bVarModSearch && !g_staticParams.options.bCreateFragmentIndex) SubtractVarMods(piVarModCounts, szProteinSeq[iEndPos], iEndPos); if (iEndPos == iProteinSeqLengthMinus1) dCalcPepMass -= g_staticParams.staticModifications.dAddCterminusProtein; @@ -2263,7 +3085,7 @@ bool CometSearch::SearchForPeptides(struct sDBEntry dbe, dCalcPepMass = g_staticParams.precalcMasses.dOH2ProtonCtermNterm + g_staticParams.massUtility.pdAAMassParent[(int)szProteinSeq[iStartPos]]; - if (g_staticParams.variableModParameters.bVarModSearch && !g_staticParams.options.bCreateIndex) + if (g_staticParams.variableModParameters.bVarModSearch && !g_staticParams.options.bCreateFragmentIndex) { for (int x = 0; x < VMODS; ++x) //reset variable mod counts piVarModCounts[x] = 0; @@ -2438,7 +3260,7 @@ int CometSearch::WithinMassTolerance(double dCalcPepMass, && CheckEnzymeTermini(szProteinSeq, iStartPos, iEndPos)) { // if creating indexed database, only care of peptide is within global mass range - if (g_staticParams.options.bCreateIndex) + if (g_staticParams.options.bCreatePeptideIndex || g_staticParams.options.bCreateFragmentIndex) { return 1; } @@ -3281,7 +4103,7 @@ void CometSearch::XcorrScore(char *szProteinSeq, dXcorr *= 0.005; // Scale intensities to 50 and divide score by 1E4. - dXcorr= std::round(dXcorr* 1000.0) / 1000.0; // round to 3 decimal points + dXcorr = std::round(dXcorr* 1000.0) / 1000.0; // round to 3 decimal points Threading::LockMutex(pQuery->accessMutex); @@ -3325,13 +4147,13 @@ void CometSearch::XcorrScore(char *szProteinSeq, if (dXcorr + 0.00005 >= dLowestXcorrScore && iLenPeptide <= g_staticParams.options.peptideLengthRange.iEnd) { // no need to check duplicates if indexed database search and !g_staticParams.options.bTreatSameIL and no internal decoys - if (g_staticParams.bIndexDb && !g_staticParams.options.bTreatSameIL) + if (g_staticParams.iIndexDb && !g_staticParams.options.bTreatSameIL) { StorePeptide(iWhichQuery, iStartResidue, iStartPos, iEndPos, iFoundVariableMod, szProteinSeq, dCalcPepMass, dXcorr, bDecoyPep, piVarModSites, dbe); } else if (!CheckDuplicate(iWhichQuery, iStartResidue, iEndResidue, iStartPos, iEndPos, iFoundVariableMod, dCalcPepMass, - szProteinSeq, bDecoyPep, piVarModSites, dbe)) + szProteinSeq, bDecoyPep, piVarModSites, dbe)) { StorePeptide(iWhichQuery, iStartResidue, iStartPos, iEndPos, iFoundVariableMod, szProteinSeq, dCalcPepMass, dXcorr, bDecoyPep, piVarModSites, dbe); @@ -3476,6 +4298,7 @@ void CometSearch::XcorrScoreI(char *szProteinSeq, } +/* double CometSearch::GetFragmentIonMass(int iWhichIonSeries, int i, int ctCharge, @@ -3511,6 +4334,7 @@ double CometSearch::GetFragmentIonMass(int iWhichIonSeries, return (dFragmentIonMass + (ctCharge - 1.0) * PROTON_MASS) / ctCharge; } +*/ void CometSearch::StorePeptide(int iWhichQuery, @@ -3622,10 +4446,10 @@ void CometSearch::StorePeptide(int iWhichQuery, pQuery->_pDecoys[siLowestDecoyXcorrScoreIndex].fXcorr = (float)dXcorr; - if (g_staticParams.bIndexDb) + if (g_staticParams.iIndexDb) //FIX { - pQuery->_pDecoys[siLowestDecoyXcorrScoreIndex].cPrevAA = _proteinInfo.cPrevAA; - pQuery->_pDecoys[siLowestDecoyXcorrScoreIndex].cNextAA = _proteinInfo.cNextAA; +// pQuery->_pDecoys[siLowestDecoyXcorrScoreIndex].cPrevAA = _proteinInfo.cPrevAA; +// pQuery->_pDecoys[siLowestDecoyXcorrScoreIndex].cNextAA = _proteinInfo.cNextAA; } else { @@ -3744,7 +4568,8 @@ void CometSearch::StorePeptide(int iWhichQuery, { siLowestXcorrScoreIndex = siA; } - else if (pQuery->_pResults[siLowestXcorrScoreIndex].fXcorr == pQuery->_pResults[siA].fXcorr) + else if (pQuery->_pResults[siLowestXcorrScoreIndex].fXcorr == pQuery->_pResults[siA].fXcorr + && pQuery->_pResults[siLowestXcorrScoreIndex].fXcorr > g_staticParams.options.dMinimumXcorr) { // if current lowest score is the same as current siA peptide, // determine if need to point to siA peptide as the one to replace @@ -3807,7 +4632,7 @@ void CometSearch::StorePeptide(int iWhichQuery, memcpy(pQuery->_pResults[siLowestXcorrScoreIndex].szPeptide, szProteinSeq+iStartPos, iLenPeptide*sizeof(char)); pQuery->_pResults[siLowestXcorrScoreIndex].szPeptide[iLenPeptide]='\0'; pQuery->_pResults[siLowestXcorrScoreIndex].dPepMass = dCalcPepMass; - + if (pQuery->_spectrumInfoInternal.iChargeState > 2) { pQuery->_pResults[siLowestXcorrScoreIndex].iTotalIons = (iLenPeptide - 1) @@ -3822,7 +4647,7 @@ void CometSearch::StorePeptide(int iWhichQuery, pQuery->_pResults[siLowestXcorrScoreIndex].fXcorr = (float)dXcorr; - if (g_staticParams.bIndexDb) + if (g_staticParams.iIndexDb) { pQuery->_pResults[siLowestXcorrScoreIndex].cPrevAA = _proteinInfo.cPrevAA; pQuery->_pResults[siLowestXcorrScoreIndex].cNextAA = _proteinInfo.cNextAA; @@ -3899,8 +4724,7 @@ void CometSearch::StorePeptide(int iWhichQuery, if (iVal > 0) { - pQuery->_pResults[siLowestXcorrScoreIndex].pdVarModSites[i] - = g_staticParams.variableModParameters.varModList[iVal-1].dVarModMass; + pQuery->_pResults[siLowestXcorrScoreIndex].pdVarModSites[i] = g_staticParams.variableModParameters.varModList[iVal-1].dVarModMass; } else if (iVal < 0) { @@ -5821,7 +6645,7 @@ bool CometSearch::MergeVarMods(char *szProteinSeq, // add each single PEFF mod to these existing variable mods. // Now that normal variable mods are taken care of, add in PEFF mods if pertinent - if (*bDoPeffAnalysis && !g_staticParams.options.bCreateIndex) + if (*bDoPeffAnalysis && !g_staticParams.options.bCreatePeptideIndex) { int piTmpVarModSites[MAX_PEPTIDE_LEN_P2]; memcpy(piTmpVarModSites, piVarModSites, _iSizepiVarModSites); @@ -5926,7 +6750,31 @@ bool CometSearch::MergeVarMods(char *szProteinSeq, if (bHasVarMod) { - CalcVarModIons(szProteinSeq, iWhichQuery, pbDuplFragment, piVarModSites, dCalcPepMass, iLenPeptide, dbe); + if (g_staticParams.options.bCreatePeptideIndex) + { + Threading::LockMutex(g_pvDBIndexMutex); + + // add to DBIndex vector + DBIndex sDBTmp; + sDBTmp.dPepMass = dCalcPepMass; //MH+ mass + strncpy(sDBTmp.szPeptide, szProteinSeq + _varModInfo.iStartPos, iLenPeptide); + sDBTmp.szPeptide[iLenPeptide]='\0'; + + sDBTmp.lIndexProteinFilePosition = _proteinInfo.lProteinFilePosition; + + memset(sDBTmp.pcVarModSites, 0, sizeof(sDBTmp.pcVarModSites)); + + for (int x=0; x struct SearchThreadData { @@ -83,6 +81,11 @@ class CometSearch int iStartPos); bool CheckEnzymeEndTermini(char *szProteinSeq, int iEndPos); + int BinarySearchMass(int start, + int end, + double dCalcPepMass); + bool CheckMassMatch(int iWhichQuery, + double dCalcPepMass); struct ProteinInfo { @@ -114,9 +117,6 @@ class CometSearch int end, string strMod, vector& vectorPeffOBO); - int BinarySearchMass(int start, - int end, - double dCalcPepMass); static int BinarySearchIndexMass(int iWhichThread, int iPrecursorBin, int start, @@ -166,13 +166,13 @@ class CometSearch unsigned int uiBinnedIonMasses[MAX_FRAGMENT_CHARGE+1][NUM_ION_SERIES][MAX_PEPTIDE_LEN][FRAGINDEX_VMODS+1], unsigned int uiBinnedPrecursorNL[MAX_PRECURSOR_NL_SIZE][MAX_PRECURSOR_CHARGE], int iNumMatchedFragmentIons); - bool CheckMassMatch(int iWhichQuery, - double dCalcPepMass); +/* static double GetFragmentIonMass(int iWhichIonSeries, int i, int ctCharge, double *pdAAforward, double *pdAAreverse); +*/ int CheckDuplicate(int iWhichQuery, int iStartResidue, int iEndResidue, @@ -239,6 +239,11 @@ class CometSearch struct sDBEntry *dbe); static void SearchFragmentIndex(size_t iWhichQuery, ThreadPool *tp); + bool SearchPeptideIndex(void); + void AnalyzePeptideIndex(int iWhichQuery, + DBIndex sDBI, + bool *pbDuplFragment, + struct sDBEntry *dbe); bool SearchForPeptides(struct sDBEntry dbe, char *szProteinSeq, int iNtermPeptideOnly, // used in clipped methionine sequence diff --git a/CometSearch/CometSearch.vcxproj b/CometSearch/CometSearch.vcxproj index 7577b2fb..e6c8c3f1 100644 --- a/CometSearch/CometSearch.vcxproj +++ b/CometSearch/CometSearch.vcxproj @@ -177,6 +177,7 @@ + @@ -199,6 +200,7 @@ + diff --git a/CometSearch/CometSearch.vcxproj.filters b/CometSearch/CometSearch.vcxproj.filters index 834ed90c..d2d17e36 100644 --- a/CometSearch/CometSearch.vcxproj.filters +++ b/CometSearch/CometSearch.vcxproj.filters @@ -84,6 +84,9 @@ Header Files + + Header Files + @@ -134,5 +137,8 @@ Source Files + + Source Files + \ No newline at end of file diff --git a/CometSearch/CometSearchManager.cpp b/CometSearch/CometSearchManager.cpp index 3cc21efc..ebf98db8 100644 --- a/CometSearch/CometSearchManager.cpp +++ b/CometSearch/CometSearchManager.cpp @@ -28,7 +28,7 @@ #include "CometSearchManager.h" #include "CometStatus.h" #include "CometFragmentIndex.h" - +#include "CometPeptideIndex.h" #include @@ -54,7 +54,8 @@ bool* g_bIndexPrecursors; // array for BIN(pre vector g_vFragmentPeptides; // each peptide is represented here iWhichPeptide, which mod if any, calculated mass vector g_vRawPeptides; // list of unmodified peptides and their proteins as file pointers bool g_bPlainPeptideIndexRead = false; -FILE* fpfasta; +bool g_bPeptideIndexRead = false; +FILE* fpfasta; // file pointer to FASTA; would be same as fpdb if input db was already FASTA but otherwise needed if input is .idx file /****************************************************************************** @@ -474,6 +475,20 @@ static bool ValidateSequenceDatabaseFile() FILE *fpcheck; char szErrorMsg[SIZE_ERROR]; + // open FASTA for retrieving protein names + string sTmpDB = g_staticParams.databaseInfo.szDatabase; + if (!strcmp(g_staticParams.databaseInfo.szDatabase + strlen(g_staticParams.databaseInfo.szDatabase) - 4, ".idx")) + sTmpDB = sTmpDB.erase(sTmpDB.size() - 4); // need plain fasta if indexdb input + if ((fpfasta = fopen(sTmpDB.c_str(), "r")) == NULL) + { + char szErrorMsg[SIZE_ERROR]; + sprintf(szErrorMsg, " Error (3) - cannot read FASTA file \"%s\".\n", sTmpDB.c_str()); + string strErrorMsg(szErrorMsg); + g_cometStatus.SetStatus(CometResult_Failed, strErrorMsg); + logerr(szErrorMsg); + return false; + } + // if .idx database specified but does not exist, first see if corresponding // fasta exists and if it does, create the .idx file if (strstr(g_staticParams.databaseInfo.szDatabase + strlen(g_staticParams.databaseInfo.szDatabase) - 4, ".idx")) @@ -495,7 +510,7 @@ static bool ValidateSequenceDatabaseFile() else { fclose(fpcheck); - g_staticParams.options.bCreateIndex = true; // set to true to make the index + g_staticParams.options.bCreateFragmentIndex = true; // set to true to make the index return true; } } @@ -517,7 +532,7 @@ static bool ValidateSequenceDatabaseFile() else { fclose(fpcheck); - g_staticParams.options.bCreateIndex = false; + g_staticParams.options.bCreateFragmentIndex = false; return true; } } @@ -561,6 +576,7 @@ static bool ValidateSequenceDatabaseFile() } fclose(fpcheck); + return true; } @@ -860,7 +876,9 @@ bool CometSearchManager::InitializeStaticParams() GetParamValue("scale_fragmentNL", g_staticParams.options.bScaleFragmentNL); - GetParamValue("create_index", g_staticParams.options.bCreateIndex); + GetParamValue("create_fragment_index", g_staticParams.options.bCreateFragmentIndex); + + GetParamValue("create_peptide_index", g_staticParams.options.bCreatePeptideIndex); GetParamValue("max_iterations", g_staticParams.options.lMaxIterations); @@ -1441,7 +1459,7 @@ bool CometSearchManager::InitializeStaticParams() if (g_staticParams.variableModParameters.varModList[i].iMaxNumVarModAAPerMod > g_staticParams.variableModParameters.iMaxVarModPerPeptide) g_staticParams.variableModParameters.varModList[i].iMaxNumVarModAAPerMod = g_staticParams.variableModParameters.iMaxVarModPerPeptide; - if (g_staticParams.options.bCreateIndex) + if (g_staticParams.options.bCreateFragmentIndex) { // limit any user specified modification limits to the max supported by fragment ion indexing if (g_staticParams.variableModParameters.varModList[i].iMaxNumVarModAAPerMod > FRAGINDEX_MAX_MODS_PER_MOD) g_staticParams.variableModParameters.varModList[i].iMaxNumVarModAAPerMod = FRAGINDEX_MAX_MODS_PER_MOD; @@ -1639,6 +1657,7 @@ bool CometSearchManager::InitializeStaticParams() g_staticParams.options.iMaxDuplicateProteins = INT_MAX; g_staticParams.iPrecursorNLSize = (int)g_staticParams.precursorNLIons.size(); + if (g_staticParams.iPrecursorNLSize > MAX_PRECURSOR_NL_SIZE) g_staticParams.iPrecursorNLSize = MAX_PRECURSOR_NL_SIZE; @@ -1651,15 +1670,50 @@ bool CometSearchManager::InitializeStaticParams() // At this point, check extension to set whether index database or not if (!strcmp(g_staticParams.databaseInfo.szDatabase + strlen(g_staticParams.databaseInfo.szDatabase) - 4, ".idx")) { - g_staticParams.bIndexDb = 1; + // Has .idx extension. Now parse first line ot see if peptide index or fragment index. + // either "Comet peptide index" or "Comet fragment ion index" + char szTmp[512]; + FILE *fp; - // if searching fragment index database, limit load of query spectra as no - // need to load all spectra into memory since querying spectra sequentially - if (g_staticParams.options.iSpectrumBatchSize > FRAGINDEX_MAX_BATCHSIZE || g_staticParams.options.iSpectrumBatchSize == 0) - g_staticParams.options.iSpectrumBatchSize = FRAGINDEX_MAX_BATCHSIZE; + if ( (fp=fopen(g_staticParams.databaseInfo.szDatabase, "r")) == NULL) + { + char szErrorMsg[SIZE_ERROR]; + sprintf(szErrorMsg, " Error - cannot open database index file \"%s\".\n", g_staticParams.databaseInfo.szDatabase); + string strErrorMsg(szErrorMsg); + g_cometStatus.SetStatus(CometResult_Failed, strErrorMsg); + logerr(szErrorMsg); + return false; + } + + fgets(szTmp, 512, fp); + fclose(fp); + + if (!strncmp(szTmp, "Comet peptide index", 19)) + { + g_staticParams.iIndexDb = 2; // peptide index + } + else if (!strncmp(szTmp, "Comet fragment ion index", 24)) + { + g_staticParams.iIndexDb = 1; // fragment ion index + + // if searching fragment index database, limit load of query spectra as no + // need to load all spectra into memory since querying spectra sequentially + if (g_staticParams.options.iSpectrumBatchSize > FRAGINDEX_MAX_BATCHSIZE || g_staticParams.options.iSpectrumBatchSize == 0) + g_staticParams.options.iSpectrumBatchSize = FRAGINDEX_MAX_BATCHSIZE; + } + else + { + char szErrorMsg[SIZE_ERROR]; + sprintf(szErrorMsg, " Error - first line of database index file \"%s\" contains:\n", g_staticParams.databaseInfo.szDatabase); + sprintf(szErrorMsg+strlen(szErrorMsg), "%s\n", szTmp); + string strErrorMsg(szErrorMsg); + g_cometStatus.SetStatus(CometResult_Failed, strErrorMsg); + logerr(szErrorMsg); + return false; + } } - if (g_staticParams.options.bCreateIndex && g_staticParams.bIndexDb) + if (g_staticParams.options.bCreateFragmentIndex && g_staticParams.iIndexDb) { char szErrorMsg[SIZE_ERROR]; sprintf(szErrorMsg, " Error - input database already indexed: \"%s\".\n", g_staticParams.databaseInfo.szDatabase); @@ -1669,7 +1723,7 @@ bool CometSearchManager::InitializeStaticParams() return false; } - if (g_staticParams.bIndexDb) + if (g_staticParams.iIndexDb == 1) { g_bIndexPrecursors = (bool*) malloc(BIN(g_staticParams.options.dPeptideMassHigh)); if (g_bIndexPrecursors == NULL) @@ -1983,16 +2037,23 @@ void CometSearchManager::ResetSearchStatus() g_cometStatus.ResetStatus(); } - -bool CometSearchManager::CreateIndex() +bool CometSearchManager::CreateFragmentIndex() { - // Override the Create Index flag to force it to create - g_staticParams.options.bCreateIndex = true; + // Override the Create Index flag to force it to create + g_staticParams.options.bCreateFragmentIndex = 1; - // The DoSearch will create the index and exit - return DoSearch(); + // The DoSearch will create the index and exit + return DoSearch(); } +bool CometSearchManager::CreatePeptideIndex() +{ + // Override the Create Index flag to force it to create + g_staticParams.options.bCreatePeptideIndex = 1; + + // The DoSearch will create the index and exit + return DoSearch(); +} bool CometSearchManager::DoSearch() { @@ -2017,6 +2078,18 @@ bool CometSearchManager::DoSearch() bool bSucceeded = true; + #ifdef PERF_DEBUG + // print set search parameters + std::map mapParams = GetParamsMap(); + for (std::map::iterator it = mapParams.begin(); it != mapParams.end(); ++it) + { + if (it->first != "[COMET_ENZYME_INFO]") + { + printf("OK parameter name=\"%s\" value=\"%s\"\n", it->first.c_str(), it->second->GetStringValue().c_str()); + } + } +#endif + // add git hash to version string if present // repeated here from Comet main() as main() is skipped when search invoked via DLL if (strlen(GITHUBSHA) > 0) @@ -2029,7 +2102,7 @@ bool CometSearchManager::DoSearch() else g_sCometVersion = std::string(comet_version); - if (!g_staticParams.options.bOutputSqtStream) // && !g_staticParams.bIndexDb) + if (!g_staticParams.options.bOutputSqtStream) // && !g_staticParams.iIndexDb) { strOut = "\n Comet version \"" + g_sCometVersion + "\"\n\n"; @@ -2037,7 +2110,13 @@ bool CometSearchManager::DoSearch() fflush(stdout); } - if (g_staticParams.options.bCreateIndex || !g_staticParams.bIndexDb) + if (g_staticParams.options.bCreatePeptideIndex) + { + bSucceeded = CometPeptideIndex::WritePeptideIndex(tp); + return bSucceeded; + } + + if (g_staticParams.options.bCreateFragmentIndex || !g_staticParams.iIndexDb) { // If specified, read in the protein variable mod filter file content. // Do this here only for classic search or if creating the plain peptide index. @@ -2072,7 +2151,7 @@ bool CometSearchManager::DoSearch() tp->fillPool( g_staticParams.options.iNumThreads < 0 ? 0 : g_staticParams.options.iNumThreads-1); - if (g_staticParams.options.bCreateIndex) //index + if (g_staticParams.options.bCreateFragmentIndex) //index { // write out .idx file containing unmodified peptides and protein refs; // this calls RunSearch just to query fasta and generate uniq peptide list @@ -2091,7 +2170,7 @@ bool CometSearchManager::DoSearch() bool bBlankSearchFile = false; - if (g_staticParams.bIndexDb) + if (g_staticParams.iIndexDb == 1) { if (!g_staticParams.options.iFragIndexSkipReadPrecursors) { @@ -2140,7 +2219,7 @@ bool CometSearchManager::DoSearch() time(&tStartTime); strftime(g_staticParams.szDate, 26, "%m/%d/%Y, %I:%M:%S %p", localtime(&tStartTime)); - if (!g_staticParams.options.bOutputSqtStream && !g_staticParams.bIndexDb) + if (!g_staticParams.options.bOutputSqtStream && !g_staticParams.iIndexDb) { strOut = " Search start: " + string(g_staticParams.szDate) + "\n"; strOut += " - Input file: " + string(g_staticParams.inputFile.szFileName) + "\n"; @@ -2539,7 +2618,7 @@ bool CometSearchManager::DoSearch() FILE *fpdb; // need FASTA file again to grab headers for output (currently just store file positions) string sTmpDB = g_staticParams.databaseInfo.szDatabase; - if (g_staticParams.bIndexDb) + if (g_staticParams.iIndexDb > 0) sTmpDB = sTmpDB.erase(sTmpDB.size()-4); // need plain fasta if indexdb input if ((fpdb=fopen(sTmpDB.c_str(), "r")) == NULL) { @@ -2551,7 +2630,7 @@ bool CometSearchManager::DoSearch() return false; } - if (g_staticParams.options.iSpectrumBatchSize == 0 && !g_staticParams.bIndexDb) + if (g_staticParams.options.iSpectrumBatchSize == 0 && !g_staticParams.iIndexDb) { logout(" - Reading all spectra into memory; set \"spectrum_batch_size\" if search terminates here.\n"); fflush(stdout); @@ -2559,12 +2638,12 @@ bool CometSearchManager::DoSearch() CometFragmentIndex sqSearch; - if (g_staticParams.bIndexDb) + if (g_staticParams.iIndexDb == 1) { if (!g_bPlainPeptideIndexRead) { auto tStartTime = chrono::steady_clock::now(); - if (!g_staticParams.options.bOutputSqtStream && g_staticParams.bIndexDb) + if (!g_staticParams.options.bOutputSqtStream) { cout << " - read .idx ... "; fflush(stdout); @@ -2572,7 +2651,7 @@ bool CometSearchManager::DoSearch() sqSearch.ReadPlainPeptideIndex(); - if (!g_staticParams.options.bOutputSqtStream && g_staticParams.bIndexDb) + if (!g_staticParams.options.bOutputSqtStream) { cout << CometFragmentIndex::ElapsedTime(tStartTime) << endl; } @@ -2582,7 +2661,7 @@ bool CometSearchManager::DoSearch() } auto tBeginTime = chrono::steady_clock::now(); - if (g_staticParams.bIndexDb) + if (g_staticParams.iIndexDb) { printf(" - searching \"%s\" ... ", g_staticParams.inputFile.szBaseName); fflush(stdout); @@ -2605,9 +2684,8 @@ bool CometSearchManager::DoSearch() char szTimeBuffer[32]; szTimeBuffer[0] = '\0'; #endif - // Load and preprocess all the spectra. - if (!g_staticParams.options.bOutputSqtStream && !g_staticParams.bIndexDb) + if (!g_staticParams.options.bOutputSqtStream && !g_staticParams.iIndexDb) { logout(" - Load spectra:"); @@ -2661,7 +2739,7 @@ bool CometSearchManager::DoSearch() { // need strStatusMsg in it's own scope due to goto statement above string strStatusMsg = " " + to_string(g_pvQuery.size()) + string("\n"); - if (!g_staticParams.options.bOutputSqtStream && !g_staticParams.bIndexDb) + if (!g_staticParams.options.bOutputSqtStream && !g_staticParams.iIndexDb) { logout(strStatusMsg.c_str()); } @@ -2750,7 +2828,7 @@ bool CometSearchManager::DoSearch() if (!bSucceeded) goto cleanup_results; - if (!g_staticParams.options.bOutputSqtStream && !g_staticParams.bIndexDb) + if (!g_staticParams.options.bOutputSqtStream && !g_staticParams.iIndexDb) { logout(" - Post analysis:"); fflush(stdout); @@ -2778,12 +2856,11 @@ bool CometSearchManager::DoSearch() fflush(stdout); } #endif - // Sort g_pvQuery vector by scan. std::sort(g_pvQuery.begin(), g_pvQuery.end(), compareByScanNumber); // Get flanking amino acid residues - if (g_staticParams.bIndexDb) + if (g_staticParams.iIndexDb) { for (int iWhichQuery = 0; iWhichQuery < (int)g_pvQuery.size(); ++iWhichQuery) { @@ -2796,7 +2873,7 @@ bool CometSearchManager::DoSearch() for (int iWhichResult = 0; iWhichResult < iNumPrintLines; ++iWhichResult) { - if (g_pvQuery.at(iWhichQuery)->_pResults[iWhichResult].iLenPeptide > 0 && g_pvQuery.at(iWhichQuery)->_pResults[iWhichResult].fXcorr > XCORR_CUTOFF) + if (g_pvQuery.at(iWhichQuery)->_pResults[iWhichResult].iLenPeptide > 0 && g_pvQuery.at(iWhichQuery)->_pResults[iWhichResult].fXcorr > g_staticParams.options.dMinimumXcorr) { int iNtermMod = g_pvQuery.at(iWhichQuery)->_pResults[iWhichResult].piVarModSites[0]; int iCtermMod = g_pvQuery.at(iWhichQuery)->_pResults[iWhichResult].piVarModSites[g_pvQuery.at(iWhichQuery)->_pResults[iWhichResult].iLenPeptide - 1]; @@ -2808,20 +2885,19 @@ bool CometSearchManager::DoSearch() && g_staticParams.variableModParameters.varModList[iNtermMod - 1].iWhichTerm == 0) { // only match to peptides at the N-terminus of proteins as protein terminal mod applied - CometMassSpecUtils::GetPrevNextAA(fpdb, iWhichQuery, iWhichResult, iPrintTargetDecoy, 1); + CometMassSpecUtils::GetPrevNextAA(fpfasta, iWhichQuery, iWhichResult, iPrintTargetDecoy, 1); } else if (iCtermMod > 0 && g_staticParams.variableModParameters.varModList[iCtermMod - 1].iVarModTermDistance == 0 && g_staticParams.variableModParameters.varModList[iCtermMod - 1].iWhichTerm == 1) { // only match to peptides at the C-terminus of proteins as protein terminal mod applied - CometMassSpecUtils::GetPrevNextAA(fpdb, iWhichQuery, iWhichResult, iPrintTargetDecoy, 2); + CometMassSpecUtils::GetPrevNextAA(fpfasta, iWhichQuery, iWhichResult, iPrintTargetDecoy, 2); } else { - // peptide can be anywhere in sequence - CometMassSpecUtils::GetPrevNextAA(fpdb, iWhichQuery, iWhichResult, iPrintTargetDecoy, 0); + CometMassSpecUtils::GetPrevNextAA(fpfasta, iWhichQuery, iWhichResult, iPrintTargetDecoy, 0); } } } @@ -2829,7 +2905,7 @@ bool CometSearchManager::DoSearch() } } - if (!g_staticParams.options.bOutputSqtStream && !g_staticParams.bIndexDb) + if (!g_staticParams.options.bOutputSqtStream && !g_staticParams.iIndexDb) { logout(" done\n"); fflush(stdout); @@ -2860,7 +2936,9 @@ bool CometSearchManager::DoSearch() } if (g_staticParams.options.bOutputTxtFile) + { CometWriteTxt::WriteTxt(fpout_txt, fpoutd_txt, fpdb); + } // Write SQT last as I destroy the g_staticParams.szMod string during that process if (g_staticParams.options.bOutputSqtStream || g_staticParams.options.bOutputSqtFile) @@ -2870,8 +2948,8 @@ bool CometSearchManager::DoSearch() // Deleting each Query object in the vector calls its destructor, which // frees the spectral memory (see definition for Query in CometData.h). - for (std::vector::iterator it = g_pvQuery.begin(); it != g_pvQuery.end(); ++it) - delete *it; + for (auto it = g_pvQuery.begin(); it != g_pvQuery.end(); ++it) + delete (*it); g_pvQuery.clear(); @@ -2879,7 +2957,7 @@ bool CometSearchManager::DoSearch() break; } - if (g_staticParams.bIndexDb) + if (g_staticParams.iIndexDb) cout << CometFragmentIndex::ElapsedTime(tBeginTime) << endl; if (bSucceeded) @@ -2935,7 +3013,7 @@ bool CometSearchManager::DoSearch() remove(szOutputDecoyMzIdentMLtmp); } - if (!g_staticParams.options.bOutputSqtStream && !g_staticParams.bIndexDb) + if (!g_staticParams.options.bOutputSqtStream && !g_staticParams.iIndexDb) { time_t tEndTime; @@ -3060,7 +3138,7 @@ bool CometSearchManager::DoSearch() break; } - if (g_staticParams.bIndexDb) + if (g_staticParams.iIndexDb == 1) // clean fragment ion index { int iNumIndexingThreads = g_staticParams.options.iNumThreads; if (iNumIndexingThreads > FRAGINDEX_MAX_THREADS) @@ -3086,6 +3164,10 @@ bool CometSearchManager::DoSearch() printf(" - done.\n\n"); } + else if (g_staticParams.iIndexDb == 2) + { + printf(" - done.\n\n"); + } if (bBlankSearchFile) return false; @@ -3127,26 +3209,12 @@ bool CometSearchManager::InitializeSingleSpectrumSearch() // Load databases CometFragmentIndex sqSearch; - if (!g_bPlainPeptideIndexRead) + if (g_staticParams.iIndexDb == 1 && !g_bPlainPeptideIndexRead) { sqSearch.ReadPlainPeptideIndex(); sqSearch.CreateFragmentIndex(tp); } - // open FASTA for retrieving protein names - string sTmpDB = g_staticParams.databaseInfo.szDatabase; - if (!strcmp(g_staticParams.databaseInfo.szDatabase + strlen(g_staticParams.databaseInfo.szDatabase) - 4, ".idx")) - sTmpDB = sTmpDB.erase(sTmpDB.size() - 4); // need plain fasta if indexdb input - if ((fpfasta = fopen(sTmpDB.c_str(), "r")) == NULL) - { - char szErrorMsg[SIZE_ERROR]; - sprintf(szErrorMsg, " Error (3) - cannot read database file \"%s\".\n", sTmpDB.c_str()); - string strErrorMsg(szErrorMsg); - g_cometStatus.SetStatus(CometResult_Failed, strErrorMsg); - logerr(szErrorMsg); - return false; - } - singleSearchInitializationComplete = true; return true; @@ -3197,6 +3265,9 @@ bool CometSearchManager::DoSingleSpectrumSearch(int iPrecursorCharge, if (!InitializeSingleSpectrumSearch()) return false; + // tRealTimeStart used to track elapsed search time and to exit if g_staticParams.options.iMaxIndexRunTime is surpased + g_staticParams.tRealTimeStart = std::chrono::high_resolution_clock::now(); + // We need to reset some of the static variables in-between input files CometPreprocess::Reset(); @@ -3526,15 +3597,30 @@ bool CometSearchManager::DoSingleSpectrumSearchMultiResults(const int topN, if (iNumPeaks == 0) return false; - if (dMZ * iPrecursorCharge - (iPrecursorCharge - 1) * PROTON_MASS > g_staticParams.options.dPeptideMassHigh) + if (dMZ * iPrecursorCharge - (iPrecursorCharge - 1.0) * PROTON_MASS > g_staticParams.options.dPeptideMassHigh) return false; // this assumes dPeptideMassHigh is set correctly in the calling program if (!InitializeSingleSpectrumSearch()) return false; + // tRealTimeStart used to track elapsed search time and to exit if g_staticParams.options.iMaxIndexRunTime is surpased + g_staticParams.tRealTimeStart = std::chrono::high_resolution_clock::now(); + // We need to reset some of the static variables in-between input files CometPreprocess::Reset(); +#ifdef PERF_DEBUG + // print set search parameters + std::map mapParams = GetParamsMap(); + for (std::map::iterator it = mapParams.begin(); it != mapParams.end(); ++it) + { + if (it->first != "[COMET_ENZYME_INFO]") + { + printf("OK parameter name=\"%s\" value=\"%s\"\n", it->first.c_str(), it->second->GetStringValue().c_str()); + } + } +#endif + // IMPORTANT: From this point onwards, because we've loaded some // spectra, we MUST "goto cleanup_results" before exiting the loop, // or we will create a memory leak! @@ -3584,6 +3670,7 @@ bool CometSearchManager::DoSingleSpectrumSearchMultiResults(const int topN, } takeSearchResultsN = topN; // return up to the top N results, or iSize + if (takeSearchResultsN > iSize) takeSearchResultsN = iSize; @@ -3602,20 +3689,20 @@ bool CometSearchManager::DoSingleSpectrumSearchMultiResults(const int topN, goto cleanup_results; Query* pQuery; - pQuery = g_pvQuery.at(0); // return info for top hit only + pQuery = g_pvQuery.at(0); // there's only a single query spectrum for (int idx = 0; idx < takeSearchResultsN; ++idx) { Scores score; score.dCn = 0; - score.xCorr = 0; + score.xCorr = g_staticParams.options.dMinimumXcorr; score.matchedIons = 0; score.totalIons = 0; std::string eachStrReturnPeptide; std::string eachStrReturnProtein; vector eachMatchedFragments; - if (iSize > 0 && pQuery->_pResults[idx].fXcorr > XCORR_CUTOFF && pQuery->_pResults[idx].iLenPeptide > 0) + if (iSize > 0 && pQuery->_pResults[idx].fXcorr > g_staticParams.options.dMinimumXcorr && pQuery->_pResults[idx].iLenPeptide > 0) { Results* pOutput = pQuery->_pResults; @@ -3628,29 +3715,29 @@ bool CometSearchManager::DoSingleSpectrumSearchMultiResults(const int topN, // n-term variable mod if (pOutput[idx].piVarModSites[pOutput[idx].iLenPeptide] != 0) { - std::stringstream ss; - ss << "n[" << std::fixed << std::setprecision(4) << pOutput[idx].pdVarModSites[pOutput[idx].iLenPeptide] << "]"; - eachStrReturnPeptide += ss.str(); + std::stringstream ss; + ss << "n[" << std::fixed << std::setprecision(4) << pOutput[idx].pdVarModSites[pOutput[idx].iLenPeptide] << "]"; + eachStrReturnPeptide += ss.str(); } for (int i = 0; i < pOutput[idx].iLenPeptide; ++i) { - eachStrReturnPeptide += pOutput[idx].szPeptide[i]; + eachStrReturnPeptide += pOutput[idx].szPeptide[i]; - if (pOutput[idx].piVarModSites[i] != 0) - { - std::stringstream ss; - ss << "[" << std::fixed << std::setprecision(4) << pOutput[idx].pdVarModSites[i] << "]"; - eachStrReturnPeptide += ss.str(); - } + if (pOutput[idx].piVarModSites[i] != 0) + { + std::stringstream ss; + ss << "[" << std::fixed << std::setprecision(4) << pOutput[idx].pdVarModSites[i] << "]"; + eachStrReturnPeptide += ss.str(); + } } // c-term variable mod if (pOutput[idx].piVarModSites[pOutput[idx].iLenPeptide + 1] != 0) { - std::stringstream ss; - ss << "c[" << std::fixed << std::setprecision(4) << pOutput[idx].pdVarModSites[pOutput[idx].iLenPeptide + 1] << "]"; - eachStrReturnPeptide += ss.str(); + std::stringstream ss; + ss << "c[" << std::fixed << std::setprecision(4) << pOutput[idx].pdVarModSites[pOutput[idx].iLenPeptide + 1] << "]"; + eachStrReturnPeptide += ss.str(); } eachStrReturnPeptide += "." + std::string(1, pOutput[idx].cNextAA); @@ -3661,13 +3748,13 @@ bool CometSearchManager::DoSingleSpectrumSearchMultiResults(const int topN, szProtein[511] = '\0'; eachStrReturnProtein = szProtein; //protein - score.xCorr = pOutput[idx].fXcorr; // xcorr + score.xCorr = pOutput[idx].fXcorr; // xcorr score.dCn = pOutput[idx].fDeltaCn; // deltaCn score.dSp = pOutput[idx].fScoreSp; // prelim score - score.dExpect = pOutput[idx].dExpect; // E-value - score.mass = pOutput[idx].dPepMass - PROTON_MASS; // calc neutral pep mass - score.matchedIons = pOutput[idx].iMatchedIons; // ions matched - score.totalIons = pOutput[idx].iTotalIons; // ions tot + score.dExpect = pOutput[idx].dExpect; // E-value + score.mass = pOutput[idx].dPepMass - PROTON_MASS; // calc neutral pep mass + score.matchedIons = pOutput[idx].iMatchedIons; // ions matched + score.totalIons = pOutput[idx].iTotalIons; // ions tot int iMinLength = g_staticParams.options.peptideLengthRange.iEnd; for (int x = 0; x < iSize; ++x) @@ -3733,114 +3820,114 @@ bool CometSearchManager::DoSingleSpectrumSearchMultiResults(const int topN, // Generate pdAAforward for pQuery->_pResults[idx].szPeptide. for (int i = 0; i < pQuery->_pResults[idx].iLenPeptide - 1; ++i) { - int iPos = pQuery->_pResults[idx].iLenPeptide - i - 1; + int iPos = pQuery->_pResults[idx].iLenPeptide - i - 1; - dBion += g_staticParams.massUtility.pdAAMassFragment[(int)pQuery->_pResults[idx].szPeptide[i]]; - dYion += g_staticParams.massUtility.pdAAMassFragment[(int)pQuery->_pResults[idx].szPeptide[iPos]]; + dBion += g_staticParams.massUtility.pdAAMassFragment[(int)pQuery->_pResults[idx].szPeptide[i]]; + dYion += g_staticParams.massUtility.pdAAMassFragment[(int)pQuery->_pResults[idx].szPeptide[iPos]]; - if (g_staticParams.variableModParameters.bVarModSearch) - { - if (pQuery->_pResults[idx].piVarModSites[i] != 0) - dBion += pQuery->_pResults[idx].pdVarModSites[i]; + if (g_staticParams.variableModParameters.bVarModSearch) + { + if (pQuery->_pResults[idx].piVarModSites[i] != 0) + dBion += pQuery->_pResults[idx].pdVarModSites[i]; - if (pQuery->_pResults[idx].piVarModSites[iPos] != 0) - dYion += pQuery->_pResults[idx].pdVarModSites[iPos]; - } + if (pQuery->_pResults[idx].piVarModSites[iPos] != 0) + dYion += pQuery->_pResults[idx].pdVarModSites[iPos]; + } - map::iterator it; - for (int ctCharge = 1; ctCharge <= pQuery->_spectrumInfoInternal.iMaxFragCharge; ++ctCharge) + map::iterator it; + for (int ctCharge = 1; ctCharge <= pQuery->_spectrumInfoInternal.iMaxFragCharge; ++ctCharge) + { + // calculate every ion series the user specified + for (int ionSeries = 0; ionSeries < NUM_ION_SERIES; ++ionSeries) { - // calculate every ion series the user specified - for (int ionSeries = 0; ionSeries < NUM_ION_SERIES; ++ionSeries) + // skip ion series that are not enabled. + if (!g_staticParams.ionInformation.iIonVal[ionSeries]) { - // skip ion series that are not enabled. - if (!g_staticParams.ionInformation.iIonVal[ionSeries]) - { - continue; - } + continue; + } - bool isNTerm = (ionSeries <= ION_SERIES_C); + bool isNTerm = (ionSeries <= ION_SERIES_C); - // get the fragment mass if it is n- or c-terimnus - double mass = (isNTerm) ? dBion : dYion; - int fragNumber = i + 1; + // get the fragment mass if it is n- or c-terimnus + double mass = (isNTerm) ? dBion : dYion; + int fragNumber = i + 1; - // Add any conversion factor from different ion series (e.g. b -> a, or y -> z) - mass += ionMassesRelative[ionSeries]; + // Add any conversion factor from different ion series (e.g. b -> a, or y -> z) + mass += ionMassesRelative[ionSeries]; - double mz = (mass + (ctCharge - 1) * PROTON_MASS) / ctCharge; - iTmp = BIN(mz); - if (iTmp < g_staticParams.iArraySizeGlobal && pdTmpSpectrum[iTmp] > 0.0) + double mz = (mass + (ctCharge - 1) * PROTON_MASS) / ctCharge; + iTmp = BIN(mz); + if (iTmp < g_staticParams.iArraySizeGlobal && pdTmpSpectrum[iTmp] > 0.0) + { + Fragment frag; + frag.intensity = pdTmpSpectrum[iTmp]; + frag.mass = mass; + frag.type = ionSeries; + frag.number = fragNumber; + frag.charge = ctCharge; + frag.neutralLoss = false; + frag.neutralLossMass = 0.0; + eachMatchedFragments.push_back(frag); + } + + if (g_staticParams.variableModParameters.bUseFragmentNeutralLoss) + { + for (int iMod = 0; iMod < VMODS; ++iMod) { + double dNLmass = g_staticParams.variableModParameters.varModList[iMod].dNeutralLoss; + + if (dNLmass == 0.0 || g_staticParams.variableModParameters.varModList[iMod].dVarModMass == 0.0) + { + continue; // continue if this iMod entry has no mod mass or no NL mass specified + } + + if (isNTerm) + { + // if have not already come across n-term mod residue for variable mod iMod, see if position i contains the variable mod + if (!bAddNtermFragmentNeutralLoss[iMod] && pOutput[idx].piVarModSites[i] == iMod + 1) + { + bAddNtermFragmentNeutralLoss[iMod] = true; + } + } + else + { + if (!bAddCtermFragmentNeutralLoss[iMod] && pOutput[idx].piVarModSites[iPos] == iMod + 1) + { + bAddCtermFragmentNeutralLoss[iMod] = true; + } + } + + if ((isNTerm && !bAddNtermFragmentNeutralLoss[iMod]) + || (!isNTerm && !bAddCtermFragmentNeutralLoss[iMod])) + { + continue; // no fragment NL yet in peptide so continue + } + + double dNLfragMz = mz - (dNLmass / ctCharge); + iTmp = BIN(dNLfragMz); + if (iTmp < g_staticParams.iArraySizeGlobal && iTmp >= 0 && pdTmpSpectrum[iTmp] > 0.0) + { Fragment frag; frag.intensity = pdTmpSpectrum[iTmp]; - frag.mass = mass; + frag.mass = mass - dNLmass; frag.type = ionSeries; frag.number = fragNumber; frag.charge = ctCharge; - frag.neutralLoss = false; - frag.neutralLossMass = 0.0; + frag.neutralLoss = true; + frag.neutralLossMass = dNLmass; eachMatchedFragments.push_back(frag); - } - - if (g_staticParams.variableModParameters.bUseFragmentNeutralLoss) - { - for (int iMod = 0; iMod < VMODS; ++iMod) - { - double dNLmass = g_staticParams.variableModParameters.varModList[iMod].dNeutralLoss; - - if (dNLmass == 0.0 || g_staticParams.variableModParameters.varModList[iMod].dVarModMass == 0.0) - { - continue; // continue if this iMod entry has no mod mass or no NL mass specified - } - - if (isNTerm) - { - // if have not already come across n-term mod residue for variable mod iMod, see if position i contains the variable mod - if (!bAddNtermFragmentNeutralLoss[iMod] && pOutput[idx].piVarModSites[i] == iMod + 1) - { - bAddNtermFragmentNeutralLoss[iMod] = true; - } - } - else - { - if (!bAddCtermFragmentNeutralLoss[iMod] && pOutput[idx].piVarModSites[iPos] == iMod + 1) - { - bAddCtermFragmentNeutralLoss[iMod] = true; - } - } - - if ((isNTerm && !bAddNtermFragmentNeutralLoss[iMod]) - || (!isNTerm && !bAddCtermFragmentNeutralLoss[iMod])) - { - continue; // no fragment NL yet in peptide so continue - } - - double dNLfragMz = mz - (dNLmass / ctCharge); - iTmp = BIN(dNLfragMz); - if (iTmp < g_staticParams.iArraySizeGlobal && iTmp >= 0 && pdTmpSpectrum[iTmp] > 0.0) - { - Fragment frag; - frag.intensity = pdTmpSpectrum[iTmp]; - frag.mass = mass - dNLmass; - frag.type = ionSeries; - frag.number = fragNumber; - frag.charge = ctCharge; - frag.neutralLoss = true; - frag.neutralLossMass = dNLmass; - eachMatchedFragments.push_back(frag); - } - } + } } } } + } } } else { eachStrReturnPeptide = ""; // peptide eachStrReturnProtein = ""; // protein - score.xCorr = -1; // xcorr + score.xCorr = -999 ; // xcorr score.dSp = 0; // prelim score score.dExpect = 999; // E-value score.mass = 0; // calc neutral pep mass @@ -3858,8 +3945,8 @@ bool CometSearchManager::DoSingleSpectrumSearchMultiResults(const int topN, // Deleting each Query object in the vector calls its destructor, which // frees the spectral memory (see definition for Query in CometDataInternal.h). - if (g_pvQuery.size() > 0) - delete g_pvQuery.at(0); + for (auto it = g_pvQuery.begin(); it != g_pvQuery.end(); ++it) + delete (*it); g_pvQuery.clear(); diff --git a/CometSearch/CometSearchManager.h b/CometSearch/CometSearchManager.h index e3889759..75f6615c 100644 --- a/CometSearch/CometSearchManager.h +++ b/CometSearch/CometSearchManager.h @@ -50,7 +50,8 @@ class CometSearchManager : public ICometSearchManager std::map& GetParamsMap(); // Methods inherited from ICometSearchManager - virtual bool CreateIndex(); + virtual bool CreateFragmentIndex(); + virtual bool CreatePeptideIndex(); virtual bool DoSearch(); virtual bool InitializeSingleSpectrumSearch(); virtual void FinalizeSingleSpectrumSearch(); diff --git a/CometSearch/CometWriteMzIdentML.cpp b/CometSearch/CometWriteMzIdentML.cpp index deadbb76..3a4c450b 100644 --- a/CometSearch/CometWriteMzIdentML.cpp +++ b/CometSearch/CometWriteMzIdentML.cpp @@ -296,7 +296,7 @@ bool CometWriteMzIdentML::ParseTmpFile(FILE *fpout, bool bPrintSequences = false; if (g_staticParams.options.bOutputMzIdentMLFile == 2) // print sequences in DBSequence { - if (g_staticParams.bIndexDb) + if (g_staticParams.iIndexDb) bPrintSequences = false; else bPrintSequences = true; diff --git a/CometSearch/CometWriteOut.cpp b/CometSearch/CometWriteOut.cpp index 61c5797f..cbd377b8 100644 --- a/CometSearch/CometWriteOut.cpp +++ b/CometSearch/CometWriteOut.cpp @@ -251,7 +251,7 @@ bool CometWriteOut::PrintResults(int iWhichQuery, } } - if (g_staticParams.bIndexDb) //index database + if (g_staticParams.iIndexDb) //index database { uiNumTotProteins = (unsigned int)g_pvProteinsList.at(pOutput[i].lProteinFilePosition).size(); } diff --git a/CometSearch/Common.h b/CometSearch/Common.h index 2792d0f0..41a6cf40 100644 --- a/CometSearch/Common.h +++ b/CometSearch/Common.h @@ -64,12 +64,13 @@ using namespace std; #include #include #include +#include #ifndef GITHUBSHA // value passed thru at compile time #define GITHUBSHA "" #endif -#define comet_version "2024.02 rev. 0" +#define comet_version "2024.02 rev. 1" #define copyright "(c) University of Washington" extern string g_sCometVersion; // version string including git hash diff --git a/CometSearch/Makefile b/CometSearch/Makefile index 68f4dd44..4af7032a 100644 --- a/CometSearch/Makefile +++ b/CometSearch/Makefile @@ -14,15 +14,15 @@ endif UNAME_S := $(shell uname -s) ifeq ($(UNAME_S),Darwin) - override CXXFLAGS += -O3 -static -std=c++14 -fpermissive -Wall -Wextra -Wno-write-strings -DGITHUBSHA='"$(GITHUB_SHA)"' -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64 -DGCC -D_NOSQLITE -D__int64=off64_t -I. -I$(MSTPATH)/include -I$(MSTPATH)/src/expat-2.2.9/lib -I$(MSTPATH)/src/zlib-1.2.11 + override CXXFLAGS += -O3 -static -std=c++14 -fpermissive -Wall -Wextra -Wno-write-strings -DGITHUBSHA='"$(GITHUB_SHA)"' -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64 -DGCC -D_NOSQLITE -D__int64=off64_t -I. -I$(MSTPATH)/include -I$(MSTPATH)/src/expat-2.2.9/lib -I$(MSTPATH)/src/zlib-1.2.11 else - override CXXFLAGS += -O3 -static -std=c++14 -fconcepts -fpermissive -Wall -Wextra -Wno-write-strings -DGITHUBSHA='"$(GITHUB_SHA)"' -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64 -DGCC -D_NOSQLITE -D__int64=off64_t -I. -I$(MSTPATH)/include -I$(MSTPATH)/src/expat-2.2.9/lib -I$(MSTPATH)/src/zlib-1.2.11 + override CXXFLAGS += -O3 -static -std=c++14 -fpermissive -Wall -Wextra -Wno-write-strings -DGITHUBSHA='"$(GITHUB_SHA)"' -D_LARGEFILE_SOURCE -D_FILE_OFFSET_BITS=64 -DGCC -D_NOSQLITE -D__int64=off64_t -I. -I$(MSTPATH)/include -I$(MSTPATH)/src/expat-2.2.9/lib -I$(MSTPATH)/src/zlib-1.2.11 endif COMETSEARCH = Threading.o CometInterfaces.o CometSearch.o CometPreprocess.o CometPostAnalysis.o CometMassSpecUtils.o CometWriteOut.o\ CometWriteSqt.o CometWritePepXML.o CometWriteMzIdentML.o CometWritePercolator.o CometWriteTxt.o CometSearchManager.o\ - CombinatoricsUtils.o ModificationsPermuter.o CometFragmentIndex.o + CombinatoricsUtils.o ModificationsPermuter.o CometFragmentIndex.o CometPeptideIndex.o all: $(COMETSEARCH) @@ -33,7 +33,7 @@ clean: Threading.o: Threading.cpp Threading.h ${CXX} ${CXXFLAGS} Threading.cpp -c -CometSearch.o: CometSearch.cpp Common.h CometData.h CometDataInternal.h CometSearch.h CometInterfaces.h ThreadPool.h CometFragmentIndex.h +CometSearch.o: CometSearch.cpp Common.h CometData.h CometDataInternal.h CometSearch.h CometInterfaces.h ThreadPool.h CometFragmentIndex.h CometPeptideIndex.h ${CXX} ${CXXFLAGS} CometSearch.cpp -c CometPreprocess.o: CometPreprocess.cpp Common.h CometData.h CometDataInternal.h CometPreprocess.h CometInterfaces.h $(MSTPATH) ${CXX} ${CXXFLAGS} CometPreprocess.cpp -c @@ -65,3 +65,5 @@ ModificationsPermuter.o: ModificationsPermuter.cpp ModificationsPermuter.h Com ${CXX} ${CXXFLAGS} ModificationsPermuter.cpp -c CometFragmentIndex.o: CometFragmentIndex.cpp Common.h CometData.h CometDataInternal.h CometSearch.h CometInterfaces.h ThreadPool.h ${CXX} ${CXXFLAGS} CometFragmentIndex.cpp -c +CometPeptideIndex.o: CometPeptideIndex.cpp Common.h CometData.h CometDataInternal.h CometSearch.h CometInterfaces.h ThreadPool.h + ${CXX} ${CXXFLAGS} CometPeptideIndex.cpp -c diff --git a/CometSearch/ModificationsPermuter.cpp b/CometSearch/ModificationsPermuter.cpp index 76ad9b30..7a652db5 100644 --- a/CometSearch/ModificationsPermuter.cpp +++ b/CometSearch/ModificationsPermuter.cpp @@ -22,10 +22,11 @@ #include #include #include -#include "CombinatoricsUtils.h" +#include "Common.h" +#include "CometDataInternal.h" #include "ModificationsPermuter.h" +#include "CombinatoricsUtils.h" #include "CometFragmentIndex.h" -#include "Common.h" //using namespace std; diff --git a/CometSearch/ModificationsPermuter.h b/CometSearch/ModificationsPermuter.h index 5ff6bf22..114de74b 100644 --- a/CometSearch/ModificationsPermuter.h +++ b/CometSearch/ModificationsPermuter.h @@ -16,9 +16,6 @@ #ifndef _MODIFICATIONSPERMUTER_H_ #define _MODIFICATIONSPERMUTER_H_ -#include "Common.h" -#include "CometDataInternal.h" - class ModificationsPermuter { public: diff --git a/CometSearch/ThreadPool.h b/CometSearch/ThreadPool.h index 1714c460..e97a3293 100644 --- a/CometSearch/ThreadPool.h +++ b/CometSearch/ThreadPool.h @@ -24,7 +24,6 @@ #include #include #include - #include #ifdef TPP_WIN32THREADS diff --git a/CometWrapper/CometWrapper.cpp b/CometWrapper/CometWrapper.cpp index 30b2e9db..e2921114 100644 --- a/CometWrapper/CometWrapper.cpp +++ b/CometWrapper/CometWrapper.cpp @@ -52,13 +52,22 @@ CometSearchManagerWrapper::~CometSearchManagerWrapper() } } -bool CometSearchManagerWrapper::CreateIndex() +bool CometSearchManagerWrapper::CreatePeptideIndex() { if (!_pSearchMgr) { return false; } - return _pSearchMgr->CreateIndex(); + return _pSearchMgr->CreatePeptideIndex(); +} + +bool CometSearchManagerWrapper::CreateFragmentIndex() +{ + if (!_pSearchMgr) + { + return false; + } + return _pSearchMgr->CreateFragmentIndex(); } bool CometSearchManagerWrapper::InitializeSingleSpectrumSearch() diff --git a/CometWrapper/CometWrapper.h b/CometWrapper/CometWrapper.h index 1f0fa191..82200ef8 100644 --- a/CometWrapper/CometWrapper.h +++ b/CometWrapper/CometWrapper.h @@ -35,7 +35,8 @@ namespace CometWrapper { CometSearchManagerWrapper(); virtual ~CometSearchManagerWrapper(); - bool CreateIndex(); + bool CreateFragmentIndex(); + bool CreatePeptideIndex(); bool DoSearch(); bool InitializeSingleSpectrumSearch(); void FinalizeSingleSpectrumSearch(); diff --git a/Makefile b/Makefile index c410a622..912c9140 100644 --- a/Makefile +++ b/Makefile @@ -20,7 +20,8 @@ DEPS = CometSearch/CometData.h CometSearch/CometDataInternal.h CometSearch/Comet CometSearch/CometPostAnalysis.cpp CometSearch/CometSearchManager.cpp CometSearch/CometWritePercolator.cpp CometSearch/Threading.cpp\ CometSearch/CometPreprocess.cpp CometSearch/CometWriteOut.cpp CometSearch/CometWriteSqt.cpp CometSearch/CombinatoricsUtils.cpp\ CometSearch/ModificationsPermuter.cpp CometSearch/CometInterfaces.h CometSearch/CometInterfaces.cpp\ - CometSearch/CometFragmentIndex.cpp CometSearch/CometFragmentIndex.h + CometSearch/CometFragmentIndex.cpp CometSearch/CometFragmentIndex.h\ + CometSearch/CometPeptideIndex.cpp CometSearch/CometPeptideIndex.h LIBPATHS = -L$(MSTOOLKIT) -L$(COMETSEARCH) LIBS = -lcometsearch -lmstoolkitlite -lm -lpthread diff --git a/RealtimeSearch/Search.cs b/RealtimeSearch/Search.cs index 885fab4c..ef5c9873 100644 --- a/RealtimeSearch/Search.cs +++ b/RealtimeSearch/Search.cs @@ -126,6 +126,7 @@ static void Main(string[] args) { double dTmp = double.Parse(trailerData.Values[i]); double dMassDiff = Math.Abs(dTmp - dPrecursorMZ); + if (dTmp != 0.0 && dMassDiff < 10.0) dPrecursorMZ = dTmp; } @@ -143,41 +144,45 @@ static void Main(string[] args) // these next variables store return value from search List vPeptide = new List(); List vProtein = new List(); - List> vMatchingFragments; - List vScores; int topN = 5; // report up to topN hits per query watch.Start(); SearchMgr.DoSingleSpectrumSearchMultiResults(topN, iPrecursorCharge, dPrecursorMZ, pdMass, pdInten, iNumPeaks, - out vPeptide, out vProtein, out vMatchingFragments, out vScores); + out vPeptide, out vProtein, out List> vMatchingFragments, out List vScores); watch.Stop(); int iProteinLengthCutoff = 30; if (vPeptide.Count > 0 && (iScanNumber % 1) == 0) { - for (int x = 0; x < vPeptide.Count; ++x) + if (vPeptide[0].Length > 0) { - string protein = vProtein[x]; - if (protein.Length > iProteinLengthCutoff) - protein = protein.Substring(0, iProteinLengthCutoff); // trim to avoid printing long protein description string - - Console.WriteLine("{0}\t{12}\t{1}\t{2}\t{3:0.0000}\t{11:0.0000}\t{10:0.0}\t{4:E4}\t{5:0.0000}\t{6:0.0000}\t{7}\t{8}\t{9}", - iScanNumber, vPeptide[x], protein, vScores[x].xCorr, vScores[x].dExpect, dExpPepMass - 1.00727646688, vScores[x].mass, - vScores[x].MatchedIons, vScores[x].TotalIons, iPass, vScores[x].dSp, vScores[x].dCn, x + 1); -/* - foreach (var myFragment in vMatchingFragments[x]) // print matched fragment ions + for (int x = 0; x < vPeptide.Count; ++x) { - Console.WriteLine("\t{0:0.0000}\t{1:0.0}\t{2}+\t{3}-ion", - myFragment.Mass, - myFragment.Intensity, - myFragment.Charge, - myFragment.Type); - } + if (vPeptide[x].Length > 0) + { + string protein = vProtein[x]; + if (protein.Length > iProteinLengthCutoff) + protein = protein.Substring(0, iProteinLengthCutoff); // trim to avoid printing long protein description string + + Console.WriteLine("{0}\t{12}\t{1}\t{2}\t{3:0.0000}\t{11:0.0000}\t{10:0.0}\t{4:E4}\t{5:0.0000}\t{6:0.0000}\t{7}\t{8}\t{9}", + iScanNumber, vPeptide[x], protein, vScores[x].xCorr, vScores[x].dExpect, dExpPepMass - 1.00727646688, vScores[x].mass, + vScores[x].MatchedIons, vScores[x].TotalIons, iPass, vScores[x].dSp, vScores[x].dCn, x + 1); +/* + foreach (var myFragment in vMatchingFragments[x]) // print matched fragment ions + { + Console.WriteLine("\t{0:0.0000}\t{1:0.0}\t{2}+\t{3}-ion", + myFragment.Mass, + myFragment.Intensity, + myFragment.Charge, + myFragment.Type); + } */ + } + } + Console.WriteLine(""); } - Console.WriteLine(""); } if (vPeptide.Count > 0) @@ -237,8 +242,8 @@ public bool ConfigureInputSettings(CometSearchManagerWrapper SearchMgr, String sTmp; int iTmp; double dTmp; - DoubleRangeWrapper doubleRangeParam = new DoubleRangeWrapper(); - IntRangeWrapper intRangeParam = new IntRangeWrapper(); +// DoubleRangeWrapper doubleRangeParam = new DoubleRangeWrapper(); +// IntRangeWrapper intRangeParam = new IntRangeWrapper(); SearchMgr.SetParam("database_name", sDB, sDB); @@ -274,6 +279,7 @@ public bool ConfigureInputSettings(CometSearchManagerWrapper SearchMgr, sTmp = dTmp.ToString(); SearchMgr.SetParam("fragment_bin_tol", sTmp, dTmp); + dTmp = 0.0; // fragment bin offst sTmp = dTmp.ToString(); SearchMgr.SetParam("fragment_bin_offset", sTmp, dTmp);