00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034 #include <ncbi_pch.hpp>
00035
00036 #include <util/miscmath.h>
00037 #include <algo/blast/core/ncbi_math.h>
00038 #include <util/compress/bzip2.hpp>
00039
00040
00041
00042 #include "SpectrumSet.hpp"
00043 #include "omssa.hpp"
00044 #include "pepxml.hpp"
00045
00046 #include <fstream>
00047 #include <string>
00048 #include <list>
00049 #include <deque>
00050 #include <algorithm>
00051
00052 #include <math.h>
00053
00054 USING_NCBI_SCOPE;
00055 USING_SCOPE(objects);
00056 USING_SCOPE(omssa);
00057
00058
00059
00060 int
00061 CSearchHelper::ReadModFiles(const string& ModFileName,
00062 const string& UserModFileName,
00063 const string& Path,
00064 CRef <CMSModSpecSet> Modset)
00065 {
00066 CDirEntry DirEntry(Path);
00067 string FileName;
00068 try {
00069 if(ModFileName == "")
00070 ERR_POST(Critical << "modification filename is blank!");
00071 if(!CDirEntry::IsAbsolutePath(ModFileName))
00072 FileName = DirEntry.GetDir() + ModFileName;
00073 else FileName = ModFileName;
00074 auto_ptr<CObjectIStream>
00075 modsin(CObjectIStream::Open(FileName.c_str(), eSerial_Xml));
00076 if(modsin->fail()) {
00077 ERR_POST(Fatal << "ommsacl: unable to open modification file" <<
00078 FileName);
00079 return 1;
00080 }
00081 modsin->Read(ObjectInfo(*Modset));
00082 modsin->Close();
00083
00084 } catch (NCBI_NS_STD::exception& e) {
00085 ERR_POST(Fatal << "Unable to read modification file " <<
00086 FileName << " with error " << e.what());
00087 }
00088
00089
00090 if(UserModFileName != "") {
00091 try {
00092 CRef <CMSModSpecSet> UserModset(new CMSModSpecSet);
00093 if(!CDirEntry::IsAbsolutePath(UserModFileName))
00094 FileName = DirEntry.GetDir() + UserModFileName;
00095 else FileName = UserModFileName;
00096 auto_ptr<CObjectIStream>
00097 usermodsin(CObjectIStream::Open(FileName.c_str(), eSerial_Xml));
00098 if(usermodsin->fail()) {
00099 ERR_POST(Warning << "ommsacl: unable to open user modification file" <<
00100 ModFileName);
00101 return 0;
00102 }
00103 usermodsin->Read(ObjectInfo(*UserModset));
00104 usermodsin->Close();
00105 Modset->Append(*UserModset);
00106 } catch (NCBI_NS_STD::exception& e) {
00107 ERR_POST(Fatal << "Unable to read user modification file " <<
00108 FileName << " with error " << e.what());
00109 }
00110 }
00111 return 0;
00112 }
00113
00114
00115 void
00116 CSearchHelper::ReadTaxFile(string& Filename, TTaxNameMap& TaxNameMap)
00117 {
00118 ifstream taxnames(Filename.c_str());
00119 string line;
00120 list<string> linelist;
00121 list<string>::iterator ilist;
00122 while(taxnames && !taxnames.eof()) {
00123 getline(taxnames, line);
00124 linelist.clear();
00125 NStr::Split(line, ",", linelist);
00126 if(!linelist.empty()) {
00127 ilist = linelist.begin();
00128 ilist++;
00129 TaxNameMap[NStr::StringToInt(*ilist)] = *(linelist.begin());
00130 }
00131 }
00132 }
00133
00134 void
00135 CSearchHelper::ConditionXMLStream(CObjectOStreamXml *xml_out)
00136 {
00137 if(!xml_out) return;
00138
00139 xml_out->SetReferenceSchema();
00140
00141 xml_out->SetWriteNamedIntegersByValue(true);
00142 }
00143
00144
00145
00146 int
00147 CSearchHelper::ReadFile(const string& Filename,
00148 const EMSSpectrumFileType FileType,
00149 CMSSearch& MySearch)
00150 {
00151 CRef <CMSRequest> Request (new CMSRequest);
00152 MySearch.SetRequest().push_back(Request);
00153
00154
00155
00156 CNcbiIfstream PeakFile(Filename.c_str());
00157 if(!PeakFile) {
00158 ERR_POST(Fatal <<" omssacl: not able to open spectrum file " <<
00159 Filename);
00160 return 1;
00161 }
00162
00163 CRef <CSpectrumSet> SpectrumSet(new CSpectrumSet);
00164 (*MySearch.SetRequest().begin())->SetSpectra(*SpectrumSet);
00165 return SpectrumSet->LoadFile(FileType, PeakFile);
00166 }
00167
00168 int
00169 CSearchHelper::ReadSearchRequest(const string& Filename,
00170 const ESerialDataFormat DataFormat,
00171 CMSSearch& MySearch)
00172 {
00173 CRef <CMSRequest> Request (new CMSRequest);
00174 MySearch.SetRequest().push_back(Request);
00175
00176
00177
00178 auto_ptr<CObjectIStream>
00179 in(CObjectIStream::Open(Filename.c_str(), DataFormat));
00180 in->Open(Filename.c_str(), DataFormat);
00181 if(in->fail()) {
00182 ERR_POST(Warning << "omssacl: unable to search file" <<
00183 Filename);
00184 return 1;
00185 }
00186 in->Read(ObjectInfo(*Request));
00187 in->Close();
00188 return 0;
00189 }
00190
00191
00192 int
00193 CSearchHelper::ReadCompleteSearch(const string& Filename,
00194 const ESerialDataFormat DataFormat,
00195 bool bz2,
00196 CMSSearch& MySearch)
00197 {
00198 auto_ptr <CNcbiIfstream> raw_in;
00199 auto_ptr <CCompressionIStream> compress_in;
00200 auto_ptr <CObjectIStream> in;
00201
00202 if( bz2 ) {
00203 raw_in.reset(new CNcbiIfstream(Filename.c_str()));
00204 compress_in.reset( new CCompressionIStream (*raw_in,
00205 new CBZip2StreamDecompressor(),
00206 CCompressionStream::fOwnProcessor));
00207 in.reset(CObjectIStream::Open(DataFormat, *compress_in));
00208 }
00209 else {
00210 in.reset(CObjectIStream::Open(Filename.c_str(), DataFormat));
00211 }
00212 if(in->fail()) {
00213 ERR_POST(Warning << "omssacl: unable to search file" <<
00214 Filename);
00215 return 1;
00216 }
00217 in->Read(ObjectInfo(MySearch));
00218 in->Close();
00219 return 0;
00220 }
00221
00222
00223 int
00224 CSearchHelper::LoadAnyFile(CMSSearch& MySearch,
00225 CConstRef <CMSInFile> InFile,
00226 bool* SearchEngineIterative)
00227 {
00228 string Filename(InFile->GetInfile());
00229 EMSSpectrumFileType DataFormat =
00230 static_cast <EMSSpectrumFileType> (InFile->GetInfiletype());
00231
00232 switch (DataFormat) {
00233 case eMSSpectrumFileType_dta:
00234 case eMSSpectrumFileType_dtablank:
00235 case eMSSpectrumFileType_dtaxml:
00236 case eMSSpectrumFileType_pkl:
00237 case eMSSpectrumFileType_mgf:
00238 return CSearchHelper::ReadFile(Filename, DataFormat, MySearch);
00239 break;
00240 case eMSSpectrumFileType_oms:
00241 if(SearchEngineIterative) *SearchEngineIterative = true;
00242 return CSearchHelper::ReadCompleteSearch(Filename, eSerial_AsnBinary, false, MySearch);
00243 break;
00244 case eMSSpectrumFileType_omx:
00245 if(SearchEngineIterative) *SearchEngineIterative = true;
00246 return CSearchHelper::ReadCompleteSearch(Filename, eSerial_Xml, false, MySearch);
00247 break;
00248 case eMSSpectrumFileType_xml:
00249 return CSearchHelper::ReadSearchRequest(Filename, eSerial_Xml, MySearch);
00250 break;
00251 case eMSSpectrumFileType_omxbz2 :
00252 return CSearchHelper::ReadCompleteSearch(Filename, eSerial_Xml, true, MySearch);
00253 break;
00254 case eMSSpectrumFileType_asc:
00255 case eMSSpectrumFileType_pks:
00256 case eMSSpectrumFileType_sciex:
00257 case eMSSpectrumFileType_unknown:
00258 default:
00259 break;
00260 }
00261 return 1;
00262 }
00263
00264
00265 void CSearchHelper::SaveOneFile(CMSSearch &MySearch,
00266 const string Filename,
00267 ESerialDataFormat FileFormat,
00268 bool IncludeRequest,
00269 bool bz2)
00270 {
00271 auto_ptr <CNcbiOfstream> raw_out;
00272 auto_ptr <CCompressionOStream> compress_out;
00273 auto_ptr <CObjectOStream> txt_out;
00274
00275 if( bz2 ) {
00276 raw_out.reset(new CNcbiOfstream(Filename.c_str()));
00277 compress_out.reset( new CCompressionOStream (*raw_out,
00278 new CBZip2StreamCompressor(),
00279 CCompressionStream::fOwnProcessor));
00280 txt_out.reset(CObjectOStream::Open(FileFormat, *compress_out));
00281 }
00282 else {
00283 txt_out.reset(CObjectOStream::Open(Filename.c_str(), FileFormat));
00284 }
00285
00286 if(FileFormat == eSerial_Xml) {
00287 CObjectOStreamXml *xml_out = dynamic_cast <CObjectOStreamXml *> (txt_out.get());
00288 CSearchHelper::ConditionXMLStream(xml_out);
00289 }
00290 if(IncludeRequest)
00291 txt_out->Write(ObjectInfo(MySearch));
00292 else
00293 txt_out->Write(ObjectInfo(**MySearch.SetResponse().begin()));
00294 }
00295
00296
00297 int
00298 CSearchHelper::SaveAnyFile(CMSSearch& MySearch,
00299 CMSSearchSettings::TOutfiles OutFiles,
00300 CRef <CMSModSpecSet> Modset)
00301 {
00302 CMSSearchSettings::TOutfiles::const_iterator iOutFile;
00303
00304 for(iOutFile = OutFiles.begin(); iOutFile != OutFiles.end(); ++iOutFile) {
00305 string Filename((*iOutFile)->GetOutfile());
00306 EMSSerialDataFormat DataFormat =
00307 static_cast <EMSSerialDataFormat> ((*iOutFile)->GetOutfiletype());
00308 ESerialDataFormat FileFormat(eSerial_AsnText);
00309
00310 auto_ptr <CObjectOStream> txt_out;
00311 if(DataFormat == eMSSerialDataFormat_asntext)
00312 FileFormat = eSerial_AsnText;
00313 if(DataFormat == eMSSerialDataFormat_asnbinary)
00314 FileFormat = eSerial_AsnBinary;
00315 if(DataFormat == eMSSerialDataFormat_xml)
00316 FileFormat = eSerial_Xml;
00317 if(DataFormat == eMSSerialDataFormat_xmlbz2)
00318 FileFormat = eSerial_Xml;
00319
00320 switch (DataFormat) {
00321 case eMSSerialDataFormat_asntext:
00322 case eMSSerialDataFormat_asnbinary:
00323 case eMSSerialDataFormat_xml:
00324 CSearchHelper::SaveOneFile(MySearch,
00325 Filename,
00326 FileFormat,
00327 (*iOutFile)->GetIncluderequest(),
00328 false);
00329 break;
00330 case eMSSerialDataFormat_xmlbz2:
00331 CSearchHelper::SaveOneFile(MySearch,
00332 Filename,
00333 FileFormat,
00334 (*iOutFile)->GetIncluderequest(),
00335 true);
00336 break;
00337 case eMSSerialDataFormat_pepxml:
00338 {
00339 CPepXML outPepXML;
00340 outPepXML.ConvertFromOMSSA(MySearch, Modset, Filename, Filename);
00341 auto_ptr<CObjectOStream> file_out(CObjectOStream::Open(Filename, eSerial_Xml));
00342 *file_out << outPepXML;
00343 }
00344 break;
00345 case eMSSerialDataFormat_csv:
00346 {
00347 CNcbiOfstream oscsv;
00348 oscsv.open(Filename.c_str());
00349 (*MySearch.SetResponse().begin())->PrintCSV(oscsv, Modset);
00350 oscsv.close();
00351 }
00352 break;
00353 case eMSSerialDataFormat_none:
00354 default:
00355 {
00356 ERR_POST(Error << "Unknown output file format " << DataFormat);
00357 }
00358 return 1;
00359 break;
00360 }
00361 }
00362 return 0;
00363 }
00364
00365 void
00366 CSearchHelper::ValidateSearchSettings(CRef<CMSSearchSettings> &Settings)
00367 {
00368 list <string> ValidError;
00369 if(Settings->Validate(ValidError) != 0) {
00370 list <string>::iterator iErr;
00371 for(iErr = ValidError.begin(); iErr != ValidError.end(); iErr++)
00372 ERR_POST(Warning << *iErr);
00373 ERR_POST(Fatal << "Unable to validate settings");
00374 }
00375 }
00376
00377
00378 void
00379 CSearchHelper::CreateSearchSettings(string FileName,
00380 CRef<CMSSearchSettings> &Settings)
00381 {
00382 if(FileName != "" ) {
00383 try {
00384 auto_ptr<CObjectIStream>
00385 paramsin(CObjectIStream::Open(FileName.c_str(), eSerial_Xml));
00386 if(paramsin->fail()) {
00387 ERR_POST(Fatal << "ommsacl: unable to open parameter file" <<
00388 FileName);
00389 return;
00390 }
00391 paramsin->Read(ObjectInfo(*Settings));
00392 paramsin->Close();
00393
00394 } catch (NCBI_NS_STD::exception& e) {
00395 ERR_POST(Fatal << "Unable to read parameter file " <<
00396 FileName << " with error " << e.what());
00397 }
00398 }
00399 }
00400
00401
00402
00403
00404
00405
00406
00407
00408
00409
00410
00411
00412 CSearch::CSearch(int tNum):
00413 UseRankScore(false),
00414 Iterative(false),
00415 RestrictedSearch(false)
00416 {
00417 ThreadNum = tNum;
00418 }
00419
00420
00421 void CSearch::ResetGlobals(void)
00422 {
00423 iSearchGlobal = -1;
00424 MaxMZ = 0;
00425 SharedPeakSet.Reset(0);
00426 }
00427
00428
00429 int CSearch::InitBlast(const char *blastdb, bool use_mmap)
00430 {
00431 if (!blastdb) return 0;
00432 rdfp.Reset(new CSeqDB(blastdb, CSeqDB::eProtein,
00433 0, 0, use_mmap));
00434 numseq = rdfp->GetNumOIDs();
00435 return 0;
00436 }
00437
00438
00439
00440
00441 int CSearch::CreateLadders(const char *Sequence,
00442 int iSearch,
00443 int position,
00444 int endposition,
00445 int *Masses,
00446 int iMissed,
00447 CAA& AA,
00448 int iMod,
00449 CMod ModList[],
00450 int NumMod)
00451 {
00452 TLadderMap::iterator Iter;
00453 SetLadderContainer().Begin(Iter);
00454 while(Iter != SetLadderContainer().SetLadderMap().end()) {
00455 bool NoProline = find(GetSettings()->GetNoprolineions().begin(),
00456 GetSettings()->GetNoprolineions().end(),
00457 CMSMatchedPeakSetMap::Key2Series(Iter->first)) !=
00458 GetSettings()->GetNoprolineions().end();
00459 if (!(*(Iter->second))[iMod]->
00460 CreateLadder(CMSMatchedPeakSetMap::Key2Series(Iter->first),
00461 CMSMatchedPeakSetMap::Key2Charge(Iter->first),
00462 Sequence,
00463 iSearch,
00464 position,
00465 endposition,
00466 Masses[iMissed],
00467 MassArray,
00468 AA,
00469 SetMassAndMask(iMissed, iMod).Mask,
00470 ModList,
00471 NumMod,
00472 *SetSettings(),
00473 NoProline
00474 )) return 1;
00475 SetLadderContainer().Next(Iter);
00476 }
00477
00478 return 0;
00479 }
00480
00481
00482
00483 int CSearch::CompareLadders(int iMod,
00484 CMSPeak *Peaks,
00485 bool OrLadders,
00486 const TMassPeak *MassPeak)
00487 {
00488 EMSPeakListTypes Which = Peaks->GetWhich(MassPeak->Charge);
00489
00490 int ChargeLimitLo(0), ChargeLimitHi(0);
00491 if (MassPeak) {
00492 if(MassPeak->Charge < Peaks->GetConsiderMult()) {
00493 ChargeLimitLo = 1;
00494 ChargeLimitHi = 1;
00495 }
00496 else {
00497 ChargeLimitLo = 0;
00498 ChargeLimitHi = 0;
00499 }
00500 }
00501
00502 TLadderMap::iterator Iter;
00503 SetLadderContainer().Begin(Iter, ChargeLimitLo, ChargeLimitHi);
00504 vector<bool> usedPeaks(Peaks->SetPeakLists()[Which]->GetNum(), false);
00505 while(Iter != SetLadderContainer().SetLadderMap().end()) {
00506 Peaks->CompareSortedRank(*((*(Iter->second))[iMod]), Which, usedPeaks);
00507 SetLadderContainer().Next(Iter, ChargeLimitLo, ChargeLimitHi);
00508 }
00509 return 0;
00510 }
00511
00512
00513
00514 bool CSearch::CompareLaddersTop(int iMod,
00515 CMSPeak *Peaks,
00516 const TMassPeak *MassPeak)
00517 {
00518 int ChargeLimitLo(0), ChargeLimitHi(0);
00519 if (MassPeak) {
00520 if(MassPeak->Charge < Peaks->GetConsiderMult()) {
00521 ChargeLimitLo = 1;
00522 ChargeLimitHi = 1;
00523 }
00524 else {
00525 ChargeLimitLo = 0;
00526 ChargeLimitHi = 0;
00527 }
00528 }
00529
00530 TLadderMap::iterator Iter;
00531 SetLadderContainer().Begin(Iter, ChargeLimitLo, ChargeLimitHi);
00532 while(Iter != SetLadderContainer().SetLadderMap().end()) {
00533 if(Peaks->CompareTop(*((*(Iter->second))[iMod]))) return true;
00534 SetLadderContainer().Next(Iter, ChargeLimitLo, ChargeLimitHi);
00535 }
00536 return false;
00537 }
00538
00539
00540 const bool
00541 CSearch::ReSearch(const int Number) const
00542 {
00543 if ( GetSettings()->GetIterativesettings().GetResearchthresh() != 0.0) {
00544
00545 CRef <CMSHitSet> HitSet;
00546 HitSet = GetResponse()->FindHitSet(Number);
00547 if (HitSet.IsNull()) return true;
00548 if (HitSet->GetHits().empty()) return true;
00549 if ((*HitSet->GetHits().begin())->GetEvalue() <=
00550 GetSettings()->GetIterativesettings().GetResearchthresh())
00551 return false;
00552 else return true;
00553 }
00554 return true;
00555 }
00556
00557
00558
00559 void CSearch::Spectrum2Peak(CRef<CMSPeakSet> PeakSet)
00560 {
00561 CSpectrumSet::Tdata::const_iterator iSpectrum;
00562 CMSPeak* Peaks;
00563
00564 iSpectrum = GetRequest()->GetSpectra().Get().begin();
00565 for (; iSpectrum != GetRequest()->GetSpectra().Get().end(); iSpectrum++) {
00566 CRef <CMSSpectrum> Spectrum = *iSpectrum;
00567 if (!Spectrum) {
00568 ERR_POST(Error << "omssa: unable to find spectrum");
00569 return;
00570 }
00571
00572
00573 if (GetIterative() && !ReSearch(Spectrum->GetNumber()))
00574 continue;
00575
00576 Peaks = new CMSPeak(GetSettings()->GetHitlistlen());
00577 if (!Peaks) {
00578 ERR_POST(Error << "omssa: unable to allocate CMSPeak");
00579 return;
00580 }
00581
00582 Peaks->ReadAndProcess(*Spectrum, *GetSettings());
00583 #if 0
00584 {
00585 ofstream os("test.dta");
00586 Peaks->Write(os, eMSSpectrumFileType_dta, eMSPeakListCharge1);
00587 }
00588 #endif
00589 PeakSet->AddPeak(Peaks);
00590
00591 }
00592 MaxMZ = PeakSet->SortPeaks(MSSCALE2INT(GetSettings()->GetPeptol()),
00593 GetSettings()->GetZdep());
00594
00595 }
00596
00597
00598 struct CMassMaskCompare {
00599 bool operator() (const TMassMask& x, const TMassMask& y)
00600 {
00601 if (x.Mass < y.Mass) return true;
00602 return false;
00603 }
00604 };
00605
00606
00607
00608
00609
00610
00611 void CSearch::DeleteVariableOverlap(int& NumMod,
00612 CMod ModList[])
00613 {
00614 int i, j;
00615 for (i = 0; i < NumMod; i++) {
00616
00617 if (ModList[i].GetFixed() != 1) {
00618
00619 for (j = 0; j < NumMod; j++) {
00620
00621 if (ModList[j].GetFixed() == 1 &&
00622 ModList[i].GetSite() == ModList[j].GetSite()) {
00623
00624 ModList[i].SetFixed() = -1;
00625 }
00626 }
00627 }
00628 }
00629
00630
00631 for (i = 0; i < NumMod;) {
00632 if (ModList[i].GetFixed() == -1) {
00633 NumMod--;
00634
00635 if (i == NumMod) return;
00636
00637 for (j=i; j < NumMod; ++j) {
00638 ModList[j] = ModList[j+1];
00639 }
00640 }
00641 else i++;
00642 }
00643 return;
00644 }
00645
00646
00647 void CSearch::UpdateWithNewPep(int Missed,
00648 const char *PepStart[],
00649 const char *PepEnd[],
00650 int NumMod[],
00651 CMod ModList[][MAXMOD],
00652 int Masses[],
00653 int EndMasses[],
00654 int NumModSites[],
00655 CRef <CMSModSpecSet> &Modset)
00656 {
00657
00658 int iMissed;
00659
00660
00661
00662 int iMod;
00663
00664
00665
00666 for (iMissed = 0; iMissed < Missed - 1; iMissed++) {
00667
00668 if (PepStart[iMissed] == (const char *)-1) continue;
00669
00670 PepEnd[iMissed] = PepEnd[Missed - 1];
00671
00672
00673
00674
00675
00676
00677
00678
00679
00680 const char *OldSite(0);
00681 int NumModSitesCount(0), NumModCount(0);
00682 for (iMod = 0; iMod < NumMod[Missed-1]; iMod++) {
00683
00684
00685 if (NumModCount + NumMod[iMissed] >= MAXMOD) break;
00686
00687
00688 if ((Modset->GetModType(ModList[Missed-1][iMod].GetEnum()) == eMSModType_modnp ||
00689 Modset->GetModType(ModList[Missed-1][iMod].GetEnum()) == eMSModType_modnpaa) &&
00690 PepStart[iMissed] != ModList[Missed-1][iMod].GetSite()) {
00691 continue;
00692 }
00693
00694
00695 if (Modset->GetModType(ModList[Missed-1][iMod].GetEnum()) == eMSModType_modn ||
00696 Modset->GetModType(ModList[Missed-1][iMod].GetEnum()) == eMSModType_modnaa) {
00697 continue;
00698 }
00699
00700
00701 ModList[iMissed][NumModCount + NumMod[iMissed]] =
00702 ModList[Missed-1][iMod];
00703
00704
00705 if (OldSite != ModList[iMissed][NumModCount + NumMod[iMissed]].GetSite() &&
00706 ModList[iMissed][NumModCount + NumMod[iMissed]].GetFixed() != 1) {
00707 NumModSitesCount++;
00708 OldSite = ModList[iMissed][NumModCount + NumMod[iMissed]].GetSite();
00709 }
00710
00711
00712 NumModCount++;
00713
00714
00715 }
00716
00717
00718 Masses[iMissed] += Masses[Missed - 1];
00719
00720
00721 EndMasses[iMissed] = EndMasses[Missed - 1];
00722
00723
00724 NumMod[iMissed] += NumModCount;
00725
00726
00727 NumModSites[iMissed] += NumModSitesCount;
00728 }
00729 }
00730
00731
00732
00733
00734
00735
00736
00737
00738
00739 void CSearch::CountModSites(int &NumModSites,
00740 int NumMod,
00741 CMod ModList[])
00742 {
00743 NumModSites = 0;
00744 int i;
00745 const char *OldSite(0);
00746
00747 for (i = 0; i < NumMod; i++) {
00748
00749 if (ModList[i].GetSite() != OldSite && ModList[i].GetFixed() != 1 ) {
00750 NumModSites++;
00751 OldSite = ModList[i].GetSite();
00752 }
00753 }
00754 }
00755
00756
00757
00758 void CSearch::CreateModCombinations(int Missed,
00759 const char *PepStart[],
00760 int Masses[],
00761 int EndMasses[],
00762 int NumMod[],
00763 int NumMassAndMask[],
00764 int NumModSites[],
00765 CMod ModList[][MAXMOD]
00766 )
00767 {
00768
00769
00770
00771
00772
00773
00774
00775
00776 unsigned Mask, MassOfMask;
00777
00778 int iiMod;
00779
00780 int iModCount;
00781
00782 int iMissed;
00783
00784 int iMod;
00785
00786 int ModIndex[MAXMOD];
00787
00788
00789 for (iMissed = 0; iMissed < Missed; iMissed++) {
00790
00791 if (PepStart[iMissed] == (const char *)-1) continue;
00792 iModCount = 0;
00793
00794
00795 SetMassAndMask(iMissed, iModCount).Mass =
00796 Masses[iMissed] + EndMasses[iMissed];
00797 SetMassAndMask(iMissed, iModCount).Mask = 0;
00798
00799 int NumVariable(NumMod[iMissed]);
00800 int NumFixed;
00801
00802 for (iMod = 0; iMod < NumMod[iMissed]; iMod++) {
00803 if (ModList[iMissed][iMod].GetFixed()) {
00804 SetMassAndMask(iMissed, iModCount).Mass += ModList[iMissed][iMod].GetPrecursorDelta();
00805 SetMassAndMask(iMissed, iModCount).Mask |= 1 << iMod;
00806 NumVariable--;
00807 }
00808 }
00809 iModCount++;
00810 NumFixed = NumMod[iMissed] - NumVariable;
00811
00812
00813
00814 for (iMod = 0; iMod < NumModSites[iMissed] && iModCount < MaxModPerPep; iMod++) {
00815
00816
00817
00818
00819
00820 InitModIndex(ModIndex, iMod, NumMod[iMissed],
00821 NumModSites[iMissed], ModList[iMissed]);
00822 do {
00823
00824
00825 MassOfMask = SetMassAndMask(iMissed, 0).Mass;
00826 for (iiMod = 0; iiMod <= iMod; iiMod++ )
00827 MassOfMask += ModList[iMissed][ModIndex[iiMod + NumFixed]].GetPrecursorDelta();
00828
00829 Mask = MakeBoolMask(ModIndex, iMod + NumFixed);
00830
00831 SetMassAndMask(iMissed, iModCount).Mass = MassOfMask;
00832 SetMassAndMask(iMissed, iModCount).Mask = Mask;
00833 #if 0
00834 printf("NumMod = %d iMod = %d, Mask = \n", NumMod[iMissed], iMod);
00835 int iii;
00836 for (iii=NumMod[iMissed]-1; iii >= 0; iii--) {
00837 if (Mask & 1 << iii) printf("1");
00838 else printf("0");
00839 }
00840 printf("\n");
00841 #endif
00842
00843 iModCount++;
00844
00845 } while (iModCount < MaxModPerPep &&
00846 CalcModIndex(ModIndex, iMod, NumMod[iMissed], NumFixed,
00847 NumModSites[iMissed], ModList[iMissed]));
00848 }
00849
00850
00851 if (SetSettings()->GetPrecursorsearchtype() == eMSSearchType_exact) {
00852 int ii;
00853 for (ii = 0; ii < iModCount; ++ii) {
00854 SetMassAndMask(iMissed, ii).Mass +=
00855 SetMassAndMask(iMissed, ii).Mass /
00856 MSSCALE2INT(GetSettings()->GetExactmass()) *
00857 MSSCALE2INT(kNeutron);
00858 }
00859 }
00860
00861
00862
00863 sort(MassAndMask.get() + iMissed*MaxModPerPep, MassAndMask.get() + iMissed*MaxModPerPep + iModCount,
00864 CMassMaskCompare());
00865
00866 NumMassAndMask[iMissed] = iModCount;
00867
00868 }
00869 }
00870
00871
00872 void CSearch::SetIons(list <EMSIonSeries> & Ions)
00873 {
00874 if (GetSettings()->GetIonstosearch().size() < 1) {
00875 ERR_POST(Fatal << "omssa: at least one ions series to search need to be specified");
00876 }
00877 CMSSearchSettings::TIonstosearch::const_iterator i;
00878 i = GetSettings()->GetIonstosearch().begin();
00879 for(; i != GetSettings()->GetIonstosearch().end(); ++i) {
00880 Ions.push_back(static_cast <EMSIonSeries> (*i));
00881 }
00882 }
00883
00884
00885 void CSearch::InitLadders(list <EMSIonSeries> & Ions)
00886 {
00887
00888 int MaxLadderSize = GetSettings()->GetMaxproductions();
00889 if (MaxLadderSize == 0) MaxLadderSize = kMSLadderMax;
00890
00891 int i;
00892 SetLadderContainer().SetSeriesChargePairList().clear();
00893 list <EMSIonSeries> ::const_iterator iIons;
00894
00895 for (iIons = Ions.begin(); iIons != Ions.end(); ++iIons) {
00896 for(i = 1; i <= GetSettings()->GetChargehandling().GetMaxproductcharge(); ++i) {
00897 SetLadderContainer().SetSeriesChargePairList().
00898 push_back(TSeriesChargePairList::value_type(i, *iIons));
00899 }
00900 }
00901 SetLadderContainer().CreateLadderArrays(MaxModPerPep, MaxLadderSize);
00902 }
00903
00904
00905 void CSearch::MakeOidSet(void)
00906 {
00907 SetOidSet().clear();
00908 if (GetSettings()->GetIterativesettings().GetSubsetthresh() != 0.0) {
00909 SetRestrictedSearch() = true;
00910 GetResponse()->
00911 GetOidsBelowThreshold(
00912 SetOidSet(),
00913 GetSettings()->GetIterativesettings().GetSubsetthresh());
00914 }
00915 }
00916
00917 int CSearch::iSearchGlobal = -1;
00918 int CSearch::MaxMZ = 0;
00919 CRef<CMSPeakSet> CSearch::SharedPeakSet = null;
00920 DEFINE_STATIC_FAST_MUTEX(iSearchMutex);
00921 DEFINE_STATIC_FAST_MUTEX(PeakSetMutex);
00922 DEFINE_STATIC_FAST_MUTEX(PeaksExaminedMutex);
00923
00924 void CSearch::SetupSearch(CRef <CMSRequest> MyRequestIn,
00925 CRef <CMSResponse> MyResponseIn,
00926 CRef <CMSModSpecSet> Modset,
00927 CRef <CMSSearchSettings> SettingsIn,
00928 TOMSSACallback Callback,
00929 void *CallbackData)
00930 {
00931 initRequestIn = MyRequestIn;
00932 initResponseIn = MyResponseIn;
00933 initModset = Modset;
00934 initSettingsIn = SettingsIn;
00935 initCallback = Callback;
00936 initCallbackData = CallbackData;
00937 }
00938
00939 void* CSearch::Main(void)
00940 {
00941 Search(initRequestIn,
00942 initResponseIn,
00943 initModset,
00944 initSettingsIn,
00945 initCallback);
00946
00947 return new bool(true);
00948 }
00949
00950 void CSearch::OnExit(void)
00951 {
00952 }
00953
00954 void CSearch::CopySettings(CRef <CSearch> fromObj)
00955 {
00956 initRequestIn = fromObj->initRequestIn;
00957 initResponseIn = fromObj->initResponseIn;
00958 initModset = fromObj->initModset;
00959 initSettingsIn = fromObj->initSettingsIn;
00960 initCallback = fromObj->initCallback;
00961 initCallbackData = fromObj->initCallbackData;
00962 UseRankScore = fromObj->UseRankScore;
00963 Iterative = fromObj->Iterative;
00964 numseq = fromObj->numseq;
00965 rdfp = fromObj->rdfp;
00966
00967 }
00968
00969 void CSearch::Search(CRef <CMSRequest> MyRequestIn,
00970 CRef <CMSResponse> MyResponseIn,
00971 CRef <CMSModSpecSet> Modset,
00972 CRef <CMSSearchSettings> SettingsIn,
00973 TOMSSACallback Callback,
00974 void *CallbackData)
00975 {
00976 try {
00977 SetSettings().Reset(SettingsIn);
00978 SetRequest().Reset(MyRequestIn);
00979 SetResponse().Reset(MyResponseIn);
00980
00981
00982 SetSettings()->SetScale(MSSCALE);
00983 SetResponse()->SetScale(MSSCALE);
00984
00985 SetEnzyme() = CCleaveFactory::CleaveFactory(static_cast <EMSEnzymes>
00986 (GetSettings()->GetEnzyme()));
00987
00988
00989 if (GetIterative()) {
00990
00991 if (GetResponse()->GetDbversion() != Getnumseq())
00992 ERR_POST(Fatal <<
00993 "number of sequences in search library is not the same as previously searched. Unable to do iterative search.");
00994
00995
00996 MakeOidSet();
00997 }
00998
00999
01000 MaxModPerPep = GetSettings()->GetMaxmods();
01001 if (MaxModPerPep > MAXMOD2) MaxModPerPep = MAXMOD2;
01002
01003 list <EMSIonSeries> Ions;
01004 SetIons(Ions);
01005 InitLadders(Ions);
01006
01007 LadderCalc.reset(new Int1[MaxModPerPep]);
01008 CAA AA;
01009
01010 int Missed;
01011 if (GetEnzyme()->GetNonSpecific()) Missed = 1;
01012 else Missed = GetSettings()->GetMissedcleave()+1;
01013
01014 int iMissed;
01015
01016 int iSearch, hits;
01017 int endposition, position;
01018
01019
01020 FixedMods.Init(GetSettings()->GetFixed(), Modset);
01021 MassArray.Init(FixedMods, GetSettings()->GetProductsearchtype(), Modset);
01022 PrecursorMassArray.Init(FixedMods,
01023 GetSettings()->GetPrecursorsearchtype(), Modset);
01024
01025 SetEnzyme()->SetNMethionine() =
01026 VariableMods.Init(GetSettings()->GetVariable(), Modset) ||
01027 SetSettings()->GetNmethionine();
01028
01029 const int *IntMassArray = MassArray.GetIntMass();
01030 const int *PrecursorIntMassArray = PrecursorMassArray.GetIntMass();
01031 const char *PepStart[MAXMISSEDCLEAVE];
01032 const char *PepEnd[MAXMISSEDCLEAVE];
01033
01034
01035 CMod ModList[MAXMISSEDCLEAVE][MAXMOD];
01036
01037 int NumMod[MAXMISSEDCLEAVE];
01038
01039 int NumModSites[MAXMISSEDCLEAVE];
01040
01041
01042
01043 MassAndMask.reset(new TMassMask[MAXMISSEDCLEAVE*MaxModPerPep]);
01044
01045
01046 int NumMassAndMask[MAXMISSEDCLEAVE];
01047
01048
01049
01050
01051 int Masses[MAXMISSEDCLEAVE];
01052 int EndMasses[MAXMISSEDCLEAVE];
01053
01054 int iMod;
01055
01056 bool SequenceDone;
01057
01058 const CMSSearchSettings::TTaxids& Tax = GetSettings()->GetTaxids();
01059 CMSSearchSettings::TTaxids::const_iterator iTax;
01060
01061 CMSHit NewHit;
01062 CMSHit *NewHitOut;
01063
01064 const TMassPeak *MassPeak;
01065 CMSPeak* Peaks;
01066 CIntervalTree::const_iterator im;
01067
01068
01069 TLadderMap::iterator Iter;
01070
01071 {{
01072 CFastMutexGuard guard(PeakSetMutex);
01073 if (SharedPeakSet == null) {
01074 SharedPeakSet = new CMSPeakSet();
01075 Spectrum2Peak(SharedPeakSet);
01076 }
01077 }}
01078 vector <int> taxids;
01079 vector <int>::iterator itaxids;
01080 bool TaxInfo(false);
01081 bool iSearchNotDone(true);
01082
01083
01084
01085 while (iSearchNotDone) {
01086 {{
01087 CFastMutexGuard guard(iSearchMutex);
01088 iSearchGlobal++;
01089 if (!rdfp->CheckOrFindOID(iSearchGlobal)) {
01090 iSearchNotDone = false;
01091 continue;
01092 }
01093 iSearch = iSearchGlobal;
01094 if (iSearch % 10000 == 0) {
01095 if(Callback) Callback(Getnumseq(), iSearch, CallbackData);
01096 }
01097 }}
01098
01099
01100 if (GetRestrictedSearch() && SetOidSet().find(iSearch) == SetOidSet().end())
01101 continue;
01102
01103 if (SetSettings()->IsSetTaxids()) {
01104 rdfp->GetTaxIDs(iSearch, taxids, false);
01105 for (itaxids = taxids.begin(); itaxids != taxids.end(); ++itaxids) {
01106 if (*itaxids == 0) continue;
01107 TaxInfo = true;
01108 for (iTax = Tax.begin(); iTax != Tax.end(); ++iTax) {
01109 if (*itaxids == *iTax) goto TaxContinue;
01110 }
01111 }
01112 continue;
01113 }
01114 TaxContinue:
01115 CSeqDBSequence Sequence(rdfp.GetPointer(), iSearch);
01116 SequenceDone = false;
01117
01118
01119 for (iMissed = 0; iMissed < Missed; iMissed++) {
01120 PepStart[iMissed] = (const char *)-1;
01121 PepEnd[iMissed] = Sequence.GetData();
01122 Masses[iMissed] = 0;
01123 EndMasses[iMissed] = 0;
01124 NumMod[iMissed] = 0;
01125 NumModSites[iMissed] = 0;
01126
01127 ModList[iMissed][0].Reset();
01128 }
01129 PepStart[Missed - 1] = Sequence.GetData();
01130
01131
01132 if (SetEnzyme()->GetNonSpecific()) {
01133 SetEnzyme()->SetStop() = Sequence.GetData() + SetSettings()->GetMinnoenzyme() - 1;
01134 }
01135
01136
01137 while (!SequenceDone) {
01138
01139
01140
01141
01142
01143 Masses[Missed - 1] = 0;
01144 EndMasses[Missed - 1] = 0;
01145 NumMod[Missed - 1] = 0;
01146 NumModSites[Missed - 1] = 0;
01147
01148 ModList[Missed - 1][0].Reset();
01149
01150
01151 SequenceDone =
01152 SetEnzyme()->CalcAndCut(Sequence.GetData(),
01153 Sequence.GetData() + Sequence.GetLength() - 1,
01154 &(PepEnd[Missed - 1]),
01155 &(Masses[Missed - 1]),
01156 NumMod[Missed - 1],
01157 MAXMOD,
01158 &(EndMasses[Missed - 1]),
01159 VariableMods, FixedMods,
01160 ModList[Missed - 1],
01161 IntMassArray,
01162 PrecursorIntMassArray,
01163 Modset,
01164 SetSettings()->GetMaxproductions()
01165 );
01166
01167
01168 DeleteVariableOverlap(NumMod[Missed - 1],
01169 ModList[Missed - 1]);
01170
01171
01172 CountModSites(NumModSites[Missed - 1],
01173 NumMod[Missed - 1],
01174 ModList[Missed - 1]);
01175
01176 UpdateWithNewPep(Missed, PepStart, PepEnd, NumMod, ModList,
01177 Masses, EndMasses, NumModSites, Modset);
01178
01179 CreateModCombinations(Missed, PepStart, Masses,
01180 EndMasses, NumMod, NumMassAndMask,
01181 NumModSites, ModList);
01182
01183
01184 int OldMass;
01185 bool NoMassMatch;
01186
01187 for (iMissed = 0; iMissed < Missed; iMissed++) {
01188 if (PepStart[iMissed] == (const char *)-1) continue;
01189
01190
01191 position = PepStart[iMissed] - Sequence.GetData();
01192 endposition = PepEnd[iMissed] - Sequence.GetData();
01193
01194
01195 ClearLadderCalc(NumMassAndMask[iMissed]);
01196
01197 OldMass = 0;
01198 NoMassMatch = true;
01199
01200
01201 for (iMod = 0; iMod < NumMassAndMask[iMissed]; iMod++) {
01202
01203
01204 if (SetMassAndMask(iMissed, iMod).Mass == OldMass &&
01205 NoMassMatch) continue;
01206 NoMassMatch = true;
01207 OldMass = SetMassAndMask(iMissed, iMod).Mass;
01208
01209
01210
01211 if (!SetEnzyme()->GetTopDown())
01212 im = SharedPeakSet->SetIntervalTree().IntervalsContaining(OldMass);
01213
01214 else
01215 im = SharedPeakSet->SetIntervalTree().AllIntervals();
01216
01217 for (; im; ++im ) {
01218 MassPeak = static_cast <const TMassPeak *> (im.GetValue().GetPointerOrNull());
01219
01220 Peaks = MassPeak->Peak;
01221
01222 NoMassMatch = false;
01223
01224 if (!GetLadderCalc(iMod)) {
01225 if (CreateLadders(Sequence.GetData(),
01226 iSearch,
01227 position,
01228 endposition,
01229 Masses,
01230 iMissed,
01231 AA,
01232 iMod,
01233 ModList[iMissed],
01234 NumMod[iMissed]) != 0) continue;
01235 SetLadderCalc(iMod) = true;
01236
01237 }
01238 else {
01239 TLadderMap::iterator Iter;
01240 SetLadderContainer().Begin(Iter);
01241 while(Iter != SetLadderContainer().SetLadderMap().end()) {
01242 (*(Iter->second))[iMod]->ClearHits();
01243 SetLadderContainer().Next(Iter);
01244 }
01245 }
01246
01247 if (UseRankScore) {
01248 {{
01249 CFastMutexGuard guard(PeaksExaminedMutex);
01250 Peaks->SetPeptidesExamined(MassPeak->Charge)++;
01251 }}
01252 }
01253 if (CompareLaddersTop(iMod,
01254 Peaks,
01255 MassPeak)
01256 ) {
01257 if (!UseRankScore) {
01258 {{
01259 CFastMutexGuard guard(PeaksExaminedMutex);
01260 Peaks->SetPeptidesExamined(MassPeak->Charge)++;
01261 }}
01262 }
01263 CompareLadders(iMod,
01264 Peaks,
01265 false,
01266 MassPeak);
01267 hits = 0;
01268 SetLadderContainer().Begin(Iter);
01269 while(Iter != SetLadderContainer().SetLadderMap().end()) {
01270 hits += (*(Iter->second))[iMod]->HitCount();
01271 SetLadderContainer().Next(Iter);
01272 }
01273
01274
01275 {{
01276 CFastMutexGuard guard(PeakSetMutex);
01277 if (hits >= SetSettings()->GetMinhit()) {
01278
01279 NewHit.SetHits() = hits;
01280 NewHit.SetCharge() = MassPeak->Charge;
01281
01282 if (Peaks->AddHit(NewHit, NewHitOut)) {
01283 NewHitOut->SetStart() = position;
01284 NewHitOut->SetStop() = endposition;
01285 NewHitOut->SetSeqIndex() = iSearch;
01286 NewHitOut->SetExpMass() = MassPeak->Mass;
01287
01288 NewHitOut->
01289 RecordMatches(SetLadderContainer(),
01290 iMod,
01291 Peaks,
01292 SetMassAndMask(iMissed, iMod).Mask,
01293 ModList[iMissed],
01294 NumMod[iMissed],
01295 PepStart[iMissed],
01296 SetSettings()->GetSearchctermproduct(),
01297 SetSettings()->GetSearchb1(),
01298 SetMassAndMask(iMissed, iMod).Mass
01299 );
01300 }
01301 }
01302 }}
01303 }
01304 }
01305 }
01306 }
01307 if (SetEnzyme()->GetNonSpecific()) {
01308 int NonSpecificMass(Masses[0] + EndMasses[0]);
01309 PartialLoop:
01310
01311
01312
01313
01314
01315
01316
01317
01318
01319
01320
01321 if (NonSpecificMass < MaxMZ &&
01322 SetEnzyme()->GetStop() < Sequence.GetData() + Sequence.GetLength() - 1 &&
01323 (SetSettings()->GetMaxnoenzyme() == 0 ||
01324 SetEnzyme()->GetStop() - PepStart[0] + 1 < SetSettings()->GetMaxnoenzyme())
01325 ) {
01326 SetEnzyme()->SetStop()++;
01327 NonSpecificMass += PrecursorIntMassArray[AA.GetMap()[*(SetEnzyme()->GetStop())]];
01328 }
01329
01330 else if ( PepStart[0] < Sequence.GetData() + Sequence.GetLength() -
01331 SetSettings()->GetMinnoenzyme()) {
01332 PepStart[0]++;
01333 SetEnzyme()->SetStop() = PepStart[0] + SetSettings()->GetMinnoenzyme() - 1;
01334
01335
01336 NonSpecificMass = 0;
01337 const char *iSeqChar;
01338 for (iSeqChar = PepStart[0]; iSeqChar <= SetEnzyme()->GetStop(); iSeqChar++)
01339 NonSpecificMass += PrecursorIntMassArray[AA.GetMap()[*iSeqChar]];
01340
01341 SequenceDone = false;
01342 }
01343 else SequenceDone = true;
01344
01345
01346
01347
01348 if (!SequenceDone && SetEnzyme()->GetCleaveNum() > 0 &&
01349 PepStart[0] != Sequence.GetData() &&
01350 SetEnzyme()->GetStop() != Sequence.GetData() + Sequence.GetLength() - 1 ) {
01351 if (!SetEnzyme()->CheckCleaveChar(PepStart[0]-1) &&
01352 !SetEnzyme()->CheckCleaveChar(SetEnzyme()->GetStop()))
01353 goto PartialLoop;
01354 }
01355
01356 PepEnd[0] = PepStart[0];
01357 }
01358 else {
01359 if (!SequenceDone) {
01360 int NumModCount;
01361 const char *OldSite;
01362 int NumModSitesCount;
01363
01364 for (iMissed = 0; iMissed < Missed - 1; iMissed++) {
01365
01366 Masses[iMissed] = Masses[iMissed + 1];
01367
01368
01369
01370 NumModCount = 0;
01371 OldSite = 0;
01372 NumModSitesCount = 0;
01373 for (iMod = 0; iMod < NumMod[iMissed + 1]; iMod++) {
01374
01375 if (Modset->GetModType(ModList[iMissed + 1][iMod].GetEnum()) != eMSModType_modcp &&
01376 Modset->GetModType(ModList[iMissed + 1][iMod].GetEnum()) != eMSModType_modcpaa) {
01377 ModList[iMissed][NumModCount] = ModList[iMissed + 1][iMod];
01378 NumModCount++;
01379
01380 if (OldSite != ModList[iMissed + 1][iMod].GetSite() &&
01381 ModList[iMissed + 1][iMod].GetFixed() != 1) {
01382 NumModSitesCount++;
01383 OldSite = ModList[iMissed + 1][iMod].GetSite();
01384 }
01385 }
01386 }
01387 NumMod[iMissed] = NumModCount;
01388 NumModSites[iMissed] = NumModSitesCount;
01389
01390
01391 PepStart[iMissed] = PepStart[iMissed + 1];
01392 }
01393
01394
01395 PepEnd[Missed-1] += 1;
01396 PepStart[Missed-1] = PepEnd[Missed-1];
01397 }
01398 }
01399
01400 }
01401
01402
01403 }
01404
01405
01406 if (GetSettings()->IsSetTaxids() && !TaxInfo)
01407 ERR_POST(Warning <<
01408 "Taxonomically restricted search specified and no matching organisms found in sequence library. Did you use a sequence library with taxonomic information?");
01409
01410 }
01411 catch (NCBI_NS_STD::exception& e) {
01412 ERR_POST(Info << "Exception caught in CSearch::Search: " << e.what());
01413 throw;
01414 }
01415
01416
01417 }
01418
01419
01420
01421
01422
01423 void CSearch::AddModsToHit(CMSHits *Hit, CMSHit *MSHit)
01424 {
01425 int i;
01426 for (i = 0; i < MSHit->GetNumModInfo(); i++) {
01427
01428 if (MSHit->GetModInfo(i).GetIsFixed() == 1) continue;
01429 CRef< CMSModHit > ModHit(new CMSModHit);
01430 ModHit->SetSite() = MSHit->GetModInfo(i).GetSite();
01431 ModHit->SetModtype() = MSHit->GetModInfo(i).GetModEnum() ;
01432 Hit->SetMods().push_back(ModHit);
01433 }
01434 }
01435
01436
01437
01438
01439
01440
01441 void CSearch::AddIonsToHit(CMSHits *Hit, CMSHit *MSHit)
01442 {
01443 int i;
01444 for (i = 0; i < MSHit->GetHits(); i++) {
01445 CRef<CMSMZHit> IonHit(new CMSMZHit);
01446 IonHit->SetIon() = MSHit->GetHitInfo(i).GetIonSeries();
01447 IonHit->SetCharge() = MSHit->GetHitInfo(i).GetCharge();
01448 IonHit->SetNumber() = MSHit->GetHitInfo(i).GetNumber();
01449 IonHit->SetMz() = MSHit->GetHitInfo(i).GetMZ();
01450 Hit->SetMzhits().push_back(IonHit);
01451 }
01452 }
01453
01454
01455
01456
01457
01458
01459 void CSearch::MakeModString(string& seqstring, string& modseqstring, CMSHit *MSHit)
01460 {
01461 int i;
01462 modseqstring = seqstring;
01463 for (i = 0; i < MSHit->GetNumModInfo(); i++) {
01464 modseqstring += NStr::IntToString(MSHit->GetModInfo(i).GetSite()) + ":" +
01465 NStr::IntToString(MSHit->GetModInfo(i).GetModEnum()) + ",";
01466 }
01467 }
01468
01469
01470 void CSearch::CreateSequence(int Start,
01471 int Stop,
01472 string &seqstring,
01473 CSeqDBSequence &Sequence)
01474 {
01475 int iseq;
01476 seqstring.erase();
01477
01478 for (iseq = Start; iseq <= Stop; iseq++) {
01479 seqstring += UniqueAA[Sequence.GetData()[iseq]];
01480 }
01481 }
01482
01483
01484 void CSearch::SetResult(CRef<CMSPeakSet> PeakSet)
01485 {
01486
01487 double ThreshStart = GetSettings()->GetCutlo();
01488 double ThreshEnd = GetSettings()->GetCuthi();
01489 double ThreshInc = GetSettings()->GetCutinc();
01490 double Evalcutoff = GetSettings()->GetCutoff();
01491
01492 CMSPeak* Peaks;
01493
01494 TScoreList ScoreList;
01495 TScoreList::iterator iScoreList;
01496 CMSHit * MSHit;
01497
01498
01499 SetResponse()->SetDbversion(Getnumseq());
01500
01501
01502 SetOidSet().clear();
01503
01504 while(!PeakSet->GetPeaks().empty()) {
01505 Peaks = *(PeakSet->GetPeaks().begin());
01506
01507
01508 CRef< CMSHitSet > HitSet(null);
01509
01510
01511 if (GetIterative()) {
01512 HitSet = SetResponse()->FindHitSet(Peaks->GetNumber());
01513 if (HitSet.IsNull())
01514 ERR_POST(Warning << "unable to find matching hitset");
01515 }
01516
01517
01518 if (HitSet.IsNull()) {
01519 HitSet = new CMSHitSet;
01520 if (!HitSet) {
01521 ERR_POST(Error << "omssa: unable to allocate hitset");
01522 return;
01523 }
01524 HitSet->SetNumber(Peaks->GetNumber());
01525 HitSet->SetIds() = Peaks->GetName();
01526 SetResponse()->SetHitsets().push_back(HitSet);
01527 }
01528 HitSet->SetSettingid() = GetSettings()->GetSettingid();
01529
01530
01531 if (Peaks->GetError() == eMSHitError_notenuffpeaks) {
01532 _TRACE("empty set");
01533 HitSet->SetError(eMSHitError_notenuffpeaks);
01534 ScoreList.clear();
01535 delete *(PeakSet->GetPeaks().begin());
01536 PeakSet->GetPeaks().pop_front();
01537 continue;
01538 }
01539
01540 double Threshold, MinThreshold(ThreshStart), MinEval(1000000.0L);
01541 if (!UseRankScore) {
01542
01543 for (Threshold = ThreshStart; Threshold <= ThreshEnd;
01544 Threshold += ThreshInc) {
01545 CalcNSort(ScoreList, Threshold, Peaks);
01546 if (!ScoreList.empty()) {
01547 _TRACE("Threshold = " << Threshold <<
01548 "EVal = " << ScoreList.begin()->first);
01549 }
01550 if (!ScoreList.empty() && ScoreList.begin()->first < MinEval) {
01551 MinEval = ScoreList.begin()->first;
01552 MinThreshold = Threshold;
01553 }
01554 ScoreList.clear();
01555 }
01556 }
01557 _TRACE("Min Threshold = " << MinThreshold);
01558 CalcNSort(ScoreList,
01559 MinThreshold,
01560 Peaks);
01561
01562
01563 if (GetIterative() && !ScoreList.empty()) {
01564 if ((GetSettings()->GetIterativesettings().GetReplacethresh() == 0.0 &&
01565 (HitSet->GetHits().empty() ||
01566 ScoreList.begin()->first <= (*HitSet->GetHits().begin())->GetEvalue())) ||
01567 (GetSettings()->GetIterativesettings().GetReplacethresh() != 0.0 &&
01568 ScoreList.begin()->first <= GetSettings()->GetIterativesettings().GetReplacethresh())) {
01569 HitSet->SetHits().clear();
01570 }
01571 else {
01572 ScoreList.clear();
01573 delete *(PeakSet->GetPeaks().begin());
01574 PeakSet->GetPeaks().pop_front();
01575 continue;
01576 }
01577 }
01578
01579 const CMSSearchSettings::TTaxids& Tax = GetSettings()->GetTaxids();
01580 CMSSearchSettings::TTaxids::const_iterator iTax;
01581
01582
01583 map <string, CMSHits * > PepDone;
01584
01585 for (iScoreList = ScoreList.begin();
01586 iScoreList != ScoreList.end();
01587 iScoreList++) {
01588
01589 double Score = iScoreList->first;
01590 if (Score > Evalcutoff) continue;
01591 CMSHits * Hit;
01592 CMSPepHit * Pephit;
01593
01594 MSHit = iScoreList->second;
01595
01596 CBlast_def_line_set::Tdata::const_iterator iDefLine;
01597 CRef<CBlast_def_line_set> Hdr = rdfp->GetHdr(MSHit->GetSeqIndex());
01598
01599 for (iDefLine = Hdr->Get().begin();
01600 iDefLine != Hdr->Get().end();
01601 ++iDefLine) {
01602 if (GetSettings()->IsSetTaxids()) {
01603 for (iTax = Tax.begin(); iTax != Tax.end(); iTax++) {
01604 if ((*iDefLine)->GetTaxid() == *iTax) goto TaxContinue2;
01605 }
01606 continue;
01607 }
01608 TaxContinue2:
01609 string seqstring, modseqstring;
01610
01611
01612 SetOidSet().insert(MSHit->GetSeqIndex());
01613
01614 CSeqDBSequence Sequence(rdfp.GetPointer(), MSHit->GetSeqIndex());
01615
01616 string tempstartstop;
01617 CreateSequence(MSHit->GetStart(), MSHit->GetStop(),
01618 seqstring, Sequence);
01619 MakeModString(seqstring, modseqstring, MSHit);
01620
01621 if (PepDone.find(modseqstring) != PepDone.end()) {
01622 Hit = PepDone[modseqstring];
01623 }
01624 else {
01625 Hit = new CMSHits;
01626 Hit->SetTheomass(MSHit->GetTheoreticalMass());
01627 Hit->SetPepstring(seqstring);
01628
01629 if (MSHit->GetStart() > 0) {
01630 tempstartstop = UniqueAA[Sequence.GetData()[MSHit->GetStart()-1]];
01631 Hit->SetPepstart(tempstartstop);
01632 }
01633 else Hit->SetPepstart("");
01634
01635
01636 if (MSHit->GetStop() < Sequence.GetLength() - 1) {
01637 tempstartstop = UniqueAA[Sequence.GetData()[MSHit->GetStop()+1]];
01638 Hit->SetPepstop(tempstartstop);
01639 }
01640 else Hit->SetPepstop("");
01641
01642 if (isnan(Score)) {
01643 ERR_POST(Info << "Not a number in hitset " <<
01644 HitSet->GetNumber() <<
01645 " peptide " << modseqstring);
01646 Score = kHighEval;
01647 }
01648 else if (!finite(Score)) {
01649 ERR_POST(Info << "Infinite number in hitset " <<
01650 HitSet->GetNumber() <<
01651 " peptide " << modseqstring);
01652 Score = kHighEval;
01653 }
01654 Hit->SetEvalue(Score);
01655 Hit->SetPvalue(Score/Peaks->
01656 GetPeptidesExamined(MSHit->
01657 GetCharge()));
01658 Hit->SetCharge(MSHit->GetCharge());
01659 Hit->SetMass(MSHit->GetExpMass());
01660
01661 AddModsToHit(Hit, MSHit);
01662
01663 AddIonsToHit(Hit, MSHit);
01664 CRef<CMSHits> hitref(Hit);
01665 HitSet->SetHits().push_back(hitref);
01666 PepDone[modseqstring] = Hit;
01667
01668 }
01669
01670 Pephit = new CMSPepHit;
01671
01672 if ((*iDefLine)->CanGetSeqid()) {
01673
01674 ITERATE(list< CRef<CSeq_id> >, seqid, (*iDefLine)->GetSeqid()) {
01675 if ((**seqid).IsGi()) {
01676 Pephit->SetGi((**seqid).GetGi());
01677 break;
01678 }
01679 }
01680
01681 Pephit->SetAccession(
01682 FindBestChoice((*iDefLine)->GetSeqid(), CSeq_id::Score)->
01683 GetSeqIdString(false));
01684 }
01685
01686
01687 Pephit->SetStart(MSHit->GetStart());
01688 Pephit->SetStop(MSHit->GetStop());;
01689 Pephit->SetDefline((*iDefLine)->GetTitle());
01690 Pephit->SetProtlength(Sequence.GetLength());
01691 Pephit->SetOid(MSHit->GetSeqIndex());
01692 CRef<CMSPepHit> pepref(Pephit);
01693 Hit->SetPephits().push_back(pepref);
01694
01695 }
01696 }
01697 ScoreList.clear();
01698 delete *(PeakSet->GetPeaks().begin());
01699 PeakSet->GetPeaks().pop_front();
01700 }
01701
01702 WriteBioseqs();
01703 }
01704
01705
01706 void CSearch::WriteBioseqs(void)
01707 {
01708 ITERATE(CMSResponse::TOidSet, iOids, GetOidSet()) {
01709 CConstRef <CMSBioseq::TSeq> Bioseq(SetResponse()->SetBioseqs().GetBioseqByOid(*iOids));
01710 if (Bioseq.IsNull()) {
01711 CRef <CMSBioseq> MSBioseq (new CMSBioseq);
01712 MSBioseq->SetSeq(*rdfp->GetBioseq(*iOids));
01713 MSBioseq->SetOid() = *iOids;
01714 SetResponse()->SetBioseqs().Set().push_back(MSBioseq);
01715 }
01716 }
01717 }
01718
01719
01720 CMSMatchedPeakSet * CSearch::PepCharge(CMSHit& Hit,
01721 int SeriesCharge,
01722 int Ion,
01723 int minintensity,
01724 int Which,
01725 CMSPeak *Peaks,
01726 int Maxproductions)
01727 {
01728 int iii;
01729 int lowmz(0), highmz;
01730
01731 unsigned Size = Hit.GetStop() - Hit.GetStart();
01732 if (Maxproductions == 0) Maxproductions = kMSLadderMax;
01733
01734
01735
01736 EMSTerminalBias TerminalBias(eMSNoTerminalBias);
01737
01738 for(iii = 0; iii < GetEnzyme()->GetCleaveNum(); ++iii) {
01739
01740 if(GetEnzyme()->GetCleaveOffset()[iii] == 1 ) {
01741
01742 if(TerminalBias == eMSNTerminalBias || TerminalBias == eMSNoTerminalBias)
01743 TerminalBias = eMSNTerminalBias;
01744 else
01745 TerminalBias = eMSBothTerminalBias;
01746 }
01747
01748 else if (GetEnzyme()->GetCleaveOffset()[iii] == 0 ) {
01749
01750 if(TerminalBias == eMSCTerminalBias || TerminalBias == eMSNoTerminalBias)
01751 TerminalBias = eMSCTerminalBias;
01752 else
01753 TerminalBias = eMSBothTerminalBias;
01754 }
01755 }
01756
01757
01758
01759 CSeqDBSequence Sequence(rdfp.GetPointer(), Hit.GetSeqIndex());
01760 string seqstring;
01761 CreateSequence(Hit.GetStart(),
01762 Hit.GetStop(),
01763 seqstring,
01764 Sequence);
01765
01766 bool NoProline = find(GetSettings()->GetNoprolineions().begin(),
01767 GetSettings()->GetNoprolineions().end(),
01768 Ion) !=
01769 GetSettings()->GetNoprolineions().end();
01770
01771 Hit.FillMatchedPeaks(SeriesCharge,
01772 Ion,
01773 Size,
01774 minintensity,
01775 false,
01776 TerminalBias,
01777 SeriesCharge*Maxproductions
01778
01779 ,
01780 seqstring,
01781 NoProline
01782
01783 );
01784 CMSMatchedPeakSet *MatchPeakSet = Hit.SetIonSeriesMatchMap().SetSeries(SeriesCharge, Ion);
01785 TMatchedPeakSet::iterator bin, prev, next;
01786
01787 for ( bin = MatchPeakSet->SetMatchedPeakSet().begin(); bin != MatchPeakSet->SetMatchedPeakSet().end(); ++bin) {
01788
01789 if(bin != MatchPeakSet->SetMatchedPeakSet().begin()) {
01790 lowmz = ((*bin)->GetMZ() + (*prev)->GetMZ())/2;
01791 }
01792 next = bin;
01793 ++next;
01794 if(next != MatchPeakSet->SetMatchedPeakSet().end()) {
01795 highmz = ((*bin)->GetMZ() + (*next)->GetMZ())/2;
01796 }
01797 else highmz = Hit.GetExpMass()/SeriesCharge;
01798 (*bin)->SetExpIons() =
01799 Peaks->CountMZRange(lowmz,
01800 highmz,
01801 minintensity,
01802 Which) /
01803 (double)(highmz - lowmz);
01804
01805 (*bin)->SetMassTolerance() = (Peaks->GetTol())/SeriesCharge;
01806 prev = bin;
01807 }
01808 return MatchPeakSet;
01809 }
01810
01811
01812
01813
01814 void CSearch::MatchAndSort(CMSPeak * Peaks,
01815 CMSHit& Hit,
01816 EMSPeakListTypes Which,
01817 int minintensity,
01818 const TSeriesChargePairList::const_iterator &iPairList,
01819 list<CMSMatchedPeakSet *> &Forward,
01820 list<CMSMatchedPeakSet *> &Backward)
01821 {
01822 CMSMatchedPeakSet * current;
01823
01824 current = PepCharge(Hit,
01825 iPairList->first,
01826 iPairList->second,
01827 minintensity,
01828 Which,
01829 Peaks,
01830 GetSettings()->GetMaxproductions());
01831
01832 if (kIonDirection[iPairList->second] == 1)
01833 Forward.push_back(current);
01834 else if (kIonDirection[iPairList->second] == -1)
01835 Backward.push_back(current);
01836 }
01837
01838
01839 void CSearch::DoubleCompare(list<CMSMatchedPeakSet *> &SingleForward,
01840 list<CMSMatchedPeakSet *> &SingleBackward,
01841 list<CMSMatchedPeakSet *> &Double,
01842 bool DoubleForward)
01843 {
01844 list<CMSMatchedPeakSet *>::iterator iDouble, iFront, iBack;
01845
01846 for (iDouble = Double.begin(); iDouble != Double.end(); ++iDouble) {
01847
01848 for(iFront = SingleForward.begin(); iFront != SingleForward.end(); ++iFront) {
01849 (*iDouble)->Compare(*iFront, DoubleForward);
01850 }
01851
01852 for(iBack = SingleBackward.begin(); iBack != SingleBackward.end(); ++iBack) {
01853 (*iDouble)->Compare(*iBack, !DoubleForward);
01854 }
01855 }
01856 }
01857
01858
01859 void CSearch::CalcNSort(TScoreList& ScoreList,
01860 double Threshold,
01861 CMSPeak* Peaks
01862 )
01863 {
01864 int iCharges;
01865 int iHitList;
01866 int Tophitnum = GetSettings()->GetTophitnum();
01867
01868 for (iCharges = 0; iCharges < Peaks->GetNumCharges(); iCharges++) {
01869
01870 TMSHitList& HitList = Peaks->GetHitList(iCharges);
01871 for (iHitList = 0; iHitList != Peaks->GetHitListIndex(iCharges);
01872 iHitList++) {
01873
01874 int tempMass = HitList[iHitList].GetExpMass();
01875 int Charge = HitList[iHitList].GetCharge();
01876 EMSPeakListTypes Which = Peaks->GetWhich(Charge);
01877
01878
01879
01880
01881
01882 int minintensity = static_cast <int> (Threshold * Peaks->GetMaxI(Which));
01883
01884
01885 TSeriesChargePairList::const_iterator iPairList;
01886 list <CMSMatchedPeakSet *> SingleForward, SingleBackward, DoubleForward, DoubleBackward;
01887
01888 for (iPairList = SetLadderContainer().GetSeriesChargePairList().begin();
01889 iPairList != SetLadderContainer().GetSeriesChargePairList().end();
01890 ++iPairList) {
01891
01892
01893 if (iPairList->first == 1) {
01894 MatchAndSort(Peaks, HitList[iHitList], Which, minintensity,
01895 iPairList, SingleForward, SingleBackward);
01896 }
01897 else if (Charge >= Peaks->GetConsiderMult()) {
01898 MatchAndSort(Peaks, HitList[iHitList], Which, minintensity,
01899 iPairList, DoubleForward, DoubleBackward);
01900 }
01901 }
01902
01903 list <CMSMatchedPeakSet *> ::iterator iFront, iBack, iDouble;
01904
01905 if(GetSettings()->GetNocorrelationscore() == 0) {
01906
01907 for (iFront = SingleForward.begin(); iFront != SingleForward.end(); ++iFront) {
01908 for(iBack = SingleBackward.begin(); iBack != SingleBackward.end(); ++iBack) {
01909 (*iFront)->Compare(*iBack, false);
01910 }
01911 }
01912 if (Charge >= Peaks->GetConsiderMult()) {
01913 DoubleCompare(SingleForward, SingleBackward, DoubleForward, true);
01914 DoubleCompare(SingleForward, SingleBackward, DoubleBackward, false);
01915 }
01916 }
01917
01918
01919 double adjust = HitList[iHitList].GetMaxDelta() /
01920 MSSCALE2INT(GetSettings()->GetMsmstol());
01921 if(adjust < GetSettings()->GetAutomassadjust())
01922 adjust = GetSettings()->GetAutomassadjust();
01923 if(adjust > 1.0)
01924 adjust = 1.0;
01925 double a =
01926 HitList[iHitList].CalcPoissonMean(GetSettings()->GetProbfollowingion(),
01927 GetEnzyme()->GetCleaveNum(),
01928 GetSettings()->GetProbfollowingion(),
01929 19,
01930 adjust);
01931
01932 if (a == 0) {
01933
01934 continue;
01935 }
01936 if (a < 0 ) {
01937 _TRACE("poisson mean is < 0");
01938 continue;
01939 }
01940 else if (isnan(a) || !finite(a)) {
01941 ERR_POST(Info << "poisson mean is NaN or is infinite");
01942 continue;
01943 }
01944
01945
01946 if (HitList[iHitList].GetHits() < a) continue;
01947
01948 double pval;
01949 int N;
01950 N = Peaks->GetPeptidesExamined(Charge) +
01951 (GetSettings()->GetZdep() * (Charge - 1) + 1) *
01952 GetSettings()->GetPseudocount();
01953
01954 if (!UseRankScore) {
01955 int High, Low, NumPeaks, NumLo, NumHi;
01956 Peaks->HighLow(High, Low, NumPeaks, tempMass, Charge, Threshold, NumLo, NumHi);
01957
01958 double TopHitProb = ((double)Tophitnum)/NumPeaks;
01959
01960 if (TopHitProb > 1.0) TopHitProb = 1.0;
01961 int numhits = HitList[iHitList].CountHits(Threshold, Peaks->GetMaxI(Which));
01962 double Normal = HitList[iHitList].CalcNormalTopHit(a, TopHitProb);
01963 pval = HitList[iHitList].CalcPvalueTopHit(a, numhits, Normal, TopHitProb);
01964 }
01965 else {
01966 pval = HitList[iHitList].CalcPvalue(a, HitList[iHitList].CountHits(Threshold, Peaks->GetMaxI(Which)));
01967 }
01968 if (UseRankScore && !GetPoissonOnly()) {
01969 if (HitList[iHitList].GetM() != 0.0) {
01970 double Perf = HitList[iHitList].CalcRankProb();
01971 _TRACE( "Perf=" << Perf << " pval=" << pval << " N=" << N );
01972 pval *= Perf;
01973 pval *= 10.0;
01974 }
01975 else ERR_POST(Info << "M is zero");
01976 }
01977 double eval = 3e3 * pval * N;
01978
01979 ScoreList.insert(pair<const double, CMSHit *>
01980 (eval, &(HitList[iHitList])));
01981 }
01982 }
01983 }
01984
01985 CSearch::~CSearch()
01986 {
01987 }
01988
01989
01990
01991