src/algo/ms/omssa/omssa.cpp

Go to the documentation of this file.
00001 /* 
00002 * ===========================================================================
00003 *
00004 *                            PUBLIC DOMAIN NOTICE
00005 *               National Center for Biotechnology Information
00006 *
00007 *  This software/database is a "United States Government Work" under the
00008 *  terms of the United States Copyright Act.  It was written as part of
00009 *  the author's official duties as a United States Government employee and
00010 *  thus cannot be copyrighted.  This software/database is freely available
00011 *  to the public for use. The National Library of Medicine and the U.S.
00012 *  Government have not placed any restriction on its use or reproduction.
00013 *
00014 *  Although all reasonable efforts have been taken to ensure the accuracy
00015 *  and reliability of the software and data, the NLM and the U.S.
00016 *  Government do not and cannot warrant the performance or results that
00017 *  may be obtained by using this software or data. The NLM and the U.S.
00018 *  Government disclaim all warranties, express or implied, including
00019 *  warranties of performance, merchantability or fitness for any particular
00020 *  purpose.
00021 *
00022 *  Please cite the authors in any work or product based on this material.
00023 *
00024 * ===========================================================================
00025 *
00026 * Authors:  Lewis Y. Geer, Douglas J. Slotta
00027 *  
00028 * File Description:
00029 *    code to do the ms/ms search and score matches
00030 *
00031 * ===========================================================================
00032 */
00033 
00034 #include <ncbi_pch.hpp>
00035 
00036 #include <util/miscmath.h>
00037 #include <algo/blast/core/ncbi_math.h>
00038 #include <util/compress/bzip2.hpp> 
00039 
00040 
00041 
00042 #include "SpectrumSet.hpp"
00043 #include "omssa.hpp"
00044 #include "pepxml.hpp"
00045 
00046 #include <fstream>
00047 #include <string>
00048 #include <list>
00049 #include <deque>
00050 #include <algorithm>
00051 
00052 #include <math.h>
00053 
00054 USING_NCBI_SCOPE;
00055 USING_SCOPE(objects);
00056 USING_SCOPE(omssa);
00057 
00058 
00059 
00060 int 
00061 CSearchHelper::ReadModFiles(const string& ModFileName,
00062                           const string& UserModFileName,
00063                           const string& Path,
00064                           CRef <CMSModSpecSet> Modset)
00065 {  
00066     CDirEntry DirEntry(Path);
00067     string FileName;
00068     try {
00069         if(ModFileName == "")
00070             ERR_POST(Critical << "modification filename is blank!");
00071         if(!CDirEntry::IsAbsolutePath(ModFileName))
00072             FileName = DirEntry.GetDir() + ModFileName;
00073         else FileName = ModFileName;
00074         auto_ptr<CObjectIStream> 
00075             modsin(CObjectIStream::Open(FileName.c_str(), eSerial_Xml));
00076         if(modsin->fail()) {        
00077             ERR_POST(Fatal << "ommsacl: unable to open modification file" << 
00078                      FileName);
00079             return 1;
00080         }
00081         modsin->Read(ObjectInfo(*Modset));
00082         modsin->Close();
00083     
00084     } catch (NCBI_NS_STD::exception& e) {
00085         ERR_POST(Fatal << "Unable to read modification file " <<
00086                  FileName << " with error " << e.what());
00087     }
00088 
00089     // read in user mod file, if any
00090     if(UserModFileName != "") {
00091         try {
00092             CRef <CMSModSpecSet> UserModset(new CMSModSpecSet);
00093             if(!CDirEntry::IsAbsolutePath(UserModFileName))
00094                 FileName = DirEntry.GetDir() + UserModFileName;
00095             else FileName = UserModFileName;
00096             auto_ptr<CObjectIStream> 
00097              usermodsin(CObjectIStream::Open(FileName.c_str(), eSerial_Xml));
00098             if(usermodsin->fail()) {        
00099                  ERR_POST(Warning << "ommsacl: unable to open user modification file" << 
00100                           ModFileName);
00101                  return 0;
00102              }
00103             usermodsin->Read(ObjectInfo(*UserModset));
00104             usermodsin->Close();
00105             Modset->Append(*UserModset);
00106         } catch (NCBI_NS_STD::exception& e) {
00107              ERR_POST(Fatal << "Unable to read user modification file " <<
00108                       FileName << " with error " << e.what());
00109         }
00110     }
00111     return 0;
00112 }
00113 
00114 
00115 void 
00116 CSearchHelper::ReadTaxFile(string& Filename, TTaxNameMap& TaxNameMap)
00117 {
00118     ifstream taxnames(Filename.c_str());
00119     string line;
00120     list<string> linelist;
00121     list<string>::iterator ilist;
00122     while(taxnames && !taxnames.eof()) {
00123         getline(taxnames, line);
00124         linelist.clear();
00125         NStr::Split(line, ",", linelist);
00126         if(!linelist.empty()) {
00127             ilist = linelist.begin();
00128             ilist++;
00129             TaxNameMap[NStr::StringToInt(*ilist)] = *(linelist.begin());
00130         }
00131     }
00132 }   
00133 
00134 void 
00135 CSearchHelper::ConditionXMLStream(CObjectOStreamXml *xml_out)
00136 {
00137     if(!xml_out) return;
00138     // turn on xml schema
00139     xml_out->SetReferenceSchema();
00140     // turn off names in named integers
00141     xml_out->SetWriteNamedIntegersByValue(true);
00142 }
00143 
00144 
00145 
00146 int 
00147 CSearchHelper::ReadFile(const string& Filename,
00148                      const EMSSpectrumFileType FileType,
00149                      CMSSearch& MySearch)
00150 {
00151     CRef <CMSRequest> Request (new CMSRequest);
00152     MySearch.SetRequest().push_back(Request);
00153 //    CRef <CMSResponse> Response (new CMSResponse);
00154 //    MySearch.SetResponse().push_back(Response);
00155 
00156     CNcbiIfstream PeakFile(Filename.c_str());
00157     if(!PeakFile) {
00158         ERR_POST(Fatal <<" omssacl: not able to open spectrum file " <<
00159                  Filename);
00160         return 1;
00161     }
00162 
00163     CRef <CSpectrumSet> SpectrumSet(new CSpectrumSet);
00164     (*MySearch.SetRequest().begin())->SetSpectra(*SpectrumSet);
00165     return SpectrumSet->LoadFile(FileType, PeakFile);
00166 }   
00167 
00168 int 
00169 CSearchHelper::ReadSearchRequest(const string& Filename,
00170                                  const ESerialDataFormat DataFormat,
00171                                  CMSSearch& MySearch)
00172 {   
00173     CRef <CMSRequest> Request (new CMSRequest);
00174     MySearch.SetRequest().push_back(Request);
00175 //    CRef <CMSResponse> Response (new CMSResponse);
00176 //    MySearch.SetResponse().push_back(Response);
00177 
00178     auto_ptr<CObjectIStream> 
00179         in(CObjectIStream::Open(Filename.c_str(), DataFormat));
00180     in->Open(Filename.c_str(), DataFormat);
00181     if(in->fail()) {        
00182         ERR_POST(Warning << "omssacl: unable to search file" << 
00183                  Filename);
00184         return 1;
00185     }
00186     in->Read(ObjectInfo(*Request));
00187     in->Close();
00188     return 0;
00189 }
00190 
00191 
00192 int 
00193 CSearchHelper::ReadCompleteSearch(const string& Filename,
00194                                const ESerialDataFormat DataFormat,
00195                                bool bz2,
00196                                CMSSearch& MySearch)
00197 {
00198     auto_ptr <CNcbiIfstream> raw_in;
00199     auto_ptr <CCompressionIStream> compress_in;
00200     auto_ptr <CObjectIStream> in;
00201 
00202     if( bz2 ) {
00203         raw_in.reset(new CNcbiIfstream(Filename.c_str()));
00204         compress_in.reset( new CCompressionIStream (*raw_in, 
00205                                                     new CBZip2StreamDecompressor(), 
00206                                                     CCompressionStream::fOwnProcessor)); 
00207         in.reset(CObjectIStream::Open(DataFormat, *compress_in)); 
00208     }
00209     else {
00210         in.reset(CObjectIStream::Open(Filename.c_str(), DataFormat));
00211     }
00212     if(in->fail()) {        
00213         ERR_POST(Warning << "omssacl: unable to search file" << 
00214                  Filename);
00215         return 1;
00216     }
00217     in->Read(ObjectInfo(MySearch));
00218     in->Close();
00219     return 0;
00220 }
00221 
00222 
00223 int 
00224 CSearchHelper::LoadAnyFile(CMSSearch& MySearch, 
00225                            CConstRef <CMSInFile> InFile,
00226                            bool* SearchEngineIterative)
00227 {
00228     string Filename(InFile->GetInfile());
00229     EMSSpectrumFileType DataFormat =
00230         static_cast <EMSSpectrumFileType> (InFile->GetInfiletype());
00231                                
00232     switch (DataFormat) {
00233     case eMSSpectrumFileType_dta:
00234     case eMSSpectrumFileType_dtablank:
00235     case eMSSpectrumFileType_dtaxml:
00236     case eMSSpectrumFileType_pkl:
00237     case eMSSpectrumFileType_mgf:
00238     return CSearchHelper::ReadFile(Filename, DataFormat, MySearch);
00239     break;
00240     case eMSSpectrumFileType_oms:
00241     if(SearchEngineIterative) *SearchEngineIterative = true;
00242     return CSearchHelper::ReadCompleteSearch(Filename, eSerial_AsnBinary, false, MySearch);
00243     break;
00244     case eMSSpectrumFileType_omx:
00245     if(SearchEngineIterative) *SearchEngineIterative = true;
00246     return CSearchHelper::ReadCompleteSearch(Filename, eSerial_Xml, false, MySearch);
00247     break;
00248     case eMSSpectrumFileType_xml:
00249     return CSearchHelper::ReadSearchRequest(Filename, eSerial_Xml, MySearch);
00250     break;
00251     case eMSSpectrumFileType_omxbz2 :
00252     return CSearchHelper::ReadCompleteSearch(Filename, eSerial_Xml, true, MySearch);
00253     break;
00254     case eMSSpectrumFileType_asc:
00255     case eMSSpectrumFileType_pks:
00256     case eMSSpectrumFileType_sciex:
00257     case eMSSpectrumFileType_unknown:
00258     default:
00259         break;
00260     }
00261     return 1;  // not supported
00262 }
00263 
00264 
00265 void CSearchHelper::SaveOneFile(CMSSearch &MySearch,
00266                                 const string Filename, 
00267                                 ESerialDataFormat FileFormat,
00268                                 bool IncludeRequest,
00269                                 bool bz2) 
00270 {
00271     auto_ptr <CNcbiOfstream> raw_out;
00272     auto_ptr <CCompressionOStream> compress_out;
00273     auto_ptr <CObjectOStream> txt_out;
00274 
00275     if( bz2 ) {
00276         raw_out.reset(new CNcbiOfstream(Filename.c_str()));
00277         compress_out.reset( new CCompressionOStream (*raw_out, 
00278                                                     new CBZip2StreamCompressor(), 
00279                                                     CCompressionStream::fOwnProcessor)); 
00280         txt_out.reset(CObjectOStream::Open(FileFormat, *compress_out)); 
00281     }
00282     else {
00283         txt_out.reset(CObjectOStream::Open(Filename.c_str(), FileFormat));
00284     }
00285 
00286     if(FileFormat == eSerial_Xml) {
00287         CObjectOStreamXml *xml_out = dynamic_cast <CObjectOStreamXml *> (txt_out.get());
00288         CSearchHelper::ConditionXMLStream(xml_out);
00289     }
00290     if(IncludeRequest)
00291         txt_out->Write(ObjectInfo(MySearch));
00292     else
00293         txt_out->Write(ObjectInfo(**MySearch.SetResponse().begin()));
00294 }   
00295 
00296 
00297 int 
00298 CSearchHelper::SaveAnyFile(CMSSearch& MySearch, 
00299                            CMSSearchSettings::TOutfiles OutFiles,
00300                            CRef <CMSModSpecSet> Modset)
00301 {
00302     CMSSearchSettings::TOutfiles::const_iterator iOutFile;
00303 
00304     for(iOutFile = OutFiles.begin(); iOutFile != OutFiles.end(); ++iOutFile) {
00305         string Filename((*iOutFile)->GetOutfile());
00306         EMSSerialDataFormat DataFormat =
00307             static_cast <EMSSerialDataFormat> ((*iOutFile)->GetOutfiletype());
00308         ESerialDataFormat FileFormat(eSerial_AsnText);
00309 
00310         auto_ptr <CObjectOStream> txt_out;
00311         if(DataFormat == eMSSerialDataFormat_asntext)
00312             FileFormat = eSerial_AsnText;
00313         if(DataFormat == eMSSerialDataFormat_asnbinary)
00314               FileFormat = eSerial_AsnBinary;
00315         if(DataFormat == eMSSerialDataFormat_xml)
00316                FileFormat = eSerial_Xml;
00317         if(DataFormat == eMSSerialDataFormat_xmlbz2)
00318                 FileFormat = eSerial_Xml;
00319 
00320         switch (DataFormat) {
00321         case eMSSerialDataFormat_asntext:
00322         case eMSSerialDataFormat_asnbinary:
00323         case eMSSerialDataFormat_xml:
00324         CSearchHelper::SaveOneFile(MySearch,
00325                                    Filename, 
00326                                    FileFormat, 
00327                                    (*iOutFile)->GetIncluderequest(),
00328                                    false);
00329         break;
00330         case eMSSerialDataFormat_xmlbz2:
00331         CSearchHelper::SaveOneFile(MySearch,
00332                                    Filename, 
00333                                    FileFormat, 
00334                                    (*iOutFile)->GetIncluderequest(),
00335                                    true);
00336         break;
00337         case eMSSerialDataFormat_pepxml:
00338         {
00339             CPepXML outPepXML;
00340             outPepXML.ConvertFromOMSSA(MySearch, Modset, Filename, Filename);
00341             auto_ptr<CObjectOStream> file_out(CObjectOStream::Open(Filename, eSerial_Xml));
00342             *file_out << outPepXML;
00343         }
00344         break;
00345         case eMSSerialDataFormat_csv:    
00346         {
00347             CNcbiOfstream oscsv;
00348             oscsv.open(Filename.c_str());
00349             (*MySearch.SetResponse().begin())->PrintCSV(oscsv, Modset);
00350             oscsv.close();
00351         }
00352         break;
00353         case eMSSerialDataFormat_none:
00354         default:
00355         {
00356             ERR_POST(Error << "Unknown output file format " << DataFormat);
00357         }
00358         return 1;
00359         break;
00360         }
00361     }
00362     return 0;
00363 }
00364 
00365 void 
00366 CSearchHelper::ValidateSearchSettings(CRef<CMSSearchSettings> &Settings)
00367 {
00368     list <string> ValidError;
00369     if(Settings->Validate(ValidError) != 0) {
00370         list <string>::iterator iErr;
00371         for(iErr = ValidError.begin(); iErr != ValidError.end(); iErr++)
00372         ERR_POST(Warning << *iErr);
00373         ERR_POST(Fatal << "Unable to validate settings");
00374     }
00375 }
00376 
00377 
00378 void 
00379 CSearchHelper::CreateSearchSettings(string FileName,
00380                                     CRef<CMSSearchSettings> &Settings)
00381 {
00382     if(FileName != "" ) {
00383         try {
00384             auto_ptr<CObjectIStream> 
00385                 paramsin(CObjectIStream::Open(FileName.c_str(), eSerial_Xml));
00386             if(paramsin->fail()) {      
00387                 ERR_POST(Fatal << "ommsacl: unable to open parameter file" << 
00388                          FileName);
00389                 return;
00390             }
00391             paramsin->Read(ObjectInfo(*Settings));
00392             paramsin->Close();
00393 
00394         } catch (NCBI_NS_STD::exception& e) {
00395             ERR_POST(Fatal << "Unable to read parameter file " <<
00396                      FileName << " with error " << e.what());
00397         }
00398     }
00399 }
00400 
00401 
00402 
00403 
00404 /////////////////////////////////////////////////////////////////////////////
00405 //
00406 //  CSearch::
00407 //
00408 //  Performs the ms/ms search
00409 //
00410 
00411 
00412 CSearch::CSearch(int tNum): 
00413 UseRankScore(false),
00414 Iterative(false),
00415 RestrictedSearch(false)
00416 {
00417     ThreadNum = tNum;
00418 }
00419 
00420 
00421 void CSearch::ResetGlobals(void)
00422 {
00423     iSearchGlobal = -1;
00424     MaxMZ = 0;
00425     SharedPeakSet.Reset(0);
00426 }
00427 
00428 
00429 int CSearch::InitBlast(const char *blastdb, bool use_mmap)
00430 {
00431     if (!blastdb) return 0;
00432     rdfp.Reset(new CSeqDB(blastdb, CSeqDB::eProtein, 
00433     0, 0, use_mmap));
00434     numseq = rdfp->GetNumOIDs();
00435     return 0;   
00436 }
00437 
00438 
00439 // create the ladders from sequence
00440 
00441 int CSearch::CreateLadders(const char *Sequence,
00442                            int iSearch,
00443                            int position,
00444                            int endposition,
00445                            int *Masses, 
00446                            int iMissed,
00447                            CAA& AA, 
00448                            int iMod,
00449                            CMod ModList[],
00450                            int NumMod)
00451 {
00452     TLadderMap::iterator Iter;
00453     SetLadderContainer().Begin(Iter);
00454     while(Iter != SetLadderContainer().SetLadderMap().end()) {
00455         bool NoProline = find(GetSettings()->GetNoprolineions().begin(),
00456                               GetSettings()->GetNoprolineions().end(),
00457                               CMSMatchedPeakSetMap::Key2Series(Iter->first)) != 
00458             GetSettings()->GetNoprolineions().end();
00459         if (!(*(Iter->second))[iMod]->
00460             CreateLadder(CMSMatchedPeakSetMap::Key2Series(Iter->first),
00461                          CMSMatchedPeakSetMap::Key2Charge(Iter->first),
00462                          Sequence,
00463                          iSearch,
00464                          position,
00465                          endposition,
00466                          Masses[iMissed], 
00467                          MassArray,
00468                          AA,
00469                          SetMassAndMask(iMissed, iMod).Mask,
00470                          ModList,
00471                          NumMod,
00472                          *SetSettings(),
00473                          NoProline
00474                          )) return 1;
00475         SetLadderContainer().Next(Iter);
00476     }
00477 
00478     return 0;
00479 }
00480 
00481 
00482 // compare ladders to experiment
00483 int CSearch::CompareLadders(int iMod,
00484                             CMSPeak *Peaks,
00485                             bool OrLadders,
00486                             const TMassPeak *MassPeak)
00487 {
00488     EMSPeakListTypes Which = Peaks->GetWhich(MassPeak->Charge);
00489 
00490     int ChargeLimitLo(0), ChargeLimitHi(0);
00491     if (MassPeak) {
00492         if(MassPeak->Charge < Peaks->GetConsiderMult()) { 
00493             ChargeLimitLo = 1;
00494             ChargeLimitHi = 1;
00495         }
00496         else {
00497             ChargeLimitLo = 0;
00498             ChargeLimitHi = 0;
00499         }
00500     }
00501 
00502     TLadderMap::iterator Iter;
00503     SetLadderContainer().Begin(Iter, ChargeLimitLo, ChargeLimitHi);
00504     vector<bool> usedPeaks(Peaks->SetPeakLists()[Which]->GetNum(), false);
00505     while(Iter != SetLadderContainer().SetLadderMap().end()) {
00506         Peaks->CompareSortedRank(*((*(Iter->second))[iMod]), Which, usedPeaks);
00507         SetLadderContainer().Next(Iter, ChargeLimitLo, ChargeLimitHi);
00508     }
00509     return 0;
00510 }
00511 
00512 
00513 // compare ladders to experiment
00514 bool CSearch::CompareLaddersTop(int iMod,
00515                                 CMSPeak *Peaks,
00516                                 const TMassPeak *MassPeak)
00517 {
00518     int ChargeLimitLo(0), ChargeLimitHi(0);
00519     if (MassPeak) {
00520         if(MassPeak->Charge < Peaks->GetConsiderMult()) { 
00521             ChargeLimitLo = 1;
00522             ChargeLimitHi = 1;
00523         }
00524         else {
00525             ChargeLimitLo = 0;
00526             ChargeLimitHi = 0;
00527         }
00528     }
00529 
00530     TLadderMap::iterator Iter;
00531     SetLadderContainer().Begin(Iter, ChargeLimitLo, ChargeLimitHi);
00532     while(Iter != SetLadderContainer().SetLadderMap().end()) {
00533         if(Peaks->CompareTop(*((*(Iter->second))[iMod]))) return true;
00534         SetLadderContainer().Next(Iter, ChargeLimitLo, ChargeLimitHi);
00535     }
00536     return false;
00537 }
00538 
00539 
00540 const bool
00541 CSearch::ReSearch(const int Number) const
00542 {
00543     if ( GetSettings()->GetIterativesettings().GetResearchthresh() != 0.0) {
00544         // look for hitset
00545         CRef <CMSHitSet> HitSet;
00546         HitSet = GetResponse()->FindHitSet(Number);
00547         if (HitSet.IsNull()) return true;
00548         if (HitSet->GetHits().empty()) return true;
00549         if ((*HitSet->GetHits().begin())->GetEvalue() <= 
00550             GetSettings()->GetIterativesettings().GetResearchthresh())
00551             return false;
00552         else return true;
00553     }
00554     return true;
00555 }
00556 
00557 // loads spectra into peaks
00558 //void CSearch::Spectrum2Peak(CMSPeakSet& PeakSet)
00559 void CSearch::Spectrum2Peak(CRef<CMSPeakSet> PeakSet)
00560 {
00561     CSpectrumSet::Tdata::const_iterator iSpectrum;
00562     CMSPeak* Peaks;
00563 
00564     iSpectrum = GetRequest()->GetSpectra().Get().begin();
00565     for (; iSpectrum != GetRequest()->GetSpectra().Get().end(); iSpectrum++) {
00566         CRef <CMSSpectrum> Spectrum =  *iSpectrum;
00567         if (!Spectrum) {
00568             ERR_POST(Error << "omssa: unable to find spectrum");
00569             return;
00570         }
00571 
00572         // if iterative search and spectrum should not be re-search, skip
00573         if (GetIterative() && !ReSearch(Spectrum->GetNumber()))
00574             continue;
00575 
00576         Peaks = new CMSPeak(GetSettings()->GetHitlistlen());
00577         if (!Peaks) {
00578             ERR_POST(Error << "omssa: unable to allocate CMSPeak");
00579             return;
00580         }
00581 
00582         Peaks->ReadAndProcess(*Spectrum, *GetSettings());
00583 #if 0
00584         {
00585             ofstream os("test.dta");
00586             Peaks->Write(os, eMSSpectrumFileType_dta, eMSPeakListCharge1);
00587         }
00588 #endif
00589         PeakSet->AddPeak(Peaks);
00590 
00591     }
00592     MaxMZ = PeakSet->SortPeaks(MSSCALE2INT(GetSettings()->GetPeptol()),
00593                               GetSettings()->GetZdep());
00594 
00595 }
00596 
00597 // compares TMassMasks.  Lower m/z first in sort.
00598 struct CMassMaskCompare {
00599     bool operator() (const TMassMask& x, const TMassMask& y)
00600     {
00601         if (x.Mass < y.Mass) return true;
00602         return false;
00603     }
00604 };
00605 
00606 /**
00607  *  delete variable mods that overlap with fixed mods
00608  * @param NumMod the number of modifications
00609  * @param ModList modification information
00610  */
00611 void CSearch::DeleteVariableOverlap(int& NumMod,
00612                                     CMod ModList[])
00613 {
00614     int i, j;
00615     for (i = 0; i < NumMod; i++) {
00616         // if variable mod
00617         if (ModList[i].GetFixed() != 1) {
00618             // iterate thru all mods for comparison
00619             for (j = 0; j < NumMod; j++) {
00620                 // if fixed and at same site
00621                 if (ModList[j].GetFixed() == 1 && 
00622                     ModList[i].GetSite() == ModList[j].GetSite()) {
00623                     // mark mod for deletion
00624                     ModList[i].SetFixed() = -1;
00625                 }
00626             } // j loop
00627         } // IsFixed
00628     } // i loop
00629 
00630     // now do the deletion
00631     for (i = 0; i < NumMod;) {
00632         if (ModList[i].GetFixed() == -1) {
00633             NumMod--;
00634             // if last mod, then just return
00635             if (i == NumMod) return;
00636             // otherwise, delete the modification
00637             for (j=i; j < NumMod; ++j) {
00638                 ModList[j] = ModList[j+1];
00639             }
00640         }
00641         else i++;
00642     }
00643     return;
00644 }
00645 
00646 // update sites and masses for new peptide
00647 void CSearch::UpdateWithNewPep(int Missed,
00648                                const char *PepStart[],
00649                                const char *PepEnd[], 
00650                                int NumMod[], 
00651                                CMod ModList[][MAXMOD],
00652                                int Masses[],
00653                                int EndMasses[],
00654                                int NumModSites[],
00655                                CRef <CMSModSpecSet> &Modset)
00656 {
00657     // iterate over missed cleavages
00658     int iMissed;
00659     // maximum mods allowed
00660     //int ModMax; 
00661     // iterate over mods
00662     int iMod;
00663 
00664 
00665     // update the longer peptides to add the new peptide (Missed-1) on the end
00666     for (iMissed = 0; iMissed < Missed - 1; iMissed++) {
00667         // skip start
00668         if (PepStart[iMissed] == (const char *)-1) continue;
00669         // reset the end sequences
00670         PepEnd[iMissed] = PepEnd[Missed - 1];
00671 
00672         // update new mod masses to add in any new mods from new peptide
00673 
00674         // first determine the maximum value for updated mod list
00675         //if(NumMod[iMissed] + NumMod[Missed-1] >= MAXMOD)
00676         //    ModMax = MAXMOD - NumMod[iMissed];
00677         //else ModMax = NumMod[Missed-1];
00678 
00679         // now interate thru the new entries
00680         const char *OldSite(0);
00681         int NumModSitesCount(0), NumModCount(0);
00682         for (iMod = 0; iMod < NumMod[Missed-1]; iMod++) {
00683 
00684             // don't do more than the maximum number of modifications
00685             if (NumModCount + NumMod[iMissed] >= MAXMOD) break;
00686 
00687             // if n-term peptide mod and not at the start of the peptide, don't copy
00688             if ((Modset->GetModType(ModList[Missed-1][iMod].GetEnum()) == eMSModType_modnp || 
00689                  Modset->GetModType(ModList[Missed-1][iMod].GetEnum()) == eMSModType_modnpaa) &&
00690                 PepStart[iMissed] != ModList[Missed-1][iMod].GetSite()) {
00691                 continue;
00692             }
00693 
00694             // if n-term protein mod, don't copy
00695              if (Modset->GetModType(ModList[Missed-1][iMod].GetEnum()) == eMSModType_modn || 
00696                   Modset->GetModType(ModList[Missed-1][iMod].GetEnum()) == eMSModType_modnaa) {
00697                  continue;
00698              }
00699 
00700             // copy the mod to the old peptide
00701             ModList[iMissed][NumModCount + NumMod[iMissed]] = 
00702             ModList[Missed-1][iMod];
00703 
00704             // increment site count if not fixed mod and not the same site
00705             if (OldSite != ModList[iMissed][NumModCount + NumMod[iMissed]].GetSite() &&
00706                 ModList[iMissed][NumModCount + NumMod[iMissed]].GetFixed() != 1) {
00707                 NumModSitesCount++;
00708                 OldSite = ModList[iMissed][NumModCount + NumMod[iMissed]].GetSite();
00709             }
00710 
00711             // increment number of mods
00712             NumModCount++;
00713 
00714 
00715         }
00716 
00717         // update old masses
00718         Masses[iMissed] += Masses[Missed - 1];
00719 
00720         // update end masses
00721         EndMasses[iMissed] = EndMasses[Missed - 1];
00722 
00723         // update number of Mods
00724         NumMod[iMissed] += NumModCount;
00725 
00726         // update number of Modification Sites
00727         NumModSites[iMissed] += NumModSitesCount;
00728     }       
00729 }
00730 
00731 
00732 /**
00733  *  count the number of unique sites modified
00734  * 
00735  * @param NumModSites the number of unique mod sites
00736  * @param NumMod the number of mods
00737  * @param ModList modification information
00738  */
00739 void CSearch::CountModSites(int &NumModSites,
00740                             int NumMod,
00741                             CMod ModList[])
00742 {
00743     NumModSites = 0;
00744     int i;
00745     const char *OldSite(0);
00746 
00747     for (i = 0; i < NumMod; i++) {
00748         // skip repeated sites and fixed mods
00749         if (ModList[i].GetSite() != OldSite && ModList[i].GetFixed() != 1 ) {
00750             NumModSites++;
00751             OldSite = ModList[i].GetSite();
00752         }
00753     }
00754 }
00755 
00756 
00757 // create the various combinations of mods
00758 void CSearch::CreateModCombinations(int Missed,
00759                                     const char *PepStart[],
00760                                     int Masses[],
00761                                     int EndMasses[],
00762                                     int NumMod[],
00763                                     int NumMassAndMask[],
00764                                     int NumModSites[],
00765                                     CMod ModList[][MAXMOD]
00766                                    )
00767 {
00768     // need to iterate thru combinations that have iMod.
00769     // i.e. iMod = 3 and NumMod=5
00770     // 00111, 01011, 10011, 10101, 11001, 11010, 11100, 01101,
00771     // 01110
00772     // i[0] = 0 --> 5-3, i[1] = i[0]+1 -> 5-2, i[3] = i[1]+1 -> 5-1
00773     // then construct bool mask
00774 
00775     // holders for calculated modification mask and modified peptide masses
00776     unsigned Mask, MassOfMask;
00777     // iterate thru active mods
00778     int iiMod;
00779     // keep track of the number of unique masks created.  each corresponds to a ladder
00780     int iModCount;
00781     // missed cleavage
00782     int iMissed;
00783     // number of mods to consider
00784     int iMod;
00785     // positions of mods
00786     int ModIndex[MAXMOD];
00787 
00788     // go thru missed cleaves
00789     for (iMissed = 0; iMissed < Missed; iMissed++) {
00790         // skip start
00791         if (PepStart[iMissed] == (const char *)-1) continue;
00792         iModCount = 0;
00793 
00794         // set up non-modified mass
00795         SetMassAndMask(iMissed, iModCount).Mass = 
00796         Masses[iMissed] + EndMasses[iMissed];
00797         SetMassAndMask(iMissed, iModCount).Mask = 0;
00798 
00799         int NumVariable(NumMod[iMissed]);  // number of variable mods
00800         int NumFixed;
00801         // add in fixed mods
00802         for (iMod = 0; iMod < NumMod[iMissed]; iMod++) {
00803             if (ModList[iMissed][iMod].GetFixed()) {
00804                 SetMassAndMask(iMissed, iModCount).Mass += ModList[iMissed][iMod].GetPrecursorDelta();
00805                 SetMassAndMask(iMissed, iModCount).Mask |= 1 << iMod;
00806                 NumVariable--;
00807             }
00808         }
00809         iModCount++;
00810         NumFixed = NumMod[iMissed] - NumVariable;
00811 
00812         // go thru number of mods allowed
00813 //  for(iMod = 0; iMod < NumVariable && iModCount < MaxModPerPep; iMod++) {
00814         for (iMod = 0; iMod < NumModSites[iMissed] && iModCount < MaxModPerPep; iMod++) {
00815 
00816             // initialize ModIndex that points to mod sites
00817 
00818             // todo: ModIndex must always include fixed mods
00819 
00820             InitModIndex(ModIndex, iMod, NumMod[iMissed],
00821                          NumModSites[iMissed], ModList[iMissed]);
00822             do {
00823 
00824                 // calculate mass
00825                 MassOfMask = SetMassAndMask(iMissed, 0).Mass;
00826                 for (iiMod = 0; iiMod <= iMod; iiMod++ )
00827                     MassOfMask += ModList[iMissed][ModIndex[iiMod + NumFixed]].GetPrecursorDelta();
00828                 // make bool mask
00829                 Mask = MakeBoolMask(ModIndex, iMod + NumFixed);
00830                 // put mass and mask into storage
00831                 SetMassAndMask(iMissed, iModCount).Mass = MassOfMask;
00832                 SetMassAndMask(iMissed, iModCount).Mask = Mask;
00833 #if 0
00834                 printf("NumMod = %d iMod = %d, Mask = \n", NumMod[iMissed], iMod);
00835                 int iii;
00836                 for (iii=NumMod[iMissed]-1; iii >= 0; iii--) {
00837                     if (Mask & 1 << iii) printf("1");
00838                     else printf("0");
00839                 }
00840                 printf("\n");
00841 #endif
00842                 // keep track of the  number of ladders
00843                 iModCount++;
00844 
00845             } while (iModCount < MaxModPerPep &&
00846                      CalcModIndex(ModIndex, iMod, NumMod[iMissed], NumFixed,
00847                                   NumModSites[iMissed], ModList[iMissed]));
00848         } // iMod
00849 
00850         // if exact mass, add neutrons as appropriate
00851         if (SetSettings()->GetPrecursorsearchtype() == eMSSearchType_exact) {
00852             int ii;
00853             for (ii = 0; ii < iModCount; ++ii) {
00854                 SetMassAndMask(iMissed, ii).Mass += 
00855                 SetMassAndMask(iMissed, ii).Mass /
00856                 MSSCALE2INT(GetSettings()->GetExactmass()) * 
00857                 MSSCALE2INT(kNeutron);
00858             }
00859         }
00860 
00861 
00862         // sort mask and mass by mass
00863         sort(MassAndMask.get() + iMissed*MaxModPerPep, MassAndMask.get() + iMissed*MaxModPerPep + iModCount,
00864              CMassMaskCompare());
00865         // keep track of number of MassAndMask
00866         NumMassAndMask[iMissed] = iModCount;
00867 
00868     } // iMissed
00869 }
00870 
00871 
00872 void CSearch::SetIons(list <EMSIonSeries> & Ions)
00873 {
00874     if (GetSettings()->GetIonstosearch().size() < 1) {
00875         ERR_POST(Fatal << "omssa: at least one ions series to search need to be specified");
00876     }
00877     CMSSearchSettings::TIonstosearch::const_iterator i;
00878     i = GetSettings()->GetIonstosearch().begin();
00879     for(; i != GetSettings()->GetIonstosearch().end(); ++i) {
00880         Ions.push_back(static_cast <EMSIonSeries> (*i));
00881     }
00882 }
00883 
00884 
00885 void CSearch::InitLadders(list <EMSIonSeries> & Ions)
00886 {
00887 
00888     int MaxLadderSize = GetSettings()->GetMaxproductions();
00889     if (MaxLadderSize == 0) MaxLadderSize = kMSLadderMax;
00890 
00891     int i;
00892     SetLadderContainer().SetSeriesChargePairList().clear();
00893     list <EMSIonSeries> ::const_iterator iIons;
00894 
00895     for (iIons = Ions.begin(); iIons != Ions.end(); ++iIons) {
00896         for(i = 1; i <= GetSettings()->GetChargehandling().GetMaxproductcharge(); ++i) {
00897             SetLadderContainer().SetSeriesChargePairList().
00898                 push_back(TSeriesChargePairList::value_type(i, *iIons));
00899          }
00900     }
00901     SetLadderContainer().CreateLadderArrays(MaxModPerPep, MaxLadderSize);
00902 }
00903 
00904 
00905 void CSearch::MakeOidSet(void)
00906 {
00907     SetOidSet().clear();
00908     if (GetSettings()->GetIterativesettings().GetSubsetthresh() != 0.0) {
00909         SetRestrictedSearch() = true;
00910         GetResponse()->
00911         GetOidsBelowThreshold(
00912                              SetOidSet(),
00913                              GetSettings()->GetIterativesettings().GetSubsetthresh());
00914     }
00915 }
00916 
00917 int CSearch::iSearchGlobal = -1;
00918 int CSearch::MaxMZ = 0;
00919 CRef<CMSPeakSet> CSearch::SharedPeakSet = null;
00920 DEFINE_STATIC_FAST_MUTEX(iSearchMutex);
00921 DEFINE_STATIC_FAST_MUTEX(PeakSetMutex);
00922 DEFINE_STATIC_FAST_MUTEX(PeaksExaminedMutex);
00923  
00924 void CSearch::SetupSearch(CRef <CMSRequest> MyRequestIn,
00925               CRef <CMSResponse> MyResponseIn,
00926               CRef <CMSModSpecSet> Modset,
00927               CRef <CMSSearchSettings> SettingsIn,
00928               TOMSSACallback Callback,
00929               void *CallbackData)
00930 {
00931   initRequestIn = MyRequestIn;
00932   initResponseIn = MyResponseIn;
00933   initModset = Modset;
00934   initSettingsIn = SettingsIn;
00935   initCallback = Callback;
00936   initCallbackData = CallbackData;
00937 }
00938  
00939 void* CSearch::Main(void)
00940 {
00941    Search(initRequestIn,
00942           initResponseIn,
00943           initModset,
00944           initSettingsIn,
00945           initCallback);
00946 
00947     return new bool(true);
00948 }
00949  
00950 void CSearch::OnExit(void)
00951 {
00952 }
00953 
00954 void CSearch::CopySettings(CRef <CSearch> fromObj)
00955 {
00956   initRequestIn = fromObj->initRequestIn;
00957   initResponseIn = fromObj->initResponseIn;
00958   initModset = fromObj->initModset;
00959   initSettingsIn = fromObj->initSettingsIn;
00960   initCallback = fromObj->initCallback;
00961   initCallbackData = fromObj->initCallbackData;
00962   UseRankScore = fromObj->UseRankScore;
00963   Iterative = fromObj->Iterative;
00964   numseq = fromObj->numseq;
00965   rdfp = fromObj->rdfp;
00966   
00967 }
00968 
00969 void CSearch::Search(CRef <CMSRequest> MyRequestIn,
00970                                  CRef <CMSResponse> MyResponseIn,
00971                                  CRef <CMSModSpecSet> Modset,
00972                                  CRef <CMSSearchSettings> SettingsIn,
00973                                  TOMSSACallback Callback,
00974                                  void *CallbackData)
00975 {
00976     try {
00977         SetSettings().Reset(SettingsIn);
00978         SetRequest().Reset(MyRequestIn);
00979         SetResponse().Reset(MyResponseIn);
00980 
00981         // force the mass scale settings to what is currently used.
00982         SetSettings()->SetScale(MSSCALE);
00983         SetResponse()->SetScale(MSSCALE);
00984 
00985         SetEnzyme() = CCleaveFactory::CleaveFactory(static_cast <EMSEnzymes> 
00986                                                     (GetSettings()->GetEnzyme()));
00987 
00988         // do iterative search setup
00989         if (GetIterative()) {
00990             // check to see if the same sequence library
00991             if (GetResponse()->GetDbversion() != Getnumseq())
00992                 ERR_POST(Fatal << 
00993                          "number of sequences in search library is not the same as previously searched. Unable to do iterative search.");
00994             // if restricted sequence search
00995             // scan thru hits and make map of oids
00996             MakeOidSet();
00997         }
00998 
00999         // set maximum number of ladders to calculate per peptide
01000         MaxModPerPep = GetSettings()->GetMaxmods();
01001         if (MaxModPerPep > MAXMOD2) MaxModPerPep = MAXMOD2;
01002 
01003         list <EMSIonSeries> Ions;
01004         SetIons(Ions);
01005         InitLadders(Ions);
01006 
01007         LadderCalc.reset(new Int1[MaxModPerPep]);
01008         CAA AA;
01009 
01010         int Missed;  // number of missed cleaves allowed + 1
01011         if (GetEnzyme()->GetNonSpecific()) Missed = 1;
01012         else Missed = GetSettings()->GetMissedcleave()+1;
01013 
01014         int iMissed; // iterate thru missed cleavages
01015 
01016         int iSearch, hits;
01017         int endposition, position;
01018 
01019         // initialize fixed mods
01020         FixedMods.Init(GetSettings()->GetFixed(), Modset);
01021         MassArray.Init(FixedMods, GetSettings()->GetProductsearchtype(), Modset);
01022         PrecursorMassArray.Init(FixedMods, 
01023                                 GetSettings()->GetPrecursorsearchtype(), Modset);
01024         // initialize variable mods and set enzyme to use n-term methionine cleavage
01025         SetEnzyme()->SetNMethionine() = 
01026             VariableMods.Init(GetSettings()->GetVariable(), Modset) ||
01027             SetSettings()->GetNmethionine();
01028 
01029         const int *IntMassArray = MassArray.GetIntMass();
01030         const int *PrecursorIntMassArray = PrecursorMassArray.GetIntMass();
01031         const char *PepStart[MAXMISSEDCLEAVE];
01032         const char *PepEnd[MAXMISSEDCLEAVE];
01033 
01034         // contains informations on individual mod sites
01035         CMod ModList[MAXMISSEDCLEAVE][MAXMOD];
01036 
01037         int NumMod[MAXMISSEDCLEAVE];
01038         // the number of modification sites.  always less than NumMod.
01039         int NumModSites[MAXMISSEDCLEAVE];
01040 
01041 
01042         // calculated masses and masks
01043         MassAndMask.reset(new TMassMask[MAXMISSEDCLEAVE*MaxModPerPep]);
01044 
01045         // the number of masses and masks for each peptide
01046         int NumMassAndMask[MAXMISSEDCLEAVE];
01047 
01048         // set up mass array, indexed by missed cleavage
01049         // note that EndMasses is the end mass of peptide, kept separate to allow
01050         // reuse of Masses array in missed cleavage calc
01051         int Masses[MAXMISSEDCLEAVE];
01052         int EndMasses[MAXMISSEDCLEAVE];
01053 
01054         int iMod;   // used to iterate thru modifications
01055 
01056         bool SequenceDone;  // are we done iterating through the sequences?
01057 
01058         const CMSSearchSettings::TTaxids& Tax = GetSettings()->GetTaxids();
01059         CMSSearchSettings::TTaxids::const_iterator iTax;
01060 
01061         CMSHit NewHit;  // a new hit of a ladder to an m/z value
01062         CMSHit *NewHitOut;  // copy of new hit
01063 
01064         const TMassPeak *MassPeak; // peak currently in consideration
01065         CMSPeak* Peaks;
01066         CIntervalTree::const_iterator im; // iterates over interval tree
01067 
01068         // iterates over ladders
01069         TLadderMap::iterator Iter;
01070 
01071         {{
01072            CFastMutexGuard guard(PeakSetMutex);
01073            if (SharedPeakSet == null) {
01074               SharedPeakSet = new CMSPeakSet();
01075               Spectrum2Peak(SharedPeakSet);
01076            }
01077         }}
01078         vector <int> taxids;
01079         vector <int>::iterator itaxids;
01080         bool TaxInfo(false);  // check to see if any tax information in blast library
01081         bool iSearchNotDone(true);
01082 
01083         // iterate through sequences
01084         //for (iSearch = 0; rdfp->CheckOrFindOID(iSearch); iSearch++) {
01085         while (iSearchNotDone) {
01086             {{
01087                 CFastMutexGuard guard(iSearchMutex);
01088                 iSearchGlobal++;
01089                 if (!rdfp->CheckOrFindOID(iSearchGlobal)) {
01090                     iSearchNotDone = false;
01091                     continue;
01092                 }                
01093                 iSearch = iSearchGlobal;
01094                 if (iSearch % 10000 == 0) {
01095                    if(Callback) Callback(Getnumseq(), iSearch, CallbackData);
01096                 }
01097             }}
01098             
01099             // if oid restricted search, check to see if oid is in set
01100             if (GetRestrictedSearch() && SetOidSet().find(iSearch) == SetOidSet().end())
01101                 continue;
01102             
01103             if (SetSettings()->IsSetTaxids()) {
01104                 rdfp->GetTaxIDs(iSearch, taxids, false);
01105                 for (itaxids = taxids.begin(); itaxids != taxids.end(); ++itaxids) {
01106                     if (*itaxids == 0) continue;
01107                     TaxInfo = true;
01108                     for (iTax = Tax.begin(); iTax != Tax.end(); ++iTax) {
01109                         if (*itaxids == *iTax) goto TaxContinue;
01110                     } 
01111                 }
01112                 continue;
01113             }
01114             TaxContinue:
01115             CSeqDBSequence Sequence(rdfp.GetPointer(), iSearch);
01116             SequenceDone = false;
01117 
01118             // initialize missed cleavage matrix
01119             for (iMissed = 0; iMissed < Missed; iMissed++) {
01120                 PepStart[iMissed] = (const char *)-1; // mark start
01121                 PepEnd[iMissed] = Sequence.GetData();
01122                 Masses[iMissed] = 0;
01123                 EndMasses[iMissed] = 0;
01124                 NumMod[iMissed] = 0;
01125                 NumModSites[iMissed] = 0;
01126 
01127                 ModList[iMissed][0].Reset();
01128             }
01129             PepStart[Missed - 1] = Sequence.GetData();
01130 
01131             // if non-specific enzyme, set stop point
01132             if (SetEnzyme()->GetNonSpecific()) {
01133                 SetEnzyme()->SetStop() = Sequence.GetData() + SetSettings()->GetMinnoenzyme() - 1;
01134             }
01135 
01136             // iterate thru the sequence by digesting it
01137             while (!SequenceDone) {
01138 
01139 
01140                 // zero out no missed cleavage peptide mass and mods
01141                 // note that Masses and EndMass are separate to reuse
01142                 // masses during the missed cleavage calculation
01143                 Masses[Missed - 1] = 0;
01144                 EndMasses[Missed - 1] = 0;
01145                 NumMod[Missed - 1] = 0;
01146                 NumModSites[Missed - 1] = 0;
01147                 // init no modification elements
01148                 ModList[Missed - 1][0].Reset();
01149 
01150                 // calculate new stop and mass
01151                 SequenceDone = 
01152                 SetEnzyme()->CalcAndCut(Sequence.GetData(),
01153                                    Sequence.GetData() + Sequence.GetLength() - 1, 
01154                                    &(PepEnd[Missed - 1]),
01155                                    &(Masses[Missed - 1]),
01156                                    NumMod[Missed - 1],
01157                                    MAXMOD,
01158                                    &(EndMasses[Missed - 1]),
01159                                    VariableMods, FixedMods,
01160                                    ModList[Missed - 1],
01161                                    IntMassArray,
01162                                    PrecursorIntMassArray,
01163                                    Modset,
01164                                    SetSettings()->GetMaxproductions()
01165                                   );
01166 
01167                 // delete variable mods that overlap with fixed mods
01168                 DeleteVariableOverlap(NumMod[Missed - 1],
01169                                       ModList[Missed - 1]);
01170 
01171                 // count the number of unique sites modified
01172                 CountModSites(NumModSites[Missed - 1],
01173                               NumMod[Missed - 1],
01174                               ModList[Missed - 1]);
01175 
01176                 UpdateWithNewPep(Missed, PepStart, PepEnd, NumMod, ModList,
01177                                  Masses, EndMasses, NumModSites, Modset);
01178 
01179                 CreateModCombinations(Missed, PepStart, Masses,
01180                                       EndMasses, NumMod, NumMassAndMask,
01181                                       NumModSites, ModList);
01182 
01183 
01184                 int OldMass;  // keeps the old peptide mass for comparison
01185                 bool NoMassMatch;  // was there a match to the old mass?
01186 
01187                 for (iMissed = 0; iMissed < Missed; iMissed++) {
01188                     if (PepStart[iMissed] == (const char *)-1) continue;  // skip start
01189 
01190                     // get the start and stop position, inclusive, of the peptide
01191                     position =  PepStart[iMissed] - Sequence.GetData();
01192                     endposition = PepEnd[iMissed] - Sequence.GetData();
01193 
01194                     // init bool for "Has ladder been calculated?"
01195                     ClearLadderCalc(NumMassAndMask[iMissed]);
01196 
01197                     OldMass = 0;
01198                     NoMassMatch = true;
01199 
01200                     // go thru total number of mods
01201                     for (iMod = 0; iMod < NumMassAndMask[iMissed]; iMod++) {
01202 
01203                         // have we seen this mass before?
01204                         if (SetMassAndMask(iMissed, iMod).Mass == OldMass &&
01205                             NoMassMatch) continue;
01206                         NoMassMatch = true;
01207                         OldMass = SetMassAndMask(iMissed, iMod).Mass;
01208 
01209                         // return peaks where theoretical mass is <= precursor mass + tol
01210                         // and >= precursor mass - tol
01211                         if (!SetEnzyme()->GetTopDown())
01212                             im = SharedPeakSet->SetIntervalTree().IntervalsContaining(OldMass);
01213                         // if top-down enzyme, skip the interval tree match
01214                         else
01215                             im = SharedPeakSet->SetIntervalTree().AllIntervals();
01216 
01217                         for (; im; ++im ) {
01218                             MassPeak = static_cast <const TMassPeak *> (im.GetValue().GetPointerOrNull());
01219 
01220                             Peaks = MassPeak->Peak;
01221                             // make sure we look thru other mod masks with the same mass
01222                             NoMassMatch = false;
01223 
01224                             if (!GetLadderCalc(iMod)) {
01225                                 if (CreateLadders(Sequence.GetData(), 
01226                                                   iSearch,
01227                                                   position,
01228                                                   endposition,
01229                                                   Masses,
01230                                                   iMissed, 
01231                                                   AA,
01232                                                   iMod,
01233                                                   ModList[iMissed],
01234                                                   NumMod[iMissed]) != 0) continue;
01235                                 SetLadderCalc(iMod) = true; 
01236                                 // continue to next sequence if ladders not successfully made
01237                             }
01238                             else {
01239                                 TLadderMap::iterator Iter;
01240                                 SetLadderContainer().Begin(Iter);
01241                                 while(Iter != SetLadderContainer().SetLadderMap().end()) {
01242                                     (*(Iter->second))[iMod]->ClearHits();
01243                                     SetLadderContainer().Next(Iter);
01244                                 }
01245                             }
01246 
01247                             if (UseRankScore) {
01248                                 {{
01249                                     CFastMutexGuard guard(PeaksExaminedMutex);   
01250                                     Peaks->SetPeptidesExamined(MassPeak->Charge)++;
01251                                 }}
01252                             }
01253                             if (CompareLaddersTop(iMod, 
01254                                                   Peaks,
01255                                                   MassPeak)
01256                                ) {
01257                                 if (!UseRankScore) {
01258                                     {{
01259                                         CFastMutexGuard guard(PeaksExaminedMutex);
01260                                         Peaks->SetPeptidesExamined(MassPeak->Charge)++;
01261                                     }}
01262                                 }
01263                                 CompareLadders(iMod, 
01264                                                Peaks,
01265                                                false,
01266                                                MassPeak);
01267                                 hits = 0;
01268                                 SetLadderContainer().Begin(Iter);
01269                                 while(Iter != SetLadderContainer().SetLadderMap().end()) {
01270                                     hits += (*(Iter->second))[iMod]->HitCount();
01271                                     SetLadderContainer().Next(Iter);
01272                                 }
01273                                 
01274                                     
01275                                 {{
01276                                    CFastMutexGuard guard(PeakSetMutex);
01277                                    if (hits >= SetSettings()->GetMinhit()) {
01278                                       // need to save mods.  bool map?
01279                                       NewHit.SetHits() = hits;   
01280                                       NewHit.SetCharge() = MassPeak->Charge;
01281                                       // only record if hit kept
01282                                       if (Peaks->AddHit(NewHit, NewHitOut)) {
01283                                          NewHitOut->SetStart() = position;
01284                                          NewHitOut->SetStop() = endposition;
01285                                          NewHitOut->SetSeqIndex() = iSearch;
01286                                          NewHitOut->SetExpMass() = MassPeak->Mass;
01287                                          // record the hits
01288                                          NewHitOut->
01289                                             RecordMatches(SetLadderContainer(),
01290                                                           iMod,
01291                                                           Peaks,
01292                                                           SetMassAndMask(iMissed, iMod).Mask,
01293                                                           ModList[iMissed],
01294                                                           NumMod[iMissed],
01295                                                           PepStart[iMissed],
01296                                                           SetSettings()->GetSearchctermproduct(),
01297                                                           SetSettings()->GetSearchb1(),
01298                                                           SetMassAndMask(iMissed, iMod).Mass
01299                                                           );
01300                                       }
01301                                    }
01302                                 }}
01303                             } // new addition
01304                         } // MassPeak
01305                     } //iMod
01306                 } // iMissed
01307                 if (SetEnzyme()->GetNonSpecific()) {
01308                     int NonSpecificMass(Masses[0] + EndMasses[0]);
01309                     PartialLoop:
01310 
01311                     // check that stop is within bounds
01312                     //// upper bound is max precursor mass divided by lightest AA
01313                     ////      if(enzyme->GetStop() - PepStart[0] < MaxMZ/MonoMass[7]/MSSCALE &&
01314                     // upper bound redefined so that minimum mass of existing peptide
01315                     // is less than the max precursor mass minus the mass of glycine
01316                     // assumes that any mods have positive mass
01317 
01318                     // argghh, doesn't work for semi-tryptic, which resets the mass
01319                     // need to use different criterion if semi-tryptic and  start position was
01320                     // moved.  otherwise this criterion is OK
01321                     if (NonSpecificMass < MaxMZ /*- MSSCALE2INT(MonoMass[7]) */&&
01322                         SetEnzyme()->GetStop() < Sequence.GetData() + Sequence.GetLength() - 1 /*-1 added*/ &&
01323                         (SetSettings()->GetMaxnoenzyme() == 0 ||
01324                          SetEnzyme()->GetStop() - PepStart[0] + 1 < SetSettings()->GetMaxnoenzyme())
01325                        ) {
01326                         SetEnzyme()->SetStop()++;
01327                         NonSpecificMass += PrecursorIntMassArray[AA.GetMap()[*(SetEnzyme()->GetStop())]];
01328                     }
01329                     // reset to new start with minimum size
01330                     else if ( PepStart[0] < Sequence.GetData() + Sequence.GetLength() - 
01331                               SetSettings()->GetMinnoenzyme()) {
01332                         PepStart[0]++;
01333                         SetEnzyme()->SetStop() = PepStart[0] + SetSettings()->GetMinnoenzyme() - 1;
01334 
01335                         // reset mass
01336                         NonSpecificMass = 0;
01337                         const char *iSeqChar;
01338                         for (iSeqChar = PepStart[0]; iSeqChar <= SetEnzyme()->GetStop(); iSeqChar++)
01339                            NonSpecificMass += PrecursorIntMassArray[AA.GetMap()[*iSeqChar]];
01340                         // reset sequence done flag if at end of sequence
01341                         SequenceDone = false;
01342                     }
01343                     else SequenceDone = true;
01344 
01345                     // if this is partial tryptic, loop back if one end or the other is not tryptic
01346                     // for start, need to check sequence before (check for start of seq)
01347                     // for end, need to deal with end of protein case
01348                     if (!SequenceDone && SetEnzyme()->GetCleaveNum() > 0 &&
01349                         PepStart[0] != Sequence.GetData() &&
01350                         SetEnzyme()->GetStop() != Sequence.GetData() + Sequence.GetLength() - 1 /* -1 added */ ) {
01351                         if (!SetEnzyme()->CheckCleaveChar(PepStart[0]-1) &&
01352                             !SetEnzyme()->CheckCleaveChar(SetEnzyme()->GetStop()))
01353                             goto PartialLoop;
01354                     }
01355 
01356                     PepEnd[0] = PepStart[0];
01357                 }
01358                 else {
01359                     if (!SequenceDone) {
01360                         int NumModCount;
01361                         const char *OldSite;
01362                         int NumModSitesCount;
01363                         // get rid of longest peptide and move the other peptides down the line
01364                         for (iMissed = 0; iMissed < Missed - 1; iMissed++) {
01365                             // move masses to next missed cleavage
01366                             Masses[iMissed] = Masses[iMissed + 1];
01367                             // don't move EndMasses as they are recalculated
01368 
01369                             // move the modification data
01370                             NumModCount = 0;
01371                             OldSite = 0;
01372                             NumModSitesCount = 0;
01373                             for (iMod = 0; iMod < NumMod[iMissed + 1]; iMod++) {
01374                                 // throw away the c term peptide mods as we have a new c terminus
01375                                 if (Modset->GetModType(ModList[iMissed + 1][iMod].GetEnum()) != eMSModType_modcp  && 
01376                                     Modset->GetModType(ModList[iMissed + 1][iMod].GetEnum()) != eMSModType_modcpaa) {
01377                                     ModList[iMissed][NumModCount] = ModList[iMissed + 1][iMod];
01378                                     NumModCount++;
01379                                     // increment mod site count if new site and not fixed mod
01380                                     if (OldSite != ModList[iMissed + 1][iMod].GetSite() &&
01381                                         ModList[iMissed + 1][iMod].GetFixed() != 1) {
01382                                         NumModSitesCount++;
01383                                         OldSite = ModList[iMissed + 1][iMod].GetSite();
01384                                     }
01385                                 }
01386                             }
01387                             NumMod[iMissed] = NumModCount;
01388                             NumModSites[iMissed] = NumModSitesCount;
01389 
01390                             // copy starts to next missed cleavage
01391                             PepStart[iMissed] = PepStart[iMissed + 1];
01392                         }
01393 
01394                         // init new start from old stop
01395                         PepEnd[Missed-1] += 1;
01396                         PepStart[Missed-1] = PepEnd[Missed-1];
01397                     }
01398                 }
01399 
01400             }
01401 
01402 
01403         }
01404 
01405 
01406         if (GetSettings()->IsSetTaxids() && !TaxInfo)
01407             ERR_POST(Warning << 
01408                      "Taxonomically restricted search specified and no matching organisms found in sequence library.  Did you use a sequence library with taxonomic information?");
01409 
01410     }
01411     catch (NCBI_NS_STD::exception& e) {
01412         ERR_POST(Info << "Exception caught in CSearch::Search: " << e.what());
01413         throw;
01414     }
01415 
01416     //return PeakSet;
01417 }
01418 
01419 ///
01420 ///  Adds modification information to hitset
01421 ///
01422 
01423 void CSearch::AddModsToHit(CMSHits *Hit, CMSHit *MSHit)
01424 {
01425     int i;
01426     for (i = 0; i < MSHit->GetNumModInfo(); i++) {
01427         // screen out fixed mods
01428         if (MSHit->GetModInfo(i).GetIsFixed() == 1) continue;
01429         CRef< CMSModHit > ModHit(new CMSModHit);
01430         ModHit->SetSite() = MSHit->GetModInfo(i).GetSite();
01431         ModHit->SetModtype() = MSHit->GetModInfo(i).GetModEnum() ;
01432         Hit->SetMods().push_back(ModHit);
01433     }
01434 }
01435 
01436 
01437 ///
01438 ///  Adds ion information to hitset
01439 ///
01440 
01441 void CSearch::AddIonsToHit(CMSHits *Hit, CMSHit *MSHit)
01442 {
01443     int i;
01444     for (i = 0; i < MSHit->GetHits(); i++) {
01445         CRef<CMSMZHit> IonHit(new CMSMZHit);
01446         IonHit->SetIon() = MSHit->GetHitInfo(i).GetIonSeries();
01447         IonHit->SetCharge() = MSHit->GetHitInfo(i).GetCharge();
01448         IonHit->SetNumber() = MSHit->GetHitInfo(i).GetNumber();
01449         IonHit->SetMz() = MSHit->GetHitInfo(i).GetMZ();
01450         Hit->SetMzhits().push_back(IonHit);
01451     }
01452 }
01453 
01454 
01455 ///
01456 ///  Makes a string hashed out of the sequence plus mods
01457 ///
01458 
01459 void CSearch::MakeModString(string& seqstring, string& modseqstring, CMSHit *MSHit)
01460 {
01461     int i;
01462     modseqstring = seqstring;
01463     for (i = 0; i < MSHit->GetNumModInfo(); i++) {
01464         modseqstring += NStr::IntToString(MSHit->GetModInfo(i).GetSite()) + ":" +
01465                         NStr::IntToString(MSHit->GetModInfo(i).GetModEnum()) + ",";
01466     }
01467 }
01468 
01469 
01470 void CSearch::CreateSequence(int Start,
01471                              int Stop,
01472                              string &seqstring, 
01473                              CSeqDBSequence &Sequence) 
01474 {
01475     int iseq;
01476     seqstring.erase();
01477     
01478     for (iseq = Start; iseq <= Stop; iseq++) {
01479         seqstring += UniqueAA[Sequence.GetData()[iseq]];
01480     }
01481 }   
01482     
01483 
01484 void CSearch::SetResult(CRef<CMSPeakSet> PeakSet)
01485 {
01486 
01487     double ThreshStart = GetSettings()->GetCutlo(); 
01488     double ThreshEnd = GetSettings()->GetCuthi();
01489     double ThreshInc = GetSettings()->GetCutinc();
01490     double Evalcutoff = GetSettings()->GetCutoff();
01491 
01492     CMSPeak* Peaks;
01493 
01494     TScoreList ScoreList;
01495     TScoreList::iterator iScoreList;
01496     CMSHit * MSHit;
01497 
01498     // set the search library version
01499     SetResponse()->SetDbversion(Getnumseq());
01500 
01501     // Reset the oid set for tracking results
01502     SetOidSet().clear();
01503 
01504     while(!PeakSet->GetPeaks().empty()) {
01505         Peaks = *(PeakSet->GetPeaks().begin());
01506 
01507         // add to hitset
01508         CRef< CMSHitSet > HitSet(null);
01509 
01510         // if iterative search, try to find hitset
01511         if (GetIterative()) {
01512             HitSet = SetResponse()->FindHitSet(Peaks->GetNumber());
01513             if (HitSet.IsNull())
01514                 ERR_POST(Warning << "unable to find matching hitset");
01515         }
01516         
01517         // create a hitset if necessary
01518         if (HitSet.IsNull()) {
01519             HitSet = new CMSHitSet;
01520             if (!HitSet) {
01521                 ERR_POST(Error << "omssa: unable to allocate hitset");
01522                 return;
01523             }
01524             HitSet->SetNumber(Peaks->GetNumber());
01525             HitSet->SetIds() = Peaks->GetName();
01526             SetResponse()->SetHitsets().push_back(HitSet);
01527         }
01528         HitSet->SetSettingid() = GetSettings()->GetSettingid();
01529 
01530         // if there weren't enough peaks to do a search, note in error status
01531         if (Peaks->GetError() == eMSHitError_notenuffpeaks) {
01532             _TRACE("empty set");
01533             HitSet->SetError(eMSHitError_notenuffpeaks);
01534             ScoreList.clear();
01535             delete *(PeakSet->GetPeaks().begin());
01536             PeakSet->GetPeaks().pop_front();
01537             continue;
01538         }
01539 
01540         double Threshold, MinThreshold(ThreshStart), MinEval(1000000.0L);
01541         if (!UseRankScore) {
01542             // now calculate scores and sort
01543             for (Threshold = ThreshStart; Threshold <= ThreshEnd; 
01544                 Threshold += ThreshInc) {
01545                 CalcNSort(ScoreList, Threshold, Peaks);
01546                 if (!ScoreList.empty()) {
01547                     _TRACE("Threshold = " << Threshold <<
01548                            "EVal = " << ScoreList.begin()->first);
01549                 }
01550                 if (!ScoreList.empty() && ScoreList.begin()->first < MinEval) {
01551                     MinEval = ScoreList.begin()->first;
01552                     MinThreshold = Threshold;
01553                 }
01554                 ScoreList.clear();
01555             }
01556         }
01557         _TRACE("Min Threshold = " << MinThreshold);
01558         CalcNSort(ScoreList,
01559                   MinThreshold,
01560                   Peaks);
01561 
01562         // if iterative search, check to see if hitset needs to be replaced
01563         if (GetIterative() && !ScoreList.empty()) {
01564             if ((GetSettings()->GetIterativesettings().GetReplacethresh() == 0.0 &&
01565                  (HitSet->GetHits().empty() ||
01566                   ScoreList.begin()->first <= (*HitSet->GetHits().begin())->GetEvalue()))  || 
01567                 (GetSettings()->GetIterativesettings().GetReplacethresh() != 0.0 &&
01568                  ScoreList.begin()->first <= GetSettings()->GetIterativesettings().GetReplacethresh())) {
01569                 HitSet->SetHits().clear();
01570             }
01571             else {
01572                 ScoreList.clear();
01573                 delete *(PeakSet->GetPeaks().begin());
01574                 PeakSet->GetPeaks().pop_front();
01575                 continue;
01576             }
01577         }
01578 
01579         const CMSSearchSettings::TTaxids& Tax = GetSettings()->GetTaxids();
01580         CMSSearchSettings::TTaxids::const_iterator iTax;
01581 
01582         // keep a list of redundant peptides
01583         map <string, CMSHits * > PepDone;
01584         // add to hitset by score
01585         for (iScoreList = ScoreList.begin();
01586             iScoreList != ScoreList.end();
01587             iScoreList++) {
01588 
01589             double Score = iScoreList->first;
01590             if (Score > Evalcutoff) continue;
01591             CMSHits * Hit;
01592             CMSPepHit * Pephit;
01593 
01594             MSHit = iScoreList->second;
01595 
01596             CBlast_def_line_set::Tdata::const_iterator iDefLine;
01597             CRef<CBlast_def_line_set> Hdr = rdfp->GetHdr(MSHit->GetSeqIndex());
01598             // scan taxids
01599             for (iDefLine = Hdr->Get().begin();
01600                 iDefLine != Hdr->Get().end();
01601                 ++iDefLine) {
01602                 if (GetSettings()->IsSetTaxids()) {
01603                     for (iTax = Tax.begin(); iTax != Tax.end(); iTax++) {
01604                         if ((*iDefLine)->GetTaxid() == *iTax) goto TaxContinue2;
01605                     } 
01606                     continue;
01607                 }
01608                 TaxContinue2:
01609                 string seqstring, modseqstring;
01610 
01611                 // keep a list of the oids
01612                 SetOidSet().insert(MSHit->GetSeqIndex());
01613                 // get the sequence
01614                 CSeqDBSequence Sequence(rdfp.GetPointer(), MSHit->GetSeqIndex());
01615 
01616                 string tempstartstop;
01617                 CreateSequence(MSHit->GetStart(), MSHit->GetStop(),
01618                                seqstring, Sequence);
01619                 MakeModString(seqstring, modseqstring, MSHit);
01620 
01621                 if (PepDone.find(modseqstring) != PepDone.end()) {
01622                     Hit = PepDone[modseqstring];
01623                 }
01624                 else {
01625                     Hit = new CMSHits;
01626                     Hit->SetTheomass(MSHit->GetTheoreticalMass());
01627                     Hit->SetPepstring(seqstring);
01628                     // set the start AA, if there is one
01629                     if (MSHit->GetStart() > 0) {
01630                         tempstartstop = UniqueAA[Sequence.GetData()[MSHit->GetStart()-1]];
01631                         Hit->SetPepstart(tempstartstop);
01632                     }
01633                     else Hit->SetPepstart("");
01634 
01635                     // set the end AA, if there is one
01636                     if (MSHit->GetStop() < Sequence.GetLength() - 1) {
01637                         tempstartstop = UniqueAA[Sequence.GetData()[MSHit->GetStop()+1]];
01638                         Hit->SetPepstop(tempstartstop);
01639                     }
01640                     else Hit->SetPepstop("");
01641 
01642                     if (isnan(Score)) {
01643                         ERR_POST(Info << "Not a number in hitset " << 
01644                                  HitSet->GetNumber() <<
01645                                  " peptide " << modseqstring);
01646                         Score = kHighEval;
01647                     }
01648                     else if (!finite(Score)) {
01649                         ERR_POST(Info << "Infinite number in hitset " << 
01650                                  HitSet->GetNumber() <<
01651                                  " peptide " << modseqstring);
01652                         Score = kHighEval;
01653                     }
01654                     Hit->SetEvalue(Score);
01655                     Hit->SetPvalue(Score/Peaks->
01656                                    GetPeptidesExamined(MSHit->
01657                                                        GetCharge()));      
01658                     Hit->SetCharge(MSHit->GetCharge());
01659                     Hit->SetMass(MSHit->GetExpMass());
01660                     // insert mods here
01661                     AddModsToHit(Hit, MSHit);
01662                     // insert ions here
01663                     AddIonsToHit(Hit, MSHit);
01664                     CRef<CMSHits> hitref(Hit);
01665                     HitSet->SetHits().push_back(hitref);  
01666                     PepDone[modseqstring] = Hit;
01667 
01668                 }
01669 
01670                 Pephit = new CMSPepHit;
01671 
01672                 if ((*iDefLine)->CanGetSeqid()) {
01673                     // find a gi
01674                     ITERATE(list< CRef<CSeq_id> >, seqid, (*iDefLine)->GetSeqid()) {
01675                         if ((**seqid).IsGi()) {
01676                             Pephit->SetGi((**seqid).GetGi());
01677                             break;
01678                         }
01679                     }
01680 
01681                     Pephit->SetAccession(
01682                                         FindBestChoice((*iDefLine)->GetSeqid(), CSeq_id::Score)->
01683                                         GetSeqIdString(false));
01684                 }
01685 
01686 
01687                 Pephit->SetStart(MSHit->GetStart());
01688                 Pephit->SetStop(MSHit->GetStop());;
01689                 Pephit->SetDefline((*iDefLine)->GetTitle());
01690                 Pephit->SetProtlength(Sequence.GetLength());
01691                 Pephit->SetOid(MSHit->GetSeqIndex());
01692                 CRef<CMSPepHit> pepref(Pephit);
01693                 Hit->SetPephits().push_back(pepref);
01694 
01695             }
01696         }
01697         ScoreList.clear();
01698         delete *(PeakSet->GetPeaks().begin());
01699         PeakSet->GetPeaks().pop_front();
01700     }
01701     // write bioseqs to output
01702     WriteBioseqs();
01703 }
01704 
01705 
01706 void CSearch::WriteBioseqs(void)
01707 {
01708     ITERATE(CMSResponse::TOidSet, iOids, GetOidSet()) {
01709         CConstRef <CMSBioseq::TSeq> Bioseq(SetResponse()->SetBioseqs().GetBioseqByOid(*iOids));
01710         if (Bioseq.IsNull()) {
01711             CRef <CMSBioseq> MSBioseq (new CMSBioseq);
01712             MSBioseq->SetSeq(*rdfp->GetBioseq(*iOids));
01713             MSBioseq->SetOid() = *iOids;
01714             SetResponse()->SetBioseqs().Set().push_back(MSBioseq);
01715         }
01716     }
01717 }
01718 
01719 
01720 CMSMatchedPeakSet * CSearch::PepCharge(CMSHit& Hit,
01721                                        int SeriesCharge,
01722                                        int Ion,
01723                                        int minintensity,
01724                                        int Which, 
01725                                        CMSPeak *Peaks,
01726                                        int Maxproductions)
01727 {
01728     int iii;
01729     int lowmz(0), highmz;
01730 
01731     unsigned Size = Hit.GetStop() - Hit.GetStart();
01732     if (Maxproductions == 0) Maxproductions = kMSLadderMax;
01733 
01734 
01735     // decide if there is any terminal bias
01736     EMSTerminalBias TerminalBias(eMSNoTerminalBias);
01737 
01738     for(iii = 0; iii < GetEnzyme()->GetCleaveNum(); ++iii) {
01739         // n term
01740         if(GetEnzyme()->GetCleaveOffset()[iii] == 1 ) {
01741             // check to see if should be biases on both ends
01742             if(TerminalBias == eMSNTerminalBias || TerminalBias == eMSNoTerminalBias)
01743                 TerminalBias = eMSNTerminalBias;
01744             else
01745                 TerminalBias = eMSBothTerminalBias;
01746         }
01747         // c term
01748         else if (GetEnzyme()->GetCleaveOffset()[iii] == 0 ) {            
01749             // check to see if should be biases on both ends
01750             if(TerminalBias == eMSCTerminalBias || TerminalBias == eMSNoTerminalBias)
01751                 TerminalBias = eMSCTerminalBias;
01752             else
01753                 TerminalBias = eMSBothTerminalBias;
01754         }
01755     }
01756 
01757 //#if 0
01758     // make a copy of the peptide sequence
01759     CSeqDBSequence Sequence(rdfp.GetPointer(), Hit.GetSeqIndex());
01760     string seqstring;
01761     CreateSequence(Hit.GetStart(),
01762                    Hit.GetStop(),
01763                    seqstring,
01764                    Sequence);
01765 //#endif
01766     bool NoProline = find(GetSettings()->GetNoprolineions().begin(),
01767                           GetSettings()->GetNoprolineions().end(),
01768                           Ion) != 
01769         GetSettings()->GetNoprolineions().end();
01770     // fill in the matched ions
01771     Hit.FillMatchedPeaks(SeriesCharge,
01772                          Ion,
01773                          Size,
01774                          minintensity,
01775                          false, 
01776                          TerminalBias, 
01777                          SeriesCharge*Maxproductions
01778 //#if 0
01779                          ,
01780                          seqstring,
01781                          NoProline
01782 //#endif
01783                          );
01784     CMSMatchedPeakSet *MatchPeakSet = Hit.SetIonSeriesMatchMap().SetSeries(SeriesCharge, Ion);
01785     TMatchedPeakSet::iterator bin, prev, next;
01786 
01787     for ( bin = MatchPeakSet->SetMatchedPeakSet().begin(); bin != MatchPeakSet->SetMatchedPeakSet().end(); ++bin) {
01788         // need to go thru match info, not hit info.
01789         if(bin != MatchPeakSet->SetMatchedPeakSet().begin()) {
01790             lowmz = ((*bin)->GetMZ() + (*prev)->GetMZ())/2;
01791         }
01792         next = bin;
01793         ++next;
01794         if(next != MatchPeakSet->SetMatchedPeakSet().end()) {
01795             highmz = ((*bin)->GetMZ() + (*next)->GetMZ())/2;
01796         }
01797         else highmz = Hit.GetExpMass()/SeriesCharge;
01798         (*bin)->SetExpIons() = 
01799             Peaks->CountMZRange(lowmz,
01800                                 highmz,
01801                                 minintensity,
01802                                 Which) /
01803             (double)(highmz - lowmz);
01804 
01805         (*bin)->SetMassTolerance() = (Peaks->GetTol())/SeriesCharge;
01806         prev = bin;
01807     }
01808     return MatchPeakSet;
01809 }
01810 
01811 
01812 
01813 
01814 void CSearch::MatchAndSort(CMSPeak * Peaks, 
01815                            CMSHit& Hit, 
01816                            EMSPeakListTypes Which,
01817                            int minintensity,
01818                            const TSeriesChargePairList::const_iterator &iPairList,
01819                            list<CMSMatchedPeakSet *> &Forward, 
01820                            list<CMSMatchedPeakSet *> &Backward)
01821 {
01822     CMSMatchedPeakSet * current;
01823     
01824     current = PepCharge(Hit,
01825                         iPairList->first,
01826                         iPairList->second,
01827                         minintensity,
01828                         Which,
01829                         Peaks,
01830                         GetSettings()->GetMaxproductions());
01831 
01832     if (kIonDirection[iPairList->second] == 1)
01833         Forward.push_back(current);
01834     else if (kIonDirection[iPairList->second] == -1)
01835         Backward.push_back(current);
01836 }       
01837 
01838 
01839 void CSearch::DoubleCompare(list<CMSMatchedPeakSet *> &SingleForward,
01840                             list<CMSMatchedPeakSet *> &SingleBackward,
01841                             list<CMSMatchedPeakSet *> &Double,
01842                             bool DoubleForward) 
01843 {   
01844     list<CMSMatchedPeakSet *>::iterator iDouble, iFront, iBack;
01845 
01846     for (iDouble = Double.begin(); iDouble != Double.end(); ++iDouble) {
01847         
01848         for(iFront = SingleForward.begin(); iFront != SingleForward.end(); ++iFront) {
01849             (*iDouble)->Compare(*iFront, DoubleForward);
01850         }
01851         
01852         for(iBack = SingleBackward.begin(); iBack != SingleBackward.end(); ++iBack) {
01853             (*iDouble)->Compare(*iBack, !DoubleForward);
01854         }
01855     }             
01856 }
01857 
01858 
01859 void CSearch::CalcNSort(TScoreList& ScoreList,
01860                         double Threshold,
01861                         CMSPeak* Peaks
01862                        )
01863 {
01864     int iCharges;
01865     int iHitList;
01866     int Tophitnum = GetSettings()->GetTophitnum();
01867 
01868     for (iCharges = 0; iCharges < Peaks->GetNumCharges(); iCharges++) {
01869 
01870         TMSHitList& HitList = Peaks->GetHitList(iCharges);   
01871         for (iHitList = 0; iHitList != Peaks->GetHitListIndex(iCharges);
01872             iHitList++) {
01873 
01874             int tempMass = HitList[iHitList].GetExpMass();
01875             int Charge = HitList[iHitList].GetCharge();
01876             EMSPeakListTypes Which = Peaks->GetWhich(Charge);
01877 
01878             // set up new score
01879 
01880  
01881             // minimum intensity
01882             int minintensity = static_cast <int> (Threshold * Peaks->GetMaxI(Which));
01883 
01884 
01885             TSeriesChargePairList::const_iterator iPairList;
01886             list <CMSMatchedPeakSet *> SingleForward, SingleBackward, DoubleForward, DoubleBackward;
01887 
01888             for (iPairList = SetLadderContainer().GetSeriesChargePairList().begin();
01889                 iPairList != SetLadderContainer().GetSeriesChargePairList().end();
01890                 ++iPairList) {
01891 
01892                 // charge 1
01893                 if (iPairList->first == 1) {
01894                     MatchAndSort(Peaks, HitList[iHitList], Which, minintensity,
01895                                  iPairList, SingleForward, SingleBackward);
01896                 }
01897                 else if (Charge >= Peaks->GetConsiderMult()) {
01898                     MatchAndSort(Peaks, HitList[iHitList], Which, minintensity,
01899                                   iPairList, DoubleForward, DoubleBackward);
01900                 }
01901             }
01902 
01903             list <CMSMatchedPeakSet *> ::iterator iFront, iBack, iDouble;
01904 
01905             if(GetSettings()->GetNocorrelationscore() == 0) {
01906                 // do the singly charge comparison
01907                 for (iFront = SingleForward.begin(); iFront != SingleForward.end(); ++iFront) {
01908                     for(iBack = SingleBackward.begin(); iBack != SingleBackward.end(); ++iBack) {
01909                     (*iFront)->Compare(*iBack, false);
01910                     }
01911                 }
01912                 if (Charge >= Peaks->GetConsiderMult()) {
01913                     DoubleCompare(SingleForward, SingleBackward, DoubleForward, true);
01914                     DoubleCompare(SingleForward, SingleBackward, DoubleBackward, false); 
01915                 }
01916             }
01917 
01918 
01919             double adjust = HitList[iHitList].GetMaxDelta() / 
01920                 MSSCALE2INT(GetSettings()->GetMsmstol());
01921             if(adjust < GetSettings()->GetAutomassadjust()) 
01922                 adjust = GetSettings()->GetAutomassadjust();
01923             if(adjust > 1.0) 
01924                 adjust = 1.0;
01925             double a = 
01926                 HitList[iHitList].CalcPoissonMean(GetSettings()->GetProbfollowingion(),
01927                                                   GetEnzyme()->GetCleaveNum(),
01928                                                   GetSettings()->GetProbfollowingion(),
01929                                                   19,
01930                                                   adjust);
01931 
01932             if (a == 0) {
01933                 // threshold probably too high
01934                 continue;
01935             }
01936             if (a < 0 ) {
01937                 _TRACE("poisson mean is < 0");
01938                 continue;
01939             }
01940             else if (isnan(a) || !finite(a)) {
01941                 ERR_POST(Info << "poisson mean is NaN or is infinite");
01942                 continue;
01943             }
01944 
01945             // keep going if obviously insignificant
01946             if (HitList[iHitList].GetHits() < a) continue;
01947 
01948             double pval; // statistical p-value
01949             int N; // number of peptides
01950             N = Peaks->GetPeptidesExamined(Charge) + 
01951                 (GetSettings()->GetZdep() * (Charge - 1) + 1) *
01952                 GetSettings()->GetPseudocount();
01953 
01954             if (!UseRankScore) {
01955                 int High, Low, NumPeaks, NumLo, NumHi;
01956                 Peaks->HighLow(High, Low, NumPeaks, tempMass, Charge, Threshold, NumLo, NumHi);
01957 
01958                 double TopHitProb = ((double)Tophitnum)/NumPeaks;
01959                 // correct for situation where more tophits than experimental peaks
01960                 if (TopHitProb > 1.0) TopHitProb = 1.0;
01961                 int numhits = HitList[iHitList].CountHits(Threshold, Peaks->GetMaxI(Which));
01962                 double Normal = HitList[iHitList].CalcNormalTopHit(a, TopHitProb);
01963                 pval = HitList[iHitList].CalcPvalueTopHit(a, numhits, Normal, TopHitProb);
01964             }
01965             else {
01966                 pval = HitList[iHitList].CalcPvalue(a, HitList[iHitList].CountHits(Threshold, Peaks->GetMaxI(Which)));
01967             }
01968             if (UseRankScore && !GetPoissonOnly()) {
01969                 if (HitList[iHitList].GetM() != 0.0) {
01970                     double Perf = HitList[iHitList].CalcRankProb();
01971                     _TRACE( "Perf=" << Perf << " pval=" << pval << " N=" << N );
01972                     pval *= Perf;
01973                     pval *= 10.0;  // correction to scales
01974                 }
01975                 else ERR_POST(Info << "M is zero");
01976             }
01977             double eval = 3e3 * pval * N;
01978 //            _TRACE( " pval=" << pval << " eval=" << eval );
01979             ScoreList.insert(pair<const double, CMSHit *> 
01980                              (eval, &(HitList[iHitList])));
01981         }   
01982     } 
01983 }
01984 
01985 CSearch::~CSearch()
01986 {
01987 }
01988 
01989 
01990 
01991 

Generated on Sun Dec 6 22:18:48 2009 for NCBI C++ ToolKit by  doxygen 1.4.6
Modified on Mon Dec 07 16:20:52 2009 by modify_doxy.py rev. 173732