NCBI C++ ToolKit
omssamerge.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

00001 /* 
00002  * ===========================================================================
00003  *
00004  *                            PUBLIC DOMAIN NOTICE
00005  *               National Center for Biotechnology Information
00006  *
00007  *  This software/database is a "United States Government Work" under the
00008  *  terms of the United States Copyright Act.  It was written as part of
00009  *  the author's official duties as a United States Government employee and
00010  *  thus cannot be copyrighted.  This software/database is freely available
00011  *  to the public for use. The National Library of Medicine and the U.S.
00012  *  Government have not placed any restriction on its use or reproduction.
00013  *
00014  *  Although all reasonable efforts have been taken to ensure the accuracy
00015  *  and reliability of the software and data, the NLM and the U.S.
00016  *  Government do not and cannot warrant the performance or results that
00017  *  may be obtained by using this software or data. The NLM and the U.S.
00018  *  Government disclaim all warranties, express or implied, including
00019  *  warranties of performance, merchantability or fitness for any particular
00020  *  purpose.
00021  *
00022  *  Please cite the authors in any work or product based on this material.
00023  *
00024  * ===========================================================================
00025  *
00026  * Author:  Lewis Y. Geer
00027  *  
00028  * File Description:
00029  *    program for splitting omssa files
00030  *
00031  *
00032  * ===========================================================================
00033  */
00034 
00035 #include <ncbi_pch.hpp>
00036 #include <corelib/ncbistl.hpp>
00037 #include <corelib/ncbiargs.hpp>
00038 #include <corelib/ncbiapp.hpp>
00039 #include <corelib/ncbienv.hpp>
00040 #include <corelib/ncbistre.hpp>
00041 #include <corelib/ncbifile.hpp>
00042 #include <connect/ncbi_memory_connector.h>
00043 #include <connect/ncbi_conn_stream.hpp>
00044 #include <serial/serial.hpp>
00045 #include <serial/objistrasn.hpp>
00046 #include <serial/objistrasnb.hpp>
00047 #include <serial/objostrasn.hpp>
00048 #include <serial/objostrasnb.hpp>
00049 #include <serial/iterator.hpp>
00050 #include <serial/objostrxml.hpp>
00051 #include <objects/omssa/omssa__.hpp>
00052 #include <util/compress/bzip2.hpp> 
00053 
00054 #include "omssa.hpp"
00055 #include "msmerge.hpp"
00056 
00057 #include <fstream>
00058 #include <string>
00059 #include <list>
00060 
00061 
00062 USING_NCBI_SCOPE;
00063 USING_SCOPE(objects);
00064 USING_SCOPE(omssa);
00065 
00066 /////////////////////////////////////////////////////////////////////////////
00067 //
00068 //  COMSSAMerge
00069 //
00070 //  Main application
00071 //
00072 
00073 class COMSSAMerge : public CNcbiApplication {
00074 public:
00075     COMSSAMerge();
00076 private:
00077     virtual int Run();
00078     virtual void Init();
00079 
00080 };
00081 
00082 COMSSAMerge::COMSSAMerge()
00083 {
00084     SetVersion(CVersionInfo(2, 1, 4));
00085 }
00086 
00087 
00088 
00089 
00090 
00091 void COMSSAMerge::Init()
00092 {
00093 
00094     auto_ptr<CArgDescriptions> argDesc(new CArgDescriptions);
00095 
00096     argDesc->AddDefaultKey("i", "infiles", 
00097                 "file containing list of input files on separate lines",
00098                 CArgDescriptions::eString,
00099                 "");
00100 
00101     argDesc->AddFlag("sw", "output search results without spectra");
00102 
00103     argDesc->AddFlag("it", "input as text asn.1 formatted search results");
00104     argDesc->AddFlag("ib", "input as binary asn.1 formatted search results");
00105     argDesc->AddFlag("ix", "input as xml formatted search results");
00106     argDesc->AddFlag("ibz2", "input as xml formatted search results compressed by bzip2");
00107 
00108     argDesc->AddPositional("o", "output file name", CArgDescriptions::eString);
00109 
00110 
00111     argDesc->AddFlag("ot", "output as text asn.1 formatted search results");
00112     argDesc->AddFlag("ob", "output as binary asn.1 formatted search results");
00113     argDesc->AddFlag("ox", "output as xml formatted search results");
00114     argDesc->AddFlag("obz2", "output as xml formatted search results compressed by bzip2");
00115 
00116     argDesc->AddExtra(0,10000, "input file names", CArgDescriptions::eString);
00117 
00118 
00119     SetupArgDescriptions(argDesc.release());
00120 
00121     // allow info posts to be seen
00122     SetDiagPostLevel(eDiag_Info);
00123 }
00124 
00125 int main(int argc, const char* argv[]) 
00126 {
00127     COMSSAMerge theTestApp;
00128     return theTestApp.AppMain(argc, argv, 0, eDS_Default, 0);
00129 }
00130 
00131 
00132 
00133 
00134 int COMSSAMerge::Run()
00135 {    
00136 
00137     try {
00138 
00139     CArgs args = GetArgs();
00140 
00141 
00142     CRef <COMSSASearch> MySearch(new COMSSASearch);
00143 
00144     ESerialDataFormat InFileType(eSerial_Xml), OutFileType(eSerial_Xml);
00145 
00146     bool obz2(false);  // output bzip2 compressed?
00147     bool ibz2(false);  // input bzip2 compressed?
00148 
00149     if(args["ox"]) OutFileType = eSerial_Xml;
00150     else if(args["ob"]) OutFileType = eSerial_AsnBinary;
00151     else if(args["ot"]) OutFileType = eSerial_AsnText;
00152     else if(args["obz2"]) {
00153         OutFileType = eSerial_Xml;
00154         obz2 = true;
00155     }
00156     else ERR_POST(Fatal << "output file type not given");
00157 
00158     if(args["ix"]) InFileType = eSerial_Xml;
00159     else if(args["ib"]) InFileType = eSerial_AsnBinary;
00160     else if(args["it"]) InFileType = eSerial_AsnText;
00161     else if(args["ibz2"]) {
00162         InFileType = eSerial_Xml;
00163         ibz2 = true;
00164     }
00165     else ERR_POST(Fatal << "input file type not given");
00166 
00167 
00168     // loop thru input files
00169     if ( args["i"].AsString() != "") {
00170         ifstream is(args["i"].AsString().c_str());
00171         bool Begin(true);
00172         if(!is)
00173             ERR_POST(Fatal << "unable to open input file list " << args["i"].AsString());
00174         while(!is.eof()) {
00175             string iFileName;
00176             NcbiGetline(is, iFileName, "\x0d\x0a");
00177             if(iFileName == "" || is.eof()) continue;
00178             try {
00179                 CRef <COMSSASearch> InSearch(new COMSSASearch);
00180                 CSearchHelper::ReadCompleteSearch(iFileName, InFileType, ibz2, *InSearch);
00181 //                InSearch->ReadCompleteSearch(iFileName, InFileType, ibz2);
00182                 if(Begin) {
00183                     Begin = false;
00184                     MySearch->CopyCMSSearch(InSearch);
00185                 }
00186                 else {
00187                     // add
00188                     MySearch->AppendSearch(InSearch);
00189                 }
00190             }
00191             catch(CException& e) {
00192                 ERR_POST(Fatal << "exception: " << e.what());
00193                 return 1;
00194             }
00195         }
00196     }
00197     else if ( args.GetNExtra() ) {
00198         for (size_t extra = 1;  extra <= args.GetNExtra();  extra++) {
00199             CRef <COMSSASearch> InSearch(new COMSSASearch);
00200             CSearchHelper::ReadCompleteSearch(args[extra].AsString(), InFileType, ibz2, *InSearch);
00201             //InSearch->ReadCompleteSearch(args[extra].AsString(), InFileType, ibz2);
00202             try {
00203                 if(extra == 1) {
00204                     // copy
00205                     MySearch->CopyCMSSearch(InSearch);
00206                 }
00207                 else {
00208                     // add
00209                     MySearch->AppendSearch(InSearch);
00210                 }
00211             }
00212             catch(CException& e) {
00213                 ERR_POST(Fatal << "exception: " << e.what());
00214                 return 1;
00215             }
00216         }
00217     }
00218  
00219     // write out the new search
00220 
00221     auto_ptr <CNcbiOfstream> raw_out;
00222     auto_ptr <CCompressionOStream> compress_out;
00223     auto_ptr <CObjectOStream> txt_out;
00224     
00225     if( obz2 ) {
00226         raw_out.reset(new CNcbiOfstream(args["o"].AsString().c_str()));
00227         compress_out.reset( new CCompressionOStream (*raw_out, 
00228                                                      new CBZip2StreamCompressor(), 
00229                                                      CCompressionStream::fOwnProcessor)); 
00230         txt_out.reset(CObjectOStream::Open(OutFileType, *compress_out)); 
00231     }
00232     else {
00233         txt_out.reset(CObjectOStream::Open(args["o"].AsString().c_str(), OutFileType));
00234     }
00235 
00236 
00237 //    auto_ptr <CObjectOStream> txt_out(
00238 //         CObjectOStream::Open(args["o"].AsString(), OutFileType));
00239 
00240     if(txt_out.get()) {
00241         SetUpOutputFile(txt_out.get(), OutFileType);
00242         if (args["sw"]) {
00243             txt_out->Write(ObjectInfo(*(*MySearch->SetResponse().begin())));
00244     }
00245         else {
00246             txt_out->Write(ObjectInfo(*MySearch));
00247         }
00248         txt_out->Flush();
00249         txt_out->Close();
00250     }
00251 
00252 
00253     } catch (NCBI_NS_STD::exception& e) {
00254     ERR_POST(Fatal << "Exception in COMSSAMerge::Run: " << e.what());
00255     }
00256 
00257     return 0;
00258 }
00259 
Modified on Tue Aug 19 16:23:47 2014 by modify_doxy.py rev. 426318