NCBI C++ ToolKit
blastinput_demo.cpp
Go to the documentation of this file.
00001 /*  $Id: blastinput_demo.cpp 38565 2008-07-17 17:42:49Z camacho $
00002  * ===========================================================================
00003  *
00004  *                            PUBLIC DOMAIN NOTICE
00005  *               National Center for Biotechnology Information
00006  *
00007  *  This software/database is a "United States Government Work" under the
00008  *  terms of the United States Copyright Act.  It was written as part of
00009  *  the author's official duties as a United States Government employee and
00010  *  thus cannot be copyrighted.  This software/database is freely available
00011  *  to the public for use. The National Library of Medicine and the U.S.
00012  *  Government have not placed any restriction on its use or reproduction.
00013  *
00014  *  Although all reasonable efforts have been taken to ensure the accuracy
00015  *  and reliability of the software and data, the NLM and the U.S.
00016  *  Government do not and cannot warrant the performance or results that
00017  *  may be obtained by using this software or data. The NLM and the U.S.
00018  *  Government disclaim all warranties, express or implied, including
00019  *  warranties of performance, merchantability or fitness for any particular
00020  *  purpose.
00021  *
00022  *  Please cite the author in any work or product based on this material.
00023  *
00024  * ===========================================================================
00025  *
00026  * Author: Christiam Camacho
00027  *
00028  */
00029 
00030 /** @file blastinput_demo.cpp
00031  *  Demonstration application of the sequence input functionality of the
00032  *  blastinput library
00033  */
00034 
00035 #ifndef SKIP_DOXYGEN_PROCESSING
00036 static char const rcsid[] = 
00037     "$Id: blastinput_demo.cpp 38565 2008-07-17 17:42:49Z camacho $";
00038 #endif /* SKIP_DOXYGEN_PROCESSING */
00039 
00040 #include <ncbi_pch.hpp>
00041 #include <objmgr/util/sequence.hpp>
00042 #include <algo/blast/blastinput/cmdline_flags.hpp>
00043 #include <algo/blast/blastinput/blast_input.hpp>
00044 #include <algo/blast/blastinput/blast_fasta_input.hpp>
00045 #include <algo/blast/blastinput/blast_input_aux.hpp>
00046 
00047 #ifndef SKIP_DOXYGEN_PROCESSING
00048 USING_NCBI_SCOPE;
00049 USING_SCOPE(blast);
00050 USING_SCOPE(objects);
00051 #endif
00052 
00053 /////////////////////////////////////////////////////////////////////////////
00054 //  CBlastInputDemoApplication::
00055 
00056 
00057 class CBlastInputDemoApplication : public CNcbiApplication
00058 {
00059 private:
00060     virtual void Init(void);
00061     virtual int  Run(void);
00062     virtual void Exit(void);
00063 };
00064 
00065 
00066 /////////////////////////////////////////////////////////////////////////////
00067 //  Init test for all different types of arguments
00068 
00069 
00070 void CBlastInputDemoApplication::Init(void)
00071 {
00072     HideStdArgs(fHideLogfile | fHideConffile | fHideVersion);
00073 
00074     // Create command-line argument descriptions class
00075     auto_ptr<CArgDescriptions> arg_desc(new CArgDescriptions);
00076 
00077     // Specify USAGE context
00078     arg_desc->SetUsageContext(GetArguments().GetProgramBasename(),
00079       "blastinput library demo application to read sequence input");
00080 
00081     arg_desc->AddDefaultKey(kArgQuery, "input_file", "Input file name",
00082                      CArgDescriptions::eInputFile, kDfltArgQuery);
00083 
00084     arg_desc->AddDefaultKey(kArgOutput, "output_file", "Output file name",
00085                    CArgDescriptions::eOutputFile, "-");
00086 
00087     arg_desc->AddKey("mol_type", "molecule_type",
00088                      "Molecule type of the data being read",
00089                      CArgDescriptions::eString);
00090     arg_desc->SetConstraint("mol_type",
00091                             &(*new CArgAllow_Strings, "prot", "nucl"));
00092 
00093     arg_desc->AddDefaultKey("collect_stats", "boolean_value",
00094                             "Collect statistics about data being read?",
00095                             CArgDescriptions::eBoolean, "true");
00096 
00097     // Setup arg.descriptions for this application
00098     SetupArgDescriptions(arg_desc.release());
00099 }
00100 
00101 class CSequenceInputStats : public CObject {
00102 public:
00103     CSequenceInputStats() : m_NumQueries(0), m_NumLetters(0), m_NumBatches(0) {}
00104 
00105     void AddQueryBatch(const CBlastQueryVector& query_batch) {
00106         m_NumQueries += query_batch.size();
00107 
00108         ITERATE(CBlastQueryVector, query, query_batch) {
00109             m_NumLetters += sequence::GetLength(*(*query)->GetQuerySeqLoc(),
00110                                                 (*query)->GetScope());
00111         }
00112         m_NumBatches++;
00113     }
00114 
00115     unsigned int GetNumQueries() const { return m_NumQueries; }
00116     unsigned int GetNumBatches() const { return m_NumBatches; }
00117     Uint8 GetNumLetters() const { return m_NumLetters; }
00118 
00119     void PrintReport(CNcbiOstream& out, bool is_prot, CStopWatch& sw) const {
00120         out << "Elapsed time: " << sw.AsString() << " seconds" << endl;
00121         out << "Number of queries: " << GetNumQueries() << endl;
00122         out << "Number of " << (is_prot ? "residues" : "bases") << ": " 
00123             << GetNumLetters() << endl;
00124         out << "Number of batches: " << GetNumBatches() << endl;
00125     }
00126 
00127 private:
00128     unsigned int m_NumQueries;
00129     Uint8 m_NumLetters;
00130     unsigned int m_NumBatches;
00131 };
00132 
00133 
00134 /////////////////////////////////////////////////////////////////////////////
00135 //  Run demo
00136 
00137 
00138 int CBlastInputDemoApplication::Run(void)
00139 {
00140     const CArgs& args = GetArgs();
00141     int retval = 0;
00142 
00143     try {
00144 
00145         CNcbiIstream& in = args[kArgQuery].AsInputFile();
00146         CNcbiOstream& out = args[kArgOutput].AsOutputFile();
00147         bool collect_stats = args["collect_stats"].AsBoolean();
00148         bool is_prot = static_cast<bool>(args["mol_type"].AsString() == "prot");
00149         const EProgram kProgram = is_prot ? eBlastp : eBlastn;
00150 
00151         const SDataLoaderConfig dlconfig(is_prot);
00152         CBlastInputSourceConfig iconfig(dlconfig); // use defaults
00153         CBlastFastaInputSource fasta(in, iconfig);
00154         CBlastInput input(&fasta, GetQueryBatchSize(kProgram));
00155         CRef<CScope> scope = CBlastScopeSource(dlconfig).NewScope();
00156         CRef<CSequenceInputStats> stats;
00157         CStopWatch sw;
00158 
00159         if (collect_stats) {
00160             stats.Reset(new CSequenceInputStats);
00161             sw.Start();
00162         }
00163 
00164         // This is the idiomatic use of the CBlastInput class
00165         for (; !input.End(); scope->ResetHistory()) {
00166             CRef<CBlastQueryVector> query_batch(input.GetNextSeqBatch(*scope));
00167             
00168             if (collect_stats) {
00169                 stats->AddQueryBatch(*query_batch);
00170             }
00171         }
00172 
00173         if (collect_stats) {
00174             sw.Stop();
00175             stats->PrintReport(out, is_prot, sw);
00176         }
00177 
00178     } catch (const CException& exptn) {
00179         cerr << "Error: " << exptn.GetMsg() << endl;
00180         retval = exptn.GetErrCode();
00181     } catch (const exception& e) {
00182         cerr << "Error: " << e.what() << endl;
00183         retval = -1;
00184     } catch (...) {
00185         cerr << "Unknown exception" << endl;
00186         retval = -1;
00187     }
00188 
00189     return retval;
00190 }
00191 
00192 
00193 /////////////////////////////////////////////////////////////////////////////
00194 //  Cleanup
00195 
00196 
00197 void CBlastInputDemoApplication::Exit(void)
00198 {
00199     SetDiagStream(0);
00200 }
00201 
00202 
00203 /////////////////////////////////////////////////////////////////////////////
00204 //  MAIN
00205 
00206 
00207 #ifndef SKIP_DOXYGEN_PROCESSING
00208 int main(int argc, const char* argv[])
00209 {
00210     // Execute main application function
00211     return CBlastInputDemoApplication().AppMain(argc, argv, 0, eDS_Default, 0);
00212 }
00213 #endif /* SKIP_DOXYGEN_PROCESSING */
Modified on Wed May 23 13:15:18 2012 by modify_doxy.py rev. 337098