|
NCBI C++ ToolKit
|
00001 /* $Id: blastinput_demo.cpp 38565 2008-07-17 17:42:49Z camacho $ 00002 * =========================================================================== 00003 * 00004 * PUBLIC DOMAIN NOTICE 00005 * National Center for Biotechnology Information 00006 * 00007 * This software/database is a "United States Government Work" under the 00008 * terms of the United States Copyright Act. It was written as part of 00009 * the author's official duties as a United States Government employee and 00010 * thus cannot be copyrighted. This software/database is freely available 00011 * to the public for use. The National Library of Medicine and the U.S. 00012 * Government have not placed any restriction on its use or reproduction. 00013 * 00014 * Although all reasonable efforts have been taken to ensure the accuracy 00015 * and reliability of the software and data, the NLM and the U.S. 00016 * Government do not and cannot warrant the performance or results that 00017 * may be obtained by using this software or data. The NLM and the U.S. 00018 * Government disclaim all warranties, express or implied, including 00019 * warranties of performance, merchantability or fitness for any particular 00020 * purpose. 00021 * 00022 * Please cite the author in any work or product based on this material. 00023 * 00024 * =========================================================================== 00025 * 00026 * Author: Christiam Camacho 00027 * 00028 */ 00029 00030 /** @file blastinput_demo.cpp 00031 * Demonstration application of the sequence input functionality of the 00032 * blastinput library 00033 */ 00034 00035 #ifndef SKIP_DOXYGEN_PROCESSING 00036 static char const rcsid[] = 00037 "$Id: blastinput_demo.cpp 38565 2008-07-17 17:42:49Z camacho $"; 00038 #endif /* SKIP_DOXYGEN_PROCESSING */ 00039 00040 #include <ncbi_pch.hpp> 00041 #include <objmgr/util/sequence.hpp> 00042 #include <algo/blast/blastinput/cmdline_flags.hpp> 00043 #include <algo/blast/blastinput/blast_input.hpp> 00044 #include <algo/blast/blastinput/blast_fasta_input.hpp> 00045 #include <algo/blast/blastinput/blast_input_aux.hpp> 00046 00047 #ifndef SKIP_DOXYGEN_PROCESSING 00048 USING_NCBI_SCOPE; 00049 USING_SCOPE(blast); 00050 USING_SCOPE(objects); 00051 #endif 00052 00053 ///////////////////////////////////////////////////////////////////////////// 00054 // CBlastInputDemoApplication:: 00055 00056 00057 class CBlastInputDemoApplication : public CNcbiApplication 00058 { 00059 private: 00060 virtual void Init(void); 00061 virtual int Run(void); 00062 virtual void Exit(void); 00063 }; 00064 00065 00066 ///////////////////////////////////////////////////////////////////////////// 00067 // Init test for all different types of arguments 00068 00069 00070 void CBlastInputDemoApplication::Init(void) 00071 { 00072 HideStdArgs(fHideLogfile | fHideConffile | fHideVersion); 00073 00074 // Create command-line argument descriptions class 00075 auto_ptr<CArgDescriptions> arg_desc(new CArgDescriptions); 00076 00077 // Specify USAGE context 00078 arg_desc->SetUsageContext(GetArguments().GetProgramBasename(), 00079 "blastinput library demo application to read sequence input"); 00080 00081 arg_desc->AddDefaultKey(kArgQuery, "input_file", "Input file name", 00082 CArgDescriptions::eInputFile, kDfltArgQuery); 00083 00084 arg_desc->AddDefaultKey(kArgOutput, "output_file", "Output file name", 00085 CArgDescriptions::eOutputFile, "-"); 00086 00087 arg_desc->AddKey("mol_type", "molecule_type", 00088 "Molecule type of the data being read", 00089 CArgDescriptions::eString); 00090 arg_desc->SetConstraint("mol_type", 00091 &(*new CArgAllow_Strings, "prot", "nucl")); 00092 00093 arg_desc->AddDefaultKey("collect_stats", "boolean_value", 00094 "Collect statistics about data being read?", 00095 CArgDescriptions::eBoolean, "true"); 00096 00097 // Setup arg.descriptions for this application 00098 SetupArgDescriptions(arg_desc.release()); 00099 } 00100 00101 class CSequenceInputStats : public CObject { 00102 public: 00103 CSequenceInputStats() : m_NumQueries(0), m_NumLetters(0), m_NumBatches(0) {} 00104 00105 void AddQueryBatch(const CBlastQueryVector& query_batch) { 00106 m_NumQueries += query_batch.size(); 00107 00108 ITERATE(CBlastQueryVector, query, query_batch) { 00109 m_NumLetters += sequence::GetLength(*(*query)->GetQuerySeqLoc(), 00110 (*query)->GetScope()); 00111 } 00112 m_NumBatches++; 00113 } 00114 00115 unsigned int GetNumQueries() const { return m_NumQueries; } 00116 unsigned int GetNumBatches() const { return m_NumBatches; } 00117 Uint8 GetNumLetters() const { return m_NumLetters; } 00118 00119 void PrintReport(CNcbiOstream& out, bool is_prot, CStopWatch& sw) const { 00120 out << "Elapsed time: " << sw.AsString() << " seconds" << endl; 00121 out << "Number of queries: " << GetNumQueries() << endl; 00122 out << "Number of " << (is_prot ? "residues" : "bases") << ": " 00123 << GetNumLetters() << endl; 00124 out << "Number of batches: " << GetNumBatches() << endl; 00125 } 00126 00127 private: 00128 unsigned int m_NumQueries; 00129 Uint8 m_NumLetters; 00130 unsigned int m_NumBatches; 00131 }; 00132 00133 00134 ///////////////////////////////////////////////////////////////////////////// 00135 // Run demo 00136 00137 00138 int CBlastInputDemoApplication::Run(void) 00139 { 00140 const CArgs& args = GetArgs(); 00141 int retval = 0; 00142 00143 try { 00144 00145 CNcbiIstream& in = args[kArgQuery].AsInputFile(); 00146 CNcbiOstream& out = args[kArgOutput].AsOutputFile(); 00147 bool collect_stats = args["collect_stats"].AsBoolean(); 00148 bool is_prot = static_cast<bool>(args["mol_type"].AsString() == "prot"); 00149 const EProgram kProgram = is_prot ? eBlastp : eBlastn; 00150 00151 const SDataLoaderConfig dlconfig(is_prot); 00152 CBlastInputSourceConfig iconfig(dlconfig); // use defaults 00153 CBlastFastaInputSource fasta(in, iconfig); 00154 CBlastInput input(&fasta, GetQueryBatchSize(kProgram)); 00155 CRef<CScope> scope = CBlastScopeSource(dlconfig).NewScope(); 00156 CRef<CSequenceInputStats> stats; 00157 CStopWatch sw; 00158 00159 if (collect_stats) { 00160 stats.Reset(new CSequenceInputStats); 00161 sw.Start(); 00162 } 00163 00164 // This is the idiomatic use of the CBlastInput class 00165 for (; !input.End(); scope->ResetHistory()) { 00166 CRef<CBlastQueryVector> query_batch(input.GetNextSeqBatch(*scope)); 00167 00168 if (collect_stats) { 00169 stats->AddQueryBatch(*query_batch); 00170 } 00171 } 00172 00173 if (collect_stats) { 00174 sw.Stop(); 00175 stats->PrintReport(out, is_prot, sw); 00176 } 00177 00178 } catch (const CException& exptn) { 00179 cerr << "Error: " << exptn.GetMsg() << endl; 00180 retval = exptn.GetErrCode(); 00181 } catch (const exception& e) { 00182 cerr << "Error: " << e.what() << endl; 00183 retval = -1; 00184 } catch (...) { 00185 cerr << "Unknown exception" << endl; 00186 retval = -1; 00187 } 00188 00189 return retval; 00190 } 00191 00192 00193 ///////////////////////////////////////////////////////////////////////////// 00194 // Cleanup 00195 00196 00197 void CBlastInputDemoApplication::Exit(void) 00198 { 00199 SetDiagStream(0); 00200 } 00201 00202 00203 ///////////////////////////////////////////////////////////////////////////// 00204 // MAIN 00205 00206 00207 #ifndef SKIP_DOXYGEN_PROCESSING 00208 int main(int argc, const char* argv[]) 00209 { 00210 // Execute main application function 00211 return CBlastInputDemoApplication().AppMain(argc, argv, 0, eDS_Default, 0); 00212 } 00213 #endif /* SKIP_DOXYGEN_PROCESSING */
1.7.5.1
Modified on Wed May 23 13:15:18 2012 by modify_doxy.py rev. 337098