NCBI C++ ToolKit
blast_demo.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

00001 /*  $Id: blast_demo.cpp 64223 2014-08-26 13:15:28Z madden $
00002  * ===========================================================================
00003  *
00004  *                            PUBLIC DOMAIN NOTICE
00005  *               National Center for Biotechnology Information
00006  *
00007  *  This software/database is a "United States Government Work" under the
00008  *  terms of the United States Copyright Act.  It was written as part of
00009  *  the author's official duties as a United States Government employee and
00010  *  thus cannot be copyrighted.  This software/database is freely available
00011  *  to the public for use. The National Library of Medicine and the U.S.
00012  *  Government have not placed any restriction on its use or reproduction.
00013  *
00014  *  Although all reasonable efforts have been taken to ensure the accuracy
00015  *  and reliability of the software and data, the NLM and the U.S.
00016  *  Government do not and cannot warrant the performance or results that
00017  *  may be obtained by using this software or data. The NLM and the U.S.
00018  *  Government disclaim all warranties, express or implied, including
00019  *  warranties of performance, merchantability or fitness for any particular
00020  *  purpose.
00021  *
00022  *  Please cite the author in any work or product based on this material.
00023  *
00024  * ===========================================================================
00025  *
00026  * Authors:  Tom Madden
00027  *
00028  * File Description:
00029  *   Sample application for the running a blast search.
00030  *
00031  */
00032 
00033 #include <ncbi_pch.hpp>
00034 #include <corelib/ncbiapp.hpp>
00035 #include <corelib/ncbienv.hpp>
00036 #include <corelib/ncbiargs.hpp>
00037 
00038 #include <objmgr/object_manager.hpp>
00039 
00040 #include <objects/seqalign/Seq_align_set.hpp>
00041 
00042 #include <algo/blast/api/sseqloc.hpp>
00043 #include <algo/blast/api/local_blast.hpp>
00044 #include <algo/blast/api/uniform_search.hpp>
00045 #include <algo/blast/api/blast_types.hpp>
00046 #include <algo/blast/api/blast_aux.hpp>
00047 #include <algo/blast/api/objmgr_query_data.hpp>
00048 #include <algo/blast/api/blast_options_handle.hpp>
00049 #include <algo/blast/api/blast_nucl_options.hpp>
00050 #include <algo/blast/api/blast_prot_options.hpp>
00051 
00052 #include <algo/blast/blastinput/blast_input.hpp>
00053 #include <algo/blast/blastinput/blast_fasta_input.hpp>
00054 
00055 USING_NCBI_SCOPE;
00056 USING_SCOPE(blast);
00057 
00058 
00059 /////////////////////////////////////////////////////////////////////////////
00060 //  CBlastDemoApplication::
00061 
00062 
00063 class CBlastDemoApplication : public CNcbiApplication
00064 {
00065 private:
00066     virtual void Init(void);
00067     virtual int  Run(void);
00068     virtual void Exit(void);
00069 
00070     void ProcessCommandLineArgs(CRef<CBlastOptionsHandle> opts_handle);
00071 
00072 };
00073 
00074 
00075 /////////////////////////////////////////////////////////////////////////////
00076 //  Init test for all different types of arguments
00077 
00078 
00079 void CBlastDemoApplication::Init(void)
00080 {
00081     // Create command-line argument descriptions class
00082     auto_ptr<CArgDescriptions> arg_desc(new CArgDescriptions);
00083 
00084     // Specify USAGE context
00085     arg_desc->SetUsageContext(GetArguments().GetProgramBasename(), "BLAST demo program");
00086 
00087     arg_desc->AddKey
00088         ("program", "ProgramName",
00089          "One of blastn, megablast, dc-megablast, blastp, blastx, tblastn, tblastx, rpsblast",
00090          CArgDescriptions::eString);
00091     arg_desc->SetConstraint
00092         ("program", &(*new CArgAllow_Strings,
00093                 "blastn", "megablast", "dc-megablast", "blastp", "blastx", "tblastn", "tblastx", "rpsblast"));
00094 
00095     arg_desc->AddDefaultKey
00096         ("db", "DataBase",
00097          "This is the name of the database",
00098          CArgDescriptions::eString, "nr");
00099 
00100     arg_desc->AddDefaultKey("in", "Queryfile",
00101                         "A file with the query", CArgDescriptions::eInputFile, "stdin");
00102 
00103     arg_desc->AddDefaultKey("out", "Outputfile",
00104                         "The output file", CArgDescriptions::eOutputFile, "stdout");
00105 
00106     arg_desc->AddDefaultKey("evalue", "evalue",
00107                         "E-value threshold for saving hits", CArgDescriptions::eDouble, "0");
00108 
00109     arg_desc->AddDefaultKey("penalty", "penalty", "Penalty score for a mismatch",
00110                             CArgDescriptions::eInteger, "0");
00111 
00112     arg_desc->AddDefaultKey("reward", "reward", "Reward score for a match",
00113                             CArgDescriptions::eInteger, "0");
00114 
00115     arg_desc->AddDefaultKey("matrix", "matrix", "Scoring matrix name",
00116                             CArgDescriptions::eString, "BLOSUM62");
00117 
00118     // Setup arg.descriptions for this application
00119     SetupArgDescriptions(arg_desc.release());
00120 }
00121 
00122 /// Modify BLAST options from defaults based upon command-line args.
00123 ///
00124 /// @param opts_handle already created CBlastOptionsHandle to modify [in]
00125 void CBlastDemoApplication::ProcessCommandLineArgs(CRef<CBlastOptionsHandle> opts_handle)
00126 
00127 {
00128     CArgs args = GetArgs();
00129 
00130         // Expect value is a supported option for all flavors of BLAST.
00131         if(args["evalue"].AsDouble())
00132           opts_handle->SetEvalueThreshold(args["evalue"].AsDouble());
00133         
00134         // The first branch is used if the program is blastn or a flavor of megablast
00135         // as reward and penalty is a valid option.
00136         //
00137         // The second branch is used for all other programs except rpsblast as matrix
00138         // is a valid option for blastp and other programs that perform protein-protein
00139         // comparisons.
00140         //
00141         if (CBlastNucleotideOptionsHandle* nucl_handle =
00142               dynamic_cast<CBlastNucleotideOptionsHandle*>(&*opts_handle)) {
00143 
00144               if (args["reward"].AsInteger())
00145                 nucl_handle->SetMatchReward(args["reward"].AsInteger());
00146             
00147               if (args["penalty"].AsInteger())
00148                 nucl_handle->SetMismatchPenalty(args["penalty"].AsInteger());
00149         }
00150         else if (CBlastProteinOptionsHandle* prot_handle =
00151                dynamic_cast<CBlastProteinOptionsHandle*>(&*opts_handle)) {
00152               if (args["matrix"]) 
00153                 prot_handle->SetMatrixName(args["matrix"].AsString().c_str());
00154         }
00155 
00156         return;
00157 }
00158 
00159 
00160 /////////////////////////////////////////////////////////////////////////////
00161 //  Run test (printout arguments obtained from command-line)
00162 
00163 
00164 int CBlastDemoApplication::Run(void)
00165 {
00166     // Get arguments
00167     const CArgs& args = GetArgs();
00168 
00169     EProgram program = ProgramNameToEnum(args["program"].AsString());
00170 
00171     bool db_is_aa = (program == eBlastp || program == eBlastx ||
00172                      program == eRPSBlast || program == eRPSTblastn);
00173 
00174     CRef<CBlastOptionsHandle> opts(CBlastOptionsFactory::Create(program));
00175 
00176     ProcessCommandLineArgs(opts);
00177 
00178     opts->Validate();  // Can throw CBlastException::eInvalidOptions for invalid option.
00179 
00180 
00181     // This will dump the options to stderr.
00182     // opts->GetOptions().DebugDumpText(cerr, "opts", 1);
00183 
00184     CRef<CObjectManager> objmgr = CObjectManager::GetInstance();
00185     if (!objmgr) {
00186          throw std::runtime_error("Could not initialize object manager");
00187     }
00188 
00189     const bool is_protein =
00190         !!Blast_QueryIsProtein(opts->GetOptions().GetProgramType());
00191     SDataLoaderConfig dlconfig(is_protein);
00192     CBlastInputSourceConfig iconfig(dlconfig);
00193     CBlastFastaInputSource fasta_input(args["in"].AsInputFile(), iconfig);
00194     CScope scope(*objmgr);
00195 
00196     CBlastInput blast_input(&fasta_input);
00197 
00198     TSeqLocVector query_loc = blast_input.GetAllSeqLocs(scope);
00199 
00200     CRef<IQueryFactory> query_factory(new CObjMgr_QueryFactory(query_loc));
00201 
00202     const CSearchDatabase target_db(args["db"].AsString(),
00203         db_is_aa ? CSearchDatabase::eBlastDbIsProtein : CSearchDatabase::eBlastDbIsNucleotide);
00204 
00205     CLocalBlast blaster(query_factory, opts, target_db);
00206 
00207     CSearchResultSet results = *blaster.Run();
00208 
00209     // Get warning messages.
00210     for (unsigned int i = 0; i < results.GetNumResults(); i++) 
00211     {
00212         TQueryMessages messages = results[i].GetErrors(eBlastSevWarning);
00213         if (messages.size() > 0)
00214         {
00215             CConstRef<CSeq_id> seq_id = results[i].GetSeqId();
00216             if (seq_id.NotEmpty())
00217                 cerr << "ID: " << seq_id->AsFastaString() << endl;
00218             else
00219                 cerr << "ID: " << "Unknown" << endl;
00220 
00221             ITERATE(vector<CRef<CSearchMessage> >, it, messages)
00222                 cerr << (*it)->GetMessage() << endl;
00223         }
00224     }
00225     
00226     CNcbiOstream& out = args["out"].AsOutputFile();
00227 
00228     for (unsigned int i = 0; i < results.GetNumResults(); i++) {
00229          CConstRef<CSeq_align_set> sas = results[i].GetSeqAlign();
00230          out << MSerial_AsnText << *sas;
00231     }
00232 
00233     return 0;
00234 }
00235 
00236 
00237 /////////////////////////////////////////////////////////////////////////////
00238 //  Cleanup
00239 
00240 
00241 void CBlastDemoApplication::Exit(void)
00242 {
00243     // Do your after-Run() cleanup here
00244 }
00245 
00246 
00247 /////////////////////////////////////////////////////////////////////////////
00248 //  MAIN
00249 
00250 
00251 #ifndef SKIP_DOXYGEN_PROCESSING
00252 int main(int argc, const char* argv[])
00253 {
00254     // Execute main application function
00255     return CBlastDemoApplication().AppMain(argc, argv);
00256 }
00257 #endif /* SKIP_DOXYGEN_PROCESSING */
Modified on Wed Dec 17 12:27:42 2014 by modify_doxy.py rev. 426318