src/algo/blast/blastinput/blast_args.cpp

Go to the documentation of this file.
00001 /* $Id: blast_args.cpp 174882 2009-11-02 16:46:46Z madden $
00002 * ===========================================================================
00003 *
00004 *                            PUBLIC DOMAIN NOTICE
00005 *               National Center for Biotechnology Information
00006 *
00007 *  This software/database is a "United States Government Work" under the
00008 *  terms of the United States Copyright Act.  It was written as part of
00009 *  the author's offical duties as a United States Government employee and
00010 *  thus cannot be copyrighted.  This software/database is freely available
00011 *  to the public for use. The National Library of Medicine and the U.S.
00012 *  Government have not placed any restriction on its use or reproduction.
00013 *
00014 *  Although all reasonable efforts have been taken to ensure the accuracy
00015 *  and reliability of the software and data, the NLM and the U.S.
00016 *  Government do not and cannot warrant the performance or results that
00017 *  may be obtained by using this software or data. The NLM and the U.S.
00018 *  Government disclaim all warranties, express or implied, including
00019 *  warranties of performance, merchantability or fitness for any particular
00020 *  purpose.
00021 *
00022 *  Please cite the author in any work or product based on this material.
00023 *
00024 * ===========================================================================*/
00025 
00026 /*****************************************************************************
00027 
00028 File name: blast_args.cpp
00029 
00030 Author: Jason Papadopoulos
00031 
00032 ******************************************************************************/
00033 
00034 /** @file blast_args.cpp
00035  * convert blast-related command line
00036  * arguments into blast options
00037 */
00038 
00039 #ifndef SKIP_DOXYGEN_PROCESSING
00040 static char const rcsid[] = "$Id: blast_args.cpp 174882 2009-11-02 16:46:46Z madden $";
00041 #endif
00042 
00043 #include <ncbi_pch.hpp>
00044 #include <algo/blast/api/version.hpp>
00045 #include <algo/blast/blastinput/blast_args.hpp>
00046 #include <algo/blast/api/blast_exception.hpp>
00047 #include <algo/blast/api/blast_aux.hpp>
00048 #include <algo/blast/api/objmgr_query_data.hpp> /* for CObjMgrQueryFactory */
00049 #include <algo/blast/core/blast_nalookup.h>
00050 #include <algo/blast/core/hspfilter_besthit.h>
00051 #include <objects/scoremat/PssmWithParameters.hpp>
00052 #include <util/format_guess.hpp>
00053 #include <objtools/blast/seqdb_reader/seqdb.hpp>
00054 #include <algo/blast/blastinput/blast_input.hpp>    // for CInputException
00055 #include <connect/ncbi_connutil.h>
00056 
00057 #include <algo/blast/api/msa_pssm_input.hpp>    // for CPsiBlastInputClustalW
00058 #include <algo/blast/api/pssm_engine.hpp>       // for CPssmEngine
00059 
00060 BEGIN_NCBI_SCOPE
00061 BEGIN_SCOPE(blast)
00062 USING_SCOPE(objects);
00063 USING_SCOPE(align_format);
00064 
00065 void
00066 IBlastCmdLineArgs::ExtractAlgorithmOptions(const CArgs& /* cmd_line_args */,
00067                                            CBlastOptions& /* options */)
00068 {}
00069 
00070 CProgramDescriptionArgs::CProgramDescriptionArgs(const string& program_name, 
00071                                                  const string& program_desc)
00072     : m_ProgName(program_name), m_ProgDesc(program_desc)
00073 {}
00074 
00075 void
00076 CProgramDescriptionArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
00077 {
00078     // program description
00079     arg_desc.SetUsageContext(m_ProgName, m_ProgDesc + " " + 
00080                              CBlastVersion().Print());
00081 }
00082 
00083 CTaskCmdLineArgs::CTaskCmdLineArgs(const set<string>& supported_tasks,
00084                                    const string& default_task)
00085 : m_SupportedTasks(supported_tasks), m_DefaultTask(default_task)
00086 {
00087     _ASSERT( !m_SupportedTasks.empty() );
00088     if ( !m_DefaultTask.empty() ) {
00089         _ASSERT(m_SupportedTasks.find(m_DefaultTask) != m_SupportedTasks.end());
00090     }
00091 }
00092 
00093 void
00094 CTaskCmdLineArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
00095 {
00096     arg_desc.SetCurrentGroup("General search options");
00097     if ( !m_DefaultTask.empty() ) {
00098         arg_desc.AddDefaultKey(kTask, "task_name", "Task to execute", 
00099                                CArgDescriptions::eString, m_DefaultTask);
00100     } else {
00101         arg_desc.AddKey(kTask, "task_name", "Task to execute",
00102                         CArgDescriptions::eString);
00103     }
00104     arg_desc.SetConstraint(kTask, new CArgAllowStringSet(m_SupportedTasks));
00105     arg_desc.SetCurrentGroup("");
00106    
00107 }
00108 
00109 void
00110 CTaskCmdLineArgs::ExtractAlgorithmOptions(const CArgs& /* cmd_line_args */,
00111                                           CBlastOptions& /* options */)
00112 {
00113     // N.B.: handling of tasks occurs at the application level to ensure that
00114     // only relevant tasks are added (@sa CBlastnAppArgs)
00115 }
00116 
00117 void
00118 CGenericSearchArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
00119 {
00120     arg_desc.SetCurrentGroup("General search options");
00121 
00122     // evalue cutoff
00123     arg_desc.AddDefaultKey(kArgEvalue, "evalue", 
00124                      "Expectation value (E) threshold for saving hits ",
00125                      CArgDescriptions::eDouble,
00126                      NStr::DoubleToString(BLAST_EXPECT_VALUE));
00127 
00128     // word size
00129     // Default values: blastn=11, megablast=28, others=3
00130     const string description = m_QueryIsProtein 
00131         ? "Word size for wordfinder algorithm"
00132         : "Word size for wordfinder algorithm (length of best perfect match)";
00133     arg_desc.AddOptionalKey(kArgWordSize, "int_value", description,
00134                             CArgDescriptions::eInteger);
00135     arg_desc.SetConstraint(kArgWordSize, m_QueryIsProtein 
00136                            ? new CArgAllowValuesGreaterThanOrEqual(2)
00137                            : new CArgAllowValuesGreaterThanOrEqual(4));
00138 
00139     if ( !m_IsRpsBlast && !m_IsTblastx) {
00140         // gap open penalty
00141         arg_desc.AddOptionalKey(kArgGapOpen, "open_penalty", 
00142                                 "Cost to open a gap", 
00143                                 CArgDescriptions::eInteger);
00144 
00145         // gap extend penalty
00146         arg_desc.AddOptionalKey(kArgGapExtend, "extend_penalty",
00147                                "Cost to extend a gap", 
00148                                CArgDescriptions::eInteger);
00149     }
00150 
00151 
00152     if (m_ShowPercentIdentity) {
00153         arg_desc.SetCurrentGroup("Restrict search or results");
00154         arg_desc.AddOptionalKey(kArgPercentIdentity, "float_value",
00155                                 "Percent identity",
00156                                 CArgDescriptions::eDouble);
00157         arg_desc.SetConstraint(kArgPercentIdentity,
00158                                new CArgAllow_Doubles(0.0, 100.0));
00159     }
00160 
00161     arg_desc.SetCurrentGroup("Extension options");
00162     // ungapped X-drop
00163     // Default values: blastn=20, megablast=10, others=7
00164     arg_desc.AddOptionalKey(kArgUngappedXDropoff, "float_value", 
00165                             "X-dropoff value (in bits) for ungapped extensions",
00166                             CArgDescriptions::eDouble);
00167 
00168     // Tblastx is ungapped only.
00169     if (!m_IsTblastx) {
00170          // initial gapped X-drop
00171          // Default values: blastn=30, megablast=20, tblastx=0, others=15
00172          arg_desc.AddOptionalKey(kArgGappedXDropoff, "float_value", 
00173                  "X-dropoff value (in bits) for preliminary gapped extensions",
00174                  CArgDescriptions::eDouble);
00175 
00176          // final gapped X-drop
00177          // Default values: blastn/megablast=50, tblastx=0, others=25
00178          arg_desc.AddOptionalKey(kArgFinalGappedXDropoff, "float_value", 
00179                          "X-dropoff value (in bits) for final gapped alignment",
00180                          CArgDescriptions::eDouble);
00181     }
00182 
00183     arg_desc.SetCurrentGroup("Statistical options");
00184     // effective search space
00185     // Default value is the real size
00186     arg_desc.AddOptionalKey(kArgEffSearchSpace, "int_value", 
00187                             "Effective length of the search space",
00188                             CArgDescriptions::eInt8);
00189     arg_desc.SetConstraint(kArgEffSearchSpace, 
00190                            new CArgAllowValuesGreaterThanOrEqual(0));
00191 
00192 #if 0
00193     arg_desc.AddDefaultKey(kArgMaxHSPsPerSubject, "int_value",
00194                            "Maximum number of HPSs per subject to save "
00195                            "( " + NStr::IntToString(kDfltArgMaxHSPsPerSubject)
00196                            + " means infinite)",
00197                            CArgDescriptions::eInteger,
00198                            NStr::IntToString(kDfltArgMaxHSPsPerSubject));
00199     arg_desc.SetConstraint(kArgMaxHSPsPerSubject,
00200                            new CArgAllowValuesGreaterThanOrEqual(0));
00201 #endif
00202 
00203     arg_desc.SetCurrentGroup("");
00204 }
00205 
00206 void
00207 CGenericSearchArgs::ExtractAlgorithmOptions(const CArgs& args, 
00208                                             CBlastOptions& opt)
00209 {
00210     if (args[kArgEvalue]) {
00211         opt.SetEvalueThreshold(args[kArgEvalue].AsDouble());
00212     }
00213 
00214     int gap_open=0, gap_extend=0;
00215     if (args.Exist(kArgMatrixName) && args[kArgMatrixName])
00216          BLAST_GetProteinGapExistenceExtendParams
00217              (args[kArgMatrixName].AsString().c_str(), &gap_open, &gap_extend);
00218 
00219     if (args.Exist(kArgGapOpen) && args[kArgGapOpen]) {
00220         opt.SetGapOpeningCost(args[kArgGapOpen].AsInteger());
00221     }
00222     else if (args.Exist(kArgMatrixName) && args[kArgMatrixName]) {
00223         opt.SetGapOpeningCost(gap_open);
00224     }
00225 
00226     if (args.Exist(kArgGapExtend) && args[kArgGapExtend]) {
00227         opt.SetGapExtensionCost(args[kArgGapExtend].AsInteger());
00228     }
00229     else if (args.Exist(kArgMatrixName) && args[kArgMatrixName]) {
00230         opt.SetGapExtensionCost(gap_extend);
00231     }
00232 
00233     if (args[kArgUngappedXDropoff]) {
00234         opt.SetXDropoff(args[kArgUngappedXDropoff].AsDouble());
00235     }
00236 
00237     if (args.Exist(kArgGappedXDropoff) && args[kArgGappedXDropoff]) {
00238         opt.SetGapXDropoff(args[kArgGappedXDropoff].AsDouble());
00239     }
00240 
00241     if (args.Exist(kArgFinalGappedXDropoff) && args[kArgFinalGappedXDropoff]) {
00242         opt.SetGapXDropoffFinal(args[kArgFinalGappedXDropoff].AsDouble());
00243     }
00244 
00245     if (args[kArgWordSize]) {
00246         if (m_QueryIsProtein && args[kArgWordSize].AsInteger() > 5)
00247            opt.SetLookupTableType(eCompressedAaLookupTable);
00248         opt.SetWordSize(args[kArgWordSize].AsInteger());
00249     }
00250 
00251     if (args[kArgEffSearchSpace]) {
00252         opt.SetEffectiveSearchSpace(args[kArgEffSearchSpace].AsInt8());
00253     }
00254 
00255     if (args.Exist(kArgPercentIdentity) && args[kArgPercentIdentity]) {
00256         opt.SetPercentIdentity(args[kArgPercentIdentity].AsDouble());
00257     }
00258 
00259 #if 0
00260     if (args[kArgMaxHSPsPerSubject]) {
00261         const int value = args[kArgMaxHSPsPerSubject].AsInteger();
00262         if (value != kDfltArgMaxHSPsPerSubject) {
00263             opt.SetMaxNumHspPerSequence(value);
00264         }
00265     }
00266 #endif
00267 }
00268 
00269 void
00270 CFilteringArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
00271 {
00272     arg_desc.SetCurrentGroup("Query filtering options");
00273 
00274     if (m_QueryIsProtein) {
00275         arg_desc.AddDefaultKey(kArgSegFiltering, "SEG_options",
00276                         "Filter query sequence with SEG "
00277                         "(Format: '" + kDfltArgApplyFiltering + "', " +
00278                         "'window locut hicut', or '" + kDfltArgNoFiltering +
00279                         "' to disable)",
00280                         CArgDescriptions::eString, m_FilterByDefault
00281                         ? kDfltArgSegFiltering : kDfltArgNoFiltering);
00282         arg_desc.AddDefaultKey(kArgLookupTableMaskingOnly, "soft_masking",
00283                         "Apply filtering locations as soft masks",
00284                         CArgDescriptions::eBoolean, "false");
00285     } else {
00286         arg_desc.AddDefaultKey(kArgDustFiltering, "DUST_options",
00287                         "Filter query sequence with DUST "
00288                         "(Format: '" + kDfltArgApplyFiltering + "', " + 
00289                         "'level window linker', or '" + kDfltArgNoFiltering +
00290                         "' to disable)",
00291                         CArgDescriptions::eString, m_FilterByDefault
00292                         ? kDfltArgDustFiltering : kDfltArgNoFiltering);
00293         arg_desc.AddOptionalKey(kArgFilteringDb, "filtering_database",
00294                 "BLAST database containing filtering elements (i.e.: repeats)",
00295                 CArgDescriptions::eString);
00296         
00297         arg_desc.AddOptionalKey(kArgWindowMaskerTaxId, "window_masker_taxid",
00298                 "Enable WindowMasker filtering using a Taxonomic ID",
00299                 CArgDescriptions::eInteger);
00300 
00301         arg_desc.AddOptionalKey(kArgWindowMaskerDatabase, "window_masker_db",
00302                 "Enable WindowMasker filtering using this repeats database.",
00303                 CArgDescriptions::eString);
00304 
00305         arg_desc.AddDefaultKey(kArgLookupTableMaskingOnly, "soft_masking",
00306                         "Apply filtering locations as soft masks",
00307                         CArgDescriptions::eBoolean, "true");
00308     }
00309 
00310     arg_desc.SetCurrentGroup("");
00311 }
00312 
00313 void 
00314 CFilteringArgs::x_TokenizeFilteringArgs(const string& filtering_args, 
00315                                         vector<string>& output) const
00316 {
00317     output.clear();
00318     NStr::Tokenize(filtering_args, " ", output);
00319     if (output.size() != 3) {
00320         NCBI_THROW(CInputException, eInvalidInput,
00321                    "Invalid number of arguments to filtering option");
00322     }
00323 }
00324 
00325 void
00326 CFilteringArgs::ExtractAlgorithmOptions(const CArgs& args, CBlastOptions& opt)
00327 {
00328     if (args[kArgLookupTableMaskingOnly]) {
00329         opt.SetMaskAtHash(args[kArgLookupTableMaskingOnly].AsBoolean());
00330     }
00331 
00332     vector<string> tokens;
00333 
00334     try {
00335         if (m_QueryIsProtein && args[kArgSegFiltering]) {
00336             const string& seg_opts = args[kArgSegFiltering].AsString();
00337             if (seg_opts == kDfltArgNoFiltering) {
00338                 opt.SetSegFiltering(false);
00339             } else if (seg_opts == kDfltArgApplyFiltering) {
00340                 opt.SetSegFiltering(true);
00341             } else {
00342                 x_TokenizeFilteringArgs(seg_opts, tokens);
00343                 opt.SetSegFilteringWindow(NStr::StringToInt(tokens[0]));
00344                 opt.SetSegFilteringLocut(NStr::StringToDouble(tokens[1]));
00345                 opt.SetSegFilteringHicut(NStr::StringToDouble(tokens[2]));
00346             }
00347         }
00348 
00349         if ( !m_QueryIsProtein && args[kArgDustFiltering]) {
00350             const string& dust_opts = args[kArgDustFiltering].AsString();
00351             if (dust_opts == kDfltArgNoFiltering) {
00352                 opt.SetDustFiltering(false);
00353             } else if (dust_opts == kDfltArgApplyFiltering) {
00354                 opt.SetDustFiltering(true);
00355             } else {
00356                 x_TokenizeFilteringArgs(dust_opts, tokens);
00357                 opt.SetDustFilteringLevel(NStr::StringToInt(tokens[0]));
00358                 opt.SetDustFilteringWindow(NStr::StringToInt(tokens[1]));
00359                 opt.SetDustFilteringLinker(NStr::StringToInt(tokens[2]));
00360             }
00361         }
00362     } catch (const CStringException& e) {
00363         if (e.GetErrCode() == CStringException::eConvert) {
00364             NCBI_THROW(CInputException, eInvalidInput,
00365                        "Invalid input for filtering parameters");
00366         }
00367     }
00368     
00369     int filter_dbs = 0;
00370     
00371     if (args.Exist(kArgFilteringDb) && args[kArgFilteringDb]) {
00372         opt.SetRepeatFilteringDB(args[kArgFilteringDb].AsString().c_str());
00373         filter_dbs++;
00374     }
00375     
00376     if (args.Exist(kArgWindowMaskerTaxId) &&
00377         args[kArgWindowMaskerTaxId]) {
00378         
00379         opt.SetWindowMaskerTaxId
00380             (args[kArgWindowMaskerTaxId].AsInteger());
00381         
00382         filter_dbs++;
00383     }
00384     
00385     if (args.Exist(kArgWindowMaskerDatabase) &&
00386         args[kArgWindowMaskerDatabase]) {
00387         
00388         opt.SetWindowMaskerDatabase
00389             (args[kArgWindowMaskerDatabase].AsString().c_str());
00390         
00391         filter_dbs++;
00392     }
00393     
00394     if (filter_dbs > 1) {
00395         string msg =
00396             string("Please specify at most one of ") + kArgFilteringDb + ", " +
00397             kArgWindowMaskerTaxId + ", or " + kArgWindowMaskerDatabase + ".";
00398         
00399         NCBI_THROW(CInputException, eInvalidInput, msg);
00400     }
00401 }
00402 
00403 void
00404 CWindowSizeArg::SetArgumentDescriptions(CArgDescriptions& arg_desc)
00405 {
00406     arg_desc.SetCurrentGroup("Extension options");
00407     // 2-hit wordfinder window size
00408     arg_desc.AddOptionalKey(kArgWindowSize, "int_value", 
00409                             "Multiple hits window size, use 0 to specify "
00410                             "1-hit algorithm",
00411                             CArgDescriptions::eInteger);
00412     arg_desc.SetConstraint(kArgWindowSize, 
00413                            new CArgAllowValuesGreaterThanOrEqual(0));
00414     arg_desc.SetCurrentGroup("");
00415 }
00416 
00417 void
00418 CWindowSizeArg::ExtractAlgorithmOptions(const CArgs& args, CBlastOptions& opt)
00419 {
00420     if (args[kArgWindowSize]) {
00421         opt.SetWindowSize(args[kArgWindowSize].AsInteger());
00422     } else {
00423         int window = -1;
00424         BLAST_GetSuggestedWindowSize(opt.GetProgramType(), 
00425                                      opt.GetMatrixName(), 
00426                                      &window);
00427         if (window != -1) {
00428             opt.SetWindowSize(window);
00429         }
00430     }
00431 }
00432 
00433 void
00434 COffDiagonalRangeArg::SetArgumentDescriptions(CArgDescriptions& arg_desc)
00435 {
00436     arg_desc.SetCurrentGroup("Extension options");
00437     // 2-hit wordfinder off diagonal range
00438     arg_desc.AddDefaultKey(kArgOffDiagonalRange, "int_value", 
00439                             "Number of off-diagonals to search for the 2nd hit, "
00440                             "use 0 to turn off",
00441                             CArgDescriptions::eInteger,
00442                             NStr::IntToString(kDfltOffDiagonalRange));
00443     arg_desc.SetConstraint(kArgOffDiagonalRange, 
00444                            new CArgAllowValuesGreaterThanOrEqual(0));
00445     arg_desc.SetCurrentGroup("");
00446 }
00447 
00448 void
00449 COffDiagonalRangeArg::ExtractAlgorithmOptions(const CArgs& args, CBlastOptions& opt)
00450 {
00451     if (args[kArgOffDiagonalRange]) {
00452         opt.SetOffDiagonalRange(args[kArgOffDiagonalRange].AsInteger());
00453     } else {
00454         opt.SetOffDiagonalRange(0);
00455     }
00456 }
00457 
00458 void
00459 CWordThresholdArg::SetArgumentDescriptions(CArgDescriptions& arg_desc)
00460 {
00461     arg_desc.SetCurrentGroup("General search options");
00462     // lookup table word score threshold
00463     arg_desc.AddOptionalKey(kArgWordScoreThreshold, "float_value", 
00464                  "Minimum word score such that the word is added to the "
00465                  "BLAST lookup table",
00466                  CArgDescriptions::eDouble);
00467     arg_desc.SetConstraint(kArgWordScoreThreshold, 
00468                            new CArgAllowValuesGreaterThanOrEqual(0));
00469     arg_desc.SetCurrentGroup("");
00470 }
00471 
00472 void
00473 CWordThresholdArg::ExtractAlgorithmOptions(const CArgs& args, 
00474                                            CBlastOptions& opt)
00475 {
00476     if (args[kArgWordScoreThreshold]) {
00477         opt.SetWordThreshold(args[kArgWordScoreThreshold].AsDouble());
00478     } else {
00479         double threshold = -1;
00480         BLAST_GetSuggestedThreshold(opt.GetProgramType(),
00481                                     opt.GetMatrixName(),
00482                                     &threshold);
00483         if (threshold != -1) {
00484             opt.SetWordThreshold(threshold);
00485         }
00486     }
00487 }
00488 
00489 void
00490 CMatrixNameArg::SetArgumentDescriptions(CArgDescriptions& arg_desc)
00491 {
00492     arg_desc.SetCurrentGroup("General search options");
00493     arg_desc.AddOptionalKey(kArgMatrixName, "matrix_name",
00494                            "Scoring matrix name (normally BLOSUM62)",
00495                            CArgDescriptions::eString); 
00496     arg_desc.SetCurrentGroup("");
00497 }
00498 
00499 void
00500 CMatrixNameArg::ExtractAlgorithmOptions(const CArgs& args, CBlastOptions& opt)
00501 {
00502     if (args[kArgMatrixName]) {
00503         opt.SetMatrixName(args[kArgMatrixName].AsString().c_str());
00504     }
00505 }
00506 
00507 void
00508 CNuclArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
00509 {
00510     // TLM arg_desc.SetCurrentGroup("Nucleotide scoring options");
00511 
00512     arg_desc.SetCurrentGroup("General search options");
00513     // blastn mismatch penalty
00514     arg_desc.AddOptionalKey(kArgMismatch, "penalty", 
00515                            "Penalty for a nucleotide mismatch", 
00516                            CArgDescriptions::eInteger);
00517     arg_desc.SetConstraint(kArgMismatch, 
00518                            new CArgAllowValuesLessThanOrEqual(0));
00519 
00520     // blastn match reward
00521     arg_desc.AddOptionalKey(kArgMatch, "reward", 
00522                            "Reward for a nucleotide match", 
00523                            CArgDescriptions::eInteger); 
00524     arg_desc.SetConstraint(kArgMatch, 
00525                            new CArgAllowValuesGreaterThanOrEqual(0));
00526 
00527 
00528     arg_desc.SetCurrentGroup("Extension options");
00529     arg_desc.AddFlag(kArgNoGreedyExtension,
00530                      "Use non-greedy dynamic programming extension",
00531                      true);
00532 
00533     arg_desc.SetCurrentGroup("");
00534 }
00535 
00536 void
00537 CNuclArgs::ExtractAlgorithmOptions(const CArgs& cmd_line_args,
00538                                    CBlastOptions& options)
00539 {
00540     if (cmd_line_args[kArgMismatch]) {
00541         options.SetMismatchPenalty(cmd_line_args[kArgMismatch].AsInteger());
00542     }
00543     if (cmd_line_args[kArgMatch]) {
00544         options.SetMatchReward(cmd_line_args[kArgMatch].AsInteger());
00545     }
00546 
00547     if (cmd_line_args[kArgNoGreedyExtension]) {
00548         options.SetGapExtnAlgorithm(eDynProgScoreOnly);
00549         options.SetGapTracebackAlgorithm(eDynProgTbck);
00550     }
00551 }
00552 
00553 const string CDiscontiguousMegablastArgs::kTemplType_Coding("coding");
00554 const string CDiscontiguousMegablastArgs::kTemplType_Optimal("optimal");
00555 const string 
00556 CDiscontiguousMegablastArgs::kTemplType_CodingAndOptimal("coding_and_optimal");
00557 
00558 void
00559 CDiscontiguousMegablastArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
00560 {
00561     arg_desc.SetCurrentGroup("Extension options");
00562     // FIXME: this can be applied to any program, but since it was only offered
00563     // in megablast, we're putting it here 
00564     arg_desc.AddOptionalKey(kArgMinRawGappedScore, "int_value",
00565                             "Minimum raw gapped score to keep an alignment "
00566                             "in the preliminary gapped and traceback stages",
00567                             CArgDescriptions::eInteger);
00568 
00569     arg_desc.SetCurrentGroup("Discontiguous MegaBLAST options");
00570 
00571     arg_desc.AddOptionalKey(kArgDMBTemplateType, "type", 
00572                  "Discontiguous MegaBLAST template type",
00573                  CArgDescriptions::eString);
00574     arg_desc.SetConstraint(kArgDMBTemplateType, &(*new CArgAllow_Strings, 
00575                                                   kTemplType_Coding,
00576                                                   kTemplType_Optimal,
00577                                                   kTemplType_CodingAndOptimal));
00578     arg_desc.SetDependency(kArgDMBTemplateType,
00579                            CArgDescriptions::eRequires,
00580                            kArgDMBTemplateLength);
00581 
00582     arg_desc.AddOptionalKey(kArgDMBTemplateLength, "int_value", 
00583                  "Discontiguous MegaBLAST template length",
00584                  CArgDescriptions::eInteger);
00585     set<int> allowed_values;
00586     allowed_values.insert(16);
00587     allowed_values.insert(18);
00588     allowed_values.insert(21);
00589     arg_desc.SetConstraint(kArgDMBTemplateLength, 
00590                            new CArgAllowIntegerSet(allowed_values));
00591     arg_desc.SetDependency(kArgDMBTemplateLength,
00592                            CArgDescriptions::eRequires,
00593                            kArgDMBTemplateType);
00594 
00595     arg_desc.SetCurrentGroup("");
00596 }
00597 
00598 void
00599 CDiscontiguousMegablastArgs::ExtractAlgorithmOptions(const CArgs& args,
00600                                                      CBlastOptions& options)
00601 {
00602     if (args[kArgMinRawGappedScore]) {
00603         options.SetCutoffScore(args[kArgMinRawGappedScore].AsInteger());
00604     }
00605 
00606     if (args[kArgDMBTemplateType]) {
00607         const string& type = args[kArgDMBTemplateType].AsString();
00608         EDiscWordType temp_type = eMBWordCoding;
00609 
00610         if (type == kTemplType_Coding) {
00611             temp_type = eMBWordCoding;
00612         } else if (type == kTemplType_Optimal) {
00613             temp_type = eMBWordOptimal;
00614         } else if (type == kTemplType_CodingAndOptimal) {
00615             temp_type = eMBWordTwoTemplates;
00616         } else {
00617             abort();
00618         }
00619         options.SetMBTemplateType(static_cast<unsigned char>(temp_type));
00620     }
00621 
00622     if (args[kArgDMBTemplateLength]) {
00623         unsigned char tlen = 
00624             static_cast<unsigned char>(args[kArgDMBTemplateLength].AsInteger());
00625         options.SetMBTemplateLength(tlen);
00626     }
00627 
00628     // FIXME: should the window size be adjusted if this is set?
00629 }
00630 
00631 void
00632 CCompositionBasedStatsArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
00633 {
00634     arg_desc.SetCurrentGroup("General search options");
00635     // composition based statistics, keep in sync with ECompoAdjustModes
00636     // documentation in composition_constants.h
00637     arg_desc.AddDefaultKey(kArgCompBasedStats, "compo", 
00638                       "Use composition-based statistics for blastp / tblastn:\n"
00639                       "    D or d: default (equivalent to 2)\n"
00640                       "    0 or F or f: no composition-based statistics\n"
00641                       "    1: Composition-based statistics "
00642                                       "as in NAR 29:2994-3005, 2001\n"
00643                       "    2 or T or t : Composition-based score adjustment as in "
00644                                       "Bioinformatics 21:902-911,\n"
00645                       "    2005, conditioned on sequence properties\n"
00646                       "    3: Composition-based score adjustment as in "
00647                                       "Bioinformatics 21:902-911,\n"
00648                       "    2005, unconditionally\n"
00649                       "For programs other than tblastn, must either be "
00650                       "absent or be D, F or 0",
00651                       CArgDescriptions::eString, "2");
00652 
00653     arg_desc.SetCurrentGroup("Miscellaneous options");
00654     // Use Smith-Waterman algorithm in traceback stage
00655     // FIXME: available only for gapped blastp/tblastn, and with
00656     // composition-based statistics
00657     arg_desc.AddFlag(kArgUseSWTraceback, 
00658                      "Compute locally optimal Smith-Waterman alignments?",
00659                      true);
00660     arg_desc.SetCurrentGroup("");
00661 }
00662 
00663 /** 
00664  * @brief Auxiliary function to set the composition based statistics and smith
00665  * waterman options
00666  * 
00667  * @param opt BLAST options object [in|out]
00668  * @param comp_stat_string command line value for composition based statistics
00669  * [in]
00670  * @param smith_waterman_value command line value for determining the use of
00671  * the smith-waterman algorithm [in]
00672  * @param ungapped pointer to the value which determines whether the search
00673  * should be ungapped or not. It is NULL if ungapped searches are not
00674  * applicable
00675  */
00676 static void
00677 s_SetCompositionBasedStats(CBlastOptions& opt,
00678                            const string& comp_stat_string,
00679                            bool smith_waterman_value,
00680                            bool* ungapped = NULL)
00681 {
00682     const EProgram program = opt.GetProgram();
00683     if (program == eBlastp || program == eTblastn || 
00684         program == ePSIBlast || program == ePSITblastn) {
00685 
00686         ECompoAdjustModes compo_mode = eNoCompositionBasedStats;
00687     
00688         switch (comp_stat_string[0]) {
00689             case '0': case 'F': case 'f':
00690                 compo_mode = eNoCompositionBasedStats;
00691                 break;
00692             case '1':
00693                 compo_mode = eCompositionBasedStats;
00694                 break;
00695             case 'D': case 'd':
00696             case '2': case 'T': case 't':
00697                 compo_mode = eCompositionMatrixAdjust;
00698                 break;
00699             case '3':
00700                 compo_mode = eCompoForceFullMatrixAdjust;
00701                 break;
00702         } 
00703 
00704         if(program == ePSITblastn) {
00705             compo_mode = eNoCompositionBasedStats;
00706         }
00707 
00708         if (ungapped && *ungapped && compo_mode != eNoCompositionBasedStats) {
00709             NCBI_THROW(CInputException, eInvalidInput, 
00710                        "Composition-adjusted searched are not supported with "
00711                        "an ungapped search, please add -comp_based_stats F or "
00712                        "do a gapped search");
00713         }
00714 
00715         opt.SetCompositionBasedStats(compo_mode);
00716         if (program == eBlastp &&
00717             compo_mode != eNoCompositionBasedStats &&
00718             tolower(comp_stat_string[1]) == 'u') {
00719             opt.SetUnifiedP(1);
00720         }
00721         opt.SetSmithWatermanMode(smith_waterman_value);
00722     }
00723 }
00724 
00725 void
00726 CCompositionBasedStatsArgs::ExtractAlgorithmOptions(const CArgs& args,
00727                                                     CBlastOptions& opt)
00728 {
00729     if (args[kArgCompBasedStats]) {
00730         auto_ptr<bool> ungapped(args.Exist(kArgUngapped) 
00731             ? new bool(args[kArgUngapped]) : 0);
00732         s_SetCompositionBasedStats(opt, 
00733                                    args[kArgCompBasedStats].AsString(),
00734                                    args[kArgUseSWTraceback],
00735                                    ungapped.get());
00736     }
00737 
00738 }
00739 
00740 void
00741 CGappedArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
00742 {
00743     // perform gapped search
00744 #if 0
00745     arg_desc.AddOptionalKey(ARG_GAPPED, "gapped", 
00746                  "Perform gapped alignment (default T, but "
00747                  "not available for tblastx)",
00748                  CArgDescriptions::eBoolean,
00749                  CArgDescriptions::fOptionalSeparator);
00750     arg_desc.AddAlias("-gapped", ARG_GAPPED);
00751 #endif
00752     arg_desc.SetCurrentGroup("Extension options");
00753     arg_desc.AddFlag(kArgUngapped, "Perform ungapped alignment only?", true);
00754     arg_desc.SetCurrentGroup("");
00755 }
00756 
00757 void
00758 CGappedArgs::ExtractAlgorithmOptions(const CArgs& args, CBlastOptions& options)
00759 {
00760 #if 0
00761     if (args[ARG_GAPPED] && options.GetProgram() != eTblastx) {
00762         options.SetGappedMode(args[ARG_GAPPED].AsBoolean());
00763     }
00764 #endif
00765     options.SetGappedMode( !args[kArgUngapped] );
00766 }
00767 
00768 void
00769 CLargestIntronSizeArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
00770 {
00771     arg_desc.SetCurrentGroup("General search options");
00772     // largest intron length
00773     arg_desc.AddDefaultKey(kArgMaxIntronLength, "length", 
00774                     "Length of the largest intron allowed in a translated "
00775                     "nucleotide sequence when linking multiple distinct "
00776                     "alignments (a negative value disables linking)",
00777                     CArgDescriptions::eInteger,
00778                     NStr::IntToString(kDfltArgMaxIntronLength));
00779     arg_desc.SetCurrentGroup("");
00780 }
00781 
00782 void
00783 CLargestIntronSizeArgs::ExtractAlgorithmOptions(const CArgs& args,
00784                                                 CBlastOptions& opt)
00785 {
00786     if ( !args[kArgMaxIntronLength] ) {
00787         return;
00788     }
00789 
00790     if (args[kArgMaxIntronLength].AsInteger() < 0) {
00791         opt.SetSumStatisticsMode(false);
00792     } else {
00793         opt.SetSumStatisticsMode();
00794         opt.SetLongestIntronLength(args[kArgMaxIntronLength].AsInteger());
00795     }
00796 }
00797 
00798 void
00799 CFrameShiftArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
00800 {
00801     arg_desc.SetCurrentGroup("General search options");
00802     // applicable in blastx/tblastn, off by default
00803     arg_desc.AddOptionalKey(kArgFrameShiftPenalty, "frameshift",
00804                             "Frame shift penalty (for use with out-of-frame "
00805                             "gapped alignment in blastx or tblastn, default "
00806                             "ignored)",
00807                             CArgDescriptions::eInteger);
00808     arg_desc.SetConstraint(kArgFrameShiftPenalty, 
00809                            new CArgAllowValuesGreaterThanOrEqual(1));
00810     arg_desc.SetCurrentGroup("");
00811 }
00812 
00813 void
00814 CFrameShiftArgs::ExtractAlgorithmOptions(const CArgs& args,
00815                                          CBlastOptions& opt)
00816 {
00817     if (args[kArgFrameShiftPenalty]) {
00818         opt.SetOutOfFrameMode();
00819         opt.SetFrameShiftPenalty(args[kArgFrameShiftPenalty].AsInteger());
00820     }
00821 }
00822 
00823 void
00824 CGeneticCodeArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
00825 {
00826     if (m_Target == eQuery) {
00827         arg_desc.SetCurrentGroup("Input query options");
00828         // query genetic code
00829         arg_desc.AddDefaultKey(kArgQueryGeneticCode, "int_value", 
00830                                "Genetic code to use to translate query",
00831                                CArgDescriptions::eInteger,
00832                                NStr::IntToString(BLAST_GENETIC_CODE));
00833     } else {
00834         arg_desc.SetCurrentGroup("General search options");
00835         // DB genetic code
00836         arg_desc.AddDefaultKey(kArgDbGeneticCode, "int_value", 
00837                                "Genetic code to use to translate "
00838                                "database/subjects",
00839                                CArgDescriptions::eInteger,
00840                                NStr::IntToString(BLAST_GENETIC_CODE));
00841     }
00842     arg_desc.SetCurrentGroup("");
00843 }
00844 
00845 void
00846 CGeneticCodeArgs::ExtractAlgorithmOptions(const CArgs& args,
00847                                           CBlastOptions& opt)
00848 {
00849     const EProgram program = opt.GetProgram();
00850 
00851     if (m_Target == eQuery && args[kArgQueryGeneticCode]) {
00852         opt.SetQueryGeneticCode(args[kArgQueryGeneticCode].AsInteger());
00853     }
00854   
00855     if (m_Target == eDatabase && args[kArgDbGeneticCode] &&
00856         (program == eTblastn || program == eTblastx) ) {
00857         opt.SetDbGeneticCode(args[kArgDbGeneticCode].AsInteger());
00858     }
00859 }
00860 
00861 void
00862 CGapTriggerArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
00863 {
00864     arg_desc.SetCurrentGroup("Extension options");
00865 
00866     const double default_value = m_QueryIsProtein
00867         ? BLAST_GAP_TRIGGER_PROT : BLAST_GAP_TRIGGER_NUCL;
00868     arg_desc.AddDefaultKey(kArgGapTrigger, "float_value", 
00869                            "Number of bits to trigger gapping",
00870                            CArgDescriptions::eDouble,
00871                            NStr::DoubleToString(default_value));
00872     arg_desc.SetCurrentGroup("");
00873 }
00874 
00875 void
00876 CGapTriggerArgs::ExtractAlgorithmOptions(const CArgs& args,
00877                                          CBlastOptions& opt)
00878 {
00879     if (args[kArgGapTrigger]) {
00880         opt.SetGapTrigger(args[kArgGapTrigger].AsDouble());
00881     }
00882 }
00883 
00884 void
00885 CPssmEngineArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
00886 {
00887     arg_desc.SetCurrentGroup("PSSM engine options");
00888 
00889     // Pseudo count
00890     arg_desc.AddDefaultKey(kArgPSIPseudocount, "pseudocount",
00891                            "Pseudo-count value used when constructing PSSM",
00892                            CArgDescriptions::eInteger,
00893                            NStr::IntToString(PSI_PSEUDO_COUNT_CONST));
00894 
00895     // Evalue inclusion threshold
00896     arg_desc.AddDefaultKey(kArgPSIInclusionEThreshold, "ethresh", 
00897                    "E-value inclusion threshold for pairwise alignments", 
00898                    CArgDescriptions::eDouble,
00899                    NStr::DoubleToString(PSI_INCLUSION_ETHRESH));
00900 
00901     arg_desc.SetCurrentGroup("");
00902 }
00903 
00904 void
00905 CPssmEngineArgs::ExtractAlgorithmOptions(const CArgs& args,
00906                                          CBlastOptions& opt)
00907 {
00908     if (args[kArgPSIPseudocount]) {
00909         opt.SetPseudoCount(args[kArgPSIPseudocount].AsInteger());
00910     }
00911 
00912     if (args[kArgPSIInclusionEThreshold]) {
00913         opt.SetInclusionThreshold(args[kArgPSIInclusionEThreshold].AsDouble());
00914     }
00915 }
00916 
00917 void
00918 CPsiBlastArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
00919 {
00920 
00921     if (m_DbTarget == eNucleotideDb) {
00922         arg_desc.SetCurrentGroup("PSI-TBLASTN options");
00923 
00924         // PSI-tblastn checkpoint
00925         arg_desc.AddOptionalKey(kArgPSIInputChkPntFile, "psi_chkpt_file", 
00926                                 "PSI-TBLASTN checkpoint file",
00927                                 CArgDescriptions::eInputFile);
00928         arg_desc.SetDependency(kArgPSIInputChkPntFile,
00929                                 CArgDescriptions::eExcludes,
00930                                 kArgRemote);
00931     } else {
00932         arg_desc.SetCurrentGroup("PSI-BLAST options");
00933 
00934         // Number of iterations
00935         arg_desc.AddDefaultKey(kArgPSINumIterations, "int_value",
00936                                "Number of iterations to perform",
00937                                CArgDescriptions::eInteger,
00938                                NStr::IntToString(1));
00939         arg_desc.SetConstraint(kArgPSINumIterations, 
00940                                new CArgAllowValuesGreaterThanOrEqual(1));
00941         arg_desc.SetDependency(kArgPSINumIterations,
00942                                CArgDescriptions::eExcludes,
00943                                kArgRemote);
00944         // checkpoint file
00945         arg_desc.AddOptionalKey(kArgPSIOutputChkPntFile, "checkpoint_file",
00946 
00947                                 "File name to store checkpoint file",
00948                                 CArgDescriptions::eOutputFile);
00949         // ASCII matrix file
00950         arg_desc.AddOptionalKey(kArgAsciiPssmOutputFile, "ascii_mtx_file",
00951                                 "File name to store ASCII version of PSSM",
00952                                 CArgDescriptions::eOutputFile);
00953         // MSA restart file
00954         arg_desc.AddOptionalKey(kArgMSAInputFile, "align_restart",
00955                                 "File name of multiple sequence alignment to "
00956                                 "restart PSI-BLAST",
00957                                 CArgDescriptions::eInputFile);
00958         arg_desc.SetDependency(kArgMSAInputFile,
00959                                CArgDescriptions::eExcludes,
00960                                kArgPSIInputChkPntFile);
00961         arg_desc.SetDependency(kArgMSAInputFile,
00962                                CArgDescriptions::eExcludes,
00963                                kArgQuery);
00964         // PSI-BLAST checkpoint
00965         arg_desc.AddOptionalKey(kArgPSIInputChkPntFile, "psi_chkpt_file", 
00966                                 "PSI-BLAST checkpoint file",
00967                                 CArgDescriptions::eInputFile);
00968     }
00969 
00970     arg_desc.SetDependency(kArgPSIInputChkPntFile,
00971                            CArgDescriptions::eExcludes,
00972                            kArgQuery);
00973     arg_desc.SetCurrentGroup("");
00974 }
00975 
00976 /// Auxiliary function to create a PSSM from a multiple sequence alignment file
00977 static CRef<CPssmWithParameters>
00978 s_CreatePssmFromMsa(CNcbiIstream& input_stream, CBlastOptions& opt,
00979                     bool save_ascii_pssm)
00980 {
00981     // FIXME get these from CBlastOptions
00982     CPSIBlastOptions psiblast_opts;
00983     PSIBlastOptionsNew(&psiblast_opts); 
00984 
00985     CPSIDiagnosticsRequest diags(PSIDiagnosticsRequestNewEx(save_ascii_pssm));
00986     // FIXME: if query is provided, pass it in in ncbistdaa + query length!
00987     CPsiBlastInputClustalW pssm_input(input_stream, *psiblast_opts,
00988                                       opt.GetMatrixName(), diags);
00989     CPssmEngine pssm_engine(&pssm_input);
00990     return pssm_engine.Run();
00991 }
00992 
00993 void
00994 CPsiBlastArgs::ExtractAlgorithmOptions(const CArgs& args,
00995                                        CBlastOptions& opt)
00996 {
00997     if (m_DbTarget == eProteinDb) {
00998         if (args[kArgPSINumIterations]) {
00999             m_NumIterations = args[kArgPSINumIterations].AsInteger();
01000         }
01001         if (args.Exist(kArgPSIOutputChkPntFile) &&
01002             args[kArgPSIOutputChkPntFile]) {
01003             m_CheckPointOutput.Reset
01004                 (new CAutoOutputFileReset
01005                  (args[kArgPSIOutputChkPntFile].AsString())); 
01006         }
01007         const bool kSaveAsciiPssm = args[kArgAsciiPssmOutputFile];
01008         if (kSaveAsciiPssm) {
01009             m_AsciiMatrixOutput.Reset
01010                 (new CAutoOutputFileReset
01011                  (args[kArgAsciiPssmOutputFile].AsString()));
01012         }
01013         if (args[kArgMSAInputFile]) {
01014             CNcbiIstream& in = args[kArgMSAInputFile].AsInputFile();
01015             m_Pssm = s_CreatePssmFromMsa(in, opt, kSaveAsciiPssm);
01016         }
01017     }
01018 
01019     if (args.Exist(kArgPSIInputChkPntFile) && args[kArgPSIInputChkPntFile]) {
01020         CNcbiIstream& in = args[kArgPSIInputChkPntFile].AsInputFile();
01021         _ASSERT(m_Pssm.Empty());
01022         m_Pssm.Reset(new CPssmWithParameters);
01023         try {
01024             switch (CFormatGuess().Format(in)) {
01025             case CFormatGuess::eBinaryASN:
01026                 in >> MSerial_AsnBinary >> *m_Pssm;
01027                 break;
01028             case CFormatGuess::eTextASN:
01029                 in >> MSerial_AsnText >> *m_Pssm;
01030                 break;
01031             case CFormatGuess::eXml:
01032                 in >> MSerial_Xml >> *m_Pssm;
01033                 break;
01034             default:
01035                 NCBI_THROW(CInputException, eInvalidInput, 
01036                            "Unsupported format for PSSM");
01037             }
01038         } catch (const CSerialException&) {
01039             string msg("Unrecognized format for PSSM in ");
01040             msg += args[kArgPSIInputChkPntFile].AsString() + " (must be ";
01041             msg += "PssmWithParameters)";
01042             NCBI_THROW(CInputException, eInvalidInput, msg);
01043         }
01044         _ASSERT(m_Pssm.NotEmpty());
01045     }
01046 }
01047 
01048 void
01049 CPhiBlastArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
01050 {
01051     arg_desc.SetCurrentGroup("PHI-BLAST options");
01052 
01053     arg_desc.AddOptionalKey(kArgPHIPatternFile, "file",
01054                             "File name containing pattern to search",
01055                             CArgDescriptions::eInputFile);
01056     arg_desc.SetDependency(kArgPHIPatternFile,
01057                            CArgDescriptions::eExcludes,
01058                            kArgPSIInputChkPntFile);
01059 
01060     arg_desc.SetCurrentGroup("");
01061 }
01062 
01063 void
01064 CPhiBlastArgs::ExtractAlgorithmOptions(const CArgs& args,
01065                                        CBlastOptions& opt)
01066 {
01067     if (args.Exist(kArgPHIPatternFile) && args[kArgPHIPatternFile]) {
01068         CNcbiIstream& in = args[kArgPHIPatternFile].AsInputFile();
01069         in.clear();
01070         in.seekg(0);
01071         char buffer[4096];
01072         string line;
01073         string pattern;
01074         string name;
01075         while (in.getline(buffer, 4096)) {
01076            line = buffer;
01077            string ltype = line.substr(0, 2);
01078            if (ltype == "ID") 
01079              name = line.substr(5);
01080            else if (ltype == "PA")
01081              pattern = line.substr(5);
01082         }
01083         if (!pattern.empty())
01084             opt.SetPHIPattern(pattern.c_str(), 
01085                (Blast_QueryIsNucleotide(opt.GetProgramType())
01086                ? true : false));
01087         else
01088             NCBI_THROW(CInputException, eInvalidInput, 
01089                        "PHI pattern not read");
01090     }
01091 }
01092 
01093 void
01094 CQueryOptionsArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
01095 {
01096 
01097     arg_desc.SetCurrentGroup("Query filtering options");
01098     // lowercase masking
01099     arg_desc.AddFlag(kArgUseLCaseMasking, 
01100          "Use lower case filtering in query and subject sequence(s)?", true);
01101 
01102     arg_desc.SetCurrentGroup("Input query options");
01103     // query location
01104     arg_desc.AddOptionalKey(kArgQueryLocation, "range", 
01105                             "Location on the query sequence in 1-based offsets "
01106                             "(Format: start-stop)",
01107                             CArgDescriptions::eString);
01108 
01109     if ( !m_QueryCannotBeNucl ) {
01110         // search strands
01111         arg_desc.AddDefaultKey(kArgStrand, "strand", 
01112                          "Query strand(s) to search against database/subject",
01113                          CArgDescriptions::eString, kDfltArgStrand);
01114         arg_desc.SetConstraint(kArgStrand, &(*new CArgAllow_Strings, 
01115                                              kDfltArgStrand, "plus", "minus"));
01116     }
01117 
01118     arg_desc.SetCurrentGroup("Miscellaneous options");
01119     arg_desc.AddFlag(kArgParseDeflines,
01120                  "Should the query and subject defline(s) be parsed?", true);
01121 
01122     arg_desc.SetCurrentGroup("");
01123 }
01124 
01125 void
01126 CQueryOptionsArgs::ExtractAlgorithmOptions(const CArgs& args, 
01127                                            CBlastOptions& opt)
01128 {
01129     // Get the strand
01130     {
01131         m_Strand = eNa_strand_unknown;
01132 
01133         if (!Blast_QueryIsProtein(opt.GetProgramType()) && args[kArgStrand]) {
01134             const string& kStrand = args[kArgStrand].AsString();
01135             if (kStrand == "both") {
01136                 m_Strand = eNa_strand_both;
01137             } else if (kStrand == "plus") {
01138                 m_Strand = eNa_strand_plus;
01139             } else if (kStrand == "minus") {
01140                 m_Strand = eNa_strand_minus;
01141             } else {
01142                 abort();
01143             }
01144         }
01145     }
01146 
01147     // set the sequence range
01148     if (args[kArgQueryLocation]) {
01149         m_Range = ParseSequenceRange(args[kArgQueryLocation].AsString(), 
01150                                      "Invalid specification of query location");
01151     }
01152 
01153     m_UseLCaseMask = static_cast<bool>(args[kArgUseLCaseMasking]);
01154     m_ParseDeflines = static_cast<bool>(args[kArgParseDeflines]);
01155 }
01156 
01157 CBlastDatabaseArgs::CBlastDatabaseArgs(bool request_mol_type /* = false */,
01158                                        bool is_rpsblast /* = false */)
01159     : m_RequestMoleculeType(request_mol_type), m_IsRpsBlast(is_rpsblast),
01160     m_IsProtein(true), m_SupportsDatabaseMasking(false)
01161 {}
01162 
01163 bool
01164 CBlastDatabaseArgs::HasBeenSet(const CArgs& args)
01165 {
01166     if ( (args.Exist(kArgDb) && args[kArgDb].HasValue()) ||
01167          (args.Exist(kArgSubject) && args[kArgSubject].HasValue()) ) {
01168         return true;
01169     }
01170     return false;
01171 }
01172 
01173 void
01174 CBlastDatabaseArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
01175 {
01176     arg_desc.SetCurrentGroup("General search options");
01177     // database filename
01178     arg_desc.AddOptionalKey(kArgDb, "database_name", "BLAST database name", 
01179                             CArgDescriptions::eString);
01180     arg_desc.SetCurrentGroup("");
01181 
01182     if (m_RequestMoleculeType) {
01183         arg_desc.AddKey(kArgDbType, "database_type", 
01184                         "BLAST database molecule type",
01185                         CArgDescriptions::eString);
01186         arg_desc.SetConstraint(kArgDbType, 
01187                                &(*new CArgAllow_Strings, "prot", "nucl"));
01188     }
01189 
01190     vector<string> database_args;
01191     database_args.push_back(kArgDb);
01192     database_args.push_back(kArgGiList);
01193     database_args.push_back(kArgNegativeGiList);
01194     if (m_SupportsDatabaseMasking) {
01195         database_args.push_back(kArgDbSoftMask);
01196     }
01197 
01198     // DB size
01199     arg_desc.SetCurrentGroup("Statistical options");
01200     arg_desc.AddOptionalKey(kArgDbSize, "num_letters", 
01201                             "Effective length of the database ",
01202                             CArgDescriptions::eInt8);
01203 
01204     arg_desc.SetCurrentGroup("Restrict search or results");
01205     // GI list
01206     arg_desc.AddOptionalKey(kArgGiList, "filename", 
01207                             "Restrict search of database to list of GI's",
01208                             CArgDescriptions::eString);
01209     // Negative GI list
01210     arg_desc.AddOptionalKey(kArgNegativeGiList, "filename", 
01211         "Restrict search of database to everything except the listed GIs",
01212         CArgDescriptions::eString);
01213     arg_desc.SetDependency(kArgGiList, CArgDescriptions::eExcludes, 
01214                            kArgNegativeGiList);
01215     // Entrez Query
01216     arg_desc.AddOptionalKey(kArgEntrezQuery, "entrez_query", 
01217                             "Restrict search with the given Entrez query",
01218                             CArgDescriptions::eString);
01219 
01220     // For now, disable pairing -remote with either -gilist or
01221     // -negative_gilist as this is not implemented in the BLAST server
01222     arg_desc.SetDependency(kArgGiList, CArgDescriptions::eExcludes, 
01223                            kArgRemote);
01224     arg_desc.SetDependency(kArgNegativeGiList, CArgDescriptions::eExcludes, 
01225                            kArgRemote);
01226 
01227     // Entrez query currently requires the -remote option
01228     arg_desc.SetDependency(kArgEntrezQuery, CArgDescriptions::eRequires, 
01229                            kArgRemote);
01230 
01231 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION  > 550)) && \
01232      (!defined(NCBI_COMPILER_MIPSPRO)) )
01233     // Masking of database
01234     if (m_SupportsDatabaseMasking) {
01235         arg_desc.AddOptionalKey(kArgDbSoftMask, 
01236                 "filtering_algorithm",
01237                 "Filtering algorithm ID to apply to the BLAST database as soft "
01238                 "masking",
01239                 CArgDescriptions::eInteger);
01240     }
01241 #endif
01242 
01243     // There is no RPS-BLAST 2 sequences
01244     if ( !m_IsRpsBlast ) {
01245         arg_desc.SetCurrentGroup("BLAST-2-Sequences options");
01246         // subject sequence input (for bl2seq)
01247         arg_desc.AddOptionalKey(kArgSubject, "subject_input_file",
01248                                 "Subject sequence(s) to search",
01249                                 CArgDescriptions::eInputFile);
01250         ITERATE(vector<string>, dbarg, database_args) {
01251             arg_desc.SetDependency(kArgSubject, CArgDescriptions::eExcludes, 
01252                                    *dbarg);
01253         }
01254 
01255         // subject location
01256         arg_desc.AddOptionalKey(kArgSubjectLocation, "range", 
01257                         "Location on the subject sequence in 1-based offsets "
01258                         "(Format: start-stop)",
01259                         CArgDescriptions::eString);
01260         ITERATE(vector<string>, dbarg, database_args) {
01261             arg_desc.SetDependency(kArgSubjectLocation, 
01262                                    CArgDescriptions::eExcludes, 
01263                                    *dbarg);
01264         }
01265         // Because Blast4-subject does not support Seq-locs, specifying a
01266         // subject range does not work for remote searches
01267         arg_desc.SetDependency(kArgSubjectLocation, 
01268                                CArgDescriptions::eExcludes, kArgRemote);
01269     }
01270 
01271     arg_desc.SetCurrentGroup("");
01272 }
01273 
01274 /** 
01275  * @brief Process gi lists command line arguments
01276  * 
01277  * @param args CArgs object representing command line arguments read [in]
01278  * @param argument_name name of the command line option [in]
01279  * @param filename the value of the option [out]
01280  * @param gis the contents of the file, if a remote BLAST search is needed (if
01281  * not, this will be empty upon function exit [out]
01282  */
01283 static void
01284 s_ProcessGiListArgument(const CArgs& args, 
01285                         const string& argument_name, 
01286                         string& filename, 
01287                         vector<int>& gis)
01288 {
01289     gis.clear();
01290     if (args.Exist(argument_name) && args[argument_name]) {
01291         filename.assign(args[argument_name].AsString());
01292         /// This is only needed if the gi list is to be submitted remotely as
01293         /// it needs to be sent over the network OR if we need to export the
01294         /// object as a search strategy
01295         if ((args.Exist(kArgRemote) && args[kArgRemote] && 
01296             CFile(filename).Exists()) ||
01297             (args[kArgOutputSearchStrategy].HasValue())) {
01298             SeqDB_ReadGiList(filename, gis);
01299         }
01300     }
01301 }
01302 
01303 void
01304 CBlastDatabaseArgs::ExtractAlgorithmOptions(const CArgs& args,
01305                                             CBlastOptions& opts)
01306 {
01307     EMoleculeType mol_type = Blast_SubjectIsNucleotide(opts.GetProgramType())
01308         ? CSearchDatabase::eBlastDbIsNucleotide
01309         : CSearchDatabase::eBlastDbIsProtein;
01310     m_IsProtein = (mol_type == CSearchDatabase::eBlastDbIsProtein);
01311     
01312     if (args.Exist(kArgDb) && args[kArgDb]) {
01313 
01314         m_SearchDb.Reset(new CSearchDatabase(args[kArgDb].AsString(), 
01315                                              mol_type));
01316 
01317         vector<int> gis;
01318         s_ProcessGiListArgument(args, kArgGiList, m_GiListFileName, gis);
01319         if ( !gis.empty() ) 
01320             m_SearchDb->SetGiListLimitation(gis);
01321 
01322         s_ProcessGiListArgument(args, kArgNegativeGiList,
01323                                 m_NegativeGiListFileName, gis);
01324         if ( !gis.empty() ) 
01325             m_SearchDb->SetNegativeGiListLimitation(gis);
01326 
01327         if (args.Exist(kArgEntrezQuery) && args[kArgEntrezQuery])
01328             m_SearchDb->SetEntrezQueryLimitation(args[kArgEntrezQuery].AsString());
01329 
01330 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION  > 550)) && \
01331      (!defined(NCBI_COMPILER_MIPSPRO)) )
01332         if (args.Exist(kArgDbSoftMask) && args[kArgDbSoftMask]) {
01333             m_SearchDb->SetFilteringAlgorithm(args[kArgDbSoftMask].AsInteger());
01334         }
01335 #endif
01336     } else if (args.Exist(kArgSubject) && args[kArgSubject]) {
01337 
01338         CNcbiIstream& subj_input_stream = args[kArgSubject].AsInputFile();
01339         TSeqRange subj_range;
01340         if (args.Exist(kArgSubjectLocation) && args[kArgSubjectLocation]) {
01341             subj_range = 
01342                 ParseSequenceRange(args[kArgSubjectLocation].AsString(), 
01343                             "Invalid specification of subject location");
01344         }
01345 
01346         const bool parse_deflines = args.Exist(kArgParseDeflines) 
01347             ? args[kArgParseDeflines]
01348             : kDfltArgParseDeflines;
01349         const bool use_lcase_masks = args.Exist(kArgUseLCaseMasking)
01350             ? args[kArgUseLCaseMasking]
01351             : kDfltArgUseLCaseMasking;
01352         CRef<blast::CBlastQueryVector> subjects;
01353         m_Scope = ReadSequencesToBlast(subj_input_stream, IsProtein(),
01354                                        subj_range, parse_deflines,
01355                                        use_lcase_masks, subjects);
01356         m_Subjects.Reset(new blast::CObjMgr_QueryFactory(*subjects));
01357 
01358     } else {
01359         NCBI_THROW(CInputException, eInvalidInput,
01360            "Either a BLAST database or subject sequence(s) must be specified");
01361     }
01362 
01363     if (opts.GetEffectiveSearchSpace() != 0) {
01364         // no need to set any other options, as this trumps them
01365         return;
01366     }
01367 
01368     if (args[kArgDbSize]) {
01369         opts.SetDbLength(args[kArgDbSize].AsInt8());
01370     }
01371 
01372 }
01373 
01374 void
01375 CFormattingArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
01376 {
01377     arg_desc.SetCurrentGroup("Formatting options");
01378 
01379     const string kOutputFormatDescription = string(
01380     "alignment view options:\n"
01381     "  0 = pairwise,\n"
01382     "  1 = query-anchored showing identities,\n"
01383     "  2 = query-anchored no identities,\n"
01384     "  3 = flat query-anchored, show identities,\n"
01385     "  4 = flat query-anchored, no identities,\n"
01386     "  5 = XML Blast output,\n"
01387     "  6 = tabular,\n"
01388     "  7 = tabular with comment lines,\n"
01389     "  8 = Text ASN.1,\n"
01390     "  9 = Binary ASN.1\n"
01391     " 10 = Comma-separated values\n\n"
01392     "Options 6, 7, and 10 can be additionally configured to produce\n"
01393     "a custom format specified by space delimited format specifiers.\n"
01394     "The supported format specifiers are:\n") +
01395         DescribeTabularOutputFormatSpecifiers() + 
01396         string("\n");
01397 
01398     // alignment view
01399     arg_desc.AddDefaultKey(kArgOutputFormat, "format", kOutputFormatDescription,
01400                            CArgDescriptions::eString, 
01401                            NStr::IntToString(kDfltArgOutputFormat));
01402 
01403     // show GIs in deflines
01404     arg_desc.AddFlag(kArgShowGIs, "Show NCBI GIs in deflines?", true);
01405 
01406     // number of one-line descriptions to display
01407     arg_desc.AddDefaultKey(kArgNumDescriptions, "int_value",
01408                  "Number of database sequences to show one-line "
01409                  "descriptions for",
01410                  CArgDescriptions::eInteger,
01411                  NStr::IntToString(kDfltArgNumDescriptions));
01412     arg_desc.SetConstraint(kArgNumDescriptions, 
01413                            new CArgAllowValuesGreaterThanOrEqual(0));
01414 
01415     // number of alignments per DB sequence
01416     arg_desc.AddDefaultKey(kArgNumAlignments, "int_value",
01417                  "Number of database sequences to show alignments for",
01418                  CArgDescriptions::eInteger, 
01419                  NStr::IntToString(kDfltArgNumAlignments));
01420     arg_desc.SetConstraint(kArgNumAlignments, 
01421                            new CArgAllowValuesGreaterThanOrEqual(0));
01422 
01423     // Produce HTML?
01424     arg_desc.AddFlag(kArgProduceHtml, "Produce HTML output?", true);
01425 
01426     /// Hit list size, listed here for convenience only
01427     arg_desc.SetCurrentGroup("Restrict search or results");
01428     arg_desc.AddOptionalKey(kArgMaxTargetSequences, "num_sequences",
01429                             "Maximum number of aligned sequences to keep",
01430                             CArgDescriptions::eInteger);
01431     arg_desc.SetConstraint(kArgMaxTargetSequences,
01432                            new CArgAllowValuesGreaterThanOrEqual(1));
01433 
01434     arg_desc.SetCurrentGroup("");
01435 }
01436 
01437 void
01438 CFormattingArgs::ExtractAlgorithmOptions(const CArgs& args,
01439                                          CBlastOptions& opt)
01440 {
01441     if (args[kArgOutputFormat]) {
01442         string fmt_choice = 
01443             NStr::TruncateSpaces(args[kArgOutputFormat].AsString());
01444         string::size_type pos;
01445         if ( (pos = fmt_choice.find_first_of(' ')) != string::npos) {
01446             m_CustomOutputFormatSpec.assign(fmt_choice, pos+1,
01447                                             fmt_choice.size()-(pos+1));
01448             fmt_choice.erase(pos);
01449         }
01450         int val = 0;
01451         try { val =NStr::StringToInt(fmt_choice); }
01452         catch (const CStringException&) {   // probably a conversion error
01453             CNcbiOstrstream os;
01454             os << "'" << fmt_choice << "' is not a valid output format";
01455             string msg = CNcbiOstrstreamToString(os);
01456             NCBI_THROW(CInputException, eInvalidInput, msg);
01457         }
01458         if (val < 0 || val >= static_cast<int>(eEndValue)) {
01459             string msg("Formatting choice is out of range");
01460             throw std::out_of_range(msg);
01461         }
01462         m_OutputFormat = static_cast<EOutputFormat>(val);
01463         if ( !(m_OutputFormat == eTabular ||
01464                m_OutputFormat == eTabularWithComments ||
01465                m_OutputFormat == eCommaSeparatedValues) ) {
01466                m_CustomOutputFormatSpec.clear();
01467         }
01468     }
01469 
01470     m_ShowGis = static_cast<bool>(args[kArgShowGIs]);
01471 
01472     if (args[kArgNumDescriptions]) {
01473         m_NumDescriptions = args[kArgNumDescriptions].AsInteger();
01474     } 
01475 
01476     if (args[kArgNumAlignments]) {
01477         m_NumAlignments = args[kArgNumAlignments].AsInteger();
01478     }
01479 
01480     TSeqPos hitlist_size = 0;
01481     if (args[kArgMaxTargetSequences]) {
01482         hitlist_size = args[kArgMaxTargetSequences].AsInteger();
01483         if (hitlist_size > 0 && m_OutputFormat == ePairwise) {
01484             /* Only non-default values will be overriden */
01485             string warnings = CalculateFormattingParams(hitlist_size,
01486                       m_NumDescriptions != kDfltArgNumDescriptions 
01487                       ? &m_NumDescriptions : 0,
01488                       m_NumAlignments != kDfltArgNumAlignments 
01489                       ? &m_NumAlignments : 0);
01490             if ( !warnings.empty() ) {
01491                 ERR_POST(Warning << warnings);
01492             }
01493         }
01494     }
01495 
01496     if (m_NumDescriptions == 0 && m_NumAlignments == 0 && hitlist_size == 0) {
01497         string msg("Either -");
01498         msg += kArgMaxTargetSequences + ", -";
01499         msg += kArgNumDescriptions + ", or -" + kArgNumAlignments + " must ";
01500         msg += "be non-zero";
01501         NCBI_THROW(CInputException, eInvalidInput, msg);
01502     }
01503     else if (hitlist_size != 0) {
01504         opt.SetHitlistSize(hitlist_size);
01505     } else {
01506         opt.SetHitlistSize(MAX(m_NumDescriptions, m_NumAlignments));
01507     }
01508 
01509     m_Html = static_cast<bool>(args[kArgProduceHtml]);
01510 }
01511 
01512 void
01513 CMTArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
01514 {
01515     const int kMinValue = static_cast<int>(CThreadable::kMinNumThreads);
01516 
01517     // number of threads
01518     arg_desc.SetCurrentGroup("Miscellaneous options");
01519     arg_desc.AddDefaultKey(kArgNumThreads, "int_value",
01520                            "Number of threads (CPUs) to use in the BLAST search",
01521                            CArgDescriptions::eInteger, 
01522                            NStr::IntToString(kMinValue));
01523     arg_desc.SetConstraint(kArgNumThreads, 
01524                            new CArgAllowValuesGreaterThanOrEqual(kMinValue));
01525     arg_desc.SetCurrentGroup("");
01526 }
01527 
01528 void
01529 CMTArgs::ExtractAlgorithmOptions(const CArgs& args, CBlastOptions& /* opts */)
01530 {
01531     if (args.Exist(kArgNumThreads) &&
01532         args[kArgNumThreads].HasValue()) {  // could be cancelled by the exclusion in CRemoteArgs
01533         m_NumThreads = args[kArgNumThreads].AsInteger();
01534     }
01535 }
01536 
01537 void
01538 CRemoteArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
01539 {
01540     arg_desc.SetCurrentGroup("Miscellaneous options");
01541     arg_desc.AddFlag(kArgRemote, "Execute search remotely?", true);
01542     arg_desc.SetDependency(kArgRemote,
01543                            CArgDescriptions::eExcludes,
01544                            kArgNumThreads);
01545 
01546     arg_desc.SetCurrentGroup("");
01547 }
01548 
01549 void
01550 CRemoteArgs::ExtractAlgorithmOptions(const CArgs& args, CBlastOptions& /* opts */)
01551 {
01552     if (args.Exist(kArgRemote)) {
01553         m_IsRemote = static_cast<bool>(args[kArgRemote]);
01554     }
01555 }
01556 
01557 void
01558 CDebugArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
01559 {
01560 #if _DEBUG
01561     arg_desc.SetCurrentGroup("Miscellaneous options");
01562     arg_desc.AddFlag("verbose", "Produce verbose output (show BLAST options)",
01563                      true);
01564     arg_desc.AddFlag("remote_verbose", 
01565                      "Produce verbose output for remote searches", true);
01566     arg_desc.AddFlag("use_test_remote_service", 
01567                      "Send remote requests to test servers", true);
01568     arg_desc.SetCurrentGroup("");
01569 #endif /* DEBUG */
01570 }
01571 
01572 void
01573 CDebugArgs::ExtractAlgorithmOptions(const CArgs& args, CBlastOptions& /* opts */)
01574 {
01575 #if _DEBUG
01576     m_DebugOutput = static_cast<bool>(args["verbose"]);
01577     m_RmtDebugOutput = static_cast<bool>(args["remote_verbose"]);
01578     if (args["use_test_remote_service"]) {
01579         IRWRegistry& reg = CNcbiApplication::Instance()->GetConfig();
01580         reg.Set("BLAST4", DEF_CONN_REG_SECTION "_" REG_CONN_SERVICE_NAME,
01581                 "blast4_test");
01582     }
01583 #endif /* DEBUG */
01584 }
01585 
01586 void
01587 CHspFilteringArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
01588 {
01589     // culling limit
01590     arg_desc.SetCurrentGroup("Restrict search or results");
01591     arg_desc.AddOptionalKey(kArgCullingLimit, "int_value",
01592                      "If the query range of a hit is enveloped by that of at "
01593                      "least this many higher-scoring hits, delete the hit",
01594                      CArgDescriptions::eInteger);
01595     arg_desc.SetConstraint(kArgCullingLimit, 
01596     // best hit algorithm arguments
01597                new CArgAllowValuesGreaterThanOrEqual(kDfltArgCullingLimit));
01598 
01599     arg_desc.AddOptionalKey(kArgBestHitOverhang, "float_value", 
01600                             "Best Hit algorithm overhang value "
01601                             "(recommended value: " +
01602                             NStr::DoubleToString(kDfltArgBestHitOverhang) +
01603                             ")",
01604                             CArgDescriptions::eDouble);
01605     arg_desc.SetConstraint(kArgBestHitOverhang, 
01606                            new CArgAllowValuesBetween(kBestHit_OverhangMin, 
01607                                                       kBestHit_OverhangMax));
01608     arg_desc.SetDependency(kArgBestHitOverhang,
01609                            CArgDescriptions::eExcludes,
01610                            kArgCullingLimit);
01611 
01612     arg_desc.AddOptionalKey(kArgBestHitScoreEdge, "float_value", 
01613                             "Best Hit algorithm score edge value "
01614                             "(recommended value: " +
01615                             NStr::DoubleToString(kDfltArgBestHitScoreEdge) +
01616                             ")",
01617                             CArgDescriptions::eDouble);
01618     arg_desc.SetConstraint(kArgBestHitScoreEdge, 
01619                            new CArgAllowValuesBetween(kBestHit_ScoreEdgeMin, 
01620                                                       kBestHit_ScoreEdgeMax));
01621     arg_desc.SetDependency(kArgBestHitScoreEdge,
01622                            CArgDescriptions::eExcludes,
01623                            kArgCullingLimit);
01624     arg_desc.SetCurrentGroup("");
01625 }
01626 
01627 void
01628 CHspFilteringArgs::ExtractAlgorithmOptions(const CArgs& args, 
01629                                       CBlastOptions& opts)
01630 {
01631     if (args[kArgCullingLimit]) {
01632         opts.SetCullingLimit(args[kArgCullingLimit].AsInteger());
01633     }
01634     if (args[kArgBestHitOverhang]) {
01635         opts.SetBestHitOverhang(args[kArgBestHitOverhang].AsDouble());
01636     }
01637     if (args[kArgBestHitScoreEdge]) {
01638         opts.SetBestHitScoreEdge(args[kArgBestHitScoreEdge].AsDouble());
01639     }
01640 }
01641 
01642 void
01643 CMbIndexArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
01644 {
01645     arg_desc.SetCurrentGroup("General search options");
01646     arg_desc.AddOptionalKey( 
01647             kArgUseIndex, "boolean",
01648             "Use MegaBLAST database index",
01649             CArgDescriptions::eBoolean );
01650     arg_desc.AddOptionalKey(
01651             kArgIndexName, "string",
01652             "MegaBLAST database index name",
01653             CArgDescriptions::eString );
01654     arg_desc.SetCurrentGroup( "" );
01655 }
01656 
01657 bool
01658 CMbIndexArgs::HasBeenSet(const CArgs& args)
01659 {
01660     if ( (args.Exist(kArgUseIndex) && args[kArgUseIndex].HasValue()) ||
01661          (args.Exist(kArgIndexName) && args[kArgIndexName].HasValue()) ) {
01662         return true;
01663     }
01664     return false;
01665 }
01666 
01667 void
01668 CMbIndexArgs::ExtractAlgorithmOptions(const CArgs& args,
01669                                       CBlastOptions& opts)
01670 {
01671     // MB Index does not apply to Blast2Sequences
01672     if( args.Exist( kArgUseIndex ) &&
01673         !(args.Exist( kArgSubject ) && args[kArgSubject])) {
01674 
01675         bool use_index   = true;
01676         bool force_index = false;
01677 
01678         if( args[kArgUseIndex] ) {
01679             if( args[kArgUseIndex].AsBoolean() ) force_index = true;
01680             else use_index = false;
01681         }
01682 
01683         if( args.Exist( kTask ) && args[kTask] && 
01684                 args[kTask].AsString() != "megablast" ) {
01685             use_index = false;
01686         }
01687 
01688         if( use_index ) {
01689             string index_name;
01690 
01691             if( args.Exist( kArgIndexName ) && args[kArgIndexName] ) {
01692                 index_name = args[kArgIndexName].AsString();
01693             }
01694             else if( args.Exist( kArgDb ) && args[kArgDb] ) {
01695                 index_name = args[kArgDb].AsString();
01696             }
01697             else {
01698                 NCBI_THROW(CInputException, eInvalidInput,
01699                         "Can not deduce database index name" );
01700             }
01701     
01702             opts.SetUseIndex( true, index_name, force_index );
01703         }
01704     }
01705 }
01706 
01707 void
01708 CStdCmdLineArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
01709 {
01710     arg_desc.SetCurrentGroup("Input query options");
01711 
01712     // query filename
01713     arg_desc.AddDefaultKey(kArgQuery, "input_file", 
01714                      "Input file name",
01715                      CArgDescriptions::eInputFile, kDfltArgQuery);
01716 
01717     arg_desc.SetCurrentGroup("General search options");
01718 
01719     // report output file
01720     arg_desc.AddDefaultKey(kArgOutput, "output_file", 
01721                    "Output file name",
01722                    CArgDescriptions::eOutputFile, "-");
01723 
01724     arg_desc.SetCurrentGroup("");
01725 }
01726 
01727 void
01728 CStdCmdLineArgs::ExtractAlgorithmOptions(const CArgs& args,
01729                                          CBlastOptions& /* opt */)
01730 {
01731     if (args.Exist(kArgQuery) && args[kArgQuery].HasValue() &&
01732         m_InputStream == NULL) {
01733         m_InputStream = &args[kArgQuery].AsInputFile();
01734     }
01735     m_OutputStream = &args[kArgOutput].AsOutputFile();
01736 }
01737 
01738 CNcbiIstream&
01739 CStdCmdLineArgs::GetInputStream() const
01740 {
01741     // programmer must ensure the ExtractAlgorithmOptions method is called
01742     // before this method is invoked
01743     if ( !m_InputStream ) {
01744         abort();
01745     }
01746     return *m_InputStream;
01747 }
01748 
01749 CNcbiOstream&
01750 CStdCmdLineArgs::GetOutputStream() const
01751 {
01752     // programmer must ensure the ExtractAlgorithmOptions method is called
01753     // before this method is invoked
01754     _ASSERT(m_OutputStream);
01755     return *m_OutputStream;
01756 }
01757 
01758 void
01759 CStdCmdLineArgs::SetInputStream(CRef<CTmpFile> input_file)
01760 {
01761     m_QueryTmpInputFile = input_file;
01762     m_InputStream = &input_file->AsInputFile(CTmpFile::eIfExists_Throw);
01763 }
01764 
01765 void
01766 CSearchStrategyArgs::SetArgumentDescriptions(CArgDescriptions& arg_desc)
01767 {
01768     arg_desc.SetCurrentGroup("Search strategy options");
01769 
01770     arg_desc.AddOptionalKey(kArgInputSearchStrategy,
01771                             "filename",
01772                             "Search strategy to use", 
01773                             CArgDescriptions::eInputFile);
01774     arg_desc.AddOptionalKey(kArgOutputSearchStrategy,
01775                             "filename",
01776                             "File name to record the search strategy used", 
01777                             CArgDescriptions::eOutputFile);
01778     arg_desc.SetDependency(kArgInputSearchStrategy,
01779                            CArgDescriptions::eExcludes,
01780                            kArgOutputSearchStrategy);
01781 
01782     arg_desc.SetCurrentGroup("");
01783 }
01784 
01785 void
01786 CSearchStrategyArgs::ExtractAlgorithmOptions(const CArgs& /* cmd_line_args */,
01787                                              CBlastOptions& /* options */)
01788 {
01789 }
01790 
01791 CNcbiIstream* 
01792 CSearchStrategyArgs::GetImportStream(const CArgs& args) const
01793 {
01794     CNcbiIstream* retval = NULL;
01795     if (args[kArgInputSearchStrategy].HasValue()) {
01796         retval = &args[kArgInputSearchStrategy].AsInputFile();
01797     }
01798     return retval;
01799 }
01800 
01801 CNcbiOstream* 
01802 CSearchStrategyArgs::GetExportStream(const CArgs& args) const
01803 {
01804     CNcbiOstream* retval = NULL;
01805     if (args[kArgOutputSearchStrategy].HasValue()) {
01806         retval = &args[kArgOutputSearchStrategy].AsOutputFile();
01807     }
01808     return retval;
01809 }
01810 
01811 CBlastAppArgs::CBlastAppArgs()
01812 {
01813     m_SearchStrategyArgs.Reset(new CSearchStrategyArgs);
01814     m_Args.push_back(CRef<IBlastCmdLineArgs>(&*m_SearchStrategyArgs));
01815     m_IsUngapped = false;
01816 }
01817 
01818 CArgDescriptions*
01819 CBlastAppArgs::SetCommandLine()
01820 {
01821     return SetUpCommandLineArguments(m_Args);
01822 }
01823 
01824 CRef<CBlastOptionsHandle>
01825 CBlastAppArgs::SetOptions(const CArgs& args)
01826 {
01827     // We're recovering from a saved strategy, so we need to still extract
01828     // certain options from the command line, include overriding query
01829     // and/or database
01830     if (m_OptsHandle.NotEmpty()) {
01831         CBlastOptions& opts = m_OptsHandle->SetOptions();
01832         // invoke ExtractAlgorithmOptions on certain argument classes
01833         m_QueryOptsArgs->ExtractAlgorithmOptions(args, opts);
01834         m_StdCmdLineArgs->ExtractAlgorithmOptions(args, opts);
01835         m_RemoteArgs->ExtractAlgorithmOptions(args, opts);
01836         m_DebugArgs->ExtractAlgorithmOptions(args, opts);
01837         m_FormattingArgs->ExtractAlgorithmOptions(args, opts);
01838         if (CBlastDatabaseArgs::HasBeenSet(args)) {
01839             m_BlastDbArgs->ExtractAlgorithmOptions(args, opts);
01840         }
01841         if (CMbIndexArgs::HasBeenSet(args)) {
01842             NON_CONST_ITERATE(TBlastCmdLineArgs, arg, m_Args) {
01843                 if (dynamic_cast<CMbIndexArgs*>(arg->GetPointer()) != NULL) {
01844                     (*arg)->ExtractAlgorithmOptions(args, opts);
01845                 }
01846             }
01847         }
01848         m_HspFilteringArgs->ExtractAlgorithmOptions(args, opts);
01849         m_IsUngapped = !opts.GetGappedMode();
01850         try { m_OptsHandle->Validate(); }
01851         catch (const CBlastException& e) {
01852             NCBI_THROW(CInputException, eInvalidInput, e.GetMsg());
01853         }
01854         return m_OptsHandle;
01855     }
01856 
01857     CBlastOptions::EAPILocality locality = 
01858         (args.Exist(kArgRemote) && args[kArgRemote]) 
01859         ? CBlastOptions::eRemote 
01860         : CBlastOptions::eLocal;
01861 
01862     // This is needed as a CRemoteBlast object and its options are instantiated
01863     // to create the search strategy
01864     if (GetExportSearchStrategyStream(args)) {
01865         locality = CBlastOptions::eBoth;
01866     }
01867 
01868     CRef<CBlastOptionsHandle> retval(x_CreateOptionsHandle(locality, args));
01869     CBlastOptions& opts = retval->SetOptions();
01870     NON_CONST_ITERATE(TBlastCmdLineArgs, arg, m_Args) {
01871         (*arg)->ExtractAlgorithmOptions(args, opts);
01872     }
01873 
01874     m_IsUngapped = !opts.GetGappedMode();
01875     try { retval->Validate(); }
01876     catch (const CBlastException& e) {
01877         NCBI_THROW(CInputException, eInvalidInput, e.GetMsg());
01878     }
01879     return retval;
01880 }
01881 
01882 void CBlastAppArgs::SetTask(const string& task)
01883 {
01884 #if _DEBUG
01885     ThrowIfInvalidTask(task);
01886 #endif
01887     m_Task.assign(task);
01888 }
01889 
01890 CArgDescriptions* 
01891 SetUpCommandLineArguments(TBlastCmdLineArgs& args)
01892 {
01893     auto_ptr<CArgDescriptions> retval(new CArgDescriptions);
01894 
01895     // Create the groups so that the ordering is established
01896     retval->SetCurrentGroup("Input query options");
01897     retval->SetCurrentGroup("General search options");
01898     retval->SetCurrentGroup("BLAST database options");
01899     retval->SetCurrentGroup("BLAST-2-Sequences options");
01900     retval->SetCurrentGroup("Formatting options");
01901     retval->SetCurrentGroup("Query filtering options");
01902     retval->SetCurrentGroup("Restrict search or results");
01903     retval->SetCurrentGroup("Discontiguous MegaBLAST options");
01904     retval->SetCurrentGroup("Statistical options");
01905     retval->SetCurrentGroup("Search strategy options");
01906     retval->SetCurrentGroup("Extension options");
01907     retval->SetCurrentGroup("");
01908 
01909 
01910     NON_CONST_ITERATE(TBlastCmdLineArgs, arg, args) {
01911         (*arg)->SetArgumentDescriptions(*retval);
01912     }
01913     return retval.release();
01914 }
01915 
01916 END_SCOPE(blast)
01917 END_NCBI_SCOPE
01918 
01919 

Generated on Wed Dec 9 03:55:36 2009 for NCBI C++ ToolKit by  doxygen 1.4.6
Modified on Wed Dec 09 08:17:45 2009 by modify_doxy.py rev. 173732